inflate/
lib.rs

1// Copyright 2014-2015 The Servo Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution.
3//
4// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
5// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
6// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
7// option. This file may not be copied, modified, or distributed
8// except according to those terms.
9
10//! A [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html) decoder written in rust.
11//!
12//! This library provides functionality to decompress data compressed with the DEFLATE algorithm,
13//! both with and without a [zlib](https://tools.ietf.org/html/rfc1950) header/trailer.
14//!
15//! # Examples
16//! The easiest way to get `std::Vec<u8>` containing the decompressed bytes is to use either
17//! `inflate::inflate_bytes` or `inflate::inflate_bytes_zlib` (depending on whether
18//! the encoded data has zlib headers and trailers or not). The following example
19//! decodes the DEFLATE encoded string "Hello, world" and prints it:
20//!
21//! ```rust
22//! use inflate::inflate_bytes;
23//! use std::str::from_utf8;
24//!
25//! let encoded = [243, 72, 205, 201, 201, 215, 81, 40, 207, 47, 202, 73, 1, 0];
26//! let decoded = inflate_bytes(&encoded).unwrap();
27//! println!("{}", from_utf8(&decoded).unwrap()); // prints "Hello, world"
28//! ```
29//!
30//! If you need more flexibility, then the library also provides an implementation
31//! of `std::io::Writer` in `inflate::writer`. Below is an example using an
32//! `inflate::writer::InflateWriter` to decode the DEFLATE encoded string "Hello, world":
33//!
34//! ```rust
35//! use inflate::InflateWriter;
36//! use std::io::Write;
37//! use std::str::from_utf8;
38//!
39//! let encoded = [243, 72, 205, 201, 201, 215, 81, 40, 207, 47, 202, 73, 1, 0];
40//! let mut decoder = InflateWriter::new(Vec::new());
41//! decoder.write(&encoded).unwrap();
42//! let decoded = decoder.finish().unwrap();
43//! println!("{}", from_utf8(&decoded).unwrap()); // prints "Hello, world"
44//! ```
45//!
46//! Finally, if you need even more flexibility, or if you only want to depend on
47//! `core`, you can use the `inflate::InflateStream` API. The below example
48//! decodes an array of DEFLATE encoded bytes:
49//!
50//! ```rust
51//! use inflate::InflateStream;
52//!
53//! let data = [0x73, 0x49, 0x4d, 0xcb, 0x49, 0x2c, 0x49, 0x55, 0x00, 0x11, 0x00];
54//! let mut inflater = InflateStream::new();
55//! let mut out = Vec::<u8>::new();
56//! let mut n = 0;
57//! while n < data.len() {
58//!     let res = inflater.update(&data[n..]);
59//!     if let Ok((num_bytes_read, result)) = res {
60//!         n += num_bytes_read;
61//!         out.extend(result.iter().cloned());
62//!     } else {
63//!         res.unwrap();
64//!     }
65//! }
66//! ```
67
68extern crate adler32;
69
70use std::cmp;
71use std::slice;
72
73mod checksum;
74use checksum::{Checksum, adler32_from_bytes};
75
76mod writer;
77pub use self::writer::{InflateWriter};
78
79mod utils;
80pub use self::utils::{inflate_bytes, inflate_bytes_zlib, inflate_bytes_zlib_no_checksum};
81
82mod reader;
83pub use self::reader::{DeflateDecoder, DeflateDecoderBuf};
84
85static BIT_REV_U8: [u8; 256] = [
86    0b0000_0000, 0b1000_0000, 0b0100_0000, 0b1100_0000,
87    0b0010_0000, 0b1010_0000, 0b0110_0000, 0b1110_0000,
88    0b0001_0000, 0b1001_0000, 0b0101_0000, 0b1101_0000,
89    0b0011_0000, 0b1011_0000, 0b0111_0000, 0b1111_0000,
90
91    0b0000_1000, 0b1000_1000, 0b0100_1000, 0b1100_1000,
92    0b0010_1000, 0b1010_1000, 0b0110_1000, 0b1110_1000,
93    0b0001_1000, 0b1001_1000, 0b0101_1000, 0b1101_1000,
94    0b0011_1000, 0b1011_1000, 0b0111_1000, 0b1111_1000,
95
96    0b0000_0100, 0b1000_0100, 0b0100_0100, 0b1100_0100,
97    0b0010_0100, 0b1010_0100, 0b0110_0100, 0b1110_0100,
98    0b0001_0100, 0b1001_0100, 0b0101_0100, 0b1101_0100,
99    0b0011_0100, 0b1011_0100, 0b0111_0100, 0b1111_0100,
100
101    0b0000_1100, 0b1000_1100, 0b0100_1100, 0b1100_1100,
102    0b0010_1100, 0b1010_1100, 0b0110_1100, 0b1110_1100,
103    0b0001_1100, 0b1001_1100, 0b0101_1100, 0b1101_1100,
104    0b0011_1100, 0b1011_1100, 0b0111_1100, 0b1111_1100,
105
106
107    0b0000_0010, 0b1000_0010, 0b0100_0010, 0b1100_0010,
108    0b0010_0010, 0b1010_0010, 0b0110_0010, 0b1110_0010,
109    0b0001_0010, 0b1001_0010, 0b0101_0010, 0b1101_0010,
110    0b0011_0010, 0b1011_0010, 0b0111_0010, 0b1111_0010,
111
112    0b0000_1010, 0b1000_1010, 0b0100_1010, 0b1100_1010,
113    0b0010_1010, 0b1010_1010, 0b0110_1010, 0b1110_1010,
114    0b0001_1010, 0b1001_1010, 0b0101_1010, 0b1101_1010,
115    0b0011_1010, 0b1011_1010, 0b0111_1010, 0b1111_1010,
116
117    0b0000_0110, 0b1000_0110, 0b0100_0110, 0b1100_0110,
118    0b0010_0110, 0b1010_0110, 0b0110_0110, 0b1110_0110,
119    0b0001_0110, 0b1001_0110, 0b0101_0110, 0b1101_0110,
120    0b0011_0110, 0b1011_0110, 0b0111_0110, 0b1111_0110,
121
122    0b0000_1110, 0b1000_1110, 0b0100_1110, 0b1100_1110,
123    0b0010_1110, 0b1010_1110, 0b0110_1110, 0b1110_1110,
124    0b0001_1110, 0b1001_1110, 0b0101_1110, 0b1101_1110,
125    0b0011_1110, 0b1011_1110, 0b0111_1110, 0b1111_1110,
126
127
128    0b0000_0001, 0b1000_0001, 0b0100_0001, 0b1100_0001,
129    0b0010_0001, 0b1010_0001, 0b0110_0001, 0b1110_0001,
130    0b0001_0001, 0b1001_0001, 0b0101_0001, 0b1101_0001,
131    0b0011_0001, 0b1011_0001, 0b0111_0001, 0b1111_0001,
132
133    0b0000_1001, 0b1000_1001, 0b0100_1001, 0b1100_1001,
134    0b0010_1001, 0b1010_1001, 0b0110_1001, 0b1110_1001,
135    0b0001_1001, 0b1001_1001, 0b0101_1001, 0b1101_1001,
136    0b0011_1001, 0b1011_1001, 0b0111_1001, 0b1111_1001,
137
138    0b0000_0101, 0b1000_0101, 0b0100_0101, 0b1100_0101,
139    0b0010_0101, 0b1010_0101, 0b0110_0101, 0b1110_0101,
140    0b0001_0101, 0b1001_0101, 0b0101_0101, 0b1101_0101,
141    0b0011_0101, 0b1011_0101, 0b0111_0101, 0b1111_0101,
142
143    0b0000_1101, 0b1000_1101, 0b0100_1101, 0b1100_1101,
144    0b0010_1101, 0b1010_1101, 0b0110_1101, 0b1110_1101,
145    0b0001_1101, 0b1001_1101, 0b0101_1101, 0b1101_1101,
146    0b0011_1101, 0b1011_1101, 0b0111_1101, 0b1111_1101,
147
148
149    0b0000_0011, 0b1000_0011, 0b0100_0011, 0b1100_0011,
150    0b0010_0011, 0b1010_0011, 0b0110_0011, 0b1110_0011,
151    0b0001_0011, 0b1001_0011, 0b0101_0011, 0b1101_0011,
152    0b0011_0011, 0b1011_0011, 0b0111_0011, 0b1111_0011,
153
154    0b0000_1011, 0b1000_1011, 0b0100_1011, 0b1100_1011,
155    0b0010_1011, 0b1010_1011, 0b0110_1011, 0b1110_1011,
156    0b0001_1011, 0b1001_1011, 0b0101_1011, 0b1101_1011,
157    0b0011_1011, 0b1011_1011, 0b0111_1011, 0b1111_1011,
158
159    0b0000_0111, 0b1000_0111, 0b0100_0111, 0b1100_0111,
160    0b0010_0111, 0b1010_0111, 0b0110_0111, 0b1110_0111,
161    0b0001_0111, 0b1001_0111, 0b0101_0111, 0b1101_0111,
162    0b0011_0111, 0b1011_0111, 0b0111_0111, 0b1111_0111,
163
164    0b0000_1111, 0b1000_1111, 0b0100_1111, 0b1100_1111,
165    0b0010_1111, 0b1010_1111, 0b0110_1111, 0b1110_1111,
166    0b0001_1111, 0b1001_1111, 0b0101_1111, 0b1101_1111,
167    0b0011_1111, 0b1011_1111, 0b0111_1111, 0b1111_1111
168];
169
170#[derive(Clone, Copy)]
171struct BitState {
172    n: u8,
173    v: u32,
174}
175
176#[derive(Clone)]
177struct BitStream<'a> {
178    bytes: slice::Iter<'a, u8>,
179    used: usize,
180    state: BitState,
181}
182
183#[cfg(debug)]
184macro_rules! debug { ($($x:tt)*) => (println!($($x)*)) }
185#[cfg(not(debug))]
186macro_rules! debug { ($($x:tt)*) => (()) }
187
188impl<'a> BitStream<'a> {
189    fn new(bytes: &'a [u8], state: BitState) -> BitStream<'a> {
190        BitStream {
191            bytes: bytes.iter(),
192            used: 0,
193            state: state,
194        }
195    }
196
197    fn use_byte(&mut self) -> bool {
198        match self.bytes.next() {
199            Some(&b) => {
200                self.state.v |= (b as u32) << self.state.n;
201                self.state.n += 8;
202                self.used += 1;
203                true
204            }
205            None => false,
206        }
207    }
208
209    fn need(&mut self, n: u8) -> bool {
210        if self.state.n < n {
211            if !self.use_byte() {
212                return false;
213            }
214            if n > 8 && self.state.n < n {
215                assert!(n <= 16);
216                if !self.use_byte() {
217                    return false;
218                }
219            }
220        }
221        true
222    }
223
224    fn take16(&mut self, n: u8) -> Option<u16> {
225        if self.need(n) {
226            self.state.n -= n;
227            let v = self.state.v & ((1 << n) - 1);
228            self.state.v >>= n;
229            Some(v as u16)
230        } else {
231            None
232        }
233    }
234
235    fn take(&mut self, n: u8) -> Option<u8> {
236        assert!(n <= 8);
237        self.take16(n).map(|v: u16| v as u8)
238    }
239
240    fn fill(&mut self) -> BitState {
241        while self.state.n + 8 <= 32 && self.use_byte() {}
242        self.state
243    }
244
245    fn align_byte(&mut self) {
246        if self.state.n > 0 {
247            let n = self.state.n % 8;
248            self.take(n);
249        }
250    }
251
252    fn trailing_bytes(&mut self) -> (u8, [u8; 4]) {
253        let mut len = 0;
254        let mut bytes = [0; 4];
255        self.align_byte();
256        while self.state.n >= 8 {
257            bytes[len as usize] = self.state.v as u8;
258            len += 1;
259            self.state.n -= 8;
260            self.state.v >>= 8;
261        }
262        (len, bytes)
263    }
264}
265
266/// Generate huffman codes from the given set of lengths and run `$cb` on them except the first
267/// code for each length.
268///
269/// See also the [deflate specification](http://www.gzip.org/zlib/rfc-deflate.html#huffman)
270/// for an explanation of the algorithm.
271macro_rules! with_codes (($clens:expr, $max_bits:expr => $code_ty:ty, $cb:expr) => ({
272    // Count the number of codes for each bit length.
273    let mut bl_count = [0 as $code_ty; ($max_bits+1)];
274    for &bits in $clens.iter() {
275        if bits != 0 {
276            // This should be safe from overflow as the number of lengths read from the input
277            // is bounded by the number of bits the number of lengths is represented by in the
278            // deflate compressed data.
279            bl_count[bits as usize] += 1;
280        }
281    }
282
283    // Compute the first code value for each bit length.
284    let mut next_code = [0 as $code_ty; ($max_bits+1)];
285    let mut code = 0 as $code_ty;
286    // TODO use range_inclusive as soon as it is stable
287    //for bits in range_inclusive(1, $max_bits) {
288    for bits in 1..$max_bits + 1 {
289        code = try!(
290            code.checked_add(bl_count[bits as usize - 1])
291                .ok_or_else(|| "Error generating huffman codes: Invalid set of code lengths")
292        ) << 1;
293        next_code[bits as usize] = code;
294    }
295
296    // Compute the rest of the codes
297    for (i, &bits) in $clens.iter().enumerate() {
298        if bits != 0 {
299            let code = next_code[bits as usize];
300            // If there is an overflow here, the given set of code lengths won't allow enough
301            // unique codes to be generated.
302            let new_code = try!(
303                code.checked_add(1)
304                    .ok_or_else(|| "Error generating huffman codes: Invalid set of code lengths!")
305            );
306            next_code[bits as usize] = new_code;
307            match $cb(i as $code_ty, code, bits) {
308                Ok(()) => (),
309                Err(err) => return Err(err)
310            }
311        }
312    }
313}));
314
315struct CodeLengthReader {
316    patterns: Box<[u8; 128]>,
317    clens: Box<[u8; 19]>,
318    result: Vec<u8>,
319    num_lit: u16,
320    num_dist: u8,
321}
322
323impl CodeLengthReader {
324    fn new(clens: Box<[u8; 19]>, num_lit: u16, num_dist: u8) -> Result<CodeLengthReader, String> {
325        // Fill in the 7-bit patterns that match each code.
326        let mut patterns = Box::new([0xffu8; 128]);
327        with_codes!(clens, 7 => u8, |i: u8, code: u8, bits| -> _ {
328            /*let base = match BIT_REV_U8.get((code << (8 - bits)) as usize) {
329                Some(&base) => base,
330                None => return Err("invalid length code".to_owned())
331            }*/
332            let base = BIT_REV_U8[(code << (8 - bits)) as usize];
333            for rest in 0u8 .. 1u8 << (7 - bits) {
334                patterns[(base | (rest << bits)) as usize] = i;
335            }
336            Ok(())
337        });
338
339        Ok(CodeLengthReader {
340            patterns: patterns,
341            clens: clens,
342            result: Vec::with_capacity(num_lit as usize + num_dist as usize),
343            num_lit: num_lit,
344            num_dist: num_dist,
345        })
346    }
347
348    fn read(&mut self, stream: &mut BitStream) -> Result<bool, String> {
349        let total_len = self.num_lit as usize + self.num_dist as usize;
350        while self.result.len() < total_len {
351            if !stream.need(7) {
352                return Ok(false);
353            }
354            let save = stream.clone();
355            macro_rules! take (($n:expr) => (match stream.take($n) {
356                Some(v) => v,
357                None => {
358                    *stream = save;
359                    return Ok(false);
360                }
361            }));
362            let code = self.patterns[(stream.state.v & 0x7f) as usize];
363            stream.take(match self.clens.get(code as usize) {
364                Some(&len) => len,
365                None => return Err("invalid length code".to_owned()),
366            });
367            match code {
368                0...15 => self.result.push(code),
369                16 => {
370                    let last = match self.result.last() {
371                        Some(&v) => v,
372                        // 16 appeared before anything else
373                        None => return Err("invalid length code".to_owned()),
374                    };
375                    for _ in 0..3 + take!(2) {
376                        self.result.push(last);
377                    }
378                }
379                17 => {
380                    for _ in 0..3 + take!(3) {
381                        self.result.push(0);
382                    }
383                }
384                18 => {
385                    for _ in 0..11 + take!(7) {
386                        self.result.push(0);
387                    }
388                }
389                _ => unreachable!(),
390            }
391        }
392        Ok(true)
393    }
394
395    fn to_lit_and_dist(&self) -> Result<(DynHuffman16, DynHuffman16), String> {
396        let num_lit = self.num_lit as usize;
397        let lit = try!(DynHuffman16::new(&self.result[..num_lit]));
398        let dist = try!(DynHuffman16::new(&self.result[num_lit..]));
399        Ok((lit, dist))
400    }
401}
402
403struct Trie8bit<T> {
404    data: [T; 16],
405    children: [Option<Box<[T; 16]>>; 16],
406}
407
408struct DynHuffman16 {
409    patterns: Box<[u16; 256]>,
410    rest: Vec<Trie8bit<u16>>,
411}
412
413impl DynHuffman16 {
414    fn new(clens: &[u8]) -> Result<DynHuffman16, String> {
415        // Fill in the 8-bit patterns that match each code.
416        // Longer patterns go into the trie.
417        let mut patterns = Box::new([0xffffu16; 256]);
418        let mut rest = Vec::new();
419        with_codes!(clens, 15 => u16, |i: u16, code: u16, bits: u8| -> _ {
420            let entry = i | ((bits as u16) << 12);
421            if bits <= 8 {
422                let base = match BIT_REV_U8.get((code << (8 - bits)) as usize) {
423                    Some(&v) => v,
424                    None => return Err("invalid length code".to_owned())
425                };
426                for rest in 0u8 .. 1 << (8 - bits) {
427                    patterns[(base | (rest << (bits & 7))) as usize] = entry;
428                }
429            } else {
430                let low = match BIT_REV_U8.get((code >> (bits - 8)) as usize) {
431                    Some(&v) => v,
432                    None => return Err("invalid length code".to_owned())
433                };
434                let high = BIT_REV_U8[((code << (16 - bits)) & 0xff) as usize];
435                let (min_bits, idx) = if patterns[low as usize] != 0xffff {
436                    let bits_prev = (patterns[low as usize] >> 12) as u8;
437                    (cmp::min(bits_prev, bits), patterns[low as usize] & 0x7ff)
438                } else {
439                    rest.push(Trie8bit {
440                        data: [0xffff; 16],
441                        children: [
442                            None, None, None, None,
443                            None, None, None, None,
444                            None, None, None, None,
445                            None, None, None, None
446                        ]
447                    });
448                    (bits, (rest.len() - 1) as u16)
449                };
450                patterns[low as usize] = idx | 0x800 | ((min_bits as u16) << 12);
451                let trie_entry = match rest.get_mut(idx as usize) {
452                    Some(v) => v,
453                    None => return Err("invalid huffman code".to_owned())
454                };
455                if bits <= 12 {
456                    for rest in 0u8 .. 1 << (12 - bits) {
457                        trie_entry.data[(high | (rest << (bits - 8))) as usize] = entry;
458                    }
459                } else {
460                    let child = &mut trie_entry.children[(high & 0xf) as usize];
461                    if child.is_none() {
462                        *child = Some(Box::new([0xffff; 16]));
463                    }
464                    let child = &mut **child.as_mut().unwrap();
465                    let high_top = high >> 4;
466                    for rest in 0u8 .. 1 << (16 - bits) {
467                        child[(high_top | (rest << (bits - 12))) as usize] = entry;
468                    }
469                }
470            }
471            Ok(())
472        });
473        debug!("=== DYN HUFFMAN ===");
474        for _i in 0..256 {
475            debug!("{:08b} {:04x}", _i, patterns[BIT_REV_U8[_i] as usize]);
476        }
477        debug!("===================");
478        Ok(DynHuffman16 {
479            patterns: patterns,
480            rest: rest,
481        })
482    }
483
484    fn read<'a>(&self, stream: &mut BitStream<'a>) -> Result<Option<(BitStream<'a>, u16)>, String> {
485        let has8 = stream.need(8);
486        let entry = self.patterns[(stream.state.v & 0xff) as usize];
487        let bits = (entry >> 12) as u8;
488
489        Ok(if !has8 {
490            if bits <= stream.state.n {
491                let save = stream.clone();
492                stream.state.n -= bits;
493                stream.state.v >>= bits;
494                Some((save, entry & 0xfff))
495            } else {
496                None
497            }
498        } else if bits <= 8 {
499            let save = stream.clone();
500            stream.state.n -= bits;
501            stream.state.v >>= bits;
502            Some((save, entry & 0xfff))
503        } else {
504            let has16 = stream.need(16);
505            let trie = match self.rest.get((entry & 0x7ff) as usize) {
506                Some(trie) => trie,
507                None => return Err("invalid entry in stream".to_owned()),
508            };
509            let idx = stream.state.v >> 8;
510            let trie_entry = match trie.children[(idx & 0xf) as usize] {
511                Some(ref child) => child[((idx >> 4) & 0xf) as usize],
512                None => trie.data[(idx & 0xf) as usize],
513            };
514            let trie_bits = (trie_entry >> 12) as u8;
515            if has16 || trie_bits <= stream.state.n {
516                let save = stream.clone();
517                stream.state.n -= trie_bits;
518                stream.state.v >>= trie_bits;
519                Some((save, trie_entry & 0xfff))
520            } else {
521                None
522            }
523        })
524    }
525}
526
527enum State {
528    ZlibMethodAndFlags, // CMF
529    ZlibFlags(/* CMF */ u8), // FLG,
530    Bits(BitsNext, BitState),
531    LenDist((BitsNext, BitState), /* len */ u16, /* dist */ u16),
532    Uncompressed(/* len */ u16),
533    CheckCRC(/* len */ u8, /* bytes */ [u8; 4]),
534    Finished
535}
536
537use self::State::*;
538
539enum BitsNext {
540    BlockHeader,
541    BlockUncompressedLen,
542    BlockUncompressedNlen(/* len */ u16),
543    BlockDynHlit,
544    BlockDynHdist(/* hlit */ u8),
545    BlockDynHclen(/* hlit */ u8, /* hdist */ u8),
546    BlockDynClenCodeLengths(/* hlit */ u8, /* hdist */ u8, /* hclen */ u8, /* idx */ u8, /* clens */ Box<[u8; 19]>),
547    BlockDynCodeLengths(CodeLengthReader),
548    BlockDyn(/* lit/len */ DynHuffman16, /* dist */ DynHuffman16, /* prev_len */ u16)
549}
550
551use self::BitsNext::*;
552
553pub struct InflateStream {
554    buffer: Vec<u8>,
555    pos: u16,
556    state: Option<State>,
557    final_block: bool,
558    checksum: Checksum,
559    read_checksum: Option<u32>,
560}
561
562impl InflateStream {
563    #[allow(dead_code)]
564    /// Create a new stream for decoding raw deflate encoded data.
565    pub fn new() -> InflateStream {
566        let state = Bits(BlockHeader, BitState { n: 0, v: 0 });
567        let buffer = Vec::with_capacity(32 * 1024);
568        InflateStream::with_state_and_buffer(state, buffer, Checksum::none())
569    }
570
571    /// Create a new stream for decoding deflate encoded data with a zlib header and footer
572    pub fn from_zlib() -> InflateStream {
573        InflateStream::with_state_and_buffer(ZlibMethodAndFlags, Vec::new(), Checksum::zlib())
574    }
575
576    /// Create a new stream for decoding deflate encoded data with a zlib header and footer
577    ///
578    /// This version creates a decoder that does not checksum the data to validate it with the
579    /// checksum provided with the zlib wrapper.
580    pub fn from_zlib_no_checksum() -> InflateStream {
581        InflateStream::with_state_and_buffer(ZlibMethodAndFlags, Vec::new(), Checksum::none())
582    }
583
584    pub fn reset(&mut self) {
585        self.buffer.clear();
586        self.pos = 0;
587        self.state = Some(Bits(BlockHeader, BitState { n: 0, v: 0 }));
588        self.final_block = false;
589    }
590
591    pub fn reset_to_zlib(&mut self) {
592        self.reset();
593        self.state = Some(ZlibMethodAndFlags);
594    }
595
596    fn with_state_and_buffer(state: State, buffer: Vec<u8>, checksum: Checksum)
597                             -> InflateStream {
598        InflateStream {
599            buffer: buffer,
600            pos: 0,
601            state: Some(state),
602            final_block: false,
603            checksum: checksum,
604            read_checksum: None,
605        }
606    }
607
608    fn run_len_dist(&mut self, len: u16, dist: u16) -> Result<Option<u16>, String> {
609        debug!("RLE -{}; {} (cap={} len={})", dist, len,
610               self.buffer.capacity(), self.buffer.len());
611        if dist < 1 {
612            return Err("invalid run length in stream".to_owned());
613        }
614        // `buffer_size` is used for validating `unsafe` below, handle with care
615        let buffer_size = self.buffer.capacity() as u16;
616        let len = if self.pos < dist {
617            // Handle copying from ahead, until we hit the end reading.
618            let pos_end = self.pos + len;
619            let (pos_end, left) = if pos_end < dist {
620                (pos_end, 0)
621            } else {
622                (dist, pos_end - dist)
623            };
624            if dist > buffer_size {
625                return Err("run length distance is bigger than the window size".to_owned());
626            }
627            let forward = buffer_size - dist;
628            if pos_end + forward > self.buffer.len() as u16 {
629                return Err("invalid run length in stream".to_owned());
630            }
631
632            for i in self.pos as usize..pos_end as usize {
633                self.buffer[i] = self.buffer[i + forward as usize];
634            }
635            self.pos = pos_end;
636            left
637        } else {
638            len
639        };
640        // Handle copying from before, until we hit the end writing.
641        let pos_end = self.pos + len;
642        let (pos_end, left) = if pos_end <= buffer_size {
643            (pos_end, None)
644        } else {
645            (buffer_size, Some(pos_end - buffer_size))
646        };
647
648        if self.pos < dist && pos_end > self.pos {
649            return Err("invalid run length in stream".to_owned());
650        }
651
652        if self.buffer.len() < pos_end as usize {
653            // ensure the buffer length will not exceed the amount of allocated memory
654            assert!(pos_end <= buffer_size);
655            // ensure that the uninitialized chunk of memory will be fully overwritten
656            assert!(self.pos as usize <= self.buffer.len());
657            unsafe {
658                self.buffer.set_len(pos_end as usize);
659            }
660        }
661        assert!(dist > 0); // validation against reading uninitialized memory
662        for i in self.pos as usize..pos_end as usize {
663            self.buffer[i] = self.buffer[i - dist as usize];
664        }
665        self.pos = pos_end;
666        Ok(left)
667    }
668
669    fn next_state(&mut self, data: &[u8]) -> Result<usize, String> {
670        macro_rules! ok_bytes (($n:expr, $state:expr) => ({
671            self.state = Some($state);
672            Ok($n)
673        }));
674        let debug_byte = |_i, _b| debug!("[{:04x}] {:02x}", _i, _b);
675        macro_rules! push_or (($b:expr, $ret:expr) => (if self.pos < self.buffer.capacity() as u16 {
676            let b = $b;
677            debug_byte(self.pos, b);
678            if (self.pos as usize) < self.buffer.len() {
679                self.buffer[self.pos as usize] = b;
680            } else {
681                assert_eq!(self.pos as usize, self.buffer.len());
682                self.buffer.push(b);
683            }
684            self.pos += 1;
685        } else {
686            return $ret;
687        }));
688        macro_rules! run_len_dist (($len:expr, $dist:expr => ($bytes:expr, $next:expr, $state:expr)) => ({
689            let dist = $dist;
690            let left = try!(self.run_len_dist($len, dist));
691            if let Some(len) = left {
692                return ok_bytes!($bytes, LenDist(($next, $state), len, dist));
693            }
694        }));
695        match self.state.take().unwrap() {
696            ZlibMethodAndFlags => {
697                let b = match data.get(0) {
698                    Some(&x) => x,
699                    None => {
700                        self.state = Some(ZlibMethodAndFlags);
701                        return Ok(0);
702                    }
703                };
704                let (method, info) = (b & 0xF, b >> 4);
705                debug!("ZLIB CM=0x{:x} CINFO=0x{:x}", method, info);
706
707                // CM = 8 (DEFLATE) is the only method defined by the ZLIB specification.
708                match method {
709                    8 => {/* DEFLATE */}
710                    _ => return Err(format!("unknown ZLIB method CM=0x{:x}", method))
711                }
712
713                if info > 7 {
714                    return Err(format!("invalid ZLIB info CINFO=0x{:x}", info));
715                }
716
717                self.buffer = Vec::with_capacity(1 << (8 + info));
718
719                ok_bytes!(1, ZlibFlags(b))
720            }
721            ZlibFlags(cmf) => {
722                let b = match data.get(0) {
723                    Some(&x) => x,
724                    None => {
725                        self.state = Some(ZlibFlags(cmf));
726                        return Ok(0);
727                    }
728                };
729                let (_check, dict, _level) = (b & 0x1F, (b & 0x20) != 0, b >> 6);
730                debug!("ZLIB FCHECK=0x{:x} FDICT={} FLEVEL=0x{:x}", _check, dict, _level);
731
732                if (((cmf as u16) << 8) | b as u16) % 31 != 0 {
733                    return Err(format!("invalid ZLIB checksum CMF=0x{:x} FLG=0x{:x}", cmf, b));
734                }
735
736                if dict {
737                    return Err("unimplemented ZLIB FDICT=1".into());
738                }
739
740                ok_bytes!(1, Bits(BlockHeader, BitState { n: 0, v: 0 }))
741            }
742            Bits(next, state) => {
743                let mut stream = BitStream::new(data, state);
744                macro_rules! ok_state (($state:expr) => ({self.state = Some($state); Ok(stream.used)}));
745                macro_rules! ok (($next:expr) => (ok_state!(Bits($next, stream.fill()))));
746                macro_rules! take (
747                    ($n:expr => $next:expr) => (match stream.take($n) {
748                        Some(v) => v,
749                        None => return ok!($next)
750                    });
751                    ($n:expr) => (take!($n => next))
752                );
753                macro_rules! take16 (
754                    ($n:expr => $next:expr) => (match stream.take16($n) {
755                        Some(v) => v,
756                        None => return ok!($next)
757                    });
758                    ($n:expr) => (take16!($n => next))
759                );
760                macro_rules! len_dist (
761                    ($len:expr, $code:expr, $bits:expr => $next_early:expr, $next:expr) => ({
762                        let dist = 1 + if $bits == 0 { 0 } else { // new_base
763                            2 << $bits
764                        } + (($code as u16 - if $bits == 0 { 0 } else { // old_base
765                            $bits * 2 + 2
766                        }) << $bits) + take16!($bits => $next_early) as u16;
767                        run_len_dist!($len, dist => (stream.used, $next, stream.state));
768                    });
769                    ($len:expr, $code:expr, $bits:expr) => (
770                        len_dist!($len, $code, $bits => next, next)
771                    )
772                );
773                match next {
774                    BlockHeader => {
775                        if self.final_block {
776                            let (len, bytes) = stream.trailing_bytes();
777                            return ok_state!(CheckCRC(len, bytes));
778                        }
779                        let h = take!(3);
780                        let (final_, block_type) = ((h & 1) != 0, (h >> 1) & 0b11);
781
782                        self.final_block = final_;
783
784                        match block_type {
785                            0 => {
786                                // Skip to the next byte for an uncompressed block.
787                                stream.align_byte();
788                                ok!(BlockUncompressedLen)
789                            }
790                            1 => {
791                                // Unwrap is safe because the data is valid.
792                                let lit = DynHuffman16::new(&[
793                                    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 0-15
794                                    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 16-31
795                                    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 32-47
796                                    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 48-63
797                                    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 64-79
798                                    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 80-95
799                                    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 96-101
800                                    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 112-127
801                                    8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, // 128-143
802                                    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 144-159
803                                    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 160-175
804                                    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 176-191
805                                    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 192-207
806                                    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 208-223
807                                    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 224-239
808                                    9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, // 240-255
809                                    7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, // 256-271
810                                    7, 7, 7, 7, 7, 7, 7, 7, // 272-279
811                                    8, 8, 8, 8, 8, 8, 8, 8, // 280-287
812                                ]).unwrap();
813                                let dist = DynHuffman16::new(&[
814                                    5, 5, 5, 5, 5, 5, 5, 5,
815                                    5, 5, 5, 5, 5, 5, 5, 5,
816                                    5, 5, 5, 5, 5, 5, 5, 5,
817                                    5, 5, 5, 5, 5, 5, 5, 5
818                                ]).unwrap();
819                                ok!(BlockDyn(lit, dist, 0))
820                            }
821                            2 => ok!(BlockDynHlit),
822                            _ => {
823                                Err(format!("unimplemented DEFLATE block type 0b{:?}",
824                                             block_type))
825                            }
826                        }
827                    }
828                    BlockUncompressedLen => {
829                        let len = take16!(16);
830                        ok_state!(Bits(BlockUncompressedNlen(len), stream.state))
831                    }
832                    BlockUncompressedNlen(len) => {
833                        let nlen = take16!(16);
834                        assert_eq!(stream.state.n, 0);
835                        if !len != nlen {
836                            return Err(format!("invalid uncompressed block len: LEN: {:04x} NLEN: {:04x}", len, nlen));
837                        }
838                        ok_state!(Uncompressed(len))
839                    }
840                    BlockDynHlit => ok!(BlockDynHdist(take!(5) + 1)),
841                    BlockDynHdist(hlit) => ok!(BlockDynHclen(hlit, take!(5) + 1)),
842                    BlockDynHclen(hlit, hdist) => {
843                        ok!(BlockDynClenCodeLengths(hlit, hdist, take!(4) + 4, 0, Box::new([0; 19])))
844                    }
845                    BlockDynClenCodeLengths(hlit, hdist, hclen, i, mut clens) => {
846                        let v =
847                            match stream.take(3) {
848                                Some(v) => v,
849                                None => return ok!(BlockDynClenCodeLengths(hlit, hdist, hclen, i, clens)),
850                            };
851                        clens[[16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15][i as usize]] = v;
852                        if i < hclen - 1 {
853                            ok!(BlockDynClenCodeLengths(hlit, hdist, hclen, i + 1, clens))
854                        } else {
855                            ok!(BlockDynCodeLengths(try!(CodeLengthReader::new(clens, hlit as u16 + 256, hdist))))
856                        }
857                    }
858                    BlockDynCodeLengths(mut reader) => {
859                        let finished = try!(reader.read(&mut stream));
860                        if finished {
861                            let (lit, dist) = try!(reader.to_lit_and_dist());
862                            ok!(BlockDyn(lit, dist, 0))
863                        } else {
864                            ok!(BlockDynCodeLengths(reader))
865                        }
866                    }
867                    BlockDyn(huff_lit_len, huff_dist, mut prev_len) => {
868                        macro_rules! next (($save_len:expr) => (BlockDyn(huff_lit_len, huff_dist, $save_len)));
869                        loop {
870                            let len = if prev_len != 0 {
871                                let len = prev_len;
872                                prev_len = 0;
873                                len
874                            } else {
875                                let (save, code16) = match try!(huff_lit_len.read(&mut stream)) {
876                                    Some(data) => data,
877                                    None => return ok!(next!(0)),
878                                };
879                                let code = code16 as u8;
880                                debug!("{:09b}", code16);
881                                match code16 {
882                                    0...255 => {
883                                        push_or!(code, ok!({stream = save; next!(0)}));
884                                        continue;
885                                    }
886                                    256...285 => {}
887                                    _ => return Err(format!("bad DEFLATE len code {}", code)),
888                                }
889
890                                macro_rules! len (($code:expr, $bits:expr) => (
891                                    3 + if $bits == 0 { 0 } else { // new_base
892                                        4 << $bits
893                                    } + ((if $code == 29 {
894                                        256
895                                    } else {
896                                        $code as u16
897                                    } - if $bits == 0 { 0 } else { // old_base
898                                        $bits * 4 + 4
899                                    } - 1) << $bits) + take!($bits => {stream = save; next!(0)}) as u16
900                                ));
901                                match code {
902                                    0 => {
903                                        return if self.final_block {
904                                            let (len, bytes) = stream.trailing_bytes();
905                                            ok_state!(CheckCRC(len, bytes))
906                                        } else {
907                                            ok!(BlockHeader)
908                                        }
909                                    }
910                                    1...8 => len!(code, 0),
911                                    9...12 => len!(code, 1),
912                                    13...16 => len!(code, 2),
913                                    17...20 => len!(code, 3),
914                                    21...24 => len!(code, 4),
915                                    25...28 => len!(code, 5),
916                                    29 => len!(29, 0),
917                                    _ => return Err(format!("bad DEFLATE len code {}", code as u16 + 256)),
918                                }
919                            };
920
921                            let (save, dist_code) = match try!(huff_dist.read(&mut stream)) {
922                                Some(data) => data,
923                                None => return ok!(next!(len)),
924                            };
925                            debug!("  {:05b}", dist_code);
926                            macro_rules! len_dist_case (($bits:expr) => (
927                                len_dist!(len, dist_code, $bits => {stream = save; next!(len)}, next!(0))
928                            ));
929                            match dist_code {
930                                0...3 => len_dist_case!(0),
931                                4...5 => len_dist_case!(1),
932                                6...7 => len_dist_case!(2),
933                                8...9 => len_dist_case!(3),
934                                10...11 => len_dist_case!(4),
935                                12...13 => len_dist_case!(5),
936                                14...15 => len_dist_case!(6),
937                                16...17 => len_dist_case!(7),
938                                18...19 => len_dist_case!(8),
939                                20...21 => len_dist_case!(9),
940                                22...23 => len_dist_case!(10),
941                                24...25 => len_dist_case!(11),
942                                26...27 => len_dist_case!(12),
943                                28...29 => len_dist_case!(13),
944                                _ => return Err(format!("bad DEFLATE dist code {}", dist_code)),
945                            }
946                        }
947                    }
948                }
949            }
950            LenDist((next, state), len, dist) => {
951                run_len_dist!(len, dist => (0, next, state));
952                ok_bytes!(0, Bits(next, state))
953            }
954            Uncompressed(mut len) => {
955                for (i, &b) in data.iter().enumerate() {
956                    if len == 0 {
957                        return ok_bytes!(i, Bits(BlockHeader, BitState { n: 0, v: 0 }));
958                    }
959                    push_or!(b, ok_bytes!(i, Uncompressed(len)));
960                    len -= 1;
961                }
962                ok_bytes!(data.len(), Uncompressed(len))
963            }
964            CheckCRC(mut len, mut bytes) => {
965                if self.checksum.is_none() {
966                    // TODO: inform caller of unused bytes
967                    return ok_bytes!(0, Finished);
968                }
969
970                // Get the checksum value from the end of the stream.
971                let mut used = 0;
972                while len < 4 && used < data.len() {
973                    bytes[len as usize] = data[used];
974                    len += 1;
975                    used += 1;
976                }
977                if len < 4 {
978                    return ok_bytes!(used, CheckCRC(len, bytes));
979                }
980
981                self.read_checksum = Some(adler32_from_bytes(&bytes));
982                ok_bytes!(used, Finished)
983            }
984            Finished => {
985                // TODO: inform caller of unused bytes
986                ok_bytes!(data.len(), Finished)
987            }
988        }
989    }
990
991    /// Try to uncompress/decode the data in `data`.
992    ///
993    /// On success, returns how many bytes of the input data was decompressed, and a reference to
994    /// the buffer containing the decompressed data.
995    ///
996    /// This function may not uncompress all the provided data in one call, so it has to be called
997    /// repeatedly with the data that hasn't been decompressed yet as an input until the number of
998    /// bytes decoded returned is 0. (See the [top level crate documentation](index.html)
999    /// for an example.)
1000    ///
1001    /// # Errors
1002    /// If invalid input data is encountered, a string describing what went wrong is returned.
1003    pub fn update<'a>(&'a mut self, mut data: &[u8]) -> Result<(usize, &'a [u8]), String> {
1004        let original_size = data.len();
1005        let original_pos = self.pos as usize;
1006        let mut empty = false;
1007        while !empty &&
1008              ((self.pos as usize) < self.buffer.capacity() || self.buffer.capacity() == 0) {
1009            // next_state must be called at least once after the data is empty.
1010            empty = data.is_empty();
1011            match self.next_state(data) {
1012                Ok(n) => {
1013                    data = &data[n..];
1014                }
1015                Err(m) => return Err(m),
1016            }
1017        }
1018        let output = &self.buffer[original_pos..self.pos as usize];
1019        if self.pos as usize >= self.buffer.capacity() {
1020            self.pos = 0;
1021        }
1022
1023        // Update the checksum..
1024        self.checksum.update(output);
1025        // and validate if we are done decoding.
1026        if let Some(c) = self.read_checksum {
1027            try!(self.checksum.check(c));
1028        }
1029
1030        Ok((original_size - data.len(), output))
1031    }
1032
1033    /// Returns the calculated checksum value of the currently decoded data.
1034    ///
1035    /// Will return 0 for cases where the checksum is not validated.
1036    pub fn current_checksum(&self) -> u32 {
1037        self.checksum.current_value()
1038    }
1039}