flate2/
mem.rs

1use std::error::Error;
2use std::fmt;
3use std::io;
4use std::slice;
5
6use crate::ffi::{self, Backend, Deflate, DeflateBackend, Inflate, InflateBackend};
7use crate::Compression;
8
9/// Raw in-memory compression stream for blocks of data.
10///
11/// This type is the building block for the I/O streams in the rest of this
12/// crate. It requires more management than the [`Read`]/[`Write`] API but is
13/// maximally flexible in terms of accepting input from any source and being
14/// able to produce output to any memory location.
15///
16/// It is recommended to use the I/O stream adaptors over this type as they're
17/// easier to use.
18///
19/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
20/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
21#[derive(Debug)]
22pub struct Compress {
23    inner: Deflate,
24}
25
26/// Raw in-memory decompression stream for blocks of data.
27///
28/// This type is the building block for the I/O streams in the rest of this
29/// crate. It requires more management than the [`Read`]/[`Write`] API but is
30/// maximally flexible in terms of accepting input from any source and being
31/// able to produce output to any memory location.
32///
33/// It is recommended to use the I/O stream adaptors over this type as they're
34/// easier to use.
35///
36/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
37/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
38#[derive(Debug)]
39pub struct Decompress {
40    inner: Inflate,
41}
42
43#[derive(Copy, Clone, PartialEq, Eq, Debug)]
44/// Values which indicate the form of flushing to be used when compressing
45/// in-memory data.
46pub enum FlushCompress {
47    /// A typical parameter for passing to compression/decompression functions,
48    /// this indicates that the underlying stream to decide how much data to
49    /// accumulate before producing output in order to maximize compression.
50    None = ffi::MZ_NO_FLUSH as isize,
51
52    /// All pending output is flushed to the output buffer and the output is
53    /// aligned on a byte boundary so that the decompressor can get all input
54    /// data available so far.
55    ///
56    /// Flushing may degrade compression for some compression algorithms and so
57    /// it should only be used when necessary. This will complete the current
58    /// deflate block and follow it with an empty stored block.
59    Sync = ffi::MZ_SYNC_FLUSH as isize,
60
61    /// All pending output is flushed to the output buffer, but the output is
62    /// not aligned to a byte boundary.
63    ///
64    /// All of the input data so far will be available to the decompressor (as
65    /// with `Flush::Sync`. This completes the current deflate block and follows
66    /// it with an empty fixed codes block that is 10 bites long, and it assures
67    /// that enough bytes are output in order for the decompessor to finish the
68    /// block before the empty fixed code block.
69    Partial = ffi::MZ_PARTIAL_FLUSH as isize,
70
71    /// All output is flushed as with `Flush::Sync` and the compression state is
72    /// reset so decompression can restart from this point if previous
73    /// compressed data has been damaged or if random access is desired.
74    ///
75    /// Using this option too often can seriously degrade compression.
76    Full = ffi::MZ_FULL_FLUSH as isize,
77
78    /// Pending input is processed and pending output is flushed.
79    ///
80    /// The return value may indicate that the stream is not yet done and more
81    /// data has yet to be processed.
82    Finish = ffi::MZ_FINISH as isize,
83
84    #[doc(hidden)]
85    _Nonexhaustive,
86}
87
88#[derive(Copy, Clone, PartialEq, Eq, Debug)]
89/// Values which indicate the form of flushing to be used when
90/// decompressing in-memory data.
91pub enum FlushDecompress {
92    /// A typical parameter for passing to compression/decompression functions,
93    /// this indicates that the underlying stream to decide how much data to
94    /// accumulate before producing output in order to maximize compression.
95    None = ffi::MZ_NO_FLUSH as isize,
96
97    /// All pending output is flushed to the output buffer and the output is
98    /// aligned on a byte boundary so that the decompressor can get all input
99    /// data available so far.
100    ///
101    /// Flushing may degrade compression for some compression algorithms and so
102    /// it should only be used when necessary. This will complete the current
103    /// deflate block and follow it with an empty stored block.
104    Sync = ffi::MZ_SYNC_FLUSH as isize,
105
106    /// Pending input is processed and pending output is flushed.
107    ///
108    /// The return value may indicate that the stream is not yet done and more
109    /// data has yet to be processed.
110    Finish = ffi::MZ_FINISH as isize,
111
112    #[doc(hidden)]
113    _Nonexhaustive,
114}
115
116/// The inner state for an error when decompressing
117#[derive(Debug, Default)]
118pub(crate) struct DecompressErrorInner {
119    pub(crate) needs_dictionary: Option<u32>,
120}
121
122/// Error returned when a decompression object finds that the input stream of
123/// bytes was not a valid input stream of bytes.
124#[derive(Debug)]
125pub struct DecompressError(pub(crate) DecompressErrorInner);
126
127impl DecompressError {
128    /// Indicates whether decompression failed due to requiring a dictionary.
129    ///
130    /// The resulting integer is the Adler-32 checksum of the dictionary
131    /// required.
132    pub fn needs_dictionary(&self) -> Option<u32> {
133        self.0.needs_dictionary
134    }
135}
136
137#[inline]
138pub(crate) fn decompress_failed() -> Result<Status, DecompressError> {
139    Err(DecompressError(Default::default()))
140}
141
142#[inline]
143pub(crate) fn decompress_need_dict(adler: u32) -> Result<Status, DecompressError> {
144    Err(DecompressError(DecompressErrorInner {
145        needs_dictionary: Some(adler),
146    }))
147}
148
149/// Error returned when a compression object is used incorrectly or otherwise
150/// generates an error.
151#[derive(Debug)]
152pub struct CompressError(pub(crate) ());
153
154/// Possible status results of compressing some data or successfully
155/// decompressing a block of data.
156#[derive(Copy, Clone, PartialEq, Eq, Debug)]
157pub enum Status {
158    /// Indicates success.
159    ///
160    /// Means that more input may be needed but isn't available
161    /// and/or there's more output to be written but the output buffer is full.
162    Ok,
163
164    /// Indicates that forward progress is not possible due to input or output
165    /// buffers being empty.
166    ///
167    /// For compression it means the input buffer needs some more data or the
168    /// output buffer needs to be freed up before trying again.
169    ///
170    /// For decompression this means that more input is needed to continue or
171    /// the output buffer isn't large enough to contain the result. The function
172    /// can be called again after fixing both.
173    BufError,
174
175    /// Indicates that all input has been consumed and all output bytes have
176    /// been written. Decompression/compression should not be called again.
177    ///
178    /// For decompression with zlib streams the adler-32 of the decompressed
179    /// data has also been verified.
180    StreamEnd,
181}
182
183impl Compress {
184    /// Creates a new object ready for compressing data that it's given.
185    ///
186    /// The `level` argument here indicates what level of compression is going
187    /// to be performed, and the `zlib_header` argument indicates whether the
188    /// output data should have a zlib header or not.
189    pub fn new(level: Compression, zlib_header: bool) -> Compress {
190        Compress {
191            inner: Deflate::make(level, zlib_header, ffi::MZ_DEFAULT_WINDOW_BITS as u8),
192        }
193    }
194
195    /// Creates a new object ready for compressing data that it's given.
196    ///
197    /// The `level` argument here indicates what level of compression is going
198    /// to be performed, and the `zlib_header` argument indicates whether the
199    /// output data should have a zlib header or not. The `window_bits` parameter
200    /// indicates the base-2 logarithm of the sliding window size and must be
201    /// between 9 and 15.
202    ///
203    /// # Panics
204    ///
205    /// If `window_bits` does not fall into the range 9 ..= 15,
206    /// `new_with_window_bits` will panic.
207    ///
208    /// # Note
209    ///
210    /// This constructor is only available when the `zlib` feature is used.
211    /// Other backends currently do not support custom window bits.
212    #[cfg(feature = "any_zlib")]
213    pub fn new_with_window_bits(
214        level: Compression,
215        zlib_header: bool,
216        window_bits: u8,
217    ) -> Compress {
218        Compress {
219            inner: Deflate::make(level, zlib_header, window_bits),
220        }
221    }
222
223    /// Returns the total number of input bytes which have been processed by
224    /// this compression object.
225    pub fn total_in(&self) -> u64 {
226        self.inner.total_in()
227    }
228
229    /// Returns the total number of output bytes which have been produced by
230    /// this compression object.
231    pub fn total_out(&self) -> u64 {
232        self.inner.total_out()
233    }
234
235    /// Specifies the compression dictionary to use.
236    ///
237    /// Returns the Adler-32 checksum of the dictionary.
238    #[cfg(feature = "any_zlib")]
239    pub fn set_dictionary(&mut self, dictionary: &[u8]) -> Result<u32, CompressError> {
240        let stream = &mut *self.inner.inner.stream_wrapper;
241        let rc = unsafe {
242            assert!(dictionary.len() < ffi::uInt::max_value() as usize);
243            ffi::deflateSetDictionary(stream, dictionary.as_ptr(), dictionary.len() as ffi::uInt)
244        };
245
246        match rc {
247            ffi::MZ_STREAM_ERROR => Err(CompressError(())),
248            ffi::MZ_OK => Ok(stream.adler as u32),
249            c => panic!("unknown return code: {}", c),
250        }
251    }
252
253    /// Quickly resets this compressor without having to reallocate anything.
254    ///
255    /// This is equivalent to dropping this object and then creating a new one.
256    pub fn reset(&mut self) {
257        self.inner.reset();
258    }
259
260    /// Dynamically updates the compression level.
261    ///
262    /// This can be used to switch between compression levels for different
263    /// kinds of data, or it can be used in conjunction with a call to reset
264    /// to reuse the compressor.
265    ///
266    /// This may return an error if there wasn't enough output space to complete
267    /// the compression of the available input data before changing the
268    /// compression level. Flushing the stream before calling this method
269    /// ensures that the function will succeed on the first call.
270    #[cfg(feature = "any_zlib")]
271    pub fn set_level(&mut self, level: Compression) -> Result<(), CompressError> {
272        use libc::c_int;
273        let stream = &mut *self.inner.inner.stream_wrapper;
274
275        let rc = unsafe { ffi::deflateParams(stream, level.0 as c_int, ffi::MZ_DEFAULT_STRATEGY) };
276
277        match rc {
278            ffi::MZ_OK => Ok(()),
279            ffi::MZ_BUF_ERROR => Err(CompressError(())),
280            c => panic!("unknown return code: {}", c),
281        }
282    }
283
284    /// Compresses the input data into the output, consuming only as much
285    /// input as needed and writing as much output as possible.
286    ///
287    /// The flush option can be any of the available `FlushCompress` parameters.
288    ///
289    /// To learn how much data was consumed or how much output was produced, use
290    /// the `total_in` and `total_out` functions before/after this is called.
291    pub fn compress(
292        &mut self,
293        input: &[u8],
294        output: &mut [u8],
295        flush: FlushCompress,
296    ) -> Result<Status, CompressError> {
297        self.inner.compress(input, output, flush)
298    }
299
300    /// Compresses the input data into the extra space of the output, consuming
301    /// only as much input as needed and writing as much output as possible.
302    ///
303    /// This function has the same semantics as `compress`, except that the
304    /// length of `vec` is managed by this function. This will not reallocate
305    /// the vector provided or attempt to grow it, so space for the output must
306    /// be reserved in the output vector by the caller before calling this
307    /// function.
308    pub fn compress_vec(
309        &mut self,
310        input: &[u8],
311        output: &mut Vec<u8>,
312        flush: FlushCompress,
313    ) -> Result<Status, CompressError> {
314        let cap = output.capacity();
315        let len = output.len();
316
317        unsafe {
318            let before = self.total_out();
319            let ret = {
320                let ptr = output.as_mut_ptr().offset(len as isize);
321                let out = slice::from_raw_parts_mut(ptr, cap - len);
322                self.compress(input, out, flush)
323            };
324            output.set_len((self.total_out() - before) as usize + len);
325            return ret;
326        }
327    }
328}
329
330impl Decompress {
331    /// Creates a new object ready for decompressing data that it's given.
332    ///
333    /// The `zlib_header` argument indicates whether the input data is expected
334    /// to have a zlib header or not.
335    pub fn new(zlib_header: bool) -> Decompress {
336        Decompress {
337            inner: Inflate::make(zlib_header, ffi::MZ_DEFAULT_WINDOW_BITS as u8),
338        }
339    }
340
341    /// Creates a new object ready for decompressing data that it's given.
342    ///
343    /// The `zlib_header` argument indicates whether the input data is expected
344    /// to have a zlib header or not. The `window_bits` parameter indicates the
345    /// base-2 logarithm of the sliding window size and must be between 9 and 15.
346    ///
347    /// # Panics
348    ///
349    /// If `window_bits` does not fall into the range 9 ..= 15,
350    /// `new_with_window_bits` will panic.
351    ///
352    /// # Note
353    ///
354    /// This constructor is only available when the `zlib` feature is used.
355    /// Other backends currently do not support custom window bits.
356    #[cfg(feature = "any_zlib")]
357    pub fn new_with_window_bits(zlib_header: bool, window_bits: u8) -> Decompress {
358        Decompress {
359            inner: Inflate::make(zlib_header, window_bits),
360        }
361    }
362
363    /// Returns the total number of input bytes which have been processed by
364    /// this decompression object.
365    pub fn total_in(&self) -> u64 {
366        self.inner.total_in()
367    }
368
369    /// Returns the total number of output bytes which have been produced by
370    /// this decompression object.
371    pub fn total_out(&self) -> u64 {
372        self.inner.total_out()
373    }
374
375    /// Decompresses the input data into the output, consuming only as much
376    /// input as needed and writing as much output as possible.
377    ///
378    /// The flush option can be any of the available `FlushDecompress` parameters.
379    ///
380    /// If the first call passes `FlushDecompress::Finish` it is assumed that
381    /// the input and output buffers are both sized large enough to decompress
382    /// the entire stream in a single call.
383    ///
384    /// A flush value of `FlushDecompress::Finish` indicates that there are no
385    /// more source bytes available beside what's already in the input buffer,
386    /// and the output buffer is large enough to hold the rest of the
387    /// decompressed data.
388    ///
389    /// To learn how much data was consumed or how much output was produced, use
390    /// the `total_in` and `total_out` functions before/after this is called.
391    ///
392    /// # Errors
393    ///
394    /// If the input data to this instance of `Decompress` is not a valid
395    /// zlib/deflate stream then this function may return an instance of
396    /// `DecompressError` to indicate that the stream of input bytes is corrupted.
397    pub fn decompress(
398        &mut self,
399        input: &[u8],
400        output: &mut [u8],
401        flush: FlushDecompress,
402    ) -> Result<Status, DecompressError> {
403        self.inner.decompress(input, output, flush)
404    }
405
406    /// Decompresses the input data into the extra space in the output vector
407    /// specified by `output`.
408    ///
409    /// This function has the same semantics as `decompress`, except that the
410    /// length of `vec` is managed by this function. This will not reallocate
411    /// the vector provided or attempt to grow it, so space for the output must
412    /// be reserved in the output vector by the caller before calling this
413    /// function.
414    ///
415    /// # Errors
416    ///
417    /// If the input data to this instance of `Decompress` is not a valid
418    /// zlib/deflate stream then this function may return an instance of
419    /// `DecompressError` to indicate that the stream of input bytes is corrupted.
420    pub fn decompress_vec(
421        &mut self,
422        input: &[u8],
423        output: &mut Vec<u8>,
424        flush: FlushDecompress,
425    ) -> Result<Status, DecompressError> {
426        let cap = output.capacity();
427        let len = output.len();
428
429        unsafe {
430            let before = self.total_out();
431            let ret = {
432                let ptr = output.as_mut_ptr().offset(len as isize);
433                let out = slice::from_raw_parts_mut(ptr, cap - len);
434                self.decompress(input, out, flush)
435            };
436            output.set_len((self.total_out() - before) as usize + len);
437            return ret;
438        }
439    }
440
441    /// Specifies the decompression dictionary to use.
442    #[cfg(feature = "any_zlib")]
443    pub fn set_dictionary(&mut self, dictionary: &[u8]) -> Result<u32, DecompressError> {
444        let stream = &mut *self.inner.inner.stream_wrapper;
445        let rc = unsafe {
446            assert!(dictionary.len() < ffi::uInt::max_value() as usize);
447            ffi::inflateSetDictionary(stream, dictionary.as_ptr(), dictionary.len() as ffi::uInt)
448        };
449
450        match rc {
451            ffi::MZ_STREAM_ERROR => Err(DecompressError(Default::default())),
452            ffi::MZ_DATA_ERROR => Err(DecompressError(DecompressErrorInner {
453                needs_dictionary: Some(stream.adler as u32),
454            })),
455            ffi::MZ_OK => Ok(stream.adler as u32),
456            c => panic!("unknown return code: {}", c),
457        }
458    }
459
460    /// Performs the equivalent of replacing this decompression state with a
461    /// freshly allocated copy.
462    ///
463    /// This function may not allocate memory, though, and attempts to reuse any
464    /// previously existing resources.
465    ///
466    /// The argument provided here indicates whether the reset state will
467    /// attempt to decode a zlib header first or not.
468    pub fn reset(&mut self, zlib_header: bool) {
469        self.inner.reset(zlib_header);
470    }
471}
472
473impl Error for DecompressError {}
474
475impl From<DecompressError> for io::Error {
476    fn from(data: DecompressError) -> io::Error {
477        io::Error::new(io::ErrorKind::Other, data)
478    }
479}
480
481impl fmt::Display for DecompressError {
482    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
483        write!(f, "deflate decompression error")
484    }
485}
486
487impl Error for CompressError {}
488
489impl From<CompressError> for io::Error {
490    fn from(data: CompressError) -> io::Error {
491        io::Error::new(io::ErrorKind::Other, data)
492    }
493}
494
495impl fmt::Display for CompressError {
496    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
497        write!(f, "deflate decompression error")
498    }
499}
500
501#[cfg(test)]
502mod tests {
503    use std::io::Write;
504
505    use crate::write;
506    use crate::{Compression, Decompress, FlushDecompress};
507
508    #[cfg(feature = "any_zlib")]
509    use crate::{Compress, FlushCompress};
510
511    #[test]
512    fn issue51() {
513        let data = vec![
514            0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0xb3, 0xc9, 0x28, 0xc9,
515            0xcd, 0xb1, 0xe3, 0xe5, 0xb2, 0xc9, 0x48, 0x4d, 0x4c, 0xb1, 0xb3, 0x29, 0xc9, 0x2c,
516            0xc9, 0x49, 0xb5, 0x33, 0x31, 0x30, 0x51, 0xf0, 0xcb, 0x2f, 0x51, 0x70, 0xcb, 0x2f,
517            0xcd, 0x4b, 0xb1, 0xd1, 0x87, 0x08, 0xda, 0xe8, 0x83, 0x95, 0x00, 0x95, 0x26, 0xe5,
518            0xa7, 0x54, 0x2a, 0x24, 0xa5, 0x27, 0xe7, 0xe7, 0xe4, 0x17, 0xd9, 0x2a, 0x95, 0x67,
519            0x64, 0x96, 0xa4, 0x2a, 0x81, 0x8c, 0x48, 0x4e, 0xcd, 0x2b, 0x49, 0x2d, 0xb2, 0xb3,
520            0xc9, 0x30, 0x44, 0x37, 0x01, 0x28, 0x62, 0xa3, 0x0f, 0x95, 0x06, 0xd9, 0x05, 0x54,
521            0x04, 0xe5, 0xe5, 0xa5, 0x67, 0xe6, 0x55, 0xe8, 0x1b, 0xea, 0x99, 0xe9, 0x19, 0x21,
522            0xab, 0xd0, 0x07, 0xd9, 0x01, 0x32, 0x53, 0x1f, 0xea, 0x3e, 0x00, 0x94, 0x85, 0xeb,
523            0xe4, 0xa8, 0x00, 0x00, 0x00,
524        ];
525
526        let mut decoded = Vec::with_capacity(data.len() * 2);
527
528        let mut d = Decompress::new(false);
529        // decompressed whole deflate stream
530        assert!(d
531            .decompress_vec(&data[10..], &mut decoded, FlushDecompress::Finish)
532            .is_ok());
533
534        // decompress data that has nothing to do with the deflate stream (this
535        // used to panic)
536        drop(d.decompress_vec(&[0], &mut decoded, FlushDecompress::None));
537    }
538
539    #[test]
540    fn reset() {
541        let string = "hello world".as_bytes();
542        let mut zlib = Vec::new();
543        let mut deflate = Vec::new();
544
545        let comp = Compression::default();
546        write::ZlibEncoder::new(&mut zlib, comp)
547            .write_all(string)
548            .unwrap();
549        write::DeflateEncoder::new(&mut deflate, comp)
550            .write_all(string)
551            .unwrap();
552
553        let mut dst = [0; 1024];
554        let mut decoder = Decompress::new(true);
555        decoder
556            .decompress(&zlib, &mut dst, FlushDecompress::Finish)
557            .unwrap();
558        assert_eq!(decoder.total_out(), string.len() as u64);
559        assert!(dst.starts_with(string));
560
561        decoder.reset(false);
562        decoder
563            .decompress(&deflate, &mut dst, FlushDecompress::Finish)
564            .unwrap();
565        assert_eq!(decoder.total_out(), string.len() as u64);
566        assert!(dst.starts_with(string));
567    }
568
569    #[cfg(feature = "any_zlib")]
570    #[test]
571    fn set_dictionary_with_zlib_header() {
572        let string = "hello, hello!".as_bytes();
573        let dictionary = "hello".as_bytes();
574
575        let mut encoded = Vec::with_capacity(1024);
576
577        let mut encoder = Compress::new(Compression::default(), true);
578
579        let dictionary_adler = encoder.set_dictionary(&dictionary).unwrap();
580
581        encoder
582            .compress_vec(string, &mut encoded, FlushCompress::Finish)
583            .unwrap();
584
585        assert_eq!(encoder.total_in(), string.len() as u64);
586        assert_eq!(encoder.total_out(), encoded.len() as u64);
587
588        let mut decoder = Decompress::new(true);
589        let mut decoded = [0; 1024];
590        let decompress_error = decoder
591            .decompress(&encoded, &mut decoded, FlushDecompress::Finish)
592            .expect_err("decompression should fail due to requiring a dictionary");
593
594        let required_adler = decompress_error.needs_dictionary()
595            .expect("the first call to decompress should indicate a dictionary is required along with the required Adler-32 checksum");
596
597        assert_eq!(required_adler, dictionary_adler,
598            "the Adler-32 checksum should match the value when the dictionary was set on the compressor");
599
600        let actual_adler = decoder.set_dictionary(&dictionary).unwrap();
601
602        assert_eq!(required_adler, actual_adler);
603
604        // Decompress the rest of the input to the remainder of the output buffer
605        let total_in = decoder.total_in();
606        let total_out = decoder.total_out();
607
608        let decompress_result = decoder.decompress(
609            &encoded[total_in as usize..],
610            &mut decoded[total_out as usize..],
611            FlushDecompress::Finish,
612        );
613        assert!(decompress_result.is_ok());
614
615        assert_eq!(&decoded[..decoder.total_out() as usize], string);
616    }
617
618    #[cfg(feature = "any_zlib")]
619    #[test]
620    fn set_dictionary_raw() {
621        let string = "hello, hello!".as_bytes();
622        let dictionary = "hello".as_bytes();
623
624        let mut encoded = Vec::with_capacity(1024);
625
626        let mut encoder = Compress::new(Compression::default(), false);
627
628        encoder.set_dictionary(&dictionary).unwrap();
629
630        encoder
631            .compress_vec(string, &mut encoded, FlushCompress::Finish)
632            .unwrap();
633
634        assert_eq!(encoder.total_in(), string.len() as u64);
635        assert_eq!(encoder.total_out(), encoded.len() as u64);
636
637        let mut decoder = Decompress::new(false);
638
639        decoder.set_dictionary(&dictionary).unwrap();
640
641        let mut decoded = [0; 1024];
642        let decompress_result = decoder.decompress(&encoded, &mut decoded, FlushDecompress::Finish);
643
644        assert!(decompress_result.is_ok());
645
646        assert_eq!(&decoded[..decoder.total_out() as usize], string);
647    }
648}