deflate/
writer.rs

1use std::io::Write;
2use std::{thread, io};
3
4use byteorder::{WriteBytesExt, BigEndian};
5
6use checksum::{Adler32Checksum, RollingChecksum};
7use compress::compress_data_dynamic_n;
8use compress::Flush;
9use deflate_state::DeflateState;
10use compression_options::CompressionOptions;
11use zlib::{write_zlib_header, CompressionLevel};
12
13const ERR_STR: &'static str = "Error! The wrapped writer is missing.\
14                               This is a bug, please file an issue.";
15
16/// Keep compressing until all the input has been compressed and output or the writer returns `Err`.
17pub fn compress_until_done<W: Write>(
18    mut input: &[u8],
19    deflate_state: &mut DeflateState<W>,
20    flush_mode: Flush,
21) -> io::Result<()> {
22    // This should only be used for flushing.
23    assert!(flush_mode != Flush::None);
24    loop {
25        match compress_data_dynamic_n(input, deflate_state, flush_mode) {
26            Ok(0) => {
27                if deflate_state.output_buf().is_empty() {
28                    break;
29                } else {
30                    // If the output buffer isn't empty, keep going until it is, as there is still
31                    // data to be flushed.
32                    input = &[];
33                }
34            }
35            Ok(n) => {
36                if n < input.len() {
37                    input = &input[n..]
38                } else {
39                    input = &[];
40                }
41            }
42            Err(e) => {
43                match e.kind() {
44                    // This error means that there may still be data to flush.
45                    // This could possibly get stuck if the underlying writer keeps returning this
46                    // error.
47                    io::ErrorKind::Interrupted => (),
48                    _ => return Err(e),
49                }
50            }
51        }
52    }
53
54    debug_assert_eq!(
55        deflate_state.bytes_written,
56        deflate_state.bytes_written_control.get()
57    );
58
59    Ok(())
60}
61
62/// A DEFLATE encoder/compressor.
63///
64/// A struct implementing a [`Write`] interface that takes unencoded data and compresses it to
65/// the provided writer using DEFLATE compression.
66///
67/// # Examples
68///
69/// ```rust
70/// # use std::io;
71/// #
72/// # fn try_main() -> io::Result<Vec<u8>> {
73/// #
74/// use std::io::Write;
75///
76/// use deflate::Compression;
77/// use deflate::write::DeflateEncoder;
78///
79/// let data = b"This is some test data";
80/// let mut encoder = DeflateEncoder::new(Vec::new(), Compression::Default);
81/// encoder.write_all(data)?;
82/// let compressed_data = encoder.finish()?;
83/// # Ok(compressed_data)
84/// #
85/// # }
86/// # fn main() {
87/// #     try_main().unwrap();
88/// # }
89/// ```
90/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
91pub struct DeflateEncoder<W: Write> {
92    deflate_state: DeflateState<W>,
93}
94
95impl<W: Write> DeflateEncoder<W> {
96    /// Creates a new encoder using the provided compression options.
97    pub fn new<O: Into<CompressionOptions>>(writer: W, options: O) -> DeflateEncoder<W> {
98        DeflateEncoder {
99            deflate_state: DeflateState::new(options.into(), writer),
100        }
101    }
102
103    /// Encode all pending data to the contained writer, consume this `DeflateEncoder`,
104    /// and return the contained writer if writing succeeds.
105    pub fn finish(mut self) -> io::Result<W> {
106        self.output_all()?;
107        // We have to move the inner writer out of the encoder, and replace it with `None`
108        // to let the `DeflateEncoder` drop safely.
109        Ok(self.deflate_state.inner.take().expect(ERR_STR))
110    }
111
112    /// Resets the encoder (except the compression options), replacing the current writer
113    /// with a new one, returning the old one.
114    pub fn reset(&mut self, w: W) -> io::Result<W> {
115        self.output_all()?;
116        self.deflate_state.reset(w)
117    }
118
119    /// Output all pending data as if encoding is done, but without resetting anything
120    fn output_all(&mut self) -> io::Result<()> {
121        compress_until_done(&[], &mut self.deflate_state, Flush::Finish)
122    }
123}
124
125impl<W: Write> io::Write for DeflateEncoder<W> {
126    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
127        let flush_mode = self.deflate_state.flush_mode;
128        compress_data_dynamic_n(buf, &mut self.deflate_state, flush_mode)
129    }
130
131    /// Flush the encoder.
132    ///
133    /// This will flush the encoder, emulating the Sync flush method from Zlib.
134    /// This essentially finishes the current block, and sends an additional empty stored block to
135    /// the writer.
136    fn flush(&mut self) -> io::Result<()> {
137        compress_until_done(&[], &mut self.deflate_state, Flush::Sync)
138    }
139}
140
141impl<W: Write> Drop for DeflateEncoder<W> {
142    /// When the encoder is dropped, output the rest of the data.
143    ///
144    /// WARNING: This may silently fail if writing fails, so using this to finish encoding
145    /// for writers where writing might fail is not recommended, for that call
146    /// [`finish()`](#method.finish) instead.
147    fn drop(&mut self) {
148        // Not sure if implementing drop is a good idea or not, but we follow flate2 for now.
149        // We only do this if we are not panicking, to avoid a double panic.
150        if self.deflate_state.inner.is_some() && !thread::panicking() {
151            let _ = self.output_all();
152        }
153    }
154}
155
156
157/// A Zlib encoder/compressor.
158///
159/// A struct implementing a [`Write`] interface that takes unencoded data and compresses it to
160/// the provided writer using DEFLATE compression with Zlib headers and trailers.
161///
162/// # Examples
163///
164/// ```rust
165/// # use std::io;
166/// #
167/// # fn try_main() -> io::Result<Vec<u8>> {
168/// #
169/// use std::io::Write;
170///
171/// use deflate::Compression;
172/// use deflate::write::ZlibEncoder;
173///
174/// let data = b"This is some test data";
175/// let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default);
176/// encoder.write_all(data)?;
177/// let compressed_data = encoder.finish()?;
178/// # Ok(compressed_data)
179/// #
180/// # }
181/// # fn main() {
182/// #     try_main().unwrap();
183/// # }
184/// ```
185/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
186pub struct ZlibEncoder<W: Write> {
187    deflate_state: DeflateState<W>,
188    checksum: Adler32Checksum,
189    header_written: bool,
190}
191
192impl<W: Write> ZlibEncoder<W> {
193    /// Create a new `ZlibEncoder` using the provided compression options.
194    pub fn new<O: Into<CompressionOptions>>(writer: W, options: O) -> ZlibEncoder<W> {
195        ZlibEncoder {
196            deflate_state: DeflateState::new(options.into(), writer),
197            checksum: Adler32Checksum::new(),
198            header_written: false,
199        }
200    }
201
202    /// Output all pending data ,including the trailer(checksum) as if encoding is done,
203    /// but without resetting anything.
204    fn output_all(&mut self) -> io::Result<()> {
205        self.check_write_header()?;
206        compress_until_done(&[], &mut self.deflate_state, Flush::Finish)?;
207        self.write_trailer()
208    }
209
210    /// Encode all pending data to the contained writer, consume this `ZlibEncoder`,
211    /// and return the contained writer if writing succeeds.
212    pub fn finish(mut self) -> io::Result<W> {
213        self.output_all()?;
214        // We have to move the inner writer out of the encoder, and replace it with `None`
215        // to let the `DeflateEncoder` drop safely.
216        Ok(self.deflate_state.inner.take().expect(ERR_STR))
217    }
218
219    /// Resets the encoder (except the compression options), replacing the current writer
220    /// with a new one, returning the old one.
221    pub fn reset(&mut self, writer: W) -> io::Result<W> {
222        self.output_all()?;
223        self.header_written = false;
224        self.checksum = Adler32Checksum::new();
225        self.deflate_state.reset(writer)
226    }
227
228    /// Check if a zlib header should be written.
229    fn check_write_header(&mut self) -> io::Result<()> {
230        if !self.header_written {
231            write_zlib_header(self.deflate_state.output_buf(), CompressionLevel::Default)?;
232            self.header_written = true;
233        }
234        Ok(())
235    }
236
237    /// Write the trailer, which for zlib is the Adler32 checksum.
238    fn write_trailer(&mut self) -> io::Result<()> {
239        let hash = self.checksum.current_hash();
240
241        self.deflate_state
242            .inner
243            .as_mut()
244            .expect(ERR_STR)
245            .write_u32::<BigEndian>(hash)
246    }
247
248    /// Return the adler32 checksum of the currently consumed data.
249    pub fn checksum(&self) -> u32 {
250        self.checksum.current_hash()
251    }
252}
253
254impl<W: Write> io::Write for ZlibEncoder<W> {
255    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
256        self.check_write_header()?;
257        let flush_mode = self.deflate_state.flush_mode;
258        let res = compress_data_dynamic_n(buf, &mut self.deflate_state, flush_mode);
259        match res {
260            // If this is returned, the whole buffer was consumed
261            Ok(0) => self.checksum.update_from_slice(buf),
262            // Otherwise, only part of it was consumed, so only that part
263            // added to the checksum.
264            Ok(n) => self.checksum.update_from_slice(&buf[0..n]),
265            _ => (),
266        };
267        res
268    }
269
270    /// Flush the encoder.
271    ///
272    /// This will flush the encoder, emulating the Sync flush method from Zlib.
273    /// This essentially finishes the current block, and sends an additional empty stored block to
274    /// the writer.
275    fn flush(&mut self) -> io::Result<()> {
276        compress_until_done(&[], &mut self.deflate_state, Flush::Sync)
277    }
278}
279
280impl<W: Write> Drop for ZlibEncoder<W> {
281    /// When the encoder is dropped, output the rest of the data.
282    ///
283    /// WARNING: This may silently fail if writing fails, so using this to finish encoding
284    /// for writers where writing might fail is not recommended, for that call
285    /// [`finish()`](#method.finish) instead.
286    fn drop(&mut self) {
287        if self.deflate_state.inner.is_some() && !thread::panicking() {
288            let _ = self.output_all();
289        }
290    }
291}
292
293#[cfg(feature = "gzip")]
294pub mod gzip {
295
296    use std::io::{Write, Cursor};
297    use std::{thread, io};
298
299    use super::*;
300
301    use byteorder::{WriteBytesExt, LittleEndian};
302    use gzip_header::{Crc, GzBuilder};
303
304    /// A Gzip encoder/compressor.
305    ///
306    /// A struct implementing a [`Write`] interface that takes unencoded data and compresses it to
307    /// the provided writer using DEFLATE compression with Gzip headers and trailers.
308    ///
309    /// # Examples
310    ///
311    /// ```rust
312    /// # use std::io;
313    /// #
314    /// # fn try_main() -> io::Result<Vec<u8>> {
315    /// #
316    /// use std::io::Write;
317    ///
318    /// use deflate::Compression;
319    /// use deflate::write::GzEncoder;
320    ///
321    /// let data = b"This is some test data";
322    /// let mut encoder = GzEncoder::new(Vec::new(), Compression::Default);
323    /// encoder.write_all(data)?;
324    /// let compressed_data = encoder.finish()?;
325    /// # Ok(compressed_data)
326    /// #
327    /// # }
328    /// # fn main() {
329    /// #     try_main().unwrap();
330    /// # }
331    /// ```
332    /// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
333    pub struct GzEncoder<W: Write> {
334        inner: DeflateEncoder<W>,
335        checksum: Crc,
336        header: Vec<u8>,
337    }
338
339    impl<W: Write> GzEncoder<W> {
340        /// Create a new `GzEncoder` writing deflate-compressed data to the underlying writer when
341        /// written to, wrapped in a gzip header and trailer. The header details will be blank.
342        pub fn new<O: Into<CompressionOptions>>(writer: W, options: O) -> GzEncoder<W> {
343            GzEncoder::from_builder(GzBuilder::new(), writer, options)
344        }
345
346        /// Create a new GzEncoder from the provided `GzBuilder`. This allows customising
347        /// the detalis of the header, such as the filename and comment fields.
348        pub fn from_builder<O: Into<CompressionOptions>>(
349            builder: GzBuilder,
350            writer: W,
351            options: O,
352        ) -> GzEncoder<W> {
353            GzEncoder {
354                inner: DeflateEncoder::new(writer, options),
355                checksum: Crc::new(),
356                header: builder.into_header(),
357            }
358        }
359
360        /// Write header to the output buffer if it hasn't been done yet.
361        fn check_write_header(&mut self) {
362            if !self.header.is_empty() {
363                self.inner
364                    .deflate_state
365                    .output_buf()
366                    .extend_from_slice(&self.header);
367                self.header.clear();
368            }
369        }
370
371        /// Output all pending data ,including the trailer(checksum + count) as if encoding is done.
372        /// but without resetting anything.
373        fn output_all(&mut self) -> io::Result<()> {
374            self.check_write_header();
375            self.inner.output_all()?;
376            self.write_trailer()
377        }
378
379        /// Encode all pending data to the contained writer, consume this `GzEncoder`,
380        /// and return the contained writer if writing succeeds.
381        pub fn finish(mut self) -> io::Result<W> {
382            self.output_all()?;
383            // We have to move the inner writer out of the encoder, and replace it with `None`
384            // to let the `DeflateEncoder` drop safely.
385            Ok(self.inner.deflate_state.inner.take().expect(ERR_STR))
386        }
387
388        fn reset_no_header(&mut self, writer: W) -> io::Result<W> {
389            self.output_all()?;
390            self.checksum = Crc::new();
391            self.inner.deflate_state.reset(writer)
392        }
393
394        /// Resets the encoder (except the compression options), replacing the current writer
395        /// with a new one, returning the old one. (Using a blank header).
396        pub fn reset(&mut self, writer: W) -> io::Result<W> {
397            let w = self.reset_no_header(writer);
398            self.header = GzBuilder::new().into_header();
399            w
400        }
401
402        /// Resets the encoder (excelt the compression options), replacing the current writer
403        /// with a new one, returning the old one, and using the provided `GzBuilder` to
404        /// create the header.
405        pub fn reset_with_builder(&mut self, writer: W, builder: GzBuilder) -> io::Result<W> {
406            let w = self.reset_no_header(writer);
407            self.header = builder.into_header();
408            w
409        }
410
411        /// Write the checksum and number of bytes mod 2^32 to the output writer.
412        fn write_trailer(&mut self) -> io::Result<()> {
413            let crc = self.checksum.sum();
414            let amount = self.checksum.amt_as_u32();
415
416            // We use a buffer here to make sure we don't end up writing only half the header if
417            // writing fails.
418            let mut buf = [0u8; 8];
419            let mut temp = Cursor::new(&mut buf[..]);
420            temp.write_u32::<LittleEndian>(crc).unwrap();
421            temp.write_u32::<LittleEndian>(amount).unwrap();
422            self.inner
423                .deflate_state
424                .inner
425                .as_mut()
426                .expect(ERR_STR)
427                .write_all(temp.into_inner())
428        }
429
430        /// Get the crc32 checksum of the data comsumed so far.
431        pub fn checksum(&self) -> u32 {
432            self.checksum.sum()
433        }
434    }
435
436    impl<W: Write> io::Write for GzEncoder<W> {
437        fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
438            self.check_write_header();
439            let res = self.inner.write(buf);
440            match res {
441                Ok(0) => self.checksum.update(buf),
442                Ok(n) => self.checksum.update(&buf[0..n]),
443                _ => (),
444            };
445            res
446        }
447
448        /// Flush the encoder.
449        ///
450        /// This will flush the encoder, emulating the Sync flush method from Zlib.
451        /// This essentially finishes the current block, and sends an additional empty stored
452        /// block to the writer.
453        fn flush(&mut self) -> io::Result<()> {
454            self.inner.flush()
455        }
456    }
457
458    impl<W: Write> Drop for GzEncoder<W> {
459        /// When the encoder is dropped, output the rest of the data.
460        ///
461        /// WARNING: This may silently fail if writing fails, so using this to finish encoding
462        /// for writers where writing might fail is not recommended, for that call
463        /// [`finish()`](#method.finish) instead.
464        fn drop(&mut self) {
465            if self.inner.deflate_state.inner.is_some() && !thread::panicking() {
466                let _ = self.output_all();
467            }
468        }
469    }
470
471    #[cfg(test)]
472    mod test {
473        use super::*;
474        use test_utils::{get_test_data, decompress_gzip};
475        #[test]
476        fn gzip_writer() {
477            let data = get_test_data();
478            let comment = b"Comment";
479            let compressed = {
480                let mut compressor = GzEncoder::from_builder(
481                    GzBuilder::new().comment(&comment[..]),
482                    Vec::with_capacity(data.len() / 3),
483                    CompressionOptions::default(),
484                );
485                compressor.write_all(&data[0..data.len() / 2]).unwrap();
486                compressor.write_all(&data[data.len() / 2..]).unwrap();
487                compressor.finish().unwrap()
488            };
489
490            let (dec, res) = decompress_gzip(&compressed);
491            assert_eq!(dec.header().comment().unwrap(), comment);
492            assert!(res == data);
493        }
494    }
495}
496
497#[cfg(test)]
498mod test {
499    use super::*;
500    use test_utils::{get_test_data, decompress_to_end, decompress_zlib};
501    use compression_options::CompressionOptions;
502    use std::io::Write;
503
504    #[test]
505    fn deflate_writer() {
506        let data = get_test_data();
507        let compressed = {
508            let mut compressor = DeflateEncoder::new(
509                Vec::with_capacity(data.len() / 3),
510                CompressionOptions::high(),
511            );
512            // Write in multiple steps to see if this works as it's supposed to.
513            compressor.write_all(&data[0..data.len() / 2]).unwrap();
514            compressor.write_all(&data[data.len() / 2..]).unwrap();
515            compressor.finish().unwrap()
516        };
517
518        let res = decompress_to_end(&compressed);
519        assert!(res == data);
520    }
521
522    #[test]
523    fn zlib_writer() {
524        let data = get_test_data();
525        let compressed = {
526            let mut compressor = ZlibEncoder::new(
527                Vec::with_capacity(data.len() / 3),
528                CompressionOptions::high(),
529            );
530            compressor.write_all(&data[0..data.len() / 2]).unwrap();
531            compressor.write_all(&data[data.len() / 2..]).unwrap();
532            compressor.finish().unwrap()
533        };
534
535        let res = decompress_zlib(&compressed);
536        assert!(res == data);
537    }
538
539    #[test]
540    /// Check if the the result of compressing after resetting is the same as before.
541    fn writer_reset() {
542        let data = get_test_data();
543        let mut compressor = DeflateEncoder::new(
544            Vec::with_capacity(data.len() / 3),
545            CompressionOptions::default(),
546        );
547        compressor.write_all(&data).unwrap();
548        let res1 = compressor
549            .reset(Vec::with_capacity(data.len() / 3))
550            .unwrap();
551        compressor.write_all(&data).unwrap();
552        let res2 = compressor.finish().unwrap();
553        assert!(res1 == res2);
554    }
555
556    #[test]
557    fn writer_reset_zlib() {
558        let data = get_test_data();
559        let mut compressor = ZlibEncoder::new(
560            Vec::with_capacity(data.len() / 3),
561            CompressionOptions::default(),
562        );
563        compressor.write_all(&data).unwrap();
564        let res1 = compressor
565            .reset(Vec::with_capacity(data.len() / 3))
566            .unwrap();
567        compressor.write_all(&data).unwrap();
568        let res2 = compressor.finish().unwrap();
569        assert!(res1 == res2);
570    }
571
572    #[test]
573    fn writer_sync() {
574        let data = get_test_data();
575        let compressed = {
576            let mut compressor = DeflateEncoder::new(
577                Vec::with_capacity(data.len() / 3),
578                CompressionOptions::default(),
579            );
580            let split = data.len() / 2;
581            compressor.write_all(&data[..split]).unwrap();
582            compressor.flush().unwrap();
583            {
584                let buf = &mut compressor.deflate_state.inner.as_mut().unwrap();
585                let buf_len = buf.len();
586                // Check for the sync marker. (excluding the header as it might not line
587                // up with the byte boundary.)
588                assert_eq!(buf[buf_len - 4..], [0, 0, 255, 255]);
589            }
590            compressor.write_all(&data[split..]).unwrap();
591            compressor.finish().unwrap()
592        };
593
594        let decompressed = decompress_to_end(&compressed);
595
596        assert!(decompressed == data);
597    }
598
599    #[test]
600    /// Make sure compression works with the writer when the input is between 1 and 2 window sizes.
601    fn issue_18() {
602        use compression_options::Compression;
603        let data = vec![0; 61000];
604        let compressed = {
605            let mut compressor = ZlibEncoder::new(Vec::new(), Compression::Default);
606            compressor.write_all(&data[..]).unwrap();
607            compressor.finish().unwrap()
608        };
609        let decompressed = decompress_zlib(&compressed);
610        assert!(decompressed == data);
611    }
612
613    #[test]
614    fn writer_sync_multiple() {
615        use std::cmp;
616        let data = get_test_data();
617        let compressed = {
618            let mut compressor = DeflateEncoder::new(
619                Vec::with_capacity(data.len() / 3),
620                CompressionOptions::default(),
621            );
622            let split = data.len() / 2;
623            compressor.write_all(&data[..split]).unwrap();
624            compressor.flush().unwrap();
625            compressor.flush().unwrap();
626            {
627                let buf = &mut compressor.deflate_state.inner.as_mut().unwrap();
628                let buf_len = buf.len();
629                // Check for the sync marker. (excluding the header as it might not line
630                // up with the byte boundary.)
631                assert_eq!(buf[buf_len - 4..], [0, 0, 255, 255]);
632            }
633            compressor
634                .write_all(&data[split..cmp::min(split + 2, data.len())])
635                .unwrap();
636            compressor.flush().unwrap();
637            compressor
638                .write_all(&data[cmp::min(split + 2, data.len())..])
639                .unwrap();
640            compressor.finish().unwrap()
641        };
642
643        let decompressed = decompress_to_end(&compressed);
644
645        assert!(decompressed == data);
646
647        let mut compressor = DeflateEncoder::new(
648            Vec::with_capacity(data.len() / 3),
649            CompressionOptions::default(),
650        );
651
652        compressor.flush().unwrap();
653        compressor.write_all(&[1, 2]).unwrap();
654        compressor.flush().unwrap();
655        compressor.write_all(&[3]).unwrap();
656        compressor.flush().unwrap();
657        let compressed = compressor.finish().unwrap();
658
659        let decompressed = decompress_to_end(&compressed);
660
661        assert_eq!(decompressed, [1, 2, 3]);
662    }
663}