deflate/
lib.rs

1//! An implementation an encoder using [DEFLATE](http://www.gzip.org/zlib/rfc-deflate.html)
2//! compression algorightm in pure rust.
3//!
4//! This library provides functions to compress data using the DEFLATE algorithm,
5//! optionally wrapped using the [zlib](https://tools.ietf.org/html/rfc1950) or
6//! [gzip](http://www.gzip.org/zlib/rfc-gzip.html) formats.
7//! The current implementation is still a bit lacking speed-wise compared to C-libraries
8//! like zlib and miniz.
9//!
10//! The deflate algorithm is an older compression algorithm that is still widely used today,
11//! by e.g html headers, the `.png` inage format, the unix `gzip` program and commonly in `.zip`
12//! files. The `zlib` and `gzip` formats are wrappers around DEFLATE-compressed data, containing
13//! some extra metadata and a checksum to validate the integrity of the raw data.
14//!
15//! The deflate algorithm does not perform as well as newer algorhitms used in file formats such as
16//! `.7z`, `.rar`, `.xz` and `.bz2`, and is thus not the ideal choice for applications where
17//! the `DEFLATE` format (with or without wrappers) is not required.
18//!
19//! Support for the gzip wrapper (the wrapper that is used in `.gz` files) is disabled by default,
20//! but can be enabled with the `gzip` feature.
21//!
22//! As this library is still in development, the compression output may change slightly
23//! between versions.
24//!
25//!
26//! # Examples:
27//! ## Simple compression function:
28//! ``` rust
29//! use deflate::deflate_bytes;
30//!
31//! let data = b"Some data";
32//! let compressed = deflate_bytes(data);
33//! # let _ = compressed;
34//! ```
35//!
36//! ## Using a writer:
37//! ``` rust
38//! use std::io::Write;
39//!
40//! use deflate::Compression;
41//! use deflate::write::ZlibEncoder;
42//!
43//! let data = b"This is some test data";
44//! let mut encoder = ZlibEncoder::new(Vec::new(), Compression::Default);
45//! encoder.write_all(data).expect("Write error!");
46//! let compressed_data = encoder.finish().expect("Failed to finish compression!");
47//! # let _ = compressed_data;
48//! ```
49
50#![cfg_attr(all(feature = "benchmarks", test), feature(test))]
51
52#[cfg(all(test, feature = "benchmarks"))]
53extern crate test as test_std;
54
55#[cfg(test)]
56extern crate flate2;
57// #[cfg(test)]
58// extern crate inflate;
59
60extern crate adler32;
61extern crate byteorder;
62#[cfg(feature = "gzip")]
63extern crate gzip_header;
64
65mod compression_options;
66mod huffman_table;
67mod lz77;
68mod lzvalue;
69mod chained_hash_table;
70mod length_encode;
71mod output_writer;
72mod stored_block;
73mod huffman_lengths;
74mod zlib;
75mod checksum;
76mod bit_reverse;
77mod bitstream;
78mod encoder_state;
79mod matching;
80mod input_buffer;
81mod deflate_state;
82mod compress;
83mod rle;
84mod writer;
85#[cfg(test)]
86mod test_utils;
87
88use std::io::Write;
89use std::io;
90
91use byteorder::BigEndian;
92#[cfg(feature = "gzip")]
93use gzip_header::GzBuilder;
94#[cfg(feature = "gzip")]
95use gzip_header::Crc;
96#[cfg(feature = "gzip")]
97use byteorder::LittleEndian;
98
99use checksum::RollingChecksum;
100use deflate_state::DeflateState;
101
102pub use compression_options::{CompressionOptions, SpecialOptions, Compression};
103use compress::Flush;
104pub use lz77::MatchingType;
105
106use writer::compress_until_done;
107
108/// Encoders implementing a `Write` interface.
109pub mod write {
110    pub use writer::{DeflateEncoder, ZlibEncoder};
111    #[cfg(feature = "gzip")]
112    pub use writer::gzip::GzEncoder;
113}
114
115
116fn compress_data_dynamic<RC: RollingChecksum, W: Write>(
117    input: &[u8],
118    writer: &mut W,
119    mut checksum: RC,
120    compression_options: CompressionOptions,
121) -> io::Result<()> {
122    checksum.update_from_slice(input);
123    // We use a box here to avoid putting the buffers on the stack
124    // It's done here rather than in the structs themselves for now to
125    // keep the data close in memory.
126    let mut deflate_state = Box::new(DeflateState::new(compression_options, writer));
127    compress_until_done(input, &mut deflate_state, Flush::Finish)
128}
129
130/// Compress the given slice of bytes with DEFLATE compression.
131///
132/// Returns a `Vec<u8>` of the compressed data.
133///
134/// # Examples
135///
136/// ```
137/// use deflate::{deflate_bytes_conf, Compression};
138///
139/// let data = b"This is some test data";
140/// let compressed_data = deflate_bytes_conf(data, Compression::Best);
141/// # let _ = compressed_data;
142/// ```
143pub fn deflate_bytes_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
144    let mut writer = Vec::with_capacity(input.len() / 3);
145    compress_data_dynamic(
146        input,
147        &mut writer,
148        checksum::NoChecksum::new(),
149        options.into(),
150    ).expect("Write error!");
151    writer
152}
153
154/// Compress the given slice of bytes with DEFLATE compression using the default compression
155/// level.
156///
157/// Returns a `Vec<u8>` of the compressed data.
158///
159/// # Examples
160///
161/// ```
162/// use deflate::deflate_bytes;
163///
164/// let data = b"This is some test data";
165/// let compressed_data = deflate_bytes(data);
166/// # let _ = compressed_data;
167/// ```
168pub fn deflate_bytes(input: &[u8]) -> Vec<u8> {
169    deflate_bytes_conf(input, Compression::Default)
170}
171
172/// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer.
173///
174/// Returns a `Vec<u8>` of the compressed data.
175///
176/// Zlib dictionaries are not yet suppored.
177///
178/// # Examples
179///
180/// ```
181/// use deflate::{deflate_bytes_zlib_conf, Compression};
182///
183/// let data = b"This is some test data";
184/// let compressed_data = deflate_bytes_zlib_conf(data, Compression::Best);
185/// # let _ = compressed_data;
186/// ```
187pub fn deflate_bytes_zlib_conf<O: Into<CompressionOptions>>(input: &[u8], options: O) -> Vec<u8> {
188    use byteorder::WriteBytesExt;
189    let mut writer = Vec::with_capacity(input.len() / 3);
190    // Write header
191    zlib::write_zlib_header(&mut writer, zlib::CompressionLevel::Default)
192        .expect("Write error when writing zlib header!");
193
194    let mut checksum = checksum::Adler32Checksum::new();
195    compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
196        .expect("Write error when writing compressed data!");
197
198    let hash = checksum.current_hash();
199
200    writer
201        .write_u32::<BigEndian>(hash)
202        .expect("Write error when writing checksum!");
203    writer
204}
205
206/// Compress the given slice of bytes with DEFLATE compression, including a zlib header and trailer,
207/// using the default compression level.
208///
209/// Returns a Vec<u8> of the compressed data.
210///
211/// Zlib dictionaries are not yet suppored.
212///
213/// # Examples
214///
215/// ```
216/// use deflate::deflate_bytes_zlib;
217///
218/// let data = b"This is some test data";
219/// let compressed_data = deflate_bytes_zlib(data);
220/// # let _ = compressed_data;
221/// ```
222pub fn deflate_bytes_zlib(input: &[u8]) -> Vec<u8> {
223    deflate_bytes_zlib_conf(input, Compression::Default)
224}
225
226/// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer
227/// using the given gzip header and compression options.
228///
229/// Returns a `Vec<u8>` of the compressed data.
230///
231///
232/// # Examples
233///
234/// ```
235/// extern crate gzip_header;
236/// extern crate deflate;
237///
238/// # fn main() {
239/// use deflate::{deflate_bytes_gzip_conf, Compression};
240/// use gzip_header::GzBuilder;
241///
242/// let data = b"This is some test data";
243/// let compressed_data = deflate_bytes_gzip_conf(data, Compression::Best, GzBuilder::new());
244/// # let _ = compressed_data;
245/// # }
246/// ```
247#[cfg(feature = "gzip")]
248pub fn deflate_bytes_gzip_conf<O: Into<CompressionOptions>>(
249    input: &[u8],
250    options: O,
251    gzip_header: GzBuilder,
252) -> Vec<u8> {
253    use byteorder::WriteBytesExt;
254    let mut writer = Vec::with_capacity(input.len() / 3);
255
256    // Write header
257    writer
258        .write_all(&gzip_header.into_header())
259        .expect("Write error when writing header!");
260    let mut checksum = checksum::NoChecksum::new();
261    compress_data_dynamic(input, &mut writer, &mut checksum, options.into())
262        .expect("Write error when writing compressed data!");
263
264    let mut crc = Crc::new();
265    crc.update(input);
266
267    writer
268        .write_u32::<LittleEndian>(crc.sum())
269        .expect("Write error when writing checksum!");
270    writer
271        .write_u32::<LittleEndian>(crc.amt_as_u32())
272        .expect("Write error when writing amt!");
273    writer
274}
275
276/// Compress the given slice of bytes with DEFLATE compression, including a gzip header and trailer,
277/// using the default compression level, and a gzip header with default values.
278///
279/// Returns a `Vec<u8>` of the compressed data.
280///
281///
282/// # Examples
283///
284/// ```
285/// use deflate::deflate_bytes_gzip;
286/// let data = b"This is some test data";
287/// let compressed_data = deflate_bytes_gzip(data);
288/// # let _ = compressed_data;
289/// ```
290#[cfg(feature = "gzip")]
291pub fn deflate_bytes_gzip(input: &[u8]) -> Vec<u8> {
292    deflate_bytes_gzip_conf(input, Compression::Default, GzBuilder::new())
293}
294
295#[cfg(test)]
296mod test {
297    use super::*;
298    use std::io::Write;
299
300    use test_utils::{get_test_data, decompress_to_end, decompress_zlib};
301    #[cfg(feature = "gzip")]
302    use test_utils::decompress_gzip;
303
304    type CO = CompressionOptions;
305
306    /// Write data to the writer in chunks of chunk_size.
307    fn chunked_write<W: Write>(mut writer: W, data: &[u8], chunk_size: usize) {
308        for chunk in data.chunks(chunk_size) {
309            writer.write_all(&chunk).unwrap();
310        }
311    }
312
313    #[test]
314    fn dynamic_string_mem() {
315        let test_data = String::from("                    GNU GENERAL PUBLIC LICENSE").into_bytes();
316        let compressed = deflate_bytes(&test_data);
317
318        assert!(compressed.len() < test_data.len());
319
320        let result = decompress_to_end(&compressed);
321        assert_eq!(test_data, result);
322    }
323
324    #[test]
325    fn dynamic_string_file() {
326        let input = get_test_data();
327        let compressed = deflate_bytes(&input);
328
329        let result = decompress_to_end(&compressed);
330        for (n, (&a, &b)) in input.iter().zip(result.iter()).enumerate() {
331            if a != b {
332                println!("First difference at {}, input: {}, output: {}", n, a, b);
333                println!(
334                    "input: {:?}, output: {:?}",
335                    &input[n - 3..n + 3],
336                    &result[n - 3..n + 3]
337                );
338                break;
339            }
340        }
341        // Not using assert_eq here deliberately to avoid massive amounts of output spam
342        assert!(input == result);
343        // Check that we actually managed to compress the input
344        assert!(compressed.len() < input.len());
345    }
346
347    #[test]
348    fn file_rle() {
349        let input = get_test_data();
350        let compressed = deflate_bytes_conf(&input, CO::rle());
351
352        let result = decompress_to_end(&compressed);
353        assert!(input == result);
354    }
355
356    #[test]
357    fn file_zlib() {
358        let test_data = get_test_data();
359
360        let compressed = deflate_bytes_zlib(&test_data);
361        // {
362        //     use std::fs::File;
363        //     use std::io::Write;
364        //     let mut f = File::create("out.zlib").unwrap();
365        //     f.write_all(&compressed).unwrap();
366        // }
367
368        println!("file_zlib compressed(default) length: {}", compressed.len());
369
370        let result = decompress_zlib(&compressed);
371
372        assert!(&test_data == &result);
373        assert!(compressed.len() < test_data.len());
374    }
375
376    #[test]
377    fn zlib_short() {
378        let test_data = [10, 10, 10, 10, 10, 55];
379        roundtrip_zlib(&test_data, CO::default());
380    }
381
382    #[test]
383    fn zlib_last_block() {
384        let mut test_data = vec![22; 32768];
385        test_data.extend(&[5, 2, 55, 11, 12]);
386        roundtrip_zlib(&test_data, CO::default());
387    }
388
389    #[test]
390    fn deflate_short() {
391        let test_data = [10, 10, 10, 10, 10, 55];
392        let compressed = deflate_bytes(&test_data);
393
394        let result = decompress_to_end(&compressed);
395        assert_eq!(&test_data, result.as_slice());
396        // If block type and compression is selected correctly, this should only take 5 bytes.
397        assert_eq!(compressed.len(), 5);
398    }
399
400    #[cfg(feature = "gzip")]
401    #[test]
402    fn gzip() {
403        let data = get_test_data();
404        let comment = b"Test";
405        let compressed = deflate_bytes_gzip_conf(
406            &data,
407            Compression::Default,
408            GzBuilder::new().comment(&comment[..]),
409        );
410        let (dec, decompressed) = decompress_gzip(&compressed);
411        assert_eq!(dec.header().comment().unwrap(), comment);
412        assert!(data == decompressed);
413    }
414
415    fn chunk_test(chunk_size: usize, level: CompressionOptions) {
416        let mut compressed = Vec::with_capacity(32000);
417        let data = get_test_data();
418        {
419            let mut compressor = write::ZlibEncoder::new(&mut compressed, level);
420            chunked_write(&mut compressor, &data, chunk_size);
421            compressor.finish().unwrap();
422        }
423        let compressed2 = deflate_bytes_zlib_conf(&data, level);
424        let res = decompress_zlib(&compressed);
425        assert!(res == data);
426        assert_eq!(compressed.len(), compressed2.len());
427        assert!(compressed == compressed2);
428    }
429
430    fn writer_chunks_level(level: CompressionOptions) {
431        use input_buffer::BUFFER_SIZE;
432        let ct = |n| chunk_test(n, level);
433        ct(1);
434        ct(50);
435        ct(400);
436        ct(32768);
437        ct(BUFFER_SIZE);
438        ct(50000);
439        ct((32768 * 2) + 258);
440    }
441
442    #[ignore]
443    #[test]
444    /// Test the writer by inputing data in one chunk at the time.
445    fn zlib_writer_chunks() {
446        writer_chunks_level(CompressionOptions::default());
447        writer_chunks_level(CompressionOptions::fast());
448        writer_chunks_level(CompressionOptions::rle());
449    }
450
451    /// Check that the frequency values don't overflow.
452    #[test]
453    fn frequency_overflow() {
454        let _ = deflate_bytes_conf(
455            &vec![5; 100000],
456            compression_options::CompressionOptions::default(),
457        );
458    }
459
460    fn roundtrip_zlib(data: &[u8], level: CompressionOptions) {
461        let compressed = deflate_bytes_zlib_conf(data, level);
462        let res = decompress_zlib(&compressed);
463        if data.len() <= 32 {
464            assert_eq!(res, data, "Failed with level: {:?}", level);
465        } else {
466            assert!(res == data, "Failed with level: {:?}", level);
467        }
468    }
469
470    fn check_zero(level: CompressionOptions) {
471        roundtrip_zlib(&[], level);
472    }
473
474    /// Compress with an empty slice.
475    #[test]
476    fn empty_input() {
477        check_zero(CompressionOptions::default());
478        check_zero(CompressionOptions::fast());
479        check_zero(CompressionOptions::rle());
480    }
481
482    #[test]
483    fn one_and_two_values() {
484        let one = &[1][..];
485        roundtrip_zlib(one, CO::rle());
486        roundtrip_zlib(one, CO::fast());
487        roundtrip_zlib(one, CO::default());
488        let two = &[5, 6, 7, 8][..];
489        roundtrip_zlib(two, CO::rle());
490        roundtrip_zlib(two, CO::fast());
491        roundtrip_zlib(two, CO::default());
492    }
493
494
495}