deflate/
output_writer.rs

1use std::u16;
2
3use lzvalue::LZValue;
4use huffman_table::{NUM_LITERALS_AND_LENGTHS, NUM_DISTANCE_CODES, END_OF_BLOCK_POSITION,
5                    get_distance_code, get_length_code};
6
7/// The type used for representing how many times a literal, length or distance code has been ouput
8/// to the current buffer.
9/// As we are limiting the blocks to be at most 2^16 bytes long, we can represent frequencies using
10/// 16-bit values.
11pub type FrequencyType = u16;
12
13/// The maximum number of literals/lengths in the buffer, which in practice also means the maximum
14/// number of literals/lengths output before a new block is started.
15/// This should not be larger than the maximum value `FrequencyType` can represent to prevent
16/// overflowing (which would degrade, or in the worst case break compression).
17pub const MAX_BUFFER_LENGTH: usize = 1024 * 31;
18
19#[derive(Debug, PartialEq)]
20pub enum BufferStatus {
21    NotFull,
22    Full,
23}
24
25/// Struct that buffers lz77 data and keeps track of the usage of different codes
26pub struct DynamicWriter {
27    buffer: Vec<LZValue>,
28    // The two last length codes are not actually used, but only participates in code construction
29    // Therefore, we ignore them to get the correct number of lengths
30    frequencies: [FrequencyType; NUM_LITERALS_AND_LENGTHS],
31    distance_frequencies: [FrequencyType; NUM_DISTANCE_CODES],
32}
33
34impl DynamicWriter {
35    #[inline]
36    pub fn check_buffer_length(&self) -> BufferStatus {
37        if self.buffer.len() >= MAX_BUFFER_LENGTH {
38            BufferStatus::Full
39        } else {
40            BufferStatus::NotFull
41        }
42    }
43
44    #[inline]
45    pub fn write_literal(&mut self, literal: u8) -> BufferStatus {
46        debug_assert!(self.buffer.len() < MAX_BUFFER_LENGTH);
47        self.buffer.push(LZValue::literal(literal));
48        self.frequencies[usize::from(literal)] += 1;
49        self.check_buffer_length()
50    }
51
52    #[inline]
53    pub fn write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus {
54        self.buffer.push(LZValue::length_distance(length, distance));
55        let l_code_num = get_length_code(length);
56        // As we limit the buffer to 2^16 values, this should be safe from overflowing.
57        if cfg!(debug_assertions) {
58            self.frequencies[l_code_num] += 1;
59        } else {
60            // #Safety
61            // None of the values in the table of length code numbers will give a value
62            // that is out of bounds.
63            // There is a test to ensure that these functions can not produce too large values.
64            unsafe {
65                *self.frequencies.get_unchecked_mut(l_code_num) += 1;
66            }
67        }
68        let d_code_num = get_distance_code(distance);
69        // The compiler seems to be able to evade the bounds check here somehow.
70        self.distance_frequencies[usize::from(d_code_num)] += 1;
71        self.check_buffer_length()
72    }
73
74    pub fn buffer_length(&self) -> usize {
75        self.buffer.len()
76    }
77
78    pub fn get_buffer(&self) -> &[LZValue] {
79        &self.buffer
80    }
81
82    pub fn new() -> DynamicWriter {
83        let mut w = DynamicWriter {
84            buffer: Vec::with_capacity(MAX_BUFFER_LENGTH),
85            frequencies: [0; NUM_LITERALS_AND_LENGTHS],
86            distance_frequencies: [0; NUM_DISTANCE_CODES],
87        };
88        // This will always be 1,
89        // since there will always only be one end of block marker in each block
90        w.frequencies[END_OF_BLOCK_POSITION] = 1;
91        w
92    }
93
94    /// Special output function used with RLE compression
95    /// that avoids bothering to lookup a distance code.
96    #[inline]
97    pub fn write_length_rle(&mut self, length: u16) -> BufferStatus {
98        self.buffer.push(LZValue::length_distance(length, 1));
99        let l_code_num = get_length_code(length);
100        // As we limit the buffer to 2^16 values, this should be safe from overflowing.
101        if cfg!(debug_assertions) {
102            self.frequencies[l_code_num] += 1;
103        } else {
104            // #Safety
105            // None of the values in the table of length code numbers will give a value
106            // that is out of bounds.
107            // There is a test to ensure that these functions won't produce too large values.
108            unsafe {
109                *self.frequencies.get_unchecked_mut(l_code_num) += 1;
110            }
111        }
112        self.distance_frequencies[0] += 1;
113        self.check_buffer_length()
114    }
115
116    pub fn get_frequencies(&self) -> (&[u16], &[u16]) {
117        (&self.frequencies, &self.distance_frequencies)
118    }
119
120    pub fn clear_frequencies(&mut self) {
121        self.frequencies = [0; NUM_LITERALS_AND_LENGTHS];
122        self.distance_frequencies = [0; NUM_DISTANCE_CODES];
123        self.frequencies[END_OF_BLOCK_POSITION] = 1;
124    }
125
126    pub fn clear_data(&mut self) {
127        self.buffer.clear()
128    }
129
130    pub fn clear(&mut self) {
131        self.clear_frequencies();
132        self.clear_data();
133    }
134}
135
136#[cfg(test)]
137mod test {
138    use super::*;
139    use huffman_table::{get_length_code, get_distance_code};
140    #[test]
141    /// Ensure that these function won't produce values that would overflow the output_writer
142    /// tables since we use some unsafe indexing.
143    fn array_bounds() {
144        let w = DynamicWriter::new();
145
146        for i in 0..u16::max_value() {
147            get_length_code(i) < w.frequencies.len();
148        }
149
150        for i in 0..u16::max_value() {
151            get_distance_code(i) < w.distance_frequencies.len() as u8;
152        }
153    }
154}