1use std::u16;
23use lzvalue::LZValue;
4use huffman_table::{NUM_LITERALS_AND_LENGTHS, NUM_DISTANCE_CODES, END_OF_BLOCK_POSITION,
5 get_distance_code, get_length_code};
67/// The type used for representing how many times a literal, length or distance code has been ouput
8/// to the current buffer.
9/// As we are limiting the blocks to be at most 2^16 bytes long, we can represent frequencies using
10/// 16-bit values.
11pub type FrequencyType = u16;
1213/// The maximum number of literals/lengths in the buffer, which in practice also means the maximum
14/// number of literals/lengths output before a new block is started.
15/// This should not be larger than the maximum value `FrequencyType` can represent to prevent
16/// overflowing (which would degrade, or in the worst case break compression).
17pub const MAX_BUFFER_LENGTH: usize = 1024 * 31;
1819#[derive(Debug, PartialEq)]
20pub enum BufferStatus {
21 NotFull,
22 Full,
23}
2425/// Struct that buffers lz77 data and keeps track of the usage of different codes
26pub struct DynamicWriter {
27 buffer: Vec<LZValue>,
28// The two last length codes are not actually used, but only participates in code construction
29 // Therefore, we ignore them to get the correct number of lengths
30frequencies: [FrequencyType; NUM_LITERALS_AND_LENGTHS],
31 distance_frequencies: [FrequencyType; NUM_DISTANCE_CODES],
32}
3334impl DynamicWriter {
35#[inline]
36pub fn check_buffer_length(&self) -> BufferStatus {
37if self.buffer.len() >= MAX_BUFFER_LENGTH {
38 BufferStatus::Full
39 } else {
40 BufferStatus::NotFull
41 }
42 }
4344#[inline]
45pub fn write_literal(&mut self, literal: u8) -> BufferStatus {
46debug_assert!(self.buffer.len() < MAX_BUFFER_LENGTH);
47self.buffer.push(LZValue::literal(literal));
48self.frequencies[usize::from(literal)] += 1;
49self.check_buffer_length()
50 }
5152#[inline]
53pub fn write_length_distance(&mut self, length: u16, distance: u16) -> BufferStatus {
54self.buffer.push(LZValue::length_distance(length, distance));
55let l_code_num = get_length_code(length);
56// As we limit the buffer to 2^16 values, this should be safe from overflowing.
57if cfg!(debug_assertions) {
58self.frequencies[l_code_num] += 1;
59 } else {
60// #Safety
61 // None of the values in the table of length code numbers will give a value
62 // that is out of bounds.
63 // There is a test to ensure that these functions can not produce too large values.
64unsafe {
65*self.frequencies.get_unchecked_mut(l_code_num) += 1;
66 }
67 }
68let d_code_num = get_distance_code(distance);
69// The compiler seems to be able to evade the bounds check here somehow.
70self.distance_frequencies[usize::from(d_code_num)] += 1;
71self.check_buffer_length()
72 }
7374pub fn buffer_length(&self) -> usize {
75self.buffer.len()
76 }
7778pub fn get_buffer(&self) -> &[LZValue] {
79&self.buffer
80 }
8182pub fn new() -> DynamicWriter {
83let mut w = DynamicWriter {
84 buffer: Vec::with_capacity(MAX_BUFFER_LENGTH),
85 frequencies: [0; NUM_LITERALS_AND_LENGTHS],
86 distance_frequencies: [0; NUM_DISTANCE_CODES],
87 };
88// This will always be 1,
89 // since there will always only be one end of block marker in each block
90w.frequencies[END_OF_BLOCK_POSITION] = 1;
91 w
92 }
9394/// Special output function used with RLE compression
95 /// that avoids bothering to lookup a distance code.
96#[inline]
97pub fn write_length_rle(&mut self, length: u16) -> BufferStatus {
98self.buffer.push(LZValue::length_distance(length, 1));
99let l_code_num = get_length_code(length);
100// As we limit the buffer to 2^16 values, this should be safe from overflowing.
101if cfg!(debug_assertions) {
102self.frequencies[l_code_num] += 1;
103 } else {
104// #Safety
105 // None of the values in the table of length code numbers will give a value
106 // that is out of bounds.
107 // There is a test to ensure that these functions won't produce too large values.
108unsafe {
109*self.frequencies.get_unchecked_mut(l_code_num) += 1;
110 }
111 }
112self.distance_frequencies[0] += 1;
113self.check_buffer_length()
114 }
115116pub fn get_frequencies(&self) -> (&[u16], &[u16]) {
117 (&self.frequencies, &self.distance_frequencies)
118 }
119120pub fn clear_frequencies(&mut self) {
121self.frequencies = [0; NUM_LITERALS_AND_LENGTHS];
122self.distance_frequencies = [0; NUM_DISTANCE_CODES];
123self.frequencies[END_OF_BLOCK_POSITION] = 1;
124 }
125126pub fn clear_data(&mut self) {
127self.buffer.clear()
128 }
129130pub fn clear(&mut self) {
131self.clear_frequencies();
132self.clear_data();
133 }
134}
135136#[cfg(test)]
137mod test {
138use super::*;
139use huffman_table::{get_length_code, get_distance_code};
140#[test]
141/// Ensure that these function won't produce values that would overflow the output_writer
142 /// tables since we use some unsafe indexing.
143fn array_bounds() {
144let w = DynamicWriter::new();
145146for i in 0..u16::max_value() {
147 get_length_code(i) < w.frequencies.len();
148 }
149150for i in 0..u16::max_value() {
151 get_distance_code(i) < w.distance_frequencies.len() as u8;
152 }
153 }
154}