1#![macro_use]
3use std::cmp;
4use std::ops::{Range, RangeFrom};
5use std::iter::{self, Iterator};
6use std::slice::Iter;
7use std::fmt;
8
9use input_buffer::InputBuffer;
10use matching::longest_match;
11#[cfg(test)]
12use lzvalue::{LZValue, LZType};
13use huffman_table;
14use chained_hash_table::{ChainedHashTable, update_hash};
15#[cfg(test)]
16use compression_options::{HIGH_MAX_HASH_CHECKS, HIGH_LAZY_IF_LESS_THAN};
17use output_writer::{BufferStatus, DynamicWriter};
18use compress::Flush;
19use rle::process_chunk_greedy_rle;
20
21const MAX_MATCH: usize = huffman_table::MAX_MATCH as usize;
22const MIN_MATCH: usize = huffman_table::MIN_MATCH as usize;
23
24const NO_RLE: u16 = 43212;
25
26#[derive(Clone, Copy, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
28pub enum MatchingType {
29 Greedy,
32 Lazy,
38}
39
40impl fmt::Display for MatchingType {
41 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
42 match *self {
43 MatchingType::Greedy => write!(f, "Greedy matching"),
44 MatchingType::Lazy => write!(f, "Lazy matching"),
45 }
46 }
47}
48
49pub struct LZ77State {
51 hash_table: ChainedHashTable,
53 is_first_window: bool,
55 is_last_block: bool,
57 overlap: usize,
59 current_block_input_bytes: u64,
61 max_hash_checks: u16,
63 lazy_if_less_than: u16,
65 matching_type: MatchingType,
67 match_state: ChunkState,
69 bytes_to_hash: usize,
72 was_synced: bool,
75}
76
77impl LZ77State {
78 pub fn new(
80 max_hash_checks: u16,
81 lazy_if_less_than: u16,
82 matching_type: MatchingType,
83 ) -> LZ77State {
84 LZ77State {
85 hash_table: ChainedHashTable::new(),
86 is_first_window: true,
87 is_last_block: false,
88 overlap: 0,
89 current_block_input_bytes: 0,
90 max_hash_checks: max_hash_checks,
91 lazy_if_less_than: lazy_if_less_than,
92 matching_type: matching_type,
93 match_state: ChunkState::new(),
94 bytes_to_hash: 0,
95 was_synced: false,
96 }
97 }
98
99 pub fn reset(&mut self) {
101 self.hash_table.reset();
102 self.is_first_window = true;
103 self.is_last_block = false;
104 self.overlap = 0;
105 self.current_block_input_bytes = 0;
106 self.match_state = ChunkState::new();
107 self.bytes_to_hash = 0
108 }
109
110 pub fn set_last(&mut self) {
111 self.is_last_block = true;
112 }
113
114 pub fn is_last_block(&self) -> bool {
116 self.is_last_block
117 }
118
119 pub fn current_block_input_bytes(&self) -> u64 {
121 self.current_block_input_bytes
122 }
123
124 pub fn reset_input_bytes(&mut self) {
126 self.current_block_input_bytes = 0;
127 }
128
129 pub fn pending_byte(&self) -> bool {
131 self.match_state.add
132 }
133
134 pub fn pending_byte_as_num(&self) -> usize {
136 if self.match_state.add {
139 1
140 } else {
141 0
142 }
143 }
144}
145
146const DEFAULT_WINDOW_SIZE: usize = 32768;
147
148#[derive(Debug)]
149pub enum ProcessStatus {
151 Ok,
153 BufferFull(usize),
157}
158
159#[derive(Debug)]
160pub struct ChunkState {
164 current_length: u16,
166 current_distance: u16,
168 prev_byte: u8,
170 cur_byte: u8,
172 add: bool,
174}
175
176impl ChunkState {
177 pub fn new() -> ChunkState {
178 ChunkState {
179 current_length: 0,
180 current_distance: 0,
181 prev_byte: 0,
182 cur_byte: 0,
183 add: false,
184 }
185 }
186}
187
188pub fn buffer_full(position: usize) -> ProcessStatus {
189 ProcessStatus::BufferFull(position)
190}
191
192fn process_chunk(
193 data: &[u8],
194 iterated_data: &Range<usize>,
195 mut match_state: &mut ChunkState,
196 hash_table: &mut ChainedHashTable,
197 writer: &mut DynamicWriter,
198 max_hash_checks: u16,
199 lazy_if_less_than: usize,
200 matching_type: MatchingType,
201) -> (usize, ProcessStatus) {
202 let avoid_rle = if cfg!(test) {
203 lazy_if_less_than == NO_RLE as usize
208 } else {
209 false
210 };
211 match matching_type {
212 MatchingType::Greedy => {
213 process_chunk_greedy(data, iterated_data, hash_table, writer, max_hash_checks)
214 }
215 MatchingType::Lazy => {
216 if max_hash_checks > 0 || avoid_rle {
217 process_chunk_lazy(
218 data,
219 iterated_data,
220 &mut match_state,
221 hash_table,
222 writer,
223 max_hash_checks,
224 lazy_if_less_than,
225 )
226 } else {
227 process_chunk_greedy_rle(data, iterated_data, writer)
229 }
230 }
231 }
232}
233
234fn add_to_hash_table(
237 bytes_to_add: usize,
238 insert_it: &mut iter::Zip<RangeFrom<usize>, Iter<u8>>,
239 hash_it: &mut Iter<u8>,
240 hash_table: &mut ChainedHashTable,
241) {
242 let taker = insert_it.by_ref().take(bytes_to_add);
243 let mut hash_taker = hash_it.by_ref().take(bytes_to_add);
244 let mut hash = hash_table.current_hash();
246 for (ipos, _) in taker {
249 if let Some(&i_hash_byte) = hash_taker.next() {
250 hash = update_hash(hash, i_hash_byte);
251 hash_table.add_with_hash(ipos, hash);
252 }
253 }
254 hash_table.set_hash(hash);
256}
257
258macro_rules! write_literal{
264 ($w:ident, $byte:expr, $pos:expr) => {
265 let b_status = $w.write_literal($byte);
266
267 if let BufferStatus::Full = b_status {
268 return (0, buffer_full($pos));
269 }
270 };
271}
272
273#[inline]
276fn match_too_far(match_len: usize, match_dist: usize) -> bool {
277 const TOO_FAR: usize = 8 * 1024;
278 match_len == MIN_MATCH && match_dist > TOO_FAR
279}
280
281fn create_iterators<'a>(
283 data: &'a [u8],
284 iterated_data: &Range<usize>,
285) -> (
286 usize,
287 iter::Zip<RangeFrom<usize>, Iter<'a, u8>>,
288 Iter<'a, u8>,
289) {
290 let end = cmp::min(data.len(), iterated_data.end);
291 let start = iterated_data.start;
292 let current_chunk = &data[start..end];
293
294 let insert_it = (start..).zip(current_chunk.iter());
295 let hash_it = {
296 let hash_start = if data.len() - start > 2 {
297 start + 2
298 } else {
299 data.len()
300 };
301 (&data[hash_start..]).iter()
302 };
303 (end, insert_it, hash_it)
304}
305
306fn process_chunk_lazy(
307 data: &[u8],
308 iterated_data: &Range<usize>,
309 state: &mut ChunkState,
310 mut hash_table: &mut ChainedHashTable,
311 writer: &mut DynamicWriter,
312 max_hash_checks: u16,
313 lazy_if_less_than: usize,
314) -> (usize, ProcessStatus) {
315
316 let (end, mut insert_it, mut hash_it) = create_iterators(data, iterated_data);
317
318 const NO_LENGTH: u16 = 0;
319
320 let mut prev_length = state.current_length;
322 let mut prev_distance = state.current_distance;
324
325 state.current_length = 0;
326 state.current_distance = 0;
327
328 let mut overlap = 0;
331
332
333 let mut ignore_next = prev_length as usize >= lazy_if_less_than;
337
338 state.prev_byte = state.cur_byte;
341
342 while let Some((position, &b)) = insert_it.next() {
344 state.cur_byte = b;
345 if let Some(&hash_byte) = hash_it.next() {
346 hash_table.add_hash_value(position, hash_byte);
347
348 if !ignore_next {
351 let (mut match_len, match_dist) = {
352 let max_hash_checks = if prev_length >= 32 {
355 max_hash_checks >> 2
356 } else {
357 max_hash_checks
358 };
359
360 longest_match(
363 data,
364 hash_table,
365 position,
366 prev_length as usize,
367 max_hash_checks,
368 )
369 };
370
371 if match_too_far(match_len, match_dist) {
374 match_len = NO_LENGTH as usize;
375 };
376
377 if match_len >= lazy_if_less_than {
378 ignore_next = true;
380 }
381 state.current_length = match_len as u16;
382 state.current_distance = match_dist as u16;
383 } else {
384 state.current_length = NO_LENGTH;
386 state.current_distance = 0;
387 ignore_next = false;
389 };
390
391 if prev_length >= state.current_length && prev_length >= MIN_MATCH as u16 {
392 let b_status =
396 writer.write_length_distance(prev_length as u16, prev_distance as u16);
397
398 let bytes_to_add = prev_length - 2;
402
403 add_to_hash_table(
404 bytes_to_add as usize,
405 &mut insert_it,
406 &mut hash_it,
407 &mut hash_table,
408 );
409
410 if position + prev_length as usize > end {
417 overlap = position + prev_length as usize - end - 1;
419 };
420
421 state.add = false;
422
423 state.current_length = 0;
425 state.current_distance = 0;
426
427 if let BufferStatus::Full = b_status {
428 return (overlap, buffer_full(position + prev_length as usize - 1));
430 }
431
432 ignore_next = false;
433
434 } else if state.add {
435 write_literal!(writer, state.prev_byte, position + 1);
439 } else {
440 state.add = true
441 }
442
443 prev_length = state.current_length;
444 prev_distance = state.current_distance;
445 state.prev_byte = b;
446 } else {
447 if prev_length >= MIN_MATCH as u16 {
449 let b_status =
450 writer.write_length_distance(prev_length as u16, prev_distance as u16);
451
452 state.current_length = 0;
453 state.current_distance = 0;
454 state.add = false;
455
456 if let BufferStatus::Full = b_status {
461 return (0, buffer_full(end));
464 } else {
465 return (0, ProcessStatus::Ok);
466 }
467 };
468
469 if state.add {
470 state.add = false;
472
473 write_literal!(writer, state.prev_byte, position + 1);
475
476 };
477
478 write_literal!(writer, b, position + 1);
483 }
484 }
485 (overlap, ProcessStatus::Ok)
486}
487
488fn process_chunk_greedy(
489 data: &[u8],
490 iterated_data: &Range<usize>,
491 mut hash_table: &mut ChainedHashTable,
492 writer: &mut DynamicWriter,
493 max_hash_checks: u16,
494) -> (usize, ProcessStatus) {
495
496 let (end, mut insert_it, mut hash_it) = create_iterators(data, iterated_data);
497
498 const NO_LENGTH: usize = 0;
499
500 let mut overlap = 0;
503
504 while let Some((position, &b)) = insert_it.next() {
506 if let Some(&hash_byte) = hash_it.next() {
507 hash_table.add_hash_value(position, hash_byte);
508
509 let (match_len, match_dist) =
511 { longest_match(data, hash_table, position, NO_LENGTH, max_hash_checks) };
512
513 if match_len >= MIN_MATCH as usize && !match_too_far(match_len, match_dist) {
514 let b_status = writer.write_length_distance(match_len as u16, match_dist as u16);
517
518 let bytes_to_add = match_len - 1;
522 add_to_hash_table(
523 bytes_to_add,
524 &mut insert_it,
525 &mut hash_it,
526 &mut hash_table,
527 );
528
529 if position + match_len > end {
533 overlap = position + match_len - end;
535 };
536
537 if let BufferStatus::Full = b_status {
538 return (overlap, buffer_full(position + match_len));
540 }
541
542 } else {
543 write_literal!(writer, b, position + 1);
545 }
546 } else {
547 write_literal!(writer, b, position + 1);
551 }
552 }
553 (overlap, ProcessStatus::Ok)
554}
555
556
557#[derive(Eq, PartialEq, Clone, Copy, Debug)]
558pub enum LZ77Status {
559 NeedInput,
561 EndBlock,
564 Finished,
566}
567
568#[cfg(test)]
569pub fn lz77_compress_block_finish(
570 data: &[u8],
571 state: &mut LZ77State,
572 buffer: &mut InputBuffer,
573 mut writer: &mut DynamicWriter,
574) -> (usize, LZ77Status) {
575 let (consumed, status, _) =
576 lz77_compress_block(data, state, buffer, &mut writer, Flush::Finish);
577 (consumed, status)
578}
579
580pub fn lz77_compress_block(
590 data: &[u8],
591 state: &mut LZ77State,
592 buffer: &mut InputBuffer,
593 mut writer: &mut DynamicWriter,
594 flush: Flush,
595) -> (usize, LZ77Status, usize) {
596 let window_size = DEFAULT_WINDOW_SIZE;
598
599 let finish = flush == Flush::Finish || flush == Flush::Sync;
602 let sync = flush == Flush::Sync;
603
604 let mut current_position = 0;
605
606 let mut status = LZ77Status::EndBlock;
608
609 let mut add_initial = true;
611
612 if state.was_synced {
614 if buffer.current_end() > 2 {
615 let pos_add = buffer.current_end() - 2;
616 for (n, &b) in data.iter().take(2).enumerate() {
617 state.hash_table.add_hash_value(n + pos_add, b);
618 }
619 add_initial = false;
620 }
621 state.was_synced = false;
622 }
623
624 let mut remaining_data = buffer.add_data(data);
626
627 loop {
628 let pending_previous = state.pending_byte_as_num();
631
632 assert!(writer.buffer_length() <= (window_size * 2));
633 if state.is_first_window {
637 if buffer.current_end() >= (window_size * 2) + MAX_MATCH || finish {
640
641 if buffer.get_buffer().len() >= 2 && add_initial &&
642 state.current_block_input_bytes == 0
643 {
644 let b = buffer.get_buffer();
645 state.hash_table.add_initial_hash_values(b[0], b[1]);
648 add_initial = false;
649 }
650
651 let first_chunk_end = cmp::min(window_size, buffer.current_end());
652
653 let start = state.overlap;
654
655 let (overlap, p_status) = process_chunk(
656 buffer.get_buffer(),
657 &(start..first_chunk_end),
658 &mut state.match_state,
659 &mut state.hash_table,
660 &mut writer,
661 state.max_hash_checks,
662 state.lazy_if_less_than as usize,
663 state.matching_type,
664 );
665
666 state.overlap = overlap;
667 state.bytes_to_hash = overlap;
668
669 if let ProcessStatus::BufferFull(written) = p_status {
671 state.overlap = if overlap > 0 { overlap } else { written };
672 status = LZ77Status::EndBlock;
673 current_position = written - state.pending_byte_as_num();
674 state.current_block_input_bytes +=
675 (written - start + overlap + pending_previous -
676 state.pending_byte_as_num()) as u64;
677 break;
678 }
679
680
681 state.current_block_input_bytes +=
684 (first_chunk_end - start + overlap + pending_previous -
685 state.pending_byte_as_num()) as u64;
686
687 if first_chunk_end >= buffer.current_end() && finish {
690 current_position = first_chunk_end - state.pending_byte_as_num();
691 if !sync {
692 state.set_last();
693 state.is_first_window = false;
694 } else {
695 state.overlap = first_chunk_end;
696 state.was_synced = true;
697 }
698 debug_assert!(
699 !state.pending_byte(),
700 "Bug! Ended compression wit a pending byte!"
701 );
702 status = LZ77Status::Finished;
703 break;
704 }
705 state.is_first_window = false;
707 } else {
708 status = LZ77Status::NeedInput;
709 break;
710 }
711 } else if buffer.current_end() >= (window_size * 2) + MAX_MATCH || finish {
712 if buffer.current_end() >= window_size + 2 {
713 for (n, &h) in buffer.get_buffer()[window_size + 2..]
714 .iter()
715 .enumerate()
716 .take(state.bytes_to_hash)
717 {
718 state.hash_table.add_hash_value(window_size + n, h);
719 }
720 state.bytes_to_hash = 0;
721 }
722 let start = window_size + state.overlap;
725
726 let end = cmp::min(window_size * 2, buffer.current_end());
729
730 let (overlap, p_status) = process_chunk(
731 buffer.get_buffer(),
732 &(start..end),
733 &mut state.match_state,
734 &mut state.hash_table,
735 &mut writer,
736 state.max_hash_checks,
737 state.lazy_if_less_than as usize,
738 state.matching_type,
739 );
740
741 state.bytes_to_hash = overlap;
742
743
744 if let ProcessStatus::BufferFull(written) = p_status {
745 state.current_block_input_bytes +=
746 (written - start + pending_previous - state.pending_byte_as_num()) as u64;
747
748 state.overlap = if overlap > 0 {
753 if state.max_hash_checks > 0 {
756 state.hash_table.slide(window_size);
757 }
758 remaining_data = buffer.slide(remaining_data.unwrap_or(&[]));
759 overlap
760 } else {
761 written - window_size
762 };
763
764 current_position = written - state.pending_byte_as_num();
765
766
767 break;
770 }
771
772 state.current_block_input_bytes +=
773 (end - start + overlap + pending_previous - state.pending_byte_as_num()) as u64;
774
775 state.overlap = overlap;
778
779 if remaining_data.is_none() && finish && end == buffer.current_end() {
780 current_position = buffer.current_end();
781 debug_assert!(
782 !state.pending_byte(),
783 "Bug! Ended compression wit a pending byte!"
784 );
785
786 if !sync {
788 state.set_last();
790 } else {
791 state.overlap = buffer.current_end() - window_size;
797 state.was_synced = true;
798 }
799 status = LZ77Status::Finished;
800 break;
801 } else {
802 if state.max_hash_checks > 0 {
807 state.hash_table.slide(window_size);
808 }
809
810 remaining_data = buffer.slide(remaining_data.unwrap_or(&[]));
812 }
813 } else {
814 status = LZ77Status::NeedInput;
817 break;
818 }
819
820 }
821
822 (
823 data.len() - remaining_data.unwrap_or(&[]).len(),
824 status,
825 current_position,
826 )
827}
828
829#[cfg(test)]
830pub fn decompress_lz77(input: &[LZValue]) -> Vec<u8> {
831 decompress_lz77_with_backbuffer(input, &[])
832}
833
834#[cfg(test)]
835pub fn decompress_lz77_with_backbuffer(input: &[LZValue], back_buffer: &[u8]) -> Vec<u8> {
836 let mut output = Vec::new();
837 for p in input {
838 match p.value() {
839 LZType::Literal(l) => output.push(l),
840 LZType::StoredLengthDistance(l, d) => {
841 let d = d as usize;
843 let prev_output_len = output.len();
844 let consumed = if d > output.len() {
846 let into_back_buffer = d - output.len();
847
848 assert!(
849 into_back_buffer <= back_buffer.len(),
850 "ERROR: Attempted to refer to a match in non-existing data!\
851 into_back_buffer: {}, back_buffer len {}, d {}, l {:?}",
852 into_back_buffer,
853 back_buffer.len(),
854 d,
855 l
856 );
857 let start = back_buffer.len() - into_back_buffer;
858 let end = cmp::min(back_buffer.len(), start + l.actual_length() as usize);
859 output.extend_from_slice(&back_buffer[start..end]);
860
861 end - start
862 } else {
863 0
864 };
865
866 let start = prev_output_len.saturating_sub(d);
868 let mut n = 0;
869 while n < (l.actual_length() as usize).saturating_sub(consumed) {
870 let b = output[start + n];
871 output.push(b);
872 n += 1;
873 }
874 }
875 }
876 }
877 output
878}
879
880#[cfg(test)]
881pub struct TestStruct {
882 state: LZ77State,
883 buffer: InputBuffer,
884 writer: DynamicWriter,
885}
886
887#[cfg(test)]
888impl TestStruct {
889 fn new() -> TestStruct {
890 TestStruct::with_config(
891 HIGH_MAX_HASH_CHECKS,
892 HIGH_LAZY_IF_LESS_THAN,
893 MatchingType::Lazy,
894 )
895 }
896
897 fn with_config(
898 max_hash_checks: u16,
899 lazy_if_less_than: u16,
900 matching_type: MatchingType,
901 ) -> TestStruct {
902 TestStruct {
903 state: LZ77State::new(max_hash_checks, lazy_if_less_than, matching_type),
904 buffer: InputBuffer::empty(),
905 writer: DynamicWriter::new(),
906 }
907 }
908
909 fn compress_block(&mut self, data: &[u8], flush: bool) -> (usize, LZ77Status, usize) {
910 lz77_compress_block(
911 data,
912 &mut self.state,
913 &mut self.buffer,
914 &mut self.writer,
915 if flush { Flush::Finish } else { Flush::None },
916 )
917 }
918}
919
920#[cfg(test)]
921pub fn lz77_compress(data: &[u8]) -> Option<Vec<LZValue>> {
922 lz77_compress_conf(
923 data,
924 HIGH_MAX_HASH_CHECKS,
925 HIGH_LAZY_IF_LESS_THAN,
926 MatchingType::Lazy,
927 )
928}
929
930#[cfg(test)]
935pub fn lz77_compress_conf(
936 data: &[u8],
937 max_hash_checks: u16,
938 lazy_if_less_than: u16,
939 matching_type: MatchingType,
940) -> Option<Vec<LZValue>> {
941 let mut test_boxed = Box::new(TestStruct::with_config(
942 max_hash_checks,
943 lazy_if_less_than,
944 matching_type,
945 ));
946 let mut out = Vec::<LZValue>::with_capacity(data.len() / 3);
947 {
948 let test = test_boxed.as_mut();
949 let mut slice = data;
950
951 while !test.state.is_last_block {
952 let bytes_written = lz77_compress_block_finish(
953 slice,
954 &mut test.state,
955 &mut test.buffer,
956 &mut test.writer,
957 ).0;
958 slice = &slice[bytes_written..];
959 out.extend(test.writer.get_buffer());
960 test.writer.clear();
961 }
962
963 }
964
965 Some(out)
966}
967
968#[cfg(test)]
969mod test {
970 use super::*;
971
972 use lzvalue::{LZValue, LZType, lit, ld};
973 use chained_hash_table::WINDOW_SIZE;
974 use compression_options::DEFAULT_LAZY_IF_LESS_THAN;
975 use test_utils::get_test_data;
976 use output_writer::MAX_BUFFER_LENGTH;
977
978 fn print_output(input: &[LZValue]) {
980 let mut output = vec![];
981 for l in input {
982 match l.value() {
983 LZType::Literal(l) => output.push(l),
984 LZType::StoredLengthDistance(l, d) => {
985 output.extend(format!("<L {}>", l.actual_length()).into_bytes());
986 output.extend(format!("<D {}>", d).into_bytes())
987 }
988 }
989 }
990
991 println!("\"{}\"", String::from_utf8(output).unwrap());
992 }
993
994 #[test]
996 fn compress_short() {
997 let test_bytes = String::from("Deflate late").into_bytes();
998 let res = lz77_compress(&test_bytes).unwrap();
999
1000 let decompressed = decompress_lz77(&res);
1001
1002 assert_eq!(test_bytes, decompressed);
1003 assert_eq!(*res.last().unwrap(), LZValue::length_distance(4, 5));
1004 }
1005
1006 #[test]
1008 fn compress_long() {
1009 let input = get_test_data();
1010 let compressed = lz77_compress(&input).unwrap();
1011 assert!(compressed.len() < input.len());
1012
1013 let decompressed = decompress_lz77(&compressed);
1014 println!("compress_long length: {}", input.len());
1015
1016 for (n, (&a, &b)) in input.iter().zip(decompressed.iter()).enumerate() {
1018 if a != b {
1019 println!("First difference at {}, input: {}, output: {}", n, a, b);
1020 break;
1021 }
1022 }
1023 assert_eq!(input.len(), decompressed.len());
1024 assert!(&decompressed == &input);
1025 }
1026
1027 #[test]
1029 fn lazy() {
1030 let data = b"nba badger nbadger";
1033 let compressed = lz77_compress(data).unwrap();
1034 let test = compressed[compressed.len() - 1];
1035 if let LZType::StoredLengthDistance(l, _) = test.value() {
1036 assert_eq!(l.actual_length(), 6);
1037 } else {
1038 print_output(&compressed);
1039 panic!();
1040 }
1041 }
1042
1043 fn roundtrip(data: &[u8]) {
1044 let compressed = super::lz77_compress(&data).unwrap();
1045 let decompressed = decompress_lz77(&compressed);
1046 assert!(decompressed == data);
1047 }
1048
1049 #[test]
1051 #[allow(unused)]
1052 fn exact_window_size() {
1053 use std::io::Write;
1054 let mut data = vec![0; WINDOW_SIZE];
1055 roundtrip(&data);
1056 {
1057 data.write(&[22; WINDOW_SIZE]);
1058 }
1059 roundtrip(&data);
1060 {
1061 data.write(&[55; WINDOW_SIZE]);
1062 }
1063 roundtrip(&data);
1064 }
1065
1066 #[test]
1068 fn border() {
1069 use chained_hash_table::WINDOW_SIZE;
1070 let mut data = vec![35; WINDOW_SIZE];
1071 data.extend(b"Test");
1072 let compressed = super::lz77_compress(&data).unwrap();
1073 assert!(compressed.len() < data.len());
1074 let decompressed = decompress_lz77(&compressed);
1075
1076 assert_eq!(decompressed.len(), data.len());
1077 assert!(decompressed == data);
1078 }
1079
1080 #[test]
1081 fn border_multiple_blocks() {
1082 use chained_hash_table::WINDOW_SIZE;
1083 let mut data = vec![0; (WINDOW_SIZE * 2) + 50];
1084 data.push(1);
1085 let compressed = super::lz77_compress(&data).unwrap();
1086 assert!(compressed.len() < data.len());
1087 let decompressed = decompress_lz77(&compressed);
1088 assert_eq!(decompressed.len(), data.len());
1089 assert!(decompressed == data);
1090 }
1091
1092 #[test]
1093 fn compress_block_status() {
1094 use input_buffer::InputBuffer;
1095
1096 let data = b"Test data data";
1097 let mut writer = DynamicWriter::new();
1098
1099 let mut buffer = InputBuffer::empty();
1100 let mut state = LZ77State::new(4096, DEFAULT_LAZY_IF_LESS_THAN, MatchingType::Lazy);
1101 let status = lz77_compress_block_finish(data, &mut state, &mut buffer, &mut writer);
1102 assert_eq!(status.1, LZ77Status::Finished);
1103 assert!(&buffer.get_buffer()[..data.len()] == data);
1104 assert_eq!(buffer.current_end(), data.len());
1105 }
1106
1107 #[test]
1108 fn compress_block_multiple_windows() {
1109 use input_buffer::InputBuffer;
1110 use output_writer::DynamicWriter;
1111
1112 let data = get_test_data();
1113 assert!(data.len() > (WINDOW_SIZE * 2) + super::MAX_MATCH);
1114 let mut writer = DynamicWriter::new();
1115
1116 let mut buffer = InputBuffer::empty();
1117 let mut state = LZ77State::new(0, DEFAULT_LAZY_IF_LESS_THAN, MatchingType::Lazy);
1118 let (bytes_consumed, status) =
1119 lz77_compress_block_finish(&data, &mut state, &mut buffer, &mut writer);
1120 assert_eq!(
1121 buffer.get_buffer().len(),
1122 (WINDOW_SIZE * 2) + super::MAX_MATCH
1123 );
1124 assert_eq!(status, LZ77Status::EndBlock);
1125 let buf_len = buffer.get_buffer().len();
1126 assert!(buffer.get_buffer()[..] == data[..buf_len]);
1127
1128 writer.clear();
1129 let (_, status) = lz77_compress_block_finish(
1130 &data[bytes_consumed..],
1131 &mut state,
1132 &mut buffer,
1133 &mut writer,
1134 );
1135 assert_eq!(status, LZ77Status::EndBlock);
1136
1137 }
1138
1139 #[test]
1140 fn multiple_inputs() {
1141 let data = b"Badger badger bababa test data 25 asfgestghresjkgh";
1142 let comp1 = lz77_compress(data).unwrap();
1143 let comp2 = {
1144 const SPLIT: usize = 25;
1145 let first_part = &data[..SPLIT];
1146 let second_part = &data[SPLIT..];
1147 let mut state = TestStruct::new();
1148 let (bytes_written, status, _) = state.compress_block(first_part, false);
1149 assert_eq!(bytes_written, first_part.len());
1150 assert_eq!(status, LZ77Status::NeedInput);
1151 let (bytes_written, status, _) = state.compress_block(second_part, true);
1152 assert_eq!(bytes_written, second_part.len());
1153 assert_eq!(status, LZ77Status::Finished);
1154 Vec::from(state.writer.get_buffer())
1155 };
1156 assert!(comp1 == comp2);
1157 }
1158
1159
1160 #[test]
1161 fn buffer_fill() {
1163 let data = get_test_data();
1164 buffer_test_literals(&data);
1168 buffer_test_last_bytes(MatchingType::Greedy, &data);
1171 buffer_test_last_bytes(MatchingType::Lazy, &data);
1174
1175 buffer_test_match(MatchingType::Greedy);
1177 buffer_test_match(MatchingType::Lazy);
1179
1180 buffer_test_add_end(&data);
1182 }
1183
1184 fn buffer_test_literals(data: &[u8]) {
1186 let mut state = TestStruct::with_config(0, NO_RLE, MatchingType::Lazy);
1187 let (bytes_consumed, status, position) = state.compress_block(&data, false);
1188
1189 assert_eq!(status, LZ77Status::EndBlock);
1191 assert!(bytes_consumed <= (WINDOW_SIZE * 2) + MAX_MATCH);
1192
1193 assert_eq!(state.writer.get_buffer().len(), MAX_BUFFER_LENGTH);
1195 assert_eq!(position, state.writer.get_buffer().len());
1196 assert_eq!(
1199 state.state.current_block_input_bytes() as usize,
1200 MAX_BUFFER_LENGTH
1201 );
1202 state.state.reset_input_bytes();
1203
1204 let mut out = decompress_lz77(state.writer.get_buffer());
1205
1206 state.writer.clear();
1207 assert_eq!(state.writer.get_buffer().len(), 0);
1209
1210 assert!(data[..out.len()] == out[..]);
1211
1212 let _ = state.compress_block(&data[bytes_consumed..], false);
1213 assert!(state.writer.get_buffer().len() > 0);
1215 assert_eq!(
1216 state.state.current_block_input_bytes() as usize,
1217 MAX_BUFFER_LENGTH
1218 );
1219
1220 out.extend_from_slice(&decompress_lz77(state.writer.get_buffer()));
1221 assert!(data[..out.len()] == out[..]);
1222 }
1223
1224 fn buffer_test_last_bytes(matching_type: MatchingType, data: &[u8]) {
1226 const BYTES_USED: usize = MAX_BUFFER_LENGTH;
1227 assert!(
1228 &data[..BYTES_USED] ==
1229 &decompress_lz77(&lz77_compress_conf(
1230 &data[..BYTES_USED],
1231 0,
1232 NO_RLE,
1233 matching_type,
1234 ).unwrap())
1235 [..]
1236 );
1237 assert!(
1238 &data[..BYTES_USED + 1] ==
1239 &decompress_lz77(&lz77_compress_conf(
1240 &data[..BYTES_USED + 1],
1241 0,
1242 NO_RLE,
1243 matching_type,
1244 ).unwrap())
1245 [..]
1246 );
1247 }
1248
1249 fn buffer_test_match(matching_type: MatchingType) {
1251 let mut state = TestStruct::with_config(1, 0, matching_type);
1254 for _ in 0..MAX_BUFFER_LENGTH - 4 {
1255 assert!(state.writer.write_literal(0) == BufferStatus::NotFull);
1256 }
1257 state.compress_block(&[1, 2, 3, 1, 2, 3, 4], true);
1258 assert!(*state.writer.get_buffer().last().unwrap() == LZValue::length_distance(3, 3));
1259
1260 }
1261
1262 fn buffer_test_add_end(_data: &[u8]) {
1264 }
1280
1281 #[test]
1283 fn test_decompress_with_backbuffer() {
1284 let bb = vec![5; WINDOW_SIZE];
1285 let lz_compressed = [lit(2), lit(4), ld(4, 20), lit(1), lit(1), ld(5, 10)];
1286 let dec = decompress_lz77_with_backbuffer(&lz_compressed, &bb);
1287
1288 assert!(dec == [2, 4, 5, 5, 5, 5, 1, 1, 5, 5, 2, 4, 5]);
1290 }
1291
1292}
1293
1294#[cfg(all(test, feature = "benchmarks"))]
1295mod bench {
1296 use test_std::Bencher;
1297 use test_utils::get_test_data;
1298 use super::lz77_compress;
1299 #[bench]
1300 fn test_file_zlib_lz77_only(b: &mut Bencher) {
1301 let test_data = get_test_data();
1302 b.iter(|| lz77_compress(&test_data));
1303 }
1304}