deflate/length_encode.rs
1use std::iter::Iterator;
2use std::clone::Clone;
3
4/// An enum representing the different types in the run-length encoded data used to encode
5/// huffman table lengths
6#[derive(Debug, PartialEq, Eq)]
7pub enum EncodedLength {
8 // An actual length value
9 Length(u8),
10 // Copy the previous value n times
11 CopyPrevious(u8),
12 // Repeat zero n times (with n represented by 3 bits)
13 RepeatZero3Bits(u8),
14 // Repeat zero n times (with n represented by 7 bits)
15 RepeatZero7Bits(u8),
16}
17
18impl EncodedLength {
19 fn from_prev_and_repeat(prev: u8, repeat: u8) -> EncodedLength {
20 match prev {
21 0 => {
22 if repeat <= 10 {
23 EncodedLength::RepeatZero3Bits(repeat)
24 } else {
25 EncodedLength::RepeatZero7Bits(repeat)
26 }
27 }
28 1...15 => EncodedLength::CopyPrevious(repeat),
29 _ => panic!(),
30 }
31 }
32}
33
34pub const COPY_PREVIOUS: usize = 16;
35pub const REPEAT_ZERO_3_BITS: usize = 17;
36pub const REPEAT_ZERO_7_BITS: usize = 18;
37
38const MIN_REPEAT: u8 = 3;
39
40/// Push an `EncodedLength` to the vector and update the frequency table.
41fn update_out_and_freq(
42 encoded: EncodedLength,
43 output: &mut Vec<EncodedLength>,
44 frequencies: &mut [u16; 19],
45) {
46 let index = match encoded {
47 EncodedLength::Length(l) => usize::from(l),
48 EncodedLength::CopyPrevious(_) => COPY_PREVIOUS,
49 EncodedLength::RepeatZero3Bits(_) => REPEAT_ZERO_3_BITS,
50 EncodedLength::RepeatZero7Bits(_) => REPEAT_ZERO_7_BITS,
51 };
52
53 frequencies[index] += 1;
54
55 output.push(encoded);
56}
57
58/// Convenience function to check if the repeat counter should be incremented further
59fn not_max_repetitions(length_value: u8, repeats: u8) -> bool {
60 (length_value == 0 && repeats < 138) || repeats < 6
61}
62
63///Convenience version for unit tests.
64#[cfg(test)]
65pub fn encode_lengths<'a, I>(lengths: I) -> (Vec<EncodedLength>, [u16; 19])
66where
67 I: Iterator<Item = &'a u8> + Clone,
68{
69 let mut freqs = [0u16; 19];
70 let mut encoded: Vec<EncodedLength> = Vec::new();
71 encode_lengths_m(lengths, &mut encoded, &mut freqs);
72 (encoded, freqs)
73}
74
75/// Run-length encodes the lengths of the values in `lengths` according to the deflate
76/// specification. This is used for writing the code lengths for the huffman tables for
77/// the deflate stream.
78///
79/// Returns a tuple containing a vec of the encoded lengths
80/// Populates the supplied array with the frequency of the different encoded length values
81/// The frequency array is taken as a parameter rather than returned to avoid
82/// excessive memcpying.
83pub fn encode_lengths_m<'a, I>(
84 lengths: I,
85 mut out: &mut Vec<EncodedLength>,
86 mut frequencies: &mut [u16; 19],
87) where
88 I: Iterator<Item = &'a u8> + Clone,
89{
90 out.clear();
91 // Number of repetitions of the current value
92 let mut repeat = 0;
93 let mut iter = lengths.clone().enumerate().peekable();
94 // Previous value
95 // We set it to the compliment of the first falue to simplify the code.
96 let mut prev = !iter.peek().expect("No length values!").1;
97
98 while let Some((n, &l)) = iter.next() {
99 if l == prev && not_max_repetitions(l, repeat) {
100 repeat += 1;
101 }
102 if l != prev || iter.peek().is_none() || !not_max_repetitions(l, repeat) {
103 if repeat >= MIN_REPEAT {
104 // The previous value has been repeated enough times to write out a repeat code.
105
106 let val = EncodedLength::from_prev_and_repeat(prev, repeat);
107 update_out_and_freq(val, &mut out, &mut frequencies);
108 repeat = 0;
109 // If we have a new length value, output l unless the last value is 0 or l is the
110 // last byte.
111 if l != prev {
112 if l != 0 || iter.peek().is_none() {
113 update_out_and_freq(EncodedLength::Length(l), &mut out, &mut frequencies);
114 repeat = 0;
115 } else {
116 // If we have a zero, we start repeat at one instead of outputting, as
117 // there are separate codes for repeats of zero so we don't need a literal
118 // to define what byte to repeat.
119 repeat = 1;
120 }
121 }
122 } else {
123 // There haven't been enough repetitions of the previous value,
124 // so just we output the lengths directly.
125
126 // If we are at the end, and we have a value that is repeated, we need to
127 // skip a byte and output the last one.
128 let extra_skip = if iter.peek().is_none() && l == prev {
129 1
130 } else {
131 0
132 };
133
134 // Get to the position of the next byte to output by starting at zero and skipping.
135 let b_iter = lengths.clone().skip(n + extra_skip - repeat as usize);
136
137 // As repeats of zeroes have separate codes, we don't need to output a literal here
138 // if we have a zero (unless we are at the end).
139 let extra = if l != 0 || iter.peek().is_none() {
140 1
141 } else {
142 0
143 };
144
145 for &i in b_iter.take(repeat as usize + extra) {
146 update_out_and_freq(EncodedLength::Length(i), &mut out, &mut frequencies);
147 }
148
149 // If the current byte is zero we start repeat at 1 as we didn't output the literal
150 // directly.
151 repeat = 1 - extra as u8;
152 }
153 }
154 prev = l;
155 }
156}
157
158#[cfg(test)]
159pub fn huffman_lengths_from_frequency(frequencies: &[u16], max_len: usize) -> Vec<u8> {
160 in_place::gen_lengths(frequencies, max_len)
161}
162
163pub type LeafVec = Vec<in_place::Node>;
164
165/// Generate a set of canonical huffman lengths from the given frequencies, with a maximum length
166/// of `max_len`. The lengths are put in the lens slice parameter. Unused lengths are set to 0.
167///
168/// The leaf buffer is passed in to avoid allocating it every time this function is called.
169/// The existing data contained in it is not preserved.
170pub fn huffman_lengths_from_frequency_m(
171 frequencies: &[u16],
172 max_len: usize,
173 leaf_buffer: &mut LeafVec,
174 lens: &mut [u8],
175) {
176 in_place::in_place_lengths(frequencies, max_len, leaf_buffer, lens);
177}
178
179mod in_place {
180 type WeightType = u32;
181
182 pub fn validate_lengths(lengths: &[u8]) -> bool {
183 // Avoid issue with floating point on mips: https://github.com/oyvindln/deflate-rs/issues/23
184 if cfg!(any(target_arch = "mips", target_arch = "mipsel",
185 target_arch = "mips64", target_arch = "mipsel64")) {
186 true
187 } else {
188 let v = lengths.iter().fold(0f64, |acc, &n| {
189 acc + if n != 0 { 2f64.powi(-(n as i32)) } else { 0f64 }
190 });
191 !(v > 1.0)
192 }
193 }
194
195 #[derive(Eq, PartialEq, Debug)]
196 pub struct Node {
197 value: WeightType,
198 symbol: u16,
199 }
200
201 fn step_1(leaves: &mut [Node]) {
202 // If there are less than 2 non-zero frequencies, this function should not have been
203 // called and we should not have gotten to this point.
204 debug_assert!(leaves.len() >= 2);
205 let mut root = 0;
206 let mut leaf = 2;
207
208 leaves[0].value += leaves[1].value;
209
210 for next in 1..leaves.len() - 1 {
211 if (leaf >= leaves.len()) || (leaves[root].value < leaves[leaf].value) {
212 leaves[next].value = leaves[root].value;
213 leaves[root].value = next as WeightType;
214 root += 1;
215 } else {
216 leaves[next].value = leaves[leaf].value;
217 leaf += 1;
218 }
219
220 if (leaf >= leaves.len()) ||
221 (root < next && (leaves[root].value < leaves[leaf].value))
222 {
223 leaves[next].value += leaves[root].value;
224 leaves[root].value = next as WeightType;
225 root += 1;
226 } else {
227 leaves[next].value += leaves[leaf].value;
228 leaf += 1;
229 }
230 }
231 }
232
233 fn step_2(leaves: &mut [Node]) {
234 debug_assert!(leaves.len() >= 2);
235 let n = leaves.len();
236
237 leaves[n - 2].value = 0;
238 for t in (0..(n + 1 - 3)).rev() {
239 leaves[t].value = leaves[leaves[t].value as usize].value + 1;
240 }
241
242 let mut available = 1 as usize;
243 let mut used = 0;
244 let mut depth = 0;
245 let mut root = n as isize - 2;
246 let mut next = n as isize - 1;
247
248 while available > 0 {
249 while root >= 0 && leaves[root as usize].value == depth {
250 used += 1;
251 root -= 1;
252 }
253 while available > used {
254 leaves[next as usize].value = depth;
255 next -= 1;
256 available -= 1;
257 }
258 available = 2 * used;
259 depth += 1;
260 used = 0;
261 }
262 }
263
264 const MAX_NUMBER_OF_CODES: usize = 32;
265 const NUM_CODES_LENGTH: usize = MAX_NUMBER_OF_CODES + 1;
266
267 /// Checks if any of the lengths exceed `max_len`, and if that is the case, alters the length
268 /// table so that no codes exceed `max_len`.
269 /// This is ported from miniz (which is released as public domain by Rich Geldreich
270 /// https://github.com/richgel999/miniz/blob/master/miniz.c)
271 ///
272 /// This will not generate optimal (minimim-redundancy) codes, however in most cases
273 /// this won't make a large difference.
274 pub fn enforce_max_code_lengths(
275 num_codes: &mut [u16; NUM_CODES_LENGTH],
276 num_used: usize,
277 max_len: usize,
278 ) {
279 debug_assert!(max_len <= 15);
280
281 if num_used <= 1 {
282 return;
283 } else {
284 let mut num_above_max = 0u16;
285 for &l in num_codes[(max_len as usize + 1)..].iter() {
286 num_above_max += l;
287 }
288
289 num_codes[max_len] += num_above_max;
290
291 let mut total = 0u32;
292 for i in (1..max_len + 1).rev() {
293 // This should be safe as max_len won't be higher than 15, and num_codes[i] can't
294 // be higher than 288,
295 // and 288 << 15 will not be anywhere close to overflowing 32 bits
296 total += (num_codes[i] as u32) << (max_len - i);
297 }
298
299 // miniz uses unsigned long here. 32-bits should be sufficient though,
300 // as max_len won't be longer than 15 anyhow.
301 while total != 1u32 << max_len {
302 num_codes[max_len] -= 1;
303 for i in (1..max_len).rev() {
304 if num_codes[i] != 0 {
305 num_codes[i] -= 1;
306 num_codes[i + 1] += 2;
307 break;
308 }
309 }
310 total -= 1;
311 }
312 }
313 }
314
315
316 #[cfg(test)]
317 /// Convenience wrapper for tests.
318 pub fn gen_lengths(frequencies: &[u16], max_len: usize) -> Vec<u8> {
319 let mut lens = vec![0u8; frequencies.len()];
320 let mut leaves = Vec::new();
321 in_place_lengths(frequencies, max_len, &mut leaves, lens.as_mut_slice());
322 lens
323 }
324
325 /// Generate huffman code lengths, using the algorithm described by
326 /// Moffat and Katajainen in In-Place Calculation of Minimum-Redundancy Codes
327 /// http://people.eng.unimelb.edu.au/ammoffat/abstracts/mk95wads.html
328 /// and it's implementation.
329 ///
330 /// This is significantly faster, and seems to generally create lengths that result in length
331 /// tables that are better compressible than the algorithm used previously. The downside of this
332 /// algorithm is that it's not length-limited, so if too long code lengths are generated,
333 /// it might result in a sub-optimal tables as the length-restricting function isn't optimal.
334 pub fn in_place_lengths(
335 frequencies: &[u16],
336 max_len: usize,
337 mut leaves: &mut Vec<Node>,
338 lengths: &mut [u8],
339 ) {
340
341 debug_assert!(lengths.len() >= frequencies.len());
342
343 for l in lengths.iter_mut() {
344 *l = 0;
345 }
346
347 // Clear any previous leaves in the leaf buffer.
348 leaves.clear();
349
350 // Discard zero length nodes as they won't be given a code and thus don't need to
351 // participate in code length generation and create a new vec of the remaining
352 // symbols and weights.
353 leaves.extend(frequencies.iter().enumerate().filter_map(
354 |(n, f)| if *f > 0 {
355 Some(Node {
356 value: *f as WeightType,
357 symbol: n as u16,
358 })
359 } else {
360 None
361 },
362 ));
363
364 // Special cases with zero or 1 value having a non-zero frequency
365 if leaves.len() == 1 {
366 lengths[leaves[0].symbol as usize] = 1;
367 return;
368 } else if leaves.is_empty() {
369 return;
370 }
371
372 // Sort the leaves by value. As the sort in the standard library is stable, we don't
373 // have to worry about the symbol code here.
374 leaves.sort_by(|a, b| a.value.cmp(&b.value));
375
376 step_1(&mut leaves);
377 step_2(&mut leaves);
378
379 // Count how many codes of each length used, for usage in the next section.
380 let mut num_codes = [0u16; NUM_CODES_LENGTH];
381 for l in leaves.iter() {
382 num_codes[l.value as usize] += 1;
383 }
384
385 // As the algorithm used here doesn't limit the maximum length that can be generated
386 // we need to make sure none of the lengths exceed `max_len`
387 enforce_max_code_lengths(&mut num_codes, leaves.len(), max_len);
388
389 // Output the actual lengths
390 let mut leaf_it = leaves.iter().rev();
391 // Start at 1 since the length table is already filled with zeroes.
392 for (&n_codes, i) in num_codes[1..max_len + 1].iter().zip(1..(max_len as u8) + 1) {
393 for _ in 0..n_codes {
394 lengths[leaf_it.next().unwrap().symbol as usize] = i;
395 }
396 }
397
398 debug_assert_eq!(leaf_it.next(), None);
399 debug_assert!(
400 validate_lengths(lengths),
401 "The generated length codes were not valid!"
402 );
403 }
404
405
406}
407
408#[cfg(test)]
409mod test {
410 use super::*;
411 use std::u16;
412 use huffman_table::NUM_LITERALS_AND_LENGTHS;
413
414 fn lit(value: u8) -> EncodedLength {
415 EncodedLength::Length(value)
416 }
417
418 fn zero(repeats: u8) -> EncodedLength {
419 match repeats {
420 0...1 => EncodedLength::Length(0),
421 2...10 => EncodedLength::RepeatZero3Bits(repeats),
422 _ => EncodedLength::RepeatZero7Bits(repeats),
423 }
424 }
425
426 fn copy(copies: u8) -> EncodedLength {
427 EncodedLength::CopyPrevious(copies)
428 }
429
430 #[test]
431 fn test_encode_lengths() {
432 use huffman_table::FIXED_CODE_LENGTHS;
433 let enc = encode_lengths(FIXED_CODE_LENGTHS.iter());
434 // There are no lengths lower than 6 in the fixed table
435 assert_eq!(enc.1[0..7], [0, 0, 0, 0, 0, 0, 0]);
436 // Neither are there any lengths above 9
437 assert_eq!(enc.1[10..16], [0, 0, 0, 0, 0, 0]);
438 // Also there are no zero-length codes so there shouldn't be any repetitions of zero
439 assert_eq!(enc.1[17..19], [0, 0]);
440
441 let test_lengths = [0, 0, 5, 0, 15, 1, 0, 0, 0, 2, 4, 4, 4, 4, 3, 5, 5, 5, 5];
442 let enc = encode_lengths(test_lengths.iter()).0;
443 assert_eq!(
444 enc,
445 vec![
446 lit(0),
447 lit(0),
448 lit(5),
449 lit(0),
450 lit(15),
451 lit(1),
452 zero(3),
453 lit(2),
454 lit(4),
455 copy(3),
456 lit(3),
457 lit(5),
458 copy(3),
459 ]
460 );
461 let test_lengths = [0, 0, 0, 5, 2, 3, 0, 0, 0];
462 let enc = encode_lengths(test_lengths.iter()).0;
463 assert_eq!(enc, vec![zero(3), lit(5), lit(2), lit(3), zero(3)]);
464
465 let test_lengths = [0, 0, 0, 3, 3, 3, 5, 4, 4, 4, 4, 0, 0];
466 let enc = encode_lengths(test_lengths.iter()).0;
467 assert_eq!(
468 enc,
469 vec![
470 zero(3),
471 lit(3),
472 lit(3),
473 lit(3),
474 lit(5),
475 lit(4),
476 copy(3),
477 lit(0),
478 lit(0),
479 ]
480 );
481
482
483 let lens = [
484 0,
485 0,
486 4,
487 0,
488 0,
489 4,
490 0,
491 0,
492 0,
493 0,
494 0,
495 4,
496 4,
497 0,
498 0,
499 0,
500 0,
501 0,
502 0,
503 0,
504 0,
505 0,
506 3,
507 0,
508 0,
509 0,
510 0,
511 0,
512 0,
513 0,
514 0,
515 0,
516 0,
517 0,
518 0,
519 0,
520 0,
521 0,
522 0,
523 0,
524 0,
525 0,
526 0,
527 0,
528 0,
529 0,
530 0,
531 0,
532 0,
533 0,
534 0,
535 0,
536 0,
537 0,
538 0,
539 4,
540 0,
541 0,
542 0,
543 0,
544 0,
545 0,
546 0,
547 0,
548 0,
549 0,
550 0,
551 0,
552 0,
553 0,
554 0,
555 0,
556 0,
557 0,
558 0,
559 0,
560 0,
561 0,
562 0,
563 0,
564 0,
565 0,
566 0,
567 0,
568 0,
569 0,
570 0,
571 0,
572 0,
573 0,
574 0,
575 0,
576 0,
577 0,
578 0,
579 0,
580 0,
581 0,
582 0,
583 0,
584 0,
585 0,
586 0,
587 0,
588 0,
589 0,
590 0,
591 0,
592 0,
593 0,
594 0,
595 0,
596 0,
597 0,
598 0,
599 0,
600 0,
601 0,
602 0,
603 0,
604 0,
605 0,
606 0,
607 0,
608 0,
609 0,
610 0,
611 0,
612 0,
613 0,
614 0,
615 0,
616 0,
617 0,
618 0,
619 0,
620 0,
621 0,
622 0,
623 0,
624 0,
625 0,
626 0,
627 0,
628 0,
629 0,
630 0,
631 0,
632 0,
633 0,
634 0,
635 0,
636 0,
637 0,
638 0,
639 0,
640 0,
641 0,
642 0,
643 0,
644 0,
645 0,
646 0,
647 0,
648 0,
649 0,
650 0,
651 0,
652 0,
653 0,
654 0,
655 0,
656 0,
657 0,
658 0,
659 0,
660 0,
661 0,
662 0,
663 0,
664 0,
665 0,
666 0,
667 0,
668 0,
669 0,
670 0,
671 0,
672 0,
673 0,
674 0,
675 0,
676 0,
677 0,
678 0,
679 0,
680 0,
681 0,
682 0,
683 0,
684 0,
685 0,
686 0,
687 0,
688 0,
689 0,
690 0,
691 0,
692 0,
693 0,
694 0,
695 0,
696 0,
697 0,
698 0,
699 0,
700 0,
701 0,
702 0,
703 0,
704 0,
705 0,
706 0,
707 0,
708 0,
709 0,
710 0,
711 0,
712 0,
713 0,
714 0,
715 0,
716 0,
717 0,
718 0,
719 0,
720 0,
721 0,
722 0,
723 0,
724 0,
725 0,
726 0,
727 0,
728 0,
729 0,
730 0,
731 0,
732 0,
733 0,
734 0,
735 0,
736 0,
737 0,
738 0,
739 0,
740 4,
741 0,
742 0,
743 0,
744 0,
745 0,
746 0,
747 0,
748 0,
749 0,
750 0,
751 0,
752 0,
753 0,
754 0,
755 0,
756 0,
757 0,
758 0,
759 0,
760 0,
761 0,
762 0,
763 0,
764 0,
765 0,
766 0,
767 0,
768 0,
769 1,
770 1,
771 ];
772
773 let _ = encode_lengths(lens.iter()).0;
774
775 let lens = [
776 0,
777 0,
778 0,
779 0,
780 0,
781 0,
782 0,
783 0,
784 0,
785 0,
786 9,
787 0,
788 0,
789 9,
790 0,
791 0,
792 0,
793 0,
794 0,
795 0,
796 0,
797 0,
798 0,
799 0,
800 0,
801 0,
802 0,
803 0,
804 0,
805 0,
806 0,
807 0,
808 6,
809 0,
810 0,
811 0,
812 8,
813 0,
814 0,
815 0,
816 0,
817 8,
818 0,
819 0,
820 7,
821 8,
822 7,
823 8,
824 6,
825 6,
826 8,
827 0,
828 7,
829 6,
830 7,
831 8,
832 7,
833 7,
834 8,
835 0,
836 0,
837 0,
838 0,
839 0,
840 8,
841 8,
842 0,
843 8,
844 7,
845 0,
846 10,
847 8,
848 0,
849 8,
850 0,
851 10,
852 10,
853 8,
854 8,
855 10,
856 8,
857 0,
858 8,
859 7,
860 0,
861 10,
862 0,
863 7,
864 0,
865 0,
866 0,
867 0,
868 0,
869 0,
870 0,
871 0,
872 0,
873 6,
874 7,
875 7,
876 7,
877 6,
878 7,
879 8,
880 8,
881 6,
882 0,
883 0,
884 8,
885 8,
886 7,
887 8,
888 8,
889 0,
890 7,
891 6,
892 6,
893 8,
894 8,
895 8,
896 10,
897 10,
898 0,
899 0,
900 0,
901 0,
902 0,
903 0,
904 0,
905 0,
906 0,
907 0,
908 0,
909 0,
910 0,
911 0,
912 0,
913 0,
914 0,
915 0,
916 0,
917 0,
918 0,
919 0,
920 0,
921 0,
922 0,
923 0,
924 0,
925 0,
926 0,
927 0,
928 0,
929 0,
930 0,
931 0,
932 0,
933 0,
934 0,
935 0,
936 0,
937 0,
938 0,
939 0,
940 0,
941 0,
942 0,
943 0,
944 0,
945 0,
946 0,
947 0,
948 0,
949 0,
950 0,
951 0,
952 0,
953 0,
954 0,
955 0,
956 0,
957 0,
958 0,
959 0,
960 0,
961 0,
962 0,
963 0,
964 0,
965 0,
966 0,
967 0,
968 0,
969 0,
970 0,
971 0,
972 0,
973 0,
974 0,
975 0,
976 0,
977 0,
978 0,
979 0,
980 0,
981 0,
982 0,
983 0,
984 0,
985 0,
986 0,
987 0,
988 0,
989 0,
990 0,
991 0,
992 0,
993 0,
994 0,
995 0,
996 0,
997 0,
998 0,
999 0,
1000 0,
1001 0,
1002 0,
1003 0,
1004 0,
1005 0,
1006 0,
1007 0,
1008 0,
1009 0,
1010 0,
1011 0,
1012 0,
1013 0,
1014 0,
1015 0,
1016 0,
1017 0,
1018 0,
1019 0,
1020 0,
1021 0,
1022 0,
1023 0,
1024 0,
1025 0,
1026 0,
1027 0,
1028 0,
1029 0,
1030 0,
1031 0,
1032 10,
1033 4,
1034 3,
1035 3,
1036 4,
1037 4,
1038 5,
1039 5,
1040 5,
1041 5,
1042 5,
1043 8,
1044 8,
1045 6,
1046 7,
1047 8,
1048 10,
1049 10,
1050 0,
1051 9, /* litlen */
1052 0,
1053 0,
1054 0,
1055 0,
1056 0,
1057 0,
1058 0,
1059 8,
1060 8,
1061 8,
1062 8,
1063 6,
1064 6,
1065 5,
1066 5,
1067 5,
1068 5,
1069 6,
1070 5,
1071 5,
1072 4,
1073 4,
1074 4,
1075 4,
1076 4,
1077 4,
1078 3,
1079 4,
1080 3,
1081 4,
1082 ];
1083
1084 let enc = encode_lengths(lens.iter()).0;
1085
1086 assert_eq!(
1087 &enc[..10],
1088 &[
1089 zero(10),
1090 lit(9),
1091 lit(0),
1092 lit(0),
1093 lit(9),
1094 zero(18),
1095 lit(6),
1096 zero(3),
1097 lit(8),
1098 zero(4)
1099 ]
1100 );
1101 assert_eq!(
1102 &enc[10..20],
1103 &[
1104 lit(8),
1105 lit(0),
1106 lit(0),
1107 lit(7),
1108 lit(8),
1109 lit(7),
1110 lit(8),
1111 lit(6),
1112 lit(6),
1113 lit(8)
1114 ]
1115 );
1116
1117 let enc = encode_lengths([1, 1, 1, 2].iter()).0;
1118 assert_eq!(enc, vec![lit(1), lit(1), lit(1), lit(2)]);
1119 let enc = encode_lengths([0, 0, 3].iter()).0;
1120 assert_eq!(enc, vec![lit(0), lit(0), lit(3)]);
1121 let enc = encode_lengths([0, 0, 0, 5, 2].iter()).0;
1122 assert_eq!(enc, vec![zero(3), lit(5), lit(2)]);
1123
1124 let enc = encode_lengths([0, 0, 0, 5, 0].iter()).0;
1125 assert!(*enc.last().unwrap() != lit(5));
1126
1127 let enc = encode_lengths([0, 4, 4, 4, 4, 0].iter()).0;
1128 assert_eq!(*enc.last().unwrap(), zero(0));
1129 }
1130
1131 #[test]
1132 fn test_lengths_from_frequencies() {
1133 let frequencies = [1, 1, 5, 7, 10, 14];
1134
1135 let expected = [4, 4, 3, 2, 2, 2];
1136 let res = huffman_lengths_from_frequency(&frequencies, 4);
1137
1138 assert_eq!(expected, res.as_slice());
1139
1140 let frequencies = [1, 5, 1, 7, 10, 14];
1141 let expected = [4, 3, 4, 2, 2, 2];
1142
1143 let res = huffman_lengths_from_frequency(&frequencies, 4);
1144
1145 assert_eq!(expected, res.as_slice());
1146
1147 let frequencies = [0, 25, 0, 10, 2, 4];
1148
1149 let res = huffman_lengths_from_frequency(&frequencies, 4);
1150 assert_eq!(res[0], 0);
1151 assert_eq!(res[2], 0);
1152 assert!(res[1] < 4);
1153
1154 // Only one value
1155 let frequencies = [0, 0, 0, 0, 0, 0, 0, 0, 55, 0, 0, 0];
1156 let res = huffman_lengths_from_frequency(&frequencies, 5);
1157 let expected = [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0];
1158 assert_eq!(expected, res.as_slice());
1159
1160 // No values
1161 let frequencies = [0; 30];
1162 let res = huffman_lengths_from_frequency(&frequencies, 5);
1163 for (a, b) in frequencies.iter().zip(res.iter()) {
1164 assert_eq!(*a, (*b).into());
1165 }
1166 // assert_eq!(frequencies, res.as_slice());
1167
1168 let mut frequencies = vec![3; NUM_LITERALS_AND_LENGTHS];
1169 frequencies[55] = u16::MAX / 3;
1170 frequencies[125] = u16::MAX / 3;
1171
1172 let res = huffman_lengths_from_frequency(&frequencies, 15);
1173 assert_eq!(res.len(), NUM_LITERALS_AND_LENGTHS);
1174 assert!(res[55] < 3);
1175 assert!(res[125] < 3);
1176 }
1177
1178 #[test]
1179 /// Test if the bit lengths for a set of frequencies are optimal (give the best compression
1180 /// give the provided frequencies).
1181 fn optimal_lengths() {
1182 let freqs = [
1183 0,
1184 0,
1185 0,
1186 0,
1187 0,
1188 0,
1189 0,
1190 0,
1191 0,
1192 0,
1193 44,
1194 0,
1195 0,
1196 0,
1197 0,
1198 0,
1199 0,
1200 0,
1201 0,
1202 0,
1203 0,
1204 0,
1205 0,
1206 0,
1207 0,
1208 0,
1209 0,
1210 0,
1211 0,
1212 0,
1213 0,
1214 0,
1215 68,
1216 0,
1217 14,
1218 0,
1219 0,
1220 0,
1221 0,
1222 3,
1223 7,
1224 6,
1225 1,
1226 0,
1227 12,
1228 14,
1229 9,
1230 2,
1231 6,
1232 9,
1233 4,
1234 1,
1235 1,
1236 4,
1237 1,
1238 1,
1239 0,
1240 0,
1241 1,
1242 3,
1243 0,
1244 6,
1245 0,
1246 0,
1247 0,
1248 4,
1249 4,
1250 1,
1251 2,
1252 5,
1253 3,
1254 2,
1255 2,
1256 9,
1257 0,
1258 0,
1259 3,
1260 1,
1261 5,
1262 5,
1263 8,
1264 0,
1265 6,
1266 10,
1267 5,
1268 2,
1269 0,
1270 0,
1271 1,
1272 2,
1273 0,
1274 8,
1275 11,
1276 4,
1277 0,
1278 1,
1279 3,
1280 31,
1281 13,
1282 23,
1283 22,
1284 56,
1285 22,
1286 8,
1287 11,
1288 43,
1289 0,
1290 7,
1291 33,
1292 15,
1293 45,
1294 40,
1295 16,
1296 1,
1297 28,
1298 37,
1299 35,
1300 26,
1301 3,
1302 7,
1303 11,
1304 9,
1305 1,
1306 1,
1307 0,
1308 1,
1309 0,
1310 0,
1311 0,
1312 0,
1313 0,
1314 0,
1315 0,
1316 0,
1317 0,
1318 0,
1319 0,
1320 0,
1321 0,
1322 0,
1323 0,
1324 0,
1325 0,
1326 0,
1327 0,
1328 0,
1329 0,
1330 0,
1331 0,
1332 0,
1333 0,
1334 0,
1335 0,
1336 0,
1337 0,
1338 0,
1339 0,
1340 0,
1341 0,
1342 0,
1343 0,
1344 0,
1345 0,
1346 0,
1347 0,
1348 0,
1349 0,
1350 0,
1351 0,
1352 0,
1353 0,
1354 0,
1355 0,
1356 0,
1357 0,
1358 0,
1359 0,
1360 0,
1361 0,
1362 0,
1363 0,
1364 0,
1365 0,
1366 0,
1367 0,
1368 0,
1369 0,
1370 0,
1371 0,
1372 0,
1373 0,
1374 0,
1375 0,
1376 0,
1377 0,
1378 0,
1379 0,
1380 0,
1381 0,
1382 0,
1383 0,
1384 0,
1385 0,
1386 0,
1387 0,
1388 0,
1389 0,
1390 0,
1391 0,
1392 0,
1393 0,
1394 0,
1395 0,
1396 0,
1397 0,
1398 0,
1399 0,
1400 0,
1401 0,
1402 0,
1403 0,
1404 0,
1405 0,
1406 0,
1407 0,
1408 0,
1409 0,
1410 0,
1411 0,
1412 0,
1413 0,
1414 0,
1415 0,
1416 0,
1417 0,
1418 0,
1419 0,
1420 0,
1421 0,
1422 0,
1423 0,
1424 0,
1425 0,
1426 0,
1427 0,
1428 0,
1429 0,
1430 0,
1431 0,
1432 0,
1433 0,
1434 0,
1435 0,
1436 0,
1437 0,
1438 0,
1439 1,
1440 126,
1441 114,
1442 66,
1443 31,
1444 41,
1445 25,
1446 15,
1447 21,
1448 20,
1449 16,
1450 15,
1451 10,
1452 7,
1453 5,
1454 1,
1455 1,
1456 ];
1457
1458
1459 let lens = huffman_lengths_from_frequency(&freqs, 15);
1460
1461 // Lengths produced by miniz for this frequency table for comparison
1462 // the number of total bits encoded with these huffman codes is 7701
1463 // NOTE: There can be more than one set of optimal lengths for a set of frequencies,
1464 // (though there may be a difference in how well the table itself can be represented)
1465 // so testing for a specific length table is not ideal since different algorithms
1466 // may produce different length tables.
1467 // let lens3 = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1468 // 0, 0, 0, 0, 0,
1469 // 0, 0, 0, 0, 0, 0, 4, 0, 7, 0, 0, 0, 0, 9, 8, 8, 10, 0, 7, 7, 7, 10, 8, 7, 8,
1470 // 10, 10, 8, 10, 10, 0, 0, 10, 9, 0, 8, 0, 0, 0, 8, 8, 10, 9, 8, 9, 9, 9, 7, 0,
1471 // 0, 9, 10, 8, 8, 7, 0, 8, 7, 8, 9, 0, 0, 10, 9, 0, 7, 7, 8, 0, 10, 9, 6, 7, 6,
1472 // 6, 5, 6, 7, 7, 5, 0, 8, 5, 7, 5, 5, 6, 10, 6, 5, 5, 6, 9, 8, 7, 7, 10, 10, 0,
1473 // 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1474 // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1475 // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1476 // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1477 // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1478 // 0, 0, 10, 4, 4, 4, 5, 5, 6, 7, 6, 6, 6, 6, 7, 8, 8, 10, 10];
1479 //
1480
1481
1482 let num_bits = lens.iter()
1483 .zip(freqs.iter())
1484 .fold(0, |a, (&f, &l)| a + (f as u16 * l));
1485 assert_eq!(num_bits, 7701);
1486 }
1487}