deflate/
rle.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
use lz77::{ProcessStatus, buffer_full};
use output_writer::{BufferStatus, DynamicWriter};
use huffman_table;

use std::ops::Range;
use std::cmp;

const MIN_MATCH: usize = huffman_table::MIN_MATCH as usize;
const MAX_MATCH: usize = huffman_table::MAX_MATCH as usize;


/// Simple match function for run-length encoding.
///
/// Checks how many of the next bytes from the start of the slice `data` matches prev.
fn get_match_length_rle(data: &[u8], prev: u8) -> usize {
    data.iter()
        .take(MAX_MATCH)
        .take_while(|&&b| b == prev)
        .count()
}

/// L77-Compress data using the RLE(Run-length encoding) strategy
///
/// This function simply looks for runs of data of at least length 3.
pub fn process_chunk_greedy_rle(
    data: &[u8],
    iterated_data: &Range<usize>,
    writer: &mut DynamicWriter,
) -> (usize, ProcessStatus) {
    if data.is_empty() {
        return (0, ProcessStatus::Ok);
    };

    let end = cmp::min(data.len(), iterated_data.end);
    // Start on at least byte 1.
    let start = cmp::max(iterated_data.start, 1);
    // The previous byte.
    let mut prev = data[start - 1];
    // Iterate through the requested range, but avoid going off the end.
    let current_chunk = &data[cmp::min(start, end)..end];
    let mut insert_it = current_chunk.iter().enumerate();
    let mut overlap = 0;
    // Make sure to output the first byte
    if iterated_data.start == 0 && !data.is_empty() {
        write_literal!(writer, data[0], 1);
    }

    while let Some((n, &b)) = insert_it.next() {
        let position = n + start;
        let match_len = if prev == b {
            //TODO: Avoid comparing with self here.
            // Would use as_slice() but that doesn't work on an enumerated iterator.
            get_match_length_rle(&data[position..], prev)
        } else {
            0
        };
        if match_len >= MIN_MATCH {
            if position + match_len > end {
                overlap = position + match_len - end;
            };
            let b_status = writer.write_length_rle(match_len as u16);
            if b_status == BufferStatus::Full {
                return (overlap, buffer_full(position + match_len));
            }
            insert_it.nth(match_len - 2);
        } else {
            write_literal!(writer, b, position + 1);
        }
        prev = b;
    }

    (overlap, ProcessStatus::Ok)
}

#[cfg(test)]
mod test {
    use super::*;
    use lzvalue::{LZValue, lit, ld};

    fn l(c: char) -> LZValue {
        lit(c as u8)
    }

    #[test]
    fn rle_compress() {
        let input = b"textaaaaaaaaatext";
        let mut w = DynamicWriter::new();
        let r = 0..input.len();
        let (overlap, _) = process_chunk_greedy_rle(input, &r, &mut w);
        let expected = [
            l('t'),
            l('e'),
            l('x'),
            l('t'),
            l('a'),
            ld(8, 1),
            l('t'),
            l('e'),
            l('x'),
            l('t'),
        ];
        //println!("expected: {:?}", expected);
        //println!("actual: {:?}", w.get_buffer());
        assert!(w.get_buffer() == expected);
        assert_eq!(overlap, 0);
    }
}