adler32/
lib.rs

1//! A minimal implementation of Adler32 for Rust.
2//!
3//! This provides the simple method adler32(), that exhausts a Read and
4//! computes the Adler32 hash, as well as the RollingAdler32 struct, that can
5//! build a hash byte-by-byte, allowing to 'forget' past bytes in a rolling
6//! fashion.
7//!
8//! The adler32 code has been translated (as accurately as I could manage) from
9//! the zlib implementation.
10
11#[cfg(test)]
12extern crate rand;
13
14use std::io;
15
16// adler32 algorithm and implementation taken from zlib; http://www.zlib.net/
17// It was translated into Rust as accurately as I could manage
18// The (slow) reference was taken from Wikipedia; https://en.wikipedia.org/
19
20/* zlib.h -- interface of the 'zlib' general purpose compression library
21  version 1.2.8, April 28th, 2013
22
23  Copyright (C) 1995-2013 Jean-loup Gailly and Mark Adler
24
25  This software is provided 'as-is', without any express or implied
26  warranty.  In no event will the authors be held liable for any damages
27  arising from the use of this software.
28
29  Permission is granted to anyone to use this software for any purpose,
30  including commercial applications, and to alter it and redistribute it
31  freely, subject to the following restrictions:
32
33  1. The origin of this software must not be misrepresented; you must not
34     claim that you wrote the original software. If you use this software
35     in a product, an acknowledgment in the product documentation would be
36     appreciated but is not required.
37  2. Altered source versions must be plainly marked as such, and must not be
38     misrepresented as being the original software.
39  3. This notice may not be removed or altered from any source distribution.
40
41  Jean-loup Gailly        Mark Adler
42  jloup@gzip.org          madler@alumni.caltech.edu
43
44*/
45
46// largest prime smaller than 65536
47const BASE: u32 = 65521;
48
49// NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
50const NMAX: usize = 5552;
51
52#[inline(always)]
53fn do1(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
54    *adler += u32::from(buf[0]);
55    *sum2 += *adler;
56}
57
58#[inline(always)]
59fn do2(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
60    do1(adler, sum2, &buf[0..1]);
61    do1(adler, sum2, &buf[1..2]);
62}
63
64#[inline(always)]
65fn do4(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
66    do2(adler, sum2, &buf[0..2]);
67    do2(adler, sum2, &buf[2..4]);
68}
69
70#[inline(always)]
71fn do8(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
72    do4(adler, sum2, &buf[0..4]);
73    do4(adler, sum2, &buf[4..8]);
74}
75
76#[inline(always)]
77fn do16(adler: &mut u32, sum2: &mut u32, buf: &[u8]) {
78    do8(adler, sum2, &buf[0..8]);
79    do8(adler, sum2, &buf[8..16]);
80}
81
82/// A rolling version of the Adler32 hash, which can 'forget' past bytes.
83///
84/// Calling remove() will update the hash to the value it would have if that
85/// past byte had never been fed to the algorithm. This allows you to get the
86/// hash of a rolling window very efficiently.
87pub struct RollingAdler32 {
88    a: u32,
89    b: u32,
90}
91
92impl Default for RollingAdler32 {
93    fn default() -> RollingAdler32 {
94        RollingAdler32::new()
95    }
96}
97
98impl RollingAdler32 {
99    /// Creates an empty Adler32 context (with hash 1).
100    pub fn new() -> RollingAdler32 {
101        Self::from_value(1)
102    }
103
104    /// Creates an Adler32 context with the given initial value.
105    pub fn from_value(adler32: u32) -> RollingAdler32 {
106        let a = adler32 & 0xFFFF;
107        let b = adler32 >> 16;
108        RollingAdler32 { a, b }
109    }
110
111    /// Convenience function initializing a context from the hash of a buffer.
112    pub fn from_buffer(buffer: &[u8]) -> RollingAdler32 {
113        let mut hash = RollingAdler32::new();
114        hash.update_buffer(buffer);
115        hash
116    }
117
118    /// Returns the current hash.
119    pub fn hash(&self) -> u32 {
120        (self.b << 16) | self.a
121    }
122
123    /// Removes the given `byte` that was fed to the algorithm `size` bytes ago.
124    pub fn remove(&mut self, size: usize, byte: u8) {
125        let byte = u32::from(byte);
126        self.a = (self.a + BASE - byte) % BASE;
127        self.b = ((self.b + BASE - 1)
128                      .wrapping_add(BASE.wrapping_sub(size as u32)
129                                        .wrapping_mul(byte))) % BASE;
130    }
131
132    /// Feeds a new `byte` to the algorithm to update the hash.
133    pub fn update(&mut self, byte: u8) {
134        let byte = u32::from(byte);
135        self.a = (self.a + byte) % BASE;
136        self.b = (self.b + self.a) % BASE;
137    }
138
139    /// Feeds a vector of bytes to the algorithm to update the hash.
140    pub fn update_buffer(&mut self, buffer: &[u8]) {
141        let len = buffer.len();
142
143        // in case user likes doing a byte at a time, keep it fast
144        if len == 1 {
145            self.update(buffer[0]);
146            return;
147        }
148
149        // in case short lengths are provided, keep it somewhat fast
150        if len < 16 {
151            for byte in buffer.iter().take(len) {
152                self.a += u32::from(*byte);
153                self.b += self.a;
154            }
155            if self.a >= BASE {
156                self.a -= BASE;
157            }
158            self.b %= BASE;
159            return;
160        }
161
162        let mut pos = 0;
163
164        // do length NMAX blocks -- requires just one modulo operation;
165        while pos + NMAX <= len {
166            let end = pos + NMAX;
167            while pos < end {
168                // 16 sums unrolled
169                do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
170                pos += 16;
171            }
172            self.a %= BASE;
173            self.b %= BASE;
174        }
175
176        // do remaining bytes (less than NMAX, still just one modulo)
177        if pos < len { // avoid modulos if none remaining
178            while len - pos >= 16 {
179                do16(&mut self.a, &mut self.b, &buffer[pos..pos + 16]);
180                pos += 16;
181            }
182            while len - pos > 0 {
183                self.a += u32::from(buffer[pos]);
184                self.b += self.a;
185                pos += 1;
186            }
187            self.a %= BASE;
188            self.b %= BASE;
189        }
190    }
191}
192
193/// Consume a Read object and returns the Adler32 hash.
194pub fn adler32<R: io::Read>(mut reader: R) -> io::Result<u32> {
195    let mut hash = RollingAdler32::new();
196    let mut buffer = [0u8; NMAX];
197    let mut read = try!(reader.read(&mut buffer));
198    while read > 0 {
199        hash.update_buffer(&buffer[..read]);
200        read = try!(reader.read(&mut buffer));
201    }
202    Ok(hash.hash())
203}
204
205#[cfg(test)]
206mod test {
207    use rand;
208    use rand::Rng;
209    use std::io;
210
211    use super::{BASE, adler32, RollingAdler32};
212
213    fn adler32_slow<R: io::Read>(reader: R) -> io::Result<u32> {
214        let mut a: u32 = 1;
215        let mut b: u32 = 0;
216
217        for byte in reader.bytes() {
218            let byte = try!(byte) as u32;
219            a = (a + byte) % BASE;
220            b = (b + a) % BASE;
221        }
222
223        Ok((b << 16) | a)
224    }
225
226    #[test]
227    fn testvectors() {
228        fn do_test(v: u32, bytes: &[u8]) {
229            let mut hash = RollingAdler32::new();
230            hash.update_buffer(&bytes);
231            assert_eq!(hash.hash(), v);
232
233            let r = io::Cursor::new(bytes);
234            assert_eq!(adler32(r).unwrap(), v);
235        }
236        do_test(0x00000001, b"");
237        do_test(0x00620062, b"a");
238        do_test(0x024d0127, b"abc");
239        do_test(0x29750586, b"message digest");
240        do_test(0x90860b20, b"abcdefghijklmnopqrstuvwxyz");
241        do_test(0x8adb150c, b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\
242                              abcdefghijklmnopqrstuvwxyz\
243                              0123456789");
244        do_test(0x97b61069, b"1234567890123456789012345678901234567890\
245                              1234567890123456789012345678901234567890");
246        do_test(0xD6251498, &[255; 64000]);
247    }
248
249    #[test]
250    fn compare() {
251        let mut rng = rand::thread_rng();
252        let mut data = vec![0u8; 5589];
253        for size in [0, 1, 3, 4, 5, 31, 32, 33, 67,
254                     5550, 5552, 5553, 5568, 5584, 5589].iter().cloned() {
255            rng.fill_bytes(&mut data[..size]);
256            let r1 = io::Cursor::new(&data[..size]);
257            let r2 = r1.clone();
258            if adler32_slow(r1).unwrap() != adler32(r2).unwrap() {
259                panic!("Comparison failed, size={}", size);
260            }
261        }
262    }
263
264    #[test]
265    fn rolling() {
266        assert_eq!(RollingAdler32::from_value(0x01020304).hash(), 0x01020304);
267
268        fn do_test(a: &[u8], b: &[u8]) {
269            let mut total = Vec::with_capacity(a.len() + b.len());
270            total.extend(a);
271            total.extend(b);
272            let mut h = RollingAdler32::from_buffer(&total[..(b.len())]);
273            for i in 0..(a.len()) {
274                h.remove(b.len(), a[i]);
275                h.update(total[b.len() + i]);
276            }
277            assert_eq!(h.hash(), adler32(b).unwrap());
278        }
279        do_test(b"a", b"b");
280        do_test(b"", b"this a test");
281        do_test(b"th", b"is a test");
282        do_test(b"this a ", b"test");
283    }
284
285    #[test]
286    fn long_window_remove() {
287        let mut hash = RollingAdler32::new();
288        let w = 65536;
289        assert!(w as u32 > BASE);
290
291        let mut bytes = vec![0; w*3];
292        for (i, b) in bytes.iter_mut().enumerate() {
293            *b = i as u8;
294        }
295
296        for (i, b) in bytes.iter().enumerate() {
297            if i >= w {
298                hash.remove(w, bytes[i - w]);
299            }
300            hash.update(*b);
301            if i > 0 && i % w == 0 {
302                assert_eq!(hash.hash(), 0x433a8772);
303            }
304        }
305        assert_eq!(hash.hash(), 0xbbba8772);
306    }
307}