siphasher/
sip.rs

1// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
2// file at the top-level directory of this distribution and at
3// http://rust-lang.org/COPYRIGHT.
4//
5// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
6// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
7// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
8// option. This file may not be copied, modified, or distributed
9// except according to those terms.
10
11//! An implementation of SipHash.
12
13use core::cmp;
14use core::hash;
15use core::marker::PhantomData;
16use core::mem;
17use core::ptr;
18use core::u64;
19
20/// An implementation of SipHash 1-3.
21///
22/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
23#[derive(Debug, Clone, Copy, Default)]
24#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
25pub struct SipHasher13 {
26    hasher: Hasher<Sip13Rounds>,
27}
28
29/// An implementation of SipHash 2-4.
30///
31/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
32#[derive(Debug, Clone, Copy, Default)]
33#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
34pub struct SipHasher24 {
35    hasher: Hasher<Sip24Rounds>,
36}
37
38/// An implementation of SipHash 2-4.
39///
40/// See: <https://www.aumasson.jp/siphash/siphash.pdf>
41///
42/// SipHash is a general-purpose hashing function: it runs at a good
43/// speed (competitive with Spooky and City) and permits strong _keyed_
44/// hashing. This lets you key your hashtables from a strong RNG, such as
45/// [`rand::os::OsRng`](https://doc.rust-lang.org/rand/rand/os/struct.OsRng.html).
46///
47/// Although the SipHash algorithm is considered to be generally strong,
48/// it is not intended for cryptographic purposes. As such, all
49/// cryptographic uses of this implementation are _strongly discouraged_.
50#[derive(Debug, Clone, Copy, Default)]
51#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
52pub struct SipHasher(SipHasher24);
53
54#[derive(Debug, Clone, Copy)]
55#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
56struct Hasher<S: Sip> {
57    k0: u64,
58    k1: u64,
59    length: usize, // how many bytes we've processed
60    state: State,  // hash State
61    tail: u64,     // unprocessed bytes le
62    ntail: usize,  // how many bytes in tail are valid
63    _marker: PhantomData<S>,
64}
65
66#[derive(Debug, Clone, Copy)]
67#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
68struct State {
69    // v0, v2 and v1, v3 show up in pairs in the algorithm,
70    // and simd implementations of SipHash will use vectors
71    // of v02 and v13. By placing them in this order in the struct,
72    // the compiler can pick up on just a few simd optimizations by itself.
73    v0: u64,
74    v2: u64,
75    v1: u64,
76    v3: u64,
77}
78
79macro_rules! compress {
80    ($state:expr) => {{
81        compress!($state.v0, $state.v1, $state.v2, $state.v3)
82    }};
83    ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {{
84        $v0 = $v0.wrapping_add($v1);
85        $v1 = $v1.rotate_left(13);
86        $v1 ^= $v0;
87        $v0 = $v0.rotate_left(32);
88        $v2 = $v2.wrapping_add($v3);
89        $v3 = $v3.rotate_left(16);
90        $v3 ^= $v2;
91        $v0 = $v0.wrapping_add($v3);
92        $v3 = $v3.rotate_left(21);
93        $v3 ^= $v0;
94        $v2 = $v2.wrapping_add($v1);
95        $v1 = $v1.rotate_left(17);
96        $v1 ^= $v2;
97        $v2 = $v2.rotate_left(32);
98    }};
99}
100
101/// Loads an integer of the desired type from a byte stream, in LE order. Uses
102/// `copy_nonoverlapping` to let the compiler generate the most efficient way
103/// to load it from a possibly unaligned address.
104///
105/// Unsafe because: unchecked indexing at `i..i+size_of(int_ty)`
106macro_rules! load_int_le {
107    ($buf:expr, $i:expr, $int_ty:ident) => {{
108        debug_assert!($i + mem::size_of::<$int_ty>() <= $buf.len());
109        let mut data = 0 as $int_ty;
110        ptr::copy_nonoverlapping(
111            $buf.as_ptr().add($i),
112            &mut data as *mut _ as *mut u8,
113            mem::size_of::<$int_ty>(),
114        );
115        data.to_le()
116    }};
117}
118
119/// Loads a u64 using up to 7 bytes of a byte slice. It looks clumsy but the
120/// `copy_nonoverlapping` calls that occur (via `load_int_le!`) all have fixed
121/// sizes and avoid calling `memcpy`, which is good for speed.
122///
123/// Unsafe because: unchecked indexing at start..start+len
124#[inline]
125unsafe fn u8to64_le(buf: &[u8], start: usize, len: usize) -> u64 {
126    debug_assert!(len < 8);
127    let mut i = 0; // current byte index (from LSB) in the output u64
128    let mut out = 0;
129    if i + 3 < len {
130        out = load_int_le!(buf, start + i, u32) as u64;
131        i += 4;
132    }
133    if i + 1 < len {
134        out |= (load_int_le!(buf, start + i, u16) as u64) << (i * 8);
135        i += 2
136    }
137    if i < len {
138        out |= (*buf.get_unchecked(start + i) as u64) << (i * 8);
139        i += 1;
140    }
141    debug_assert_eq!(i, len);
142    out
143}
144
145impl SipHasher {
146    /// Creates a new `SipHasher` with the two initial keys set to 0.
147    #[inline]
148    pub fn new() -> SipHasher {
149        SipHasher::new_with_keys(0, 0)
150    }
151
152    /// Creates a `SipHasher` that is keyed off the provided keys.
153    #[inline]
154    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher {
155        SipHasher(SipHasher24::new_with_keys(key0, key1))
156    }
157
158    /// Creates a `SipHasher` from a 16 byte key.
159    pub fn new_with_key(key: &[u8; 16]) -> SipHasher {
160        let mut b0 = [0u8; 8];
161        let mut b1 = [0u8; 8];
162        b0.copy_from_slice(&key[0..8]);
163        b1.copy_from_slice(&key[8..16]);
164        let key0 = u64::from_le_bytes(b0);
165        let key1 = u64::from_le_bytes(b1);
166        Self::new_with_keys(key0, key1)
167    }
168
169    /// Get the keys used by this hasher
170    pub fn keys(&self) -> (u64, u64) {
171        (self.0.hasher.k0, self.0.hasher.k1)
172    }
173
174    /// Get the key used by this hasher as a 16 byte vector
175    pub fn key(&self) -> [u8; 16] {
176        let mut bytes = [0u8; 16];
177        bytes[0..8].copy_from_slice(&self.0.hasher.k0.to_le_bytes());
178        bytes[8..16].copy_from_slice(&self.0.hasher.k1.to_le_bytes());
179        bytes
180    }
181}
182
183impl SipHasher13 {
184    /// Creates a new `SipHasher13` with the two initial keys set to 0.
185    #[inline]
186    pub fn new() -> SipHasher13 {
187        SipHasher13::new_with_keys(0, 0)
188    }
189
190    /// Creates a `SipHasher13` that is keyed off the provided keys.
191    #[inline]
192    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher13 {
193        SipHasher13 {
194            hasher: Hasher::new_with_keys(key0, key1),
195        }
196    }
197
198    /// Creates a `SipHasher13` from a 16 byte key.
199    pub fn new_with_key(key: &[u8; 16]) -> SipHasher13 {
200        let mut b0 = [0u8; 8];
201        let mut b1 = [0u8; 8];
202        b0.copy_from_slice(&key[0..8]);
203        b1.copy_from_slice(&key[8..16]);
204        let key0 = u64::from_le_bytes(b0);
205        let key1 = u64::from_le_bytes(b1);
206        Self::new_with_keys(key0, key1)
207    }
208
209    /// Get the keys used by this hasher
210    pub fn keys(&self) -> (u64, u64) {
211        (self.hasher.k0, self.hasher.k1)
212    }
213
214    /// Get the key used by this hasher as a 16 byte vector
215    pub fn key(&self) -> [u8; 16] {
216        let mut bytes = [0u8; 16];
217        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
218        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
219        bytes
220    }
221}
222
223impl SipHasher24 {
224    /// Creates a new `SipHasher24` with the two initial keys set to 0.
225    #[inline]
226    pub fn new() -> SipHasher24 {
227        SipHasher24::new_with_keys(0, 0)
228    }
229
230    /// Creates a `SipHasher24` that is keyed off the provided keys.
231    #[inline]
232    pub fn new_with_keys(key0: u64, key1: u64) -> SipHasher24 {
233        SipHasher24 {
234            hasher: Hasher::new_with_keys(key0, key1),
235        }
236    }
237
238    /// Creates a `SipHasher24` from a 16 byte key.
239    pub fn new_with_key(key: &[u8; 16]) -> SipHasher24 {
240        let mut b0 = [0u8; 8];
241        let mut b1 = [0u8; 8];
242        b0.copy_from_slice(&key[0..8]);
243        b1.copy_from_slice(&key[8..16]);
244        let key0 = u64::from_le_bytes(b0);
245        let key1 = u64::from_le_bytes(b1);
246        Self::new_with_keys(key0, key1)
247    }
248
249    /// Get the keys used by this hasher
250    pub fn keys(&self) -> (u64, u64) {
251        (self.hasher.k0, self.hasher.k1)
252    }
253
254    /// Get the key used by this hasher as a 16 byte vector
255    pub fn key(&self) -> [u8; 16] {
256        let mut bytes = [0u8; 16];
257        bytes[0..8].copy_from_slice(&self.hasher.k0.to_le_bytes());
258        bytes[8..16].copy_from_slice(&self.hasher.k1.to_le_bytes());
259        bytes
260    }
261}
262
263impl<S: Sip> Hasher<S> {
264    #[inline]
265    fn new_with_keys(key0: u64, key1: u64) -> Hasher<S> {
266        let mut state = Hasher {
267            k0: key0,
268            k1: key1,
269            length: 0,
270            state: State {
271                v0: 0,
272                v1: 0,
273                v2: 0,
274                v3: 0,
275            },
276            tail: 0,
277            ntail: 0,
278            _marker: PhantomData,
279        };
280        state.reset();
281        state
282    }
283
284    #[inline]
285    fn reset(&mut self) {
286        self.length = 0;
287        self.state.v0 = self.k0 ^ 0x736f6d6570736575;
288        self.state.v1 = self.k1 ^ 0x646f72616e646f6d;
289        self.state.v2 = self.k0 ^ 0x6c7967656e657261;
290        self.state.v3 = self.k1 ^ 0x7465646279746573;
291        self.ntail = 0;
292    }
293
294    // A specialized write function for values with size <= 8.
295    //
296    // The hashing of multi-byte integers depends on endianness. E.g.:
297    // - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
298    // - big-endian:    `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
299    //
300    // This function does the right thing for little-endian hardware. On
301    // big-endian hardware `x` must be byte-swapped first to give the right
302    // behaviour. After any byte-swapping, the input must be zero-extended to
303    // 64-bits. The caller is responsible for the byte-swapping and
304    // zero-extension.
305    #[inline]
306    fn short_write<T>(&mut self, _x: T, x: u64) {
307        let size = mem::size_of::<T>();
308        self.length += size;
309
310        // The original number must be zero-extended, not sign-extended.
311        debug_assert!(if size < 8 { x >> (8 * size) == 0 } else { true });
312
313        // The number of bytes needed to fill `self.tail`.
314        let needed = 8 - self.ntail;
315
316        self.tail |= x << (8 * self.ntail);
317        if size < needed {
318            self.ntail += size;
319            return;
320        }
321
322        // `self.tail` is full, process it.
323        self.state.v3 ^= self.tail;
324        S::c_rounds(&mut self.state);
325        self.state.v0 ^= self.tail;
326
327        self.ntail = size - needed;
328        self.tail = if needed < 8 { x >> (8 * needed) } else { 0 };
329    }
330}
331
332impl hash::Hasher for SipHasher {
333    #[inline]
334    fn write(&mut self, msg: &[u8]) {
335        self.0.write(msg)
336    }
337
338    #[inline]
339    fn finish(&self) -> u64 {
340        self.0.finish()
341    }
342
343    #[inline]
344    fn write_usize(&mut self, i: usize) {
345        self.0.write_usize(i);
346    }
347
348    #[inline]
349    fn write_u8(&mut self, i: u8) {
350        self.0.write_u8(i);
351    }
352
353    #[inline]
354    fn write_u16(&mut self, i: u16) {
355        self.0.write_u16(i);
356    }
357
358    #[inline]
359    fn write_u32(&mut self, i: u32) {
360        self.0.write_u32(i);
361    }
362
363    #[inline]
364    fn write_u64(&mut self, i: u64) {
365        self.0.write_u64(i);
366    }
367}
368
369impl hash::Hasher for SipHasher13 {
370    #[inline]
371    fn write(&mut self, msg: &[u8]) {
372        self.hasher.write(msg)
373    }
374
375    #[inline]
376    fn finish(&self) -> u64 {
377        self.hasher.finish()
378    }
379
380    #[inline]
381    fn write_usize(&mut self, i: usize) {
382        self.hasher.write_usize(i);
383    }
384
385    #[inline]
386    fn write_u8(&mut self, i: u8) {
387        self.hasher.write_u8(i);
388    }
389
390    #[inline]
391    fn write_u16(&mut self, i: u16) {
392        self.hasher.write_u16(i);
393    }
394
395    #[inline]
396    fn write_u32(&mut self, i: u32) {
397        self.hasher.write_u32(i);
398    }
399
400    #[inline]
401    fn write_u64(&mut self, i: u64) {
402        self.hasher.write_u64(i);
403    }
404}
405
406impl hash::Hasher for SipHasher24 {
407    #[inline]
408    fn write(&mut self, msg: &[u8]) {
409        self.hasher.write(msg)
410    }
411
412    #[inline]
413    fn finish(&self) -> u64 {
414        self.hasher.finish()
415    }
416
417    #[inline]
418    fn write_usize(&mut self, i: usize) {
419        self.hasher.write_usize(i);
420    }
421
422    #[inline]
423    fn write_u8(&mut self, i: u8) {
424        self.hasher.write_u8(i);
425    }
426
427    #[inline]
428    fn write_u16(&mut self, i: u16) {
429        self.hasher.write_u16(i);
430    }
431
432    #[inline]
433    fn write_u32(&mut self, i: u32) {
434        self.hasher.write_u32(i);
435    }
436
437    #[inline]
438    fn write_u64(&mut self, i: u64) {
439        self.hasher.write_u64(i);
440    }
441}
442
443impl<S: Sip> hash::Hasher for Hasher<S> {
444    #[inline]
445    fn write_usize(&mut self, i: usize) {
446        self.short_write(i, i.to_le() as u64);
447    }
448
449    #[inline]
450    fn write_u8(&mut self, i: u8) {
451        self.short_write(i, i as u64);
452    }
453
454    #[inline]
455    fn write_u32(&mut self, i: u32) {
456        self.short_write(i, i.to_le() as u64);
457    }
458
459    #[inline]
460    fn write_u64(&mut self, i: u64) {
461        self.short_write(i, i.to_le() as u64);
462    }
463
464    #[inline]
465    fn write(&mut self, msg: &[u8]) {
466        let length = msg.len();
467        self.length += length;
468
469        let mut needed = 0;
470
471        if self.ntail != 0 {
472            needed = 8 - self.ntail;
473            self.tail |= unsafe { u8to64_le(msg, 0, cmp::min(length, needed)) } << (8 * self.ntail);
474            if length < needed {
475                self.ntail += length;
476                return;
477            } else {
478                self.state.v3 ^= self.tail;
479                S::c_rounds(&mut self.state);
480                self.state.v0 ^= self.tail;
481                self.ntail = 0;
482            }
483        }
484
485        // Buffered tail is now flushed, process new input.
486        let len = length - needed;
487        let left = len & 0x7;
488
489        let mut i = needed;
490        while i < len - left {
491            let mi = unsafe { load_int_le!(msg, i, u64) };
492
493            self.state.v3 ^= mi;
494            S::c_rounds(&mut self.state);
495            self.state.v0 ^= mi;
496
497            i += 8;
498        }
499
500        self.tail = unsafe { u8to64_le(msg, i, left) };
501        self.ntail = left;
502    }
503
504    #[inline]
505    fn finish(&self) -> u64 {
506        let mut state = self.state;
507
508        let b: u64 = ((self.length as u64 & 0xff) << 56) | self.tail;
509
510        state.v3 ^= b;
511        S::c_rounds(&mut state);
512        state.v0 ^= b;
513
514        state.v2 ^= 0xff;
515        S::d_rounds(&mut state);
516
517        state.v0 ^ state.v1 ^ state.v2 ^ state.v3
518    }
519}
520
521impl<S: Sip> Default for Hasher<S> {
522    /// Creates a `Hasher<S>` with the two initial keys set to 0.
523    #[inline]
524    fn default() -> Hasher<S> {
525        Hasher::new_with_keys(0, 0)
526    }
527}
528
529#[doc(hidden)]
530trait Sip {
531    fn c_rounds(_: &mut State);
532    fn d_rounds(_: &mut State);
533}
534
535#[derive(Debug, Clone, Copy, Default)]
536struct Sip13Rounds;
537
538impl Sip for Sip13Rounds {
539    #[inline]
540    fn c_rounds(state: &mut State) {
541        compress!(state);
542    }
543
544    #[inline]
545    fn d_rounds(state: &mut State) {
546        compress!(state);
547        compress!(state);
548        compress!(state);
549    }
550}
551
552#[derive(Debug, Clone, Copy, Default)]
553struct Sip24Rounds;
554
555impl Sip for Sip24Rounds {
556    #[inline]
557    fn c_rounds(state: &mut State) {
558        compress!(state);
559        compress!(state);
560    }
561
562    #[inline]
563    fn d_rounds(state: &mut State) {
564        compress!(state);
565        compress!(state);
566        compress!(state);
567        compress!(state);
568    }
569}