fxfs/lsm_tree/
bloom_filter.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::lsm_tree::types::FuzzyHash;
6use anyhow::{Error, Result};
7use bit_vec::BitVec;
8use rustc_hash::FxHasher;
9use std::hash::{Hash as _, Hasher as _};
10use std::marker::PhantomData;
11
12/// A bloom filter provides a probabilistic means of determining if a layer file *might* contain
13/// records related to a given key.
14///
15/// A bloom filter is a bitmap with two associated constants `m` and `k`.  The bitmap is `m` bits
16/// wide, and there are `k` associated hash functions with the filter.
17///
18/// There are two operations for a bloom filter:
19/// - Adding a new entry.  Each of the `k` hash functions is computed for the entry, and this hash
20///   corresponds to a single bit in the bitmap.  All `k` of these bits are set to 1.
21/// - Querying for an entry.  Each of the `k` hash functions is computed for the entry, and if any
22///   of the corresponding bits in the bitmap are set to 0, the bloom filter returns false, which
23///   means the layer file definitely does not contain records for that key.  Note that false
24///   positives are possible by design; all bits may have been set by unrelated entries, in which
25///   case the query returns true.
26///
27/// # Efficiency & Tuning
28///
29/// As mentioned above, there are two parameters `k` and `m` which we can tune to optimize the
30/// BloomFilter.  There are three factors to consider:
31/// - How much space we use for the bloom filter (i.e. `m`).
32/// - How efficient the operations are (i.e. `k`, since we have to compute `k` hashes for each
33///   insert/query).
34/// - Minimizing false positive rate.  Increasing `k` or `m` will both reduce the false positive
35///   rate.
36///
37/// For simplicity we fix a target false-positive rate and derive `k` and `m`.  See estimate_params.
38
39// To avoid the need for storing the nonces, we generate them pseudo-randomly.
40// Changing the generation of these values will require a version update (which is guarded against
41// in a test below).
42fn generate_hash_nonces(seed: u64, num_nonces: usize) -> Vec<u64> {
43    use rand::rngs::SmallRng;
44    use rand::{Rng as _, SeedableRng as _};
45    let mut output = Vec::with_capacity(num_nonces);
46    let mut rng = SmallRng::seed_from_u64(seed);
47    for _ in 0..num_nonces {
48        let nonce: u64 = rng.gen();
49        output.push(nonce);
50    }
51    output
52}
53
54// Returns a tuple (bloom_filter_size_bits, num_hashes).  The bloom filter will always be sized up
55// to the nearest power-of-two, which makes its usage later a bit more efficient (since we take
56// indexes modulo the size of the filter).
57// We target a false positive rate of 0.1%, from which we can derive the constants via relationships
58// described in https://en.wikipedia.org/wiki/Bloom_filter#Optimal_number_of_hash_functions.
59fn estimate_params(num_items: usize) -> (usize, usize) {
60    if num_items == 0 {
61        return (0, 0);
62    }
63    const TARGET_FP_RATE: f64 = 0.001;
64    use std::f64::consts::LN_2;
65
66    let n = num_items as f64;
67    let bits = ((n * TARGET_FP_RATE.ln()) / (-LN_2 * LN_2)).ceil() as u64;
68    // Round up to a power-of-two number of bytes.
69    let bits = std::cmp::max(bits, 8).next_power_of_two();
70    let m = bits as f64;
71
72    // Then, solve for a minimal `k` such that the probability of FP is <= TARGET_FP_RATE.
73    let mut k: f64 = 0.0;
74    let mut fp_rate = 1.0;
75    while fp_rate > TARGET_FP_RATE {
76        k += 1.0;
77        fp_rate = (1f64 - (-k / (m / n)).exp()).powf(k);
78    }
79    (bits as usize, k.round() as usize)
80}
81
82/// A read-only handle to a bloom filter.  To create a bloom filter, use `BloomFilterWriter`.
83/// Note that the bloom filter is *not* versioned; this must be managed by the caller.
84pub struct BloomFilterReader<V> {
85    data: BitVec,
86    hash_nonces: Vec<u64>,
87    _type: PhantomData<V>,
88}
89
90pub struct BloomFilterStats {
91    pub size: usize,
92    pub num_nonces: usize,
93    // The percentage (rounded up to the nearest whole) of the bits which are set.
94    pub fill_percentage: usize,
95}
96
97impl<V: FuzzyHash> BloomFilterReader<V> {
98    /// Creates a BloomFilterReader by reading the serialized contents from `buf`.
99    /// `seed` and `num_nonces` must match the values passed into BloomFilterWriter.
100    pub fn read(buf: &[u8], seed: u64, num_nonces: usize) -> Result<Self, Error> {
101        Ok(Self {
102            data: BitVec::from_bytes(buf),
103            hash_nonces: generate_hash_nonces(seed, num_nonces),
104            _type: PhantomData::default(),
105        })
106    }
107
108    /// Returns whether the bloom filter *might* contain the given value (or any part of it, for
109    /// range-based keys.
110    pub fn maybe_contains(&self, value: &V) -> bool {
111        let mut num = 0;
112        for hash in value.fuzzy_hash() {
113            if self.maybe_contains_inner(hash) {
114                return true;
115            }
116            num += 1;
117            debug_assert!(num < 4, "Too many hash partitions");
118        }
119        false
120    }
121
122    fn maybe_contains_inner(&self, initial_hash: u64) -> bool {
123        let mut hasher = FxHasher::default();
124        initial_hash.hash(&mut hasher);
125        for nonce in &self.hash_nonces {
126            hasher.write_u64(*nonce);
127            let idx = hasher.finish() as usize % self.data.len();
128            if !self.data.get(idx).unwrap() {
129                return false;
130            }
131        }
132        true
133    }
134
135    /// Call sparingly; this is expensive to compute.
136    /// Note that the return value can be trivially cached since the reader is immutable.
137    pub fn stats(&self) -> BloomFilterStats {
138        BloomFilterStats {
139            size: self.data.len().div_ceil(8),
140            num_nonces: self.hash_nonces.len(),
141            fill_percentage: self.compute_fill_percentage(),
142        }
143    }
144
145    // Use sparingly; this is expensive to compute.
146    fn compute_fill_percentage(&self) -> usize {
147        if self.data.is_empty() {
148            return 0;
149        }
150        (100 * self.data.iter().filter(|x| *x).count()).div_ceil(self.data.len())
151    }
152
153    #[cfg(test)]
154    fn new_empty(num_items: usize) -> Self {
155        let (bits, num_nonces) = estimate_params(num_items);
156        Self {
157            data: BitVec::from_elem(bits, false),
158            hash_nonces: generate_hash_nonces(0, num_nonces),
159            _type: PhantomData::default(),
160        }
161    }
162
163    #[cfg(test)]
164    fn new_full(num_items: usize) -> Self {
165        let (bits, num_nonces) = estimate_params(num_items);
166        Self {
167            data: BitVec::from_elem(bits, true),
168            hash_nonces: generate_hash_nonces(0, num_nonces),
169            _type: PhantomData::default(),
170        }
171    }
172}
173
174/// A helper to build a bloom filter in-memory before serializing it.
175/// Note that the bloom filter is *not* versioned; this must be managed by the caller.
176pub struct BloomFilterWriter<V> {
177    data: BitVec,
178    seed: u64,
179    hash_nonces: Vec<u64>,
180    _type: PhantomData<V>,
181}
182
183impl<V: FuzzyHash> BloomFilterWriter<V> {
184    /// Creates a new bloom filter suitable for the given input size.  See module comments for the
185    /// heuristics used.
186    /// `seed` is a value which is mixed into hashes in the bloom filter.  Note that it is not used
187    /// in a secure manner so should not contain any secrets.  It should be unpredictable to prevent
188    /// timing attacks.
189    pub fn new(seed: u64, num_items: usize) -> Self {
190        let (bits, num_nonces) = estimate_params(num_items);
191        Self {
192            data: BitVec::from_elem(bits, false),
193            seed,
194            hash_nonces: generate_hash_nonces(seed, num_nonces),
195            _type: PhantomData::default(),
196        }
197    }
198
199    /// Returns the size the bloom filter will occupy when serialized.
200    pub fn serialized_size(&self) -> usize {
201        self.data.len() / 8
202    }
203
204    pub fn seed(&self) -> u64 {
205        self.seed
206    }
207
208    pub fn num_nonces(&self) -> usize {
209        self.hash_nonces.len()
210    }
211
212    pub fn write<W>(&self, writer: &mut W) -> Result<(), Error>
213    where
214        W: std::io::Write,
215    {
216        Ok(writer.write_all(&self.data.to_bytes()[..])?)
217    }
218
219    pub fn insert(&mut self, value: &V) {
220        for hash in value.fuzzy_hash() {
221            self.insert_inner(hash);
222        }
223    }
224
225    fn insert_inner(&mut self, initial_hash: u64) {
226        let mut hasher = FxHasher::default();
227        initial_hash.hash(&mut hasher);
228        for nonce in &self.hash_nonces {
229            hasher.write_u64(*nonce);
230            let idx = hasher.finish() as usize % self.data.len();
231            self.data.set(idx, true);
232        }
233    }
234}
235
236#[cfg(test)]
237impl<T> From<BloomFilterWriter<T>> for BloomFilterReader<T> {
238    fn from(writer: BloomFilterWriter<T>) -> Self {
239        Self { data: writer.data, hash_nonces: writer.hash_nonces, _type: PhantomData::default() }
240    }
241}
242
243#[cfg(test)]
244mod tests {
245    use crate::lsm_tree::bloom_filter::{
246        estimate_params, generate_hash_nonces, BloomFilterReader, BloomFilterWriter,
247    };
248    use crate::object_store::allocator::AllocatorKey;
249
250    #[test]
251    fn estimated_params() {
252        // Compare to https://hur.st/bloomfilter (keeping in mind the rounding-up of the size of the
253        // bloom filter).
254        assert_eq!(estimate_params(0), (0, 0));
255        assert_eq!(estimate_params(1), (16, 6));
256        assert_eq!(estimate_params(5000), (131072, 4));
257        assert_eq!(estimate_params(50000), (1048576, 4));
258        assert_eq!(estimate_params(5_000_000), (134217728, 4));
259    }
260
261    #[test]
262    fn hash_nonces_are_stable() {
263        let expected: [u64; 16] = [
264            15601892068231251798,
265            4249550343631144082,
266            11170505403239506035,
267            3141899402926357684,
268            10455158105512296971,
269            1544954577306875038,
270            7546141422416683882,
271            10809374736430664972,
272            14270886949990153586,
273            12890306703619732195,
274            10531031577317334640,
275            2369458071968706433,
276            8554654157920588268,
277            4945022529079265586,
278            4849687277068177250,
279            11824207122193630909,
280        ];
281        for i in 1..=expected.len() {
282            let generated = generate_hash_nonces(0xfeedbad, i);
283            assert_eq!(&generated[..], &expected[..i]);
284        }
285    }
286
287    const TEST_KEYS: [i32; 4] = [0, 65535, i32::MAX, i32::MIN];
288
289    #[test]
290    fn test_empty() {
291        let filter = BloomFilterReader::new_empty(TEST_KEYS.len());
292        for key in &TEST_KEYS {
293            assert!(!filter.maybe_contains(key));
294        }
295    }
296
297    #[test]
298    fn test_full() {
299        let filter = BloomFilterReader::new_full(TEST_KEYS.len());
300        for key in &TEST_KEYS {
301            assert!(filter.maybe_contains(key));
302        }
303    }
304
305    #[test]
306    fn test_insert() {
307        for key in &TEST_KEYS {
308            // Use a new filter each time so we don't get false positives.
309            let mut filter = BloomFilterWriter::new(0, TEST_KEYS.len());
310            filter.insert(key);
311            let filter = BloomFilterReader::from(filter);
312            assert!(filter.maybe_contains(key));
313        }
314    }
315
316    #[test]
317    fn test_range_key() {
318        let mut filter = BloomFilterWriter::new(0, 2);
319        filter.insert(&AllocatorKey { device_range: 0..2097152 });
320        filter.insert(&AllocatorKey { device_range: 4194304..4194305 });
321        let filter = BloomFilterReader::from(filter);
322
323        assert!(filter.maybe_contains(&AllocatorKey { device_range: 0..1 }));
324        assert!(filter.maybe_contains(&AllocatorKey { device_range: 2097151..2097152 }));
325        assert!(!filter.maybe_contains(&AllocatorKey { device_range: 2097152..2097153 }));
326        assert!(!filter.maybe_contains(&AllocatorKey { device_range: 3145727..3145728 }));
327        assert!(filter.maybe_contains(&AllocatorKey { device_range: 4193404..4194305 }));
328
329        assert!(filter.maybe_contains(&AllocatorKey { device_range: 0..2097153 }));
330        assert!(filter.maybe_contains(&AllocatorKey { device_range: 2097152..4194305 }));
331        assert!(!filter.maybe_contains(&AllocatorKey { device_range: 104857600..104857601 }));
332    }
333
334    #[test]
335    fn test_serde() {
336        for key in &TEST_KEYS {
337            // Use a new filter each time so we don't get false positives.
338            let mut filter = BloomFilterWriter::new(0, 1);
339            filter.insert(key);
340            let num_nonces = filter.num_nonces();
341            let mut buf = vec![];
342            {
343                let mut cursor = std::io::Cursor::new(&mut buf);
344                filter.write(&mut cursor).expect("write failed");
345            }
346            let filter = BloomFilterReader::read(&buf[..], 0, num_nonces).expect("read failed");
347            assert!(filter.maybe_contains(key));
348        }
349    }
350}