fxfs/lsm_tree/
bloom_filter.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::lsm_tree::types::FuzzyHash;
6use anyhow::{Error, Result};
7use bit_vec::BitVec;
8use std::marker::PhantomData;
9
10/// A bloom filter provides a probabilistic means of determining if a layer file *might* contain
11/// records related to a given key.
12///
13/// A bloom filter is a bitmap with two associated constants `m` and `k`.  The bitmap is `m` bits
14/// wide, and there are `k` associated hash functions with the filter.
15///
16/// There are two operations for a bloom filter:
17/// - Adding a new entry.  Each of the `k` hash functions is computed for the entry, and this hash
18///   corresponds to a single bit in the bitmap.  All `k` of these bits are set to 1.
19/// - Querying for an entry.  Each of the `k` hash functions is computed for the entry, and if any
20///   of the corresponding bits in the bitmap are set to 0, the bloom filter returns false, which
21///   means the layer file definitely does not contain records for that key.  Note that false
22///   positives are possible by design; all bits may have been set by unrelated entries, in which
23///   case the query returns true.
24///
25/// # Efficiency & Tuning
26///
27/// As mentioned above, there are two parameters `k` and `m` which we can tune to optimize the
28/// BloomFilter.  There are three factors to consider:
29/// - How much space we use for the bloom filter (i.e. `m`).
30/// - How efficient the operations are (i.e. `k`, since we have to compute `k` hashes for each
31///   insert/query).
32/// - Minimizing false positive rate.  Increasing `k` or `m` will both reduce the false positive
33///   rate.
34///
35/// For simplicity we fix a target false-positive rate and derive `k` and `m`.  See estimate_params.
36
37// Returns a tuple (bloom_filter_size_bits, num_hashes).  The bloom filter will always be sized up
38// to the nearest power-of-two, which makes its usage later a bit more efficient (since we take
39// indexes modulo the size of the filter).
40// We target a false positive rate of 0.1%, from which we can derive the constants via relationships
41// described in https://en.wikipedia.org/wiki/Bloom_filter#Optimal_number_of_hash_functions.
42fn estimate_params(num_items: usize) -> (usize, usize) {
43    if num_items == 0 {
44        return (0, 0);
45    }
46    const TARGET_FP_RATE: f64 = 0.001;
47    use std::f64::consts::LN_2;
48
49    let n = num_items as f64;
50    let bits = ((n * TARGET_FP_RATE.ln()) / (-LN_2 * LN_2)).ceil() as u64;
51    // Round up to a power-of-two number of bytes.
52    let bits = std::cmp::max(bits, 8).next_power_of_two();
53    let m = bits as f64;
54
55    // Then, solve for a minimal `k` such that the probability of FP is <= TARGET_FP_RATE.
56    let mut k: f64 = 0.0;
57    let mut fp_rate = 1.0;
58    while fp_rate > TARGET_FP_RATE {
59        k += 1.0;
60        fp_rate = (1f64 - (-k / (m / n)).exp()).powf(k);
61    }
62    (bits as usize, k.round() as usize)
63}
64
65/// A read-only handle to a bloom filter.  To create a bloom filter, use `BloomFilterWriter`.
66/// Note that the bloom filter is *not* versioned; this must be managed by the caller.
67pub struct BloomFilterReader<V> {
68    data: BitVec,
69    seed: u64,
70    num_hashes: usize,
71    _type: PhantomData<V>,
72}
73
74pub struct BloomFilterStats {
75    pub size: usize,
76    pub num_hashes: usize,
77    // The percentage (rounded up to the nearest whole) of the bits which are set.
78    pub fill_percentage: usize,
79}
80
81impl<V: FuzzyHash> BloomFilterReader<V> {
82    /// Creates a BloomFilterReader by reading the serialized contents from `buf`.
83    /// `seed` and `num_hashes` must match the values passed into BloomFilterWriter.
84    pub fn read(buf: &[u8], seed: u64, num_hashes: usize) -> Result<Self, Error> {
85        Ok(Self { data: BitVec::from_bytes(buf), seed, num_hashes, _type: PhantomData::default() })
86    }
87
88    /// Returns whether the bloom filter *might* contain the given value (or any part of it, for
89    /// range-based keys.
90    pub fn maybe_contains(&self, value: &V) -> bool {
91        let mut num = 0;
92        for hash in value.fuzzy_hash() {
93            if self.maybe_contains_inner(hash) {
94                return true;
95            }
96            num += 1;
97            debug_assert!(num < 4, "Too many hash partitions");
98        }
99        false
100    }
101
102    fn maybe_contains_inner(&self, initial_hash: u64) -> bool {
103        for i in 0..self.num_hashes as u64 {
104            let seed = self.seed.wrapping_add(i);
105            let hash = crate::stable_hash::stable_hash_seeded(initial_hash, seed);
106            let idx = hash as usize % self.data.len();
107            if !self.data.get(idx).unwrap() {
108                return false;
109            }
110        }
111        true
112    }
113
114    /// Call sparingly; this is expensive to compute.
115    /// Note that the return value can be trivially cached since the reader is immutable.
116    pub fn stats(&self) -> BloomFilterStats {
117        BloomFilterStats {
118            size: self.data.len().div_ceil(8),
119            num_hashes: self.num_hashes,
120            fill_percentage: self.compute_fill_percentage(),
121        }
122    }
123
124    // Use sparingly; this is expensive to compute.
125    fn compute_fill_percentage(&self) -> usize {
126        if self.data.is_empty() {
127            return 0;
128        }
129        (100 * self.data.iter().filter(|x| *x).count()).div_ceil(self.data.len())
130    }
131
132    #[cfg(test)]
133    fn new_empty(num_items: usize) -> Self {
134        let (bits, num_hashes) = estimate_params(num_items);
135        Self {
136            data: BitVec::from_elem(bits, false),
137            seed: 0,
138            num_hashes,
139            _type: PhantomData::default(),
140        }
141    }
142
143    #[cfg(test)]
144    fn new_full(num_items: usize) -> Self {
145        let (bits, num_hashes) = estimate_params(num_items);
146        Self {
147            data: BitVec::from_elem(bits, true),
148            seed: 0,
149            num_hashes,
150            _type: PhantomData::default(),
151        }
152    }
153}
154
155/// A helper to build a bloom filter in-memory before serializing it.
156/// Note that the bloom filter is *not* versioned; this must be managed by the caller.
157pub struct BloomFilterWriter<V> {
158    data: BitVec,
159    seed: u64,
160    num_hashes: usize,
161    _type: PhantomData<V>,
162}
163
164impl<V: FuzzyHash> BloomFilterWriter<V> {
165    /// Creates a new bloom filter suitable for the given input size.  See module comments for the
166    /// heuristics used.
167    /// `seed` is a value which is mixed into hashes in the bloom filter.  Note that it is not used
168    /// in a secure manner so should not contain any secrets.  It should be unpredictable to prevent
169    /// timing attacks.
170    pub fn new(seed: u64, num_items: usize) -> Self {
171        let (bits, num_hashes) = estimate_params(num_items);
172        Self {
173            data: BitVec::from_elem(bits, false),
174            seed,
175            num_hashes,
176            _type: PhantomData::default(),
177        }
178    }
179
180    /// Returns the size the bloom filter will occupy when serialized.
181    pub fn serialized_size(&self) -> usize {
182        self.data.len() / 8
183    }
184
185    pub fn seed(&self) -> u64 {
186        self.seed
187    }
188
189    pub fn num_hashes(&self) -> usize {
190        self.num_hashes
191    }
192
193    pub fn write<W>(&self, writer: &mut W) -> Result<(), Error>
194    where
195        W: std::io::Write,
196    {
197        Ok(writer.write_all(&self.data.to_bytes()[..])?)
198    }
199
200    pub fn insert(&mut self, value: &V) {
201        for hash in value.fuzzy_hash() {
202            self.insert_inner(hash);
203        }
204    }
205
206    fn insert_inner(&mut self, initial_hash: u64) {
207        for i in 0..self.num_hashes as u64 {
208            let seed = self.seed.wrapping_add(i);
209            let hash = crate::stable_hash::stable_hash_seeded(initial_hash, seed);
210            let idx = hash as usize % self.data.len();
211            self.data.set(idx, true);
212        }
213    }
214
215    // Clear the bits in the bloom filter. Intended to be used for testing, e.g., to create an
216    // invalid bloom filter.
217    #[cfg(test)]
218    pub(crate) fn clear(&mut self) {
219        self.data.clear();
220    }
221}
222
223#[cfg(test)]
224impl<T> From<BloomFilterWriter<T>> for BloomFilterReader<T> {
225    fn from(writer: BloomFilterWriter<T>) -> Self {
226        Self {
227            data: writer.data,
228            seed: writer.seed,
229            num_hashes: writer.num_hashes,
230            _type: PhantomData::default(),
231        }
232    }
233}
234
235#[cfg(test)]
236mod tests {
237    use crate::lsm_tree::bloom_filter::{BloomFilterReader, BloomFilterWriter, estimate_params};
238    use crate::object_store::allocator::AllocatorKey;
239
240    #[test]
241    fn estimated_params() {
242        // Compare to https://hur.st/bloomfilter (keeping in mind the rounding-up of the size of the
243        // bloom filter).
244        assert_eq!(estimate_params(0), (0, 0));
245        assert_eq!(estimate_params(1), (16, 6));
246        assert_eq!(estimate_params(5000), (131072, 4));
247        assert_eq!(estimate_params(50000), (1048576, 4));
248        assert_eq!(estimate_params(5_000_000), (134217728, 4));
249    }
250
251    const TEST_KEYS: [i32; 4] = [0, 65535, i32::MAX, i32::MIN];
252
253    #[test]
254    fn test_empty() {
255        let filter = BloomFilterReader::new_empty(TEST_KEYS.len());
256        for key in &TEST_KEYS {
257            assert!(!filter.maybe_contains(key));
258        }
259    }
260
261    #[test]
262    fn test_full() {
263        let filter = BloomFilterReader::new_full(TEST_KEYS.len());
264        for key in &TEST_KEYS {
265            assert!(filter.maybe_contains(key));
266        }
267    }
268
269    #[test]
270    fn test_insert() {
271        for key in &TEST_KEYS {
272            // Use a new filter each time so we don't get false positives.
273            let mut filter = BloomFilterWriter::new(0, TEST_KEYS.len());
274            filter.insert(key);
275            let filter = BloomFilterReader::from(filter);
276            assert!(filter.maybe_contains(key));
277        }
278    }
279
280    #[test]
281    fn test_range_key() {
282        let mut filter = BloomFilterWriter::new(0, 2);
283        filter.insert(&AllocatorKey { device_range: 0..2097152 });
284        filter.insert(&AllocatorKey { device_range: 4194304..4194305 });
285        let filter = BloomFilterReader::from(filter);
286
287        assert!(filter.maybe_contains(&AllocatorKey { device_range: 0..1 }));
288        assert!(filter.maybe_contains(&AllocatorKey { device_range: 2097151..2097152 }));
289        assert!(!filter.maybe_contains(&AllocatorKey { device_range: 2097152..2097153 }));
290        assert!(!filter.maybe_contains(&AllocatorKey { device_range: 3145727..3145728 }));
291        assert!(filter.maybe_contains(&AllocatorKey { device_range: 4193404..4194305 }));
292
293        assert!(filter.maybe_contains(&AllocatorKey { device_range: 0..2097153 }));
294        assert!(filter.maybe_contains(&AllocatorKey { device_range: 2097152..4194305 }));
295        assert!(!filter.maybe_contains(&AllocatorKey { device_range: 104857600..104857601 }));
296    }
297
298    #[test]
299    fn test_serde() {
300        for key in &TEST_KEYS {
301            // Use a new filter each time so we don't get false positives.
302            let mut filter = BloomFilterWriter::new(0, 1);
303            filter.insert(key);
304            let num_hashes = filter.num_hashes();
305            let mut buf = vec![];
306            {
307                let mut cursor = std::io::Cursor::new(&mut buf);
308                filter.write(&mut cursor).expect("write failed");
309            }
310            let filter = BloomFilterReader::read(&buf[..], 0, num_hashes).expect("read failed");
311            assert!(filter.maybe_contains(key));
312        }
313    }
314}