fxfs/object_store/
extent_record.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// TODO(https://fxbug.dev/42178223): need validation after deserialization.
6
7use crate::checksum::{Checksums, ChecksumsV38};
8use crate::lsm_tree::types::{OrdLowerBound, OrdUpperBound};
9use crate::round::{round_down, round_up};
10use bit_vec::BitVec;
11use fprint::TypeFingerprint;
12use serde::{Deserialize, Serialize};
13use std::cmp::{max, min};
14use std::hash::Hash;
15use std::ops::Range;
16
17/// The common case for extents which cover the data payload of some object.
18pub const DEFAULT_DATA_ATTRIBUTE_ID: u64 = 0;
19
20/// For Blobs in Fxfs, we store the merkle tree at a well-known attribute.
21/// TODO(https://fxbug.dev/42073113): Is this the best place to store the merkle tree?  What about inline with
22/// data?
23pub const BLOB_MERKLE_ATTRIBUTE_ID: u64 = 1;
24
25/// For fsverity files in Fxfs, we store the merkle tree of the verified file at a well-known
26/// attribute.
27pub const FSVERITY_MERKLE_ATTRIBUTE_ID: u64 = 2;
28
29/// ExtentKey is a child of ObjectKey for Object attributes that have attached extents
30/// (at time of writing this was only the used for file contents).
31pub type ExtentKey = ExtentKeyV32;
32
33#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize, TypeFingerprint)]
34#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
35pub struct ExtentKeyV32 {
36    pub range: Range<u64>,
37}
38
39const EXTENT_HASH_BUCKET_SIZE: u64 = 1 * 1024 * 1024;
40
41pub struct ExtentKeyPartitionIterator {
42    range: Range<u64>,
43}
44
45impl Iterator for ExtentKeyPartitionIterator {
46    type Item = Range<u64>;
47
48    fn next(&mut self) -> Option<Self::Item> {
49        if self.range.start >= self.range.end {
50            None
51        } else {
52            let start = self.range.start;
53            self.range.start = start.saturating_add(EXTENT_HASH_BUCKET_SIZE);
54            let end = std::cmp::min(self.range.start, self.range.end);
55            Some(start..end)
56        }
57    }
58}
59
60impl ExtentKey {
61    /// Creates an ExtentKey.
62    pub fn new(range: std::ops::Range<u64>) -> Self {
63        Self { range }
64    }
65
66    /// Returns the range of bytes common between this extent and |other|.
67    pub fn overlap(&self, other: &ExtentKey) -> Option<Range<u64>> {
68        if self.range.end <= other.range.start || self.range.start >= other.range.end {
69            None
70        } else {
71            Some(max(self.range.start, other.range.start)..min(self.range.end, other.range.end))
72        }
73    }
74
75    /// Returns the search key for this extent; that is, a key which is <= this key under Ord and
76    /// OrdLowerBound.
77    /// This would be used when searching for an extent with |find| (when we want to find any
78    /// overlapping extent, which could include extents that start earlier).
79    /// For example, if the tree has extents 50..150 and 150..200 and we wish to read 100..200,
80    /// we'd search for 0..101 which would set the iterator to 50..150.
81    pub fn search_key(&self) -> Self {
82        assert_ne!(self.range.start, self.range.end);
83        ExtentKey::search_key_from_offset(self.range.start)
84    }
85
86    /// Similar to previous, but from an offset.  Returns a search key that will find the first
87    /// extent that touches offset..
88    pub fn search_key_from_offset(offset: u64) -> Self {
89        Self { range: 0..offset + 1 }
90    }
91
92    /// Returns the merge key for this extent; that is, a key which is <= this extent and any other
93    /// possibly overlapping extent, under Ord. This would be used to set the hint for |merge_into|.
94    ///
95    /// For example, if the tree has extents 0..50, 50..150 and 150..200 and we wish to insert
96    /// 100..150, we'd use a merge hint of 0..100 which would set the iterator to 50..150 (the first
97    /// element > 100..150 under Ord).
98    pub fn key_for_merge_into(&self) -> Self {
99        Self { range: 0..self.range.start }
100    }
101
102    /// Returns an iterator over the ExtentKey partitions which overlap this key (see `FuzzyHash`).
103    pub fn fuzzy_hash_partition(&self) -> ExtentKeyPartitionIterator {
104        ExtentKeyPartitionIterator {
105            range: round_down(self.range.start, EXTENT_HASH_BUCKET_SIZE)
106                ..round_up(self.range.end, EXTENT_HASH_BUCKET_SIZE).unwrap_or(u64::MAX),
107        }
108    }
109}
110
111// The normal comparison uses the end of the range before the start of the range. This makes
112// searching for records easier because it's easy to find K.. (where K is the key you are searching
113// for), which is what we want since our search routines find items with keys >= a search key.
114// OrdLowerBound orders by the start of an extent.
115impl OrdUpperBound for ExtentKey {
116    fn cmp_upper_bound(&self, other: &ExtentKey) -> std::cmp::Ordering {
117        // The comparison uses the end of the range so that we can more easily do queries.  Whilst
118        // it might be tempting to break ties by comparing the range start, next_key currently
119        // relies on keys with the same end being equal, and since we do not support overlapping
120        // keys within the same layer, ties can always be broken using layer index.  Insertions into
121        // the mutable layer should always be done using merge_into, which will ensure keys don't
122        // end up overlapping.
123        self.range.end.cmp(&other.range.end)
124    }
125}
126
127impl OrdLowerBound for ExtentKey {
128    // Orders by the start of the range rather than the end, and doesn't include the end in the
129    // comparison. This is used when merging, where we want to merge keys in lower-bound order.
130    fn cmp_lower_bound(&self, other: &ExtentKey) -> std::cmp::Ordering {
131        self.range.start.cmp(&other.range.start)
132    }
133}
134
135impl Ord for ExtentKey {
136    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
137        // We expect cmp_upper_bound and cmp_lower_bound to be used mostly, but ObjectKey needs an
138        // Ord method in order to compare other enum variants, and Transaction requires an ObjectKey
139        // to implement Ord.
140        self.range.start.cmp(&other.range.start).then(self.range.end.cmp(&other.range.end))
141    }
142}
143
144impl PartialOrd for ExtentKey {
145    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
146        Some(self.cmp(other))
147    }
148}
149
150/// The mode the extent is operating in. This changes how writes work to this region of the file.
151pub type ExtentMode = ExtentModeV38;
152
153#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TypeFingerprint)]
154pub enum ExtentModeV38 {
155    /// This extent doesn't have defined write semantics. The writer chooses how to handle the data
156    /// here. Notable uses of this are things which have their own separate checksum mechanism,
157    /// like the journal file and blobs.
158    Raw,
159    /// This extent uses copy-on-write semantics. We store the post-encryption checksums for data
160    /// validation. New writes to this logical range are written to new extents.
161    Cow(ChecksumsV38),
162    /// This extent uses overwrite semantics. The bitmap keeps track of blocks which have been
163    /// written to at least once. Blocks which haven't been written to at least once are logically
164    /// zero, so the bitmap needs to be accounted for while reading. While this extent exists, new
165    /// writes to this logical range will go to the same on-disk location.
166    OverwritePartial(BitVec),
167    /// This extent uses overwrite semantics. Every block in this extent has been written to at
168    /// least once, so we don't store the block bitmap anymore. While this extent exists, new
169    /// writes to this logical range will go to the same on-disk location.
170    Overwrite,
171}
172
173#[cfg(fuzz)]
174impl<'a> arbitrary::Arbitrary<'a> for ExtentMode {
175    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
176        Ok(match u.int_in_range(0..=3)? {
177            0 => ExtentMode::Raw,
178            1 => ExtentMode::Cow(u.arbitrary()?),
179            2 => ExtentMode::OverwritePartial(BitVec::from_bytes(u.arbitrary()?)),
180            3 => ExtentMode::Overwrite,
181            _ => unreachable!(),
182        })
183    }
184}
185
186/// ExtentValue is the payload for an extent in the object store, which describes where the extent
187/// is physically located.
188pub type ExtentValue = ExtentValueV38;
189
190#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TypeFingerprint)]
191#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
192pub enum ExtentValueV38 {
193    /// Indicates a deleted extent; that is, the logical range described by the extent key is
194    /// considered to be deleted.
195    None,
196    /// The location of the extent and other related information.  `key_id` identifies which of the
197    /// object's keys should be used.  Unencrypted files should use 0 (which can also be used for
198    /// encrypted files).  `mode` describes the write pattern for this extent.
199    Some { device_offset: u64, mode: ExtentModeV38, key_id: u64 },
200}
201
202impl ExtentValue {
203    pub fn new(device_offset: u64, mode: ExtentMode, key_id: u64) -> ExtentValue {
204        ExtentValue::Some { device_offset, mode, key_id }
205    }
206
207    pub fn new_raw(device_offset: u64, key_id: u64) -> ExtentValue {
208        Self::new(device_offset, ExtentMode::Raw, key_id)
209    }
210
211    /// Creates an ExtentValue with a checksum
212    pub fn with_checksum(device_offset: u64, checksums: Checksums, key_id: u64) -> ExtentValue {
213        Self::new(device_offset, ExtentMode::Cow(checksums), key_id)
214    }
215
216    /// Creates an ExtentValue for an overwrite range with no blocks written to yet.
217    pub fn blank_overwrite_extent(
218        device_offset: u64,
219        num_blocks: usize,
220        key_id: u64,
221    ) -> ExtentValue {
222        Self::new(
223            device_offset,
224            ExtentMode::OverwritePartial(BitVec::from_elem(num_blocks, false)),
225            key_id,
226        )
227    }
228
229    /// Creates an ExtentValue for an overwrite range with all the blocks initialized.
230    pub fn initialized_overwrite_extent(device_offset: u64, key_id: u64) -> ExtentValue {
231        Self::new(device_offset, ExtentMode::Overwrite, key_id)
232    }
233
234    /// Creates an ObjectValue for a deletion of an object extent.
235    pub fn deleted_extent() -> ExtentValue {
236        ExtentValue::None
237    }
238
239    pub fn is_deleted(&self) -> bool {
240        if let ExtentValue::None = self { true } else { false }
241    }
242
243    /// Returns a new ExtentValue offset by `amount`.  Both `amount` and `extent_len` must be
244    /// multiples of the underlying block size.
245    pub fn offset_by(&self, amount: u64, extent_len: u64) -> Self {
246        match self {
247            ExtentValue::None => Self::deleted_extent(),
248            ExtentValue::Some { device_offset, mode, key_id } => {
249                let mode = match mode {
250                    ExtentMode::Raw => ExtentMode::Raw,
251                    ExtentMode::Cow(checksums) => {
252                        if checksums.len() > 0 {
253                            let index = (amount / (extent_len / checksums.len() as u64)) as usize;
254                            ExtentMode::Cow(checksums.offset_by(index))
255                        } else {
256                            ExtentMode::Cow(Checksums::fletcher(Vec::new()))
257                        }
258                    }
259                    ExtentMode::Overwrite => ExtentMode::Overwrite,
260                    ExtentMode::OverwritePartial(bitmap) => {
261                        debug_assert!(bitmap.len() > 0);
262                        let index = (amount / (extent_len / bitmap.len() as u64)) as usize;
263                        ExtentMode::OverwritePartial(bitmap.clone().split_off(index))
264                    }
265                };
266                ExtentValue::Some { device_offset: device_offset + amount, mode, key_id: *key_id }
267            }
268        }
269    }
270
271    /// Returns a new ExtentValue after shrinking the extent from |original_len| to |new_len|.
272    pub fn shrunk(&self, original_len: u64, new_len: u64) -> Self {
273        match self {
274            ExtentValue::None => Self::deleted_extent(),
275            ExtentValue::Some { device_offset, mode, key_id } => {
276                let mode = match mode {
277                    ExtentMode::Raw => ExtentMode::Raw,
278                    ExtentMode::Cow(checksums) => {
279                        if checksums.len() > 0 {
280                            let len = (new_len / (original_len / checksums.len() as u64)) as usize;
281                            ExtentMode::Cow(checksums.shrunk(len))
282                        } else {
283                            ExtentMode::Cow(Checksums::fletcher(Vec::new()))
284                        }
285                    }
286                    ExtentMode::Overwrite => ExtentMode::Overwrite,
287                    ExtentMode::OverwritePartial(bitmap) => {
288                        debug_assert!(bitmap.len() > 0);
289                        let len = (new_len / (original_len / bitmap.len() as u64)) as usize;
290                        let mut new_bitmap = bitmap.clone();
291                        new_bitmap.truncate(len);
292                        ExtentMode::OverwritePartial(new_bitmap)
293                    }
294                };
295                ExtentValue::Some { device_offset: *device_offset, mode, key_id: *key_id }
296            }
297        }
298    }
299}
300
301#[cfg(test)]
302mod tests {
303    use super::{ExtentKey, ExtentMode, ExtentValue};
304    use crate::checksum::Checksums;
305    use crate::lsm_tree::types::{OrdLowerBound, OrdUpperBound};
306    use crate::object_store::VOLUME_DATA_KEY_ID;
307    use bit_vec::BitVec;
308    use std::cmp::Ordering;
309
310    #[test]
311    fn test_extent_cmp() {
312        let extent = ExtentKey::new(100..150);
313        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(0..100)), Ordering::Greater);
314        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(0..110)), Ordering::Greater);
315        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(0..150)), Ordering::Equal);
316        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(99..150)), Ordering::Equal);
317        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(100..150)), Ordering::Equal);
318        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(0..151)), Ordering::Less);
319        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(100..151)), Ordering::Less);
320        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(150..1000)), Ordering::Less);
321    }
322
323    #[test]
324    fn test_extent_cmp_lower_bound() {
325        let extent = ExtentKey::new(100..150);
326        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(0..100)), Ordering::Greater);
327        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(0..110)), Ordering::Greater);
328        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(0..150)), Ordering::Greater);
329        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(0..1000)), Ordering::Greater);
330        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(99..1000)), Ordering::Greater);
331        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(100..150)), Ordering::Equal);
332        // cmp_lower_bound does not check the upper bound of the range
333        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(100..1000)), Ordering::Equal);
334        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(101..102)), Ordering::Less);
335    }
336
337    #[test]
338    fn test_extent_search_and_insertion_key() {
339        let extent = ExtentKey::new(100..150);
340        assert_eq!(extent.search_key(), ExtentKey::new(0..101));
341        assert_eq!(extent.cmp_lower_bound(&extent.search_key()), Ordering::Greater);
342        assert_eq!(extent.cmp_upper_bound(&extent.search_key()), Ordering::Greater);
343        assert_eq!(extent.key_for_merge_into(), ExtentKey::new(0..100));
344        assert_eq!(extent.cmp_lower_bound(&extent.key_for_merge_into()), Ordering::Greater);
345    }
346
347    #[test]
348    fn extent_value_offset_by() {
349        assert_eq!(ExtentValue::None.offset_by(1024, 2048), ExtentValue::None);
350        assert_eq!(
351            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID).offset_by(0, 2048),
352            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID)
353        );
354        assert_eq!(
355            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID).offset_by(1024, 2048),
356            ExtentValue::new_raw(2048, VOLUME_DATA_KEY_ID)
357        );
358        assert_eq!(
359            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID).offset_by(2048, 2048),
360            ExtentValue::new_raw(3072, VOLUME_DATA_KEY_ID)
361        );
362
363        let make_checksums = |range: std::ops::Range<u64>| Checksums::fletcher(range.collect());
364
365        // In these tests we are making block size 256.
366        assert_eq!(
367            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
368                .offset_by(0, 2048),
369            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
370        );
371        assert_eq!(
372            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
373                .offset_by(1024, 2048),
374            ExtentValue::with_checksum(2048, make_checksums(4..8), VOLUME_DATA_KEY_ID)
375        );
376        assert_eq!(
377            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
378                .offset_by(2048, 2048),
379            ExtentValue::with_checksum(3072, Checksums::fletcher(Vec::new()), VOLUME_DATA_KEY_ID)
380        );
381
382        // Takes a place to switch from zeros to ones. The goal is to make sure there is exactly
383        // one zero and then only ones where we expect offset to slice.
384        let make_bitmap = |cut, length| {
385            let mut begin_bitmap = BitVec::from_elem(cut, false);
386            let mut end_bitmap = BitVec::from_elem(length - cut, true);
387            begin_bitmap.append(&mut end_bitmap);
388            ExtentMode::OverwritePartial(begin_bitmap)
389        };
390        let make_extent =
391            |device_offset, mode| ExtentValue::Some { device_offset, mode, key_id: 0 };
392
393        assert_eq!(
394            make_extent(1024, make_bitmap(1, 8)).offset_by(0, 2048),
395            make_extent(1024, make_bitmap(1, 8))
396        );
397        assert_eq!(
398            make_extent(1024, make_bitmap(5, 8)).offset_by(1024, 2048),
399            make_extent(2048, make_bitmap(1, 4))
400        );
401        assert_eq!(
402            make_extent(1024, make_bitmap(0, 8)).offset_by(2048, 2048),
403            make_extent(3072, ExtentMode::OverwritePartial(BitVec::new()))
404        );
405
406        assert_eq!(
407            make_extent(1024, ExtentMode::Overwrite).offset_by(0, 2048),
408            make_extent(1024, ExtentMode::Overwrite)
409        );
410        assert_eq!(
411            make_extent(1024, ExtentMode::Overwrite).offset_by(1024, 2048),
412            make_extent(2048, ExtentMode::Overwrite)
413        );
414        assert_eq!(
415            make_extent(1024, ExtentMode::Overwrite).offset_by(2048, 2048),
416            make_extent(3072, ExtentMode::Overwrite)
417        );
418    }
419
420    #[test]
421    fn extent_value_shrunk() {
422        assert_eq!(ExtentValue::None.shrunk(2048, 1024), ExtentValue::None);
423        assert_eq!(
424            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID).shrunk(2048, 2048),
425            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID)
426        );
427        assert_eq!(
428            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID).shrunk(2048, 1024),
429            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID)
430        );
431        assert_eq!(
432            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID).shrunk(2048, 0),
433            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID)
434        );
435
436        let make_checksums = |range: std::ops::Range<u64>| Checksums::fletcher(range.collect());
437
438        // In these tests we are making block size 256.
439        assert_eq!(
440            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
441                .shrunk(2048, 2048),
442            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
443        );
444        assert_eq!(
445            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
446                .shrunk(2048, 1024),
447            ExtentValue::with_checksum(1024, make_checksums(0..4), VOLUME_DATA_KEY_ID)
448        );
449        assert_eq!(
450            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
451                .shrunk(2048, 0),
452            ExtentValue::with_checksum(1024, Checksums::fletcher(Vec::new()), VOLUME_DATA_KEY_ID)
453        );
454
455        // Takes a place to switch from zeros to ones. The goal is to make sure there is exactly
456        // one zero and then only ones where we expect offset to slice.
457        let make_bitmap = |cut, length| {
458            let mut begin_bitmap = BitVec::from_elem(cut, false);
459            let mut end_bitmap = BitVec::from_elem(length - cut, true);
460            begin_bitmap.append(&mut end_bitmap);
461            ExtentMode::OverwritePartial(begin_bitmap)
462        };
463        let make_extent =
464            |device_offset, mode| ExtentValue::Some { device_offset, mode, key_id: 0 };
465
466        assert_eq!(
467            make_extent(1024, make_bitmap(1, 8)).shrunk(2048, 2048),
468            make_extent(1024, make_bitmap(1, 8))
469        );
470        assert_eq!(
471            make_extent(1024, make_bitmap(3, 8)).shrunk(2048, 1024),
472            make_extent(1024, make_bitmap(3, 4))
473        );
474        assert_eq!(
475            make_extent(1024, make_bitmap(0, 8)).shrunk(2048, 0),
476            make_extent(1024, ExtentMode::OverwritePartial(BitVec::new()))
477        );
478
479        assert_eq!(
480            make_extent(1024, ExtentMode::Overwrite).shrunk(2048, 2048),
481            make_extent(1024, ExtentMode::Overwrite)
482        );
483        assert_eq!(
484            make_extent(1024, ExtentMode::Overwrite).shrunk(2048, 1024),
485            make_extent(1024, ExtentMode::Overwrite)
486        );
487        assert_eq!(
488            make_extent(1024, ExtentMode::Overwrite).shrunk(2048, 0),
489            make_extent(1024, ExtentMode::Overwrite)
490        );
491    }
492}