Skip to main content

fxfs/object_store/
extent_record.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// TODO(https://fxbug.dev/42178223): need validation after deserialization.
6
7use crate::checksum::{Checksums, ChecksumsV38};
8use crate::lsm_tree::types::{OrdLowerBound, OrdUpperBound};
9use crate::round::{round_down, round_up};
10use bit_vec::BitVec;
11use fprint::TypeFingerprint;
12use serde::{Deserialize, Serialize};
13use std::cmp::{max, min};
14use std::hash::Hash;
15use std::ops::Range;
16
17/// The common case for extents which cover the data payload of some object.
18pub const DEFAULT_DATA_ATTRIBUTE_ID: u64 = 0;
19
20/// Contains a serialized `BlobMetadataUnversioned` struct. This is attribute may still exist on
21/// blobs but should no longer be written. Use `BLOB_METADATA_ATTRIBUTE_ID` instead.
22pub const BLOB_MERKLE_ATTRIBUTE_ID: u64 = 1;
23/// Contains a serialized and versioned `BlobMetadata` struct. Use `BlobMetadata::read_from` and
24/// `BlobMetadata::write_to` to access this attribute.
25pub const BLOB_METADATA_ATTRIBUTE_ID: u64 = 3;
26
27/// For fsverity files in Fxfs, we store the merkle tree of the verified file at a well-known
28/// attribute.
29pub const FSVERITY_MERKLE_ATTRIBUTE_ID: u64 = 2;
30
31/// ExtentKey is a child of ObjectKey for Object attributes that have attached extents
32/// (at time of writing this was only the used for file contents).
33pub type ExtentKey = ExtentKeyV32;
34
35#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize, TypeFingerprint)]
36#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
37pub struct ExtentKeyV32 {
38    pub range: Range<u64>,
39}
40
41const EXTENT_HASH_BUCKET_SIZE: u64 = 1 * 1024 * 1024;
42
43pub struct ExtentKeyPartitionIterator {
44    range: Range<u64>,
45}
46
47impl Iterator for ExtentKeyPartitionIterator {
48    type Item = Range<u64>;
49
50    fn next(&mut self) -> Option<Self::Item> {
51        if self.range.start >= self.range.end {
52            None
53        } else {
54            let start = self.range.start;
55            self.range.start = start.saturating_add(EXTENT_HASH_BUCKET_SIZE);
56            let end = std::cmp::min(self.range.start, self.range.end);
57            Some(start..end)
58        }
59    }
60}
61
62impl ExtentKey {
63    /// Creates an ExtentKey.
64    pub fn new(range: std::ops::Range<u64>) -> Self {
65        Self { range }
66    }
67
68    /// Returns the range of bytes common between this extent and |other|.
69    pub fn overlap(&self, other: &ExtentKey) -> Option<Range<u64>> {
70        if self.range.end <= other.range.start || self.range.start >= other.range.end {
71            None
72        } else {
73            Some(max(self.range.start, other.range.start)..min(self.range.end, other.range.end))
74        }
75    }
76
77    /// Returns the search key for this extent; that is, a key which is <= this key under Ord and
78    /// OrdLowerBound.
79    /// This would be used when searching for an extent with |find| (when we want to find any
80    /// overlapping extent, which could include extents that start earlier).
81    /// For example, if the tree has extents 50..150 and 150..200 and we wish to read 100..200,
82    /// we'd search for 0..101 which would set the iterator to 50..150.
83    pub fn search_key(&self) -> Self {
84        assert_ne!(self.range.start, self.range.end);
85        ExtentKey::search_key_from_offset(self.range.start)
86    }
87
88    /// Similar to previous, but from an offset.  Returns a search key that will find the first
89    /// extent that touches offset..
90    pub fn search_key_from_offset(offset: u64) -> Self {
91        Self { range: 0..offset + 1 }
92    }
93
94    /// Returns the merge key for this extent; that is, a key which is <= this extent and any other
95    /// possibly overlapping extent, under Ord. This would be used to set the hint for |merge_into|.
96    ///
97    /// For example, if the tree has extents 0..50, 50..150 and 150..200 and we wish to insert
98    /// 100..150, we'd use a merge hint of 0..100 which would set the iterator to 50..150 (the first
99    /// element > 100..150 under Ord).
100    pub fn key_for_merge_into(&self) -> Self {
101        Self { range: 0..self.range.start }
102    }
103
104    /// Returns an iterator over the ExtentKey partitions which overlap this key (see `FuzzyHash`).
105    pub fn fuzzy_hash_partition(&self) -> ExtentKeyPartitionIterator {
106        ExtentKeyPartitionIterator {
107            range: round_down(self.range.start, EXTENT_HASH_BUCKET_SIZE)
108                ..round_up(self.range.end, EXTENT_HASH_BUCKET_SIZE).unwrap_or(u64::MAX),
109        }
110    }
111}
112
113// The normal comparison uses the end of the range before the start of the range. This makes
114// searching for records easier because it's easy to find K.. (where K is the key you are searching
115// for), which is what we want since our search routines find items with keys >= a search key.
116// OrdLowerBound orders by the start of an extent.
117impl OrdUpperBound for ExtentKey {
118    fn cmp_upper_bound(&self, other: &ExtentKey) -> std::cmp::Ordering {
119        // The comparison uses the end of the range so that we can more easily do queries.  Whilst
120        // it might be tempting to break ties by comparing the range start, next_key currently
121        // relies on keys with the same end being equal, and since we do not support overlapping
122        // keys within the same layer, ties can always be broken using layer index.  Insertions into
123        // the mutable layer should always be done using merge_into, which will ensure keys don't
124        // end up overlapping.
125        self.range.end.cmp(&other.range.end)
126    }
127}
128
129impl OrdLowerBound for ExtentKey {
130    // Orders by the start of the range rather than the end, and doesn't include the end in the
131    // comparison. This is used when merging, where we want to merge keys in lower-bound order.
132    fn cmp_lower_bound(&self, other: &ExtentKey) -> std::cmp::Ordering {
133        self.range.start.cmp(&other.range.start)
134    }
135}
136
137impl Ord for ExtentKey {
138    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
139        // We expect cmp_upper_bound and cmp_lower_bound to be used mostly, but ObjectKey needs an
140        // Ord method in order to compare other enum variants, and Transaction requires an ObjectKey
141        // to implement Ord.
142        self.range.start.cmp(&other.range.start).then(self.range.end.cmp(&other.range.end))
143    }
144}
145
146impl PartialOrd for ExtentKey {
147    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
148        Some(self.cmp(other))
149    }
150}
151
152/// The mode the extent is operating in. This changes how writes work to this region of the file.
153pub type ExtentMode = ExtentModeV38;
154
155#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TypeFingerprint)]
156pub enum ExtentModeV38 {
157    /// This extent doesn't have defined write semantics. The writer chooses how to handle the data
158    /// here. Notable uses of this are things which have their own separate checksum mechanism,
159    /// like the journal file and blobs.
160    Raw,
161    /// This extent uses copy-on-write semantics. We store the post-encryption checksums for data
162    /// validation. New writes to this logical range are written to new extents.
163    Cow(ChecksumsV38),
164    /// This extent uses overwrite semantics. The bitmap keeps track of blocks which have been
165    /// written to at least once. Blocks which haven't been written to at least once are logically
166    /// zero, so the bitmap needs to be accounted for while reading. While this extent exists, new
167    /// writes to this logical range will go to the same on-disk location.
168    OverwritePartial(BitVec),
169    /// This extent uses overwrite semantics. Every block in this extent has been written to at
170    /// least once, so we don't store the block bitmap anymore. While this extent exists, new
171    /// writes to this logical range will go to the same on-disk location.
172    Overwrite,
173}
174
175#[cfg(fuzz)]
176impl<'a> arbitrary::Arbitrary<'a> for ExtentMode {
177    fn arbitrary(u: &mut arbitrary::Unstructured<'a>) -> arbitrary::Result<Self> {
178        Ok(match u.int_in_range(0..=3)? {
179            0 => ExtentMode::Raw,
180            1 => ExtentMode::Cow(u.arbitrary()?),
181            2 => ExtentMode::OverwritePartial(BitVec::from_bytes(u.arbitrary()?)),
182            3 => ExtentMode::Overwrite,
183            _ => unreachable!(),
184        })
185    }
186}
187
188/// ExtentValue is the payload for an extent in the object store, which describes where the extent
189/// is physically located.
190pub type ExtentValue = ExtentValueV38;
191
192#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, TypeFingerprint)]
193#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
194pub enum ExtentValueV38 {
195    /// Indicates a deleted extent; that is, the logical range described by the extent key is
196    /// considered to be deleted.
197    None,
198    /// The location of the extent and other related information.  `key_id` identifies which of the
199    /// object's keys should be used.  Unencrypted files should use 0 (which can also be used for
200    /// encrypted files).  `mode` describes the write pattern for this extent.
201    Some { device_offset: u64, mode: ExtentModeV38, key_id: u64 },
202}
203
204impl ExtentValue {
205    pub fn new(device_offset: u64, mode: ExtentMode, key_id: u64) -> ExtentValue {
206        ExtentValue::Some { device_offset, mode, key_id }
207    }
208
209    pub fn new_raw(device_offset: u64, key_id: u64) -> ExtentValue {
210        Self::new(device_offset, ExtentMode::Raw, key_id)
211    }
212
213    /// Creates an ExtentValue with a checksum
214    pub fn with_checksum(device_offset: u64, checksums: Checksums, key_id: u64) -> ExtentValue {
215        Self::new(device_offset, ExtentMode::Cow(checksums), key_id)
216    }
217
218    /// Creates an ExtentValue for an overwrite range with no blocks written to yet.
219    pub fn blank_overwrite_extent(
220        device_offset: u64,
221        num_blocks: usize,
222        key_id: u64,
223    ) -> ExtentValue {
224        Self::new(
225            device_offset,
226            ExtentMode::OverwritePartial(BitVec::from_elem(num_blocks, false)),
227            key_id,
228        )
229    }
230
231    /// Creates an ExtentValue for an overwrite range with all the blocks initialized.
232    pub fn initialized_overwrite_extent(device_offset: u64, key_id: u64) -> ExtentValue {
233        Self::new(device_offset, ExtentMode::Overwrite, key_id)
234    }
235
236    /// Creates an ObjectValue for a deletion of an object extent.
237    pub fn deleted_extent() -> ExtentValue {
238        ExtentValue::None
239    }
240
241    pub fn is_deleted(&self) -> bool {
242        if let ExtentValue::None = self { true } else { false }
243    }
244
245    /// Returns a new ExtentValue offset by `amount`.  Both `amount` and `extent_len` must be
246    /// multiples of the underlying block size.
247    pub fn offset_by(&self, amount: u64, extent_len: u64) -> Self {
248        match self {
249            ExtentValue::None => Self::deleted_extent(),
250            ExtentValue::Some { device_offset, mode, key_id } => {
251                let mode = match mode {
252                    ExtentMode::Raw => ExtentMode::Raw,
253                    ExtentMode::Cow(checksums) => {
254                        if checksums.len() > 0 {
255                            let index = (amount / (extent_len / checksums.len() as u64)) as usize;
256                            ExtentMode::Cow(checksums.offset_by(index))
257                        } else {
258                            ExtentMode::Cow(Checksums::fletcher(Vec::new()))
259                        }
260                    }
261                    ExtentMode::Overwrite => ExtentMode::Overwrite,
262                    ExtentMode::OverwritePartial(bitmap) => {
263                        debug_assert!(bitmap.len() > 0);
264                        let index = (amount / (extent_len / bitmap.len() as u64)) as usize;
265                        ExtentMode::OverwritePartial(bitmap.clone().split_off(index))
266                    }
267                };
268                ExtentValue::Some { device_offset: device_offset + amount, mode, key_id: *key_id }
269            }
270        }
271    }
272
273    /// Returns a new ExtentValue after shrinking the extent from |original_len| to |new_len|.
274    pub fn shrunk(&self, original_len: u64, new_len: u64) -> Self {
275        match self {
276            ExtentValue::None => Self::deleted_extent(),
277            ExtentValue::Some { device_offset, mode, key_id } => {
278                let mode = match mode {
279                    ExtentMode::Raw => ExtentMode::Raw,
280                    ExtentMode::Cow(checksums) => {
281                        if checksums.len() > 0 {
282                            let len = (new_len / (original_len / checksums.len() as u64)) as usize;
283                            ExtentMode::Cow(checksums.shrunk(len))
284                        } else {
285                            ExtentMode::Cow(Checksums::fletcher(Vec::new()))
286                        }
287                    }
288                    ExtentMode::Overwrite => ExtentMode::Overwrite,
289                    ExtentMode::OverwritePartial(bitmap) => {
290                        debug_assert!(bitmap.len() > 0);
291                        let len = (new_len / (original_len / bitmap.len() as u64)) as usize;
292                        let mut new_bitmap = bitmap.clone();
293                        new_bitmap.truncate(len);
294                        ExtentMode::OverwritePartial(new_bitmap)
295                    }
296                };
297                ExtentValue::Some { device_offset: *device_offset, mode, key_id: *key_id }
298            }
299        }
300    }
301}
302
303#[cfg(test)]
304mod tests {
305    use super::{ExtentKey, ExtentMode, ExtentValue};
306    use crate::checksum::Checksums;
307    use crate::lsm_tree::types::{OrdLowerBound, OrdUpperBound};
308    use crate::object_store::VOLUME_DATA_KEY_ID;
309    use bit_vec::BitVec;
310    use std::cmp::Ordering;
311
312    #[test]
313    fn test_extent_cmp() {
314        let extent = ExtentKey::new(100..150);
315        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(0..100)), Ordering::Greater);
316        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(0..110)), Ordering::Greater);
317        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(0..150)), Ordering::Equal);
318        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(99..150)), Ordering::Equal);
319        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(100..150)), Ordering::Equal);
320        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(0..151)), Ordering::Less);
321        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(100..151)), Ordering::Less);
322        assert_eq!(extent.cmp_upper_bound(&ExtentKey::new(150..1000)), Ordering::Less);
323    }
324
325    #[test]
326    fn test_extent_cmp_lower_bound() {
327        let extent = ExtentKey::new(100..150);
328        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(0..100)), Ordering::Greater);
329        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(0..110)), Ordering::Greater);
330        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(0..150)), Ordering::Greater);
331        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(0..1000)), Ordering::Greater);
332        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(99..1000)), Ordering::Greater);
333        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(100..150)), Ordering::Equal);
334        // cmp_lower_bound does not check the upper bound of the range
335        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(100..1000)), Ordering::Equal);
336        assert_eq!(extent.cmp_lower_bound(&ExtentKey::new(101..102)), Ordering::Less);
337    }
338
339    #[test]
340    fn test_extent_search_and_insertion_key() {
341        let extent = ExtentKey::new(100..150);
342        assert_eq!(extent.search_key(), ExtentKey::new(0..101));
343        assert_eq!(extent.cmp_lower_bound(&extent.search_key()), Ordering::Greater);
344        assert_eq!(extent.cmp_upper_bound(&extent.search_key()), Ordering::Greater);
345        assert_eq!(extent.key_for_merge_into(), ExtentKey::new(0..100));
346        assert_eq!(extent.cmp_lower_bound(&extent.key_for_merge_into()), Ordering::Greater);
347    }
348
349    #[test]
350    fn extent_value_offset_by() {
351        assert_eq!(ExtentValue::None.offset_by(1024, 2048), ExtentValue::None);
352        assert_eq!(
353            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID).offset_by(0, 2048),
354            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID)
355        );
356        assert_eq!(
357            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID).offset_by(1024, 2048),
358            ExtentValue::new_raw(2048, VOLUME_DATA_KEY_ID)
359        );
360        assert_eq!(
361            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID).offset_by(2048, 2048),
362            ExtentValue::new_raw(3072, VOLUME_DATA_KEY_ID)
363        );
364
365        let make_checksums = |range: std::ops::Range<u64>| Checksums::fletcher(range.collect());
366
367        // In these tests we are making block size 256.
368        assert_eq!(
369            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
370                .offset_by(0, 2048),
371            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
372        );
373        assert_eq!(
374            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
375                .offset_by(1024, 2048),
376            ExtentValue::with_checksum(2048, make_checksums(4..8), VOLUME_DATA_KEY_ID)
377        );
378        assert_eq!(
379            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
380                .offset_by(2048, 2048),
381            ExtentValue::with_checksum(3072, Checksums::fletcher(Vec::new()), VOLUME_DATA_KEY_ID)
382        );
383
384        // Takes a place to switch from zeros to ones. The goal is to make sure there is exactly
385        // one zero and then only ones where we expect offset to slice.
386        let make_bitmap = |cut, length| {
387            let mut begin_bitmap = BitVec::from_elem(cut, false);
388            let mut end_bitmap = BitVec::from_elem(length - cut, true);
389            begin_bitmap.append(&mut end_bitmap);
390            ExtentMode::OverwritePartial(begin_bitmap)
391        };
392        let make_extent =
393            |device_offset, mode| ExtentValue::Some { device_offset, mode, key_id: 0 };
394
395        assert_eq!(
396            make_extent(1024, make_bitmap(1, 8)).offset_by(0, 2048),
397            make_extent(1024, make_bitmap(1, 8))
398        );
399        assert_eq!(
400            make_extent(1024, make_bitmap(5, 8)).offset_by(1024, 2048),
401            make_extent(2048, make_bitmap(1, 4))
402        );
403        assert_eq!(
404            make_extent(1024, make_bitmap(0, 8)).offset_by(2048, 2048),
405            make_extent(3072, ExtentMode::OverwritePartial(BitVec::new()))
406        );
407
408        assert_eq!(
409            make_extent(1024, ExtentMode::Overwrite).offset_by(0, 2048),
410            make_extent(1024, ExtentMode::Overwrite)
411        );
412        assert_eq!(
413            make_extent(1024, ExtentMode::Overwrite).offset_by(1024, 2048),
414            make_extent(2048, ExtentMode::Overwrite)
415        );
416        assert_eq!(
417            make_extent(1024, ExtentMode::Overwrite).offset_by(2048, 2048),
418            make_extent(3072, ExtentMode::Overwrite)
419        );
420    }
421
422    #[test]
423    fn extent_value_shrunk() {
424        assert_eq!(ExtentValue::None.shrunk(2048, 1024), ExtentValue::None);
425        assert_eq!(
426            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID).shrunk(2048, 2048),
427            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID)
428        );
429        assert_eq!(
430            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID).shrunk(2048, 1024),
431            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID)
432        );
433        assert_eq!(
434            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID).shrunk(2048, 0),
435            ExtentValue::new_raw(1024, VOLUME_DATA_KEY_ID)
436        );
437
438        let make_checksums = |range: std::ops::Range<u64>| Checksums::fletcher(range.collect());
439
440        // In these tests we are making block size 256.
441        assert_eq!(
442            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
443                .shrunk(2048, 2048),
444            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
445        );
446        assert_eq!(
447            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
448                .shrunk(2048, 1024),
449            ExtentValue::with_checksum(1024, make_checksums(0..4), VOLUME_DATA_KEY_ID)
450        );
451        assert_eq!(
452            ExtentValue::with_checksum(1024, make_checksums(0..8), VOLUME_DATA_KEY_ID)
453                .shrunk(2048, 0),
454            ExtentValue::with_checksum(1024, Checksums::fletcher(Vec::new()), VOLUME_DATA_KEY_ID)
455        );
456
457        // Takes a place to switch from zeros to ones. The goal is to make sure there is exactly
458        // one zero and then only ones where we expect offset to slice.
459        let make_bitmap = |cut, length| {
460            let mut begin_bitmap = BitVec::from_elem(cut, false);
461            let mut end_bitmap = BitVec::from_elem(length - cut, true);
462            begin_bitmap.append(&mut end_bitmap);
463            ExtentMode::OverwritePartial(begin_bitmap)
464        };
465        let make_extent =
466            |device_offset, mode| ExtentValue::Some { device_offset, mode, key_id: 0 };
467
468        assert_eq!(
469            make_extent(1024, make_bitmap(1, 8)).shrunk(2048, 2048),
470            make_extent(1024, make_bitmap(1, 8))
471        );
472        assert_eq!(
473            make_extent(1024, make_bitmap(3, 8)).shrunk(2048, 1024),
474            make_extent(1024, make_bitmap(3, 4))
475        );
476        assert_eq!(
477            make_extent(1024, make_bitmap(0, 8)).shrunk(2048, 0),
478            make_extent(1024, ExtentMode::OverwritePartial(BitVec::new()))
479        );
480
481        assert_eq!(
482            make_extent(1024, ExtentMode::Overwrite).shrunk(2048, 2048),
483            make_extent(1024, ExtentMode::Overwrite)
484        );
485        assert_eq!(
486            make_extent(1024, ExtentMode::Overwrite).shrunk(2048, 1024),
487            make_extent(1024, ExtentMode::Overwrite)
488        );
489        assert_eq!(
490            make_extent(1024, ExtentMode::Overwrite).shrunk(2048, 0),
491            make_extent(1024, ExtentMode::Overwrite)
492        );
493    }
494}