Skip to main content

fxfs/object_store/
extent.rs

1// Copyright 2026 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::lsm_tree::types::{OrdLowerBound, OrdUpperBound};
6use crate::round::{round_down, round_up};
7use crate::serialized_types::serialized_key::{KeyDeserializer, KeySerializer, SerializeKey};
8use crate::serialized_types::varint::Buffer;
9use fprint::TypeFingerprint;
10use serde::{Deserialize, Serialize};
11use std::cmp::{max, min};
12use std::hash::Hash;
13use std::ops::Range;
14
15/// Extent represents a physical or logical range of bytes, aligned to a 512-byte boundary.
16#[derive(Clone, Debug, Eq, Hash, PartialEq, Serialize, Deserialize, TypeFingerprint)]
17#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
18pub struct Extent(pub Range<u64>);
19
20impl Extent {
21    /// Returns the range of bytes common between this extent and |other|.
22    pub fn overlap(&self, other: &Extent) -> Option<Range<u64>> {
23        if self.end <= other.start || self.start >= other.end {
24            None
25        } else {
26            Some(max(self.start, other.start)..min(self.end, other.end))
27        }
28    }
29
30    /// Returns the search key for this extent; that is, a key which is <= this key under Ord and
31    /// OrdLowerBound.
32    /// This would be used when searching for an extent with |find| (when we want to find any
33    /// overlapping extent, which could include extents that start earlier).
34    /// For example, if the tree has extents 50..150 and 150..200 and we wish to read 100..200,
35    /// we'd search for 0..101 which would set the iterator to 50..150.
36    pub fn search_key(&self) -> Self {
37        assert_ne!(self.start, self.end);
38        Extent::search_key_from_offset(self.start)
39    }
40
41    /// Similar to previous, but from an offset.  Returns a search key that will find the first
42    /// extent that touches offset..
43    pub fn search_key_from_offset(offset: u64) -> Self {
44        Self(0..offset + 1)
45    }
46
47    /// Returns the merge key for this extent; that is, a key which is <= this extent and any other
48    /// possibly overlapping extent, under Ord. This would be used to set the hint for |merge_into|.
49    ///
50    /// For example, if the tree has extents 0..50, 50..150 and 150..200 and we wish to insert
51    /// 100..150, we'd use a merge hint of 0..100 which would set the iterator to 50..150 (the first
52    /// element > 100..150 under Ord).
53    pub fn key_for_merge_into(&self) -> Self {
54        Self(0..self.start)
55    }
56
57    /// Returns an iterator over the Extent partitions which overlap this key (see `FuzzyHash`).
58    pub fn fuzzy_hash_partition(&self) -> ExtentPartitionIterator {
59        ExtentPartitionIterator {
60            range: round_down(self.start, EXTENT_HASH_BUCKET_SIZE)
61                ..round_up(self.end, EXTENT_HASH_BUCKET_SIZE).unwrap_or(u64::MAX),
62        }
63    }
64}
65
66impl SerializeKey for Extent {
67    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
68        assert_eq!(self.0.end % 512, 0, "Extent end must be 512-byte aligned");
69        assert_eq!(self.0.start % 512, 0, "Extent start must be 512-byte aligned");
70        serializer.write_u64(self.0.end / 512);
71        serializer.write_u64(self.0.start / 512);
72    }
73
74    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, anyhow::Error> {
75        let end =
76            deserializer.read_u64()?.checked_mul(512).ok_or_else(|| anyhow::anyhow!("Overflow"))?;
77        let start =
78            deserializer.read_u64()?.checked_mul(512).ok_or_else(|| anyhow::anyhow!("Overflow"))?;
79        Ok(Self(start..end))
80    }
81}
82
83impl std::ops::Deref for Extent {
84    type Target = Range<u64>;
85    fn deref(&self) -> &Self::Target {
86        &self.0
87    }
88}
89
90impl std::ops::DerefMut for Extent {
91    fn deref_mut(&mut self) -> &mut Self::Target {
92        &mut self.0
93    }
94}
95
96impl From<Range<u64>> for Extent {
97    fn from(range: Range<u64>) -> Self {
98        Self(range)
99    }
100}
101
102impl From<Extent> for Range<u64> {
103    fn from(key: Extent) -> Self {
104        key.0
105    }
106}
107
108const EXTENT_HASH_BUCKET_SIZE: u64 = 1 * 1024 * 1024;
109
110pub struct ExtentPartitionIterator {
111    range: Range<u64>,
112}
113
114impl Iterator for ExtentPartitionIterator {
115    type Item = Range<u64>;
116
117    fn next(&mut self) -> Option<Self::Item> {
118        if self.range.start >= self.range.end {
119            None
120        } else {
121            let start = self.range.start;
122            self.range.start = start.saturating_add(EXTENT_HASH_BUCKET_SIZE);
123            let end = std::cmp::min(self.range.start, self.range.end);
124            Some(start..end)
125        }
126    }
127}
128
129// The normal comparison uses the end of the range before the start of the range. This makes
130// searching for records easier because it's easy to find K.. (where K is the key you are searching
131// for), which is what we want since our search routines find items with keys >= a search key.
132// OrdLowerBound orders by the start of an extent.
133impl OrdUpperBound for Extent {
134    fn cmp_upper_bound(&self, other: &Extent) -> std::cmp::Ordering {
135        // The comparison uses the end of the range so that we can more easily do queries. Ties
136        // are broken by comparing the range start to provide a total ordering consistent with
137        // serialization. Since we do not support overlapping keys within the same layer, ties can
138        // always be broken using layer index. Insertions into the mutable layer should always be
139        // done using merge_into, which will ensure keys don't end up overlapping.
140        self.end.cmp(&other.end).then(self.start.cmp(&other.start))
141    }
142}
143
144impl OrdLowerBound for Extent {
145    // Orders by the start of the range rather than the end, and doesn't include the end in the
146    // comparison. This is used when merging, where we want to merge keys in lower-bound order.
147    fn cmp_lower_bound(&self, other: &Extent) -> std::cmp::Ordering {
148        self.start.cmp(&other.start)
149    }
150}
151
152impl Ord for Extent {
153    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
154        // We expect cmp_upper_bound and cmp_lower_bound to be used mostly, but ObjectKey needs an
155        // Ord method in order to compare other enum variants, and Transaction requires an ObjectKey
156        // to implement Ord.
157        self.start.cmp(&other.start).then(self.end.cmp(&other.end))
158    }
159}
160
161impl PartialOrd for Extent {
162    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
163        Some(self.cmp(other))
164    }
165}
166
167#[cfg(test)]
168mod tests {
169    use super::Extent;
170    use crate::lsm_tree::types::{OrdLowerBound, OrdUpperBound};
171    use crate::serialized_types::serialized_key::{KeyDeserializer, SerializeKey};
172    use std::cmp::Ordering;
173
174    #[test]
175    fn test_extent_key_serialization() {
176        let key = Extent(1024..2048);
177        let mut buf = Vec::new();
178
179        // Serialize
180        {
181            let mut ser =
182                crate::serialized_types::serialized_key::KeySerializer::new(&mut buf, Some(0));
183            key.serialize_key_to(&mut ser);
184            ser.finalize();
185        }
186
187        let (mut deser, length) = KeyDeserializer::new(&buf, Some(0)).unwrap();
188        assert_eq!(length, buf.len());
189        let decoded_key = Extent::deserialize_key_from(&mut deser).unwrap();
190
191        assert_eq!(key, decoded_key);
192
193        // Verify bytes:
194        // 2048 / 512 = 4.
195        // 1024 / 512 = 2.
196        // Delta encoding applies to first field (end = 4). Base is 0. 4 - 0 = 4.
197        // Second field is start = 2. Base is None (taken). So writes 2.
198        // Buffer should be [0, 2, 4, 2].
199        assert_eq!(buf, vec![0, 2, 4, 2]);
200    }
201
202    #[test]
203    fn test_extent_key_deserialization_overflow() {
204        let mut buf = Vec::new();
205        {
206            let mut ser =
207                crate::serialized_types::serialized_key::KeySerializer::new(&mut buf, None);
208            ser.write_u64(u64::MAX);
209            ser.write_u64(u64::MAX);
210            ser.finalize();
211        }
212        let (mut deser, length) = KeyDeserializer::new(&buf, None).unwrap();
213        assert_eq!(length, buf.len());
214        let result = Extent::deserialize_key_from(&mut deser);
215        assert!(result.is_err());
216        assert_eq!(result.unwrap_err().to_string(), "Overflow");
217    }
218
219    #[test]
220    #[should_panic(expected = "Extent end must be 512-byte aligned")]
221    fn test_extent_key_serialization_unaligned_end_panics() {
222        let key = Extent(1024..2049);
223        let mut buf = Vec::new();
224        let mut ser = crate::serialized_types::serialized_key::KeySerializer::new(&mut buf, None);
225        key.serialize_key_to(&mut ser);
226    }
227
228    #[test]
229    #[should_panic(expected = "Extent start must be 512-byte aligned")]
230    fn test_extent_key_serialization_unaligned_start_panics() {
231        let key = Extent(1025..2048);
232        let mut buf = Vec::new();
233        let mut ser = crate::serialized_types::serialized_key::KeySerializer::new(&mut buf, None);
234        key.serialize_key_to(&mut ser);
235    }
236
237    #[test]
238    fn test_extent_cmp() {
239        let extent = Extent(100..150);
240        assert_eq!(extent.cmp_upper_bound(&Extent(0..100)), Ordering::Greater);
241        assert_eq!(extent.cmp_upper_bound(&Extent(0..110)), Ordering::Greater);
242        assert_eq!(extent.cmp_upper_bound(&Extent(0..150)), Ordering::Greater);
243        assert_eq!(extent.cmp_upper_bound(&Extent(99..150)), Ordering::Greater);
244        assert_eq!(extent.cmp_upper_bound(&Extent(100..150)), Ordering::Equal);
245        assert_eq!(extent.cmp_upper_bound(&Extent(0..151)), Ordering::Less);
246        assert_eq!(extent.cmp_upper_bound(&Extent(100..151)), Ordering::Less);
247        assert_eq!(extent.cmp_upper_bound(&Extent(150..1000)), Ordering::Less);
248        assert_eq!(extent.cmp_upper_bound(&Extent(101..150)), Ordering::Less);
249    }
250
251    #[test]
252    fn test_extent_cmp_lower_bound() {
253        let extent = Extent(100..150);
254        assert_eq!(extent.cmp_lower_bound(&Extent(0..100)), Ordering::Greater);
255        assert_eq!(extent.cmp_lower_bound(&Extent(0..110)), Ordering::Greater);
256        assert_eq!(extent.cmp_lower_bound(&Extent(0..150)), Ordering::Greater);
257        assert_eq!(extent.cmp_lower_bound(&Extent(0..1000)), Ordering::Greater);
258        assert_eq!(extent.cmp_lower_bound(&Extent(99..1000)), Ordering::Greater);
259        assert_eq!(extent.cmp_lower_bound(&Extent(100..150)), Ordering::Equal);
260        // cmp_lower_bound does not check the upper bound of the range
261        assert_eq!(extent.cmp_lower_bound(&Extent(100..1000)), Ordering::Equal);
262        assert_eq!(extent.cmp_lower_bound(&Extent(101..102)), Ordering::Less);
263    }
264
265    #[test]
266    fn test_extent_search_and_insertion_key() {
267        let extent = Extent(100..150);
268        assert_eq!(extent.search_key(), Extent(0..101));
269        assert_eq!(extent.cmp_lower_bound(&extent.search_key()), Ordering::Greater);
270        assert_eq!(extent.cmp_upper_bound(&extent.search_key()), Ordering::Greater);
271        assert_eq!(extent.key_for_merge_into(), Extent(0..100));
272        assert_eq!(extent.cmp_lower_bound(&extent.key_for_merge_into()), Ordering::Greater);
273    }
274}