Skip to main content

fxfs/object_store/
object_record.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5mod legacy;
6
7pub use legacy::*;
8
9// TODO(https://fxbug.dev/42178223): need validation after deserialization.
10use crate::checksum::Checksums;
11use crate::log::error;
12use crate::lsm_tree::types::{
13    FuzzyHash, Item, ItemRef, LayerKey, MergeType, OrdLowerBound, OrdUpperBound, RangeKey,
14    SortByU64, Value,
15};
16use crate::object_store::extent_record::{
17    ExtentKey, ExtentKeyPartitionIterator, ExtentKeyV32, ExtentValue, ExtentValueV38,
18};
19use crate::serialized_types::{Migrate, Versioned, migrate_nodefault, migrate_to_version};
20use fprint::TypeFingerprint;
21use fxfs_crypto::{WrappedKey, WrappingKeyId};
22use fxfs_unicode::CasefoldString;
23use serde::{Deserialize, Serialize};
24use std::collections::BTreeMap;
25use std::default::Default;
26use std::hash::Hash;
27use std::time::{Duration, SystemTime, UNIX_EPOCH};
28
29/// ObjectDescriptor is the set of possible records in the object store.
30pub type ObjectDescriptor = ObjectDescriptorV32;
31
32#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint)]
33#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
34pub enum ObjectDescriptorV32 {
35    /// A file (in the generic sense; i.e. an object with some attributes).
36    File,
37    /// A directory (in the generic sense; i.e. an object with children).
38    Directory,
39    /// A volume, which is the root of a distinct object store containing Files and Directories.
40    Volume,
41    /// A symbolic link.
42    Symlink,
43}
44
45/// For specifying what property of the project is being addressed.
46pub type ProjectProperty = ProjectPropertyV32;
47
48#[derive(
49    Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, Deserialize, TypeFingerprint,
50)]
51#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
52pub enum ProjectPropertyV32 {
53    /// The configured limit for the project.
54    Limit,
55    /// The currently tracked usage for the project.
56    Usage,
57}
58
59pub type ObjectKeyData = ObjectKeyDataV54;
60
61#[derive(
62    Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord, Serialize, Deserialize, TypeFingerprint,
63)]
64#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
65pub enum ObjectKeyDataV54 {
66    /// A generic, untyped object.  This must come first and sort before all other keys for a given
67    /// object because it's also used as a tombstone and it needs to merge with all following keys.
68    Object,
69    /// Encryption keys for an object.
70    Keys,
71    /// An attribute associated with an object.  It has a 64-bit ID.
72    Attribute(u64, AttributeKeyV32),
73    /// A child of a directory.
74    Child { name: String },
75    /// A graveyard entry for an entire object.
76    GraveyardEntry { object_id: u64 },
77    /// Project ID info. This should only be attached to the volume's root node. Used to address the
78    /// configured limit and the usage tracking which are ordered after the `project_id` to provide
79    /// locality of the two related values.
80    Project { project_id: u64, property: ProjectPropertyV32 },
81    /// An extended attribute associated with an object. It stores the name used for the extended
82    /// attribute, which has a maximum size of 255 bytes enforced by fuchsia.io.
83    ExtendedAttribute {
84        #[serde(with = "crate::zerocopy_serialization")]
85        name: Vec<u8>,
86    },
87    /// A graveyard entry for an attribute.
88    GraveyardAttributeEntry { object_id: u64, attribute_id: u64 },
89    /// A child of an encrypted directory.  We store the filename in its encrypted form.  hash_code
90    /// is the hash of the casefolded human-readable name if a directory is also casefolded.  In
91    /// some legacy cases, this is also used in non-casefolded cases, and in some of those cases the
92    /// hash code can be 0.  Going forward, these cases are covered by `EncryptedChild` below.
93    EncryptedCasefoldChild(EncryptedCasefoldChild),
94    /// Case-insensitive child (legacy).
95    LegacyCasefoldChild(CasefoldString),
96    /// An encrypted child that does not use case folding.
97    EncryptedChild(EncryptedChild),
98    /// A child of a directory that uses the casefold feature.
99    /// (i.e. case insensitive, case preserving names)
100    CasefoldChild { hash_code: u32, name: String },
101}
102
103#[derive(
104    Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord, Serialize, Deserialize, TypeFingerprint,
105)]
106#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
107pub struct EncryptedCasefoldChild {
108    pub hash_code: u32,
109    #[serde(with = "crate::zerocopy_serialization")]
110    pub name: Vec<u8>,
111}
112
113#[derive(
114    Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord, Serialize, Deserialize, TypeFingerprint,
115)]
116#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
117pub struct EncryptedChild(#[serde(with = "crate::zerocopy_serialization")] pub Vec<u8>);
118
119pub type AttributeKey = AttributeKeyV32;
120
121#[derive(
122    Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, Deserialize, TypeFingerprint,
123)]
124#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
125pub enum AttributeKeyV32 {
126    // Order here is important: code expects Attribute to precede Extent.
127    Attribute,
128    Extent(ExtentKeyV32),
129}
130
131/// ObjectKey is a key in the object store.
132pub type ObjectKey = ObjectKeyV54;
133
134#[derive(
135    Clone,
136    Debug,
137    Eq,
138    Ord,
139    Hash,
140    PartialEq,
141    PartialOrd,
142    Serialize,
143    Deserialize,
144    TypeFingerprint,
145    Versioned,
146)]
147#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
148pub struct ObjectKeyV54 {
149    /// The ID of the object referred to.
150    pub object_id: u64,
151    /// The type and data of the key.
152    pub data: ObjectKeyDataV54,
153}
154
155impl SortByU64 for ObjectKey {
156    fn get_leading_u64(&self) -> u64 {
157        self.object_id
158    }
159}
160
161impl ObjectKey {
162    /// Creates a generic ObjectKey.
163    pub fn object(object_id: u64) -> Self {
164        Self { object_id: object_id, data: ObjectKeyData::Object }
165    }
166
167    /// Creates an ObjectKey for encryption keys.
168    pub fn keys(object_id: u64) -> Self {
169        Self { object_id, data: ObjectKeyData::Keys }
170    }
171
172    /// Creates an ObjectKey for an attribute.
173    pub fn attribute(object_id: u64, attribute_id: u64, key: AttributeKey) -> Self {
174        Self { object_id, data: ObjectKeyData::Attribute(attribute_id, key) }
175    }
176
177    /// Creates an ObjectKey for an extent.
178    pub fn extent(object_id: u64, attribute_id: u64, range: std::ops::Range<u64>) -> Self {
179        Self {
180            object_id,
181            data: ObjectKeyData::Attribute(
182                attribute_id,
183                AttributeKey::Extent(ExtentKey::new(range)),
184            ),
185        }
186    }
187
188    /// Creates an ObjectKey from an extent.
189    pub fn from_extent(object_id: u64, attribute_id: u64, extent: ExtentKey) -> Self {
190        Self {
191            object_id,
192            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(extent)),
193        }
194    }
195
196    /// Creates an ObjectKey for a child.
197    pub fn child(object_id: u64, name: &str, dir_type: DirType) -> Self {
198        match dir_type {
199            DirType::Casefold => {
200                let casefolded_name: String = fxfs_unicode::casefold(name.chars()).collect();
201                let hash_code = fscrypt::direntry::tea_hash_filename(casefolded_name.as_bytes());
202                Self {
203                    object_id,
204                    data: ObjectKeyData::CasefoldChild { hash_code, name: name.into() },
205                }
206            }
207            DirType::LegacyCasefold => Self {
208                object_id,
209                data: ObjectKeyData::LegacyCasefoldChild(CasefoldString::new(name.into())),
210            },
211            DirType::Normal => Self { object_id, data: ObjectKeyData::Child { name: name.into() } },
212            DirType::Encrypted(_) | DirType::EncryptedCasefold(_) => {
213                // These shouldn't be used directly; encrypted_child should be used instead.
214                panic!("Encrypted modes require an encrypted name");
215            }
216        }
217    }
218
219    /// Creates an ObjectKey for an encrypted child.
220    ///
221    /// The hash_code is important here -- especially for fscrypt as it affects the
222    /// name of locked files.
223    ///
224    /// For case-insensitive lookups in large encrypted directories, we lose the ability to binary
225    /// search for an entry of interest because encryption breaks our sort order. In these cases
226    /// we prefix records with a 32-bit hash based on the stable *casefolded* name. Hash collisions
227    /// aside, this lets us jump straight to the entry of interest, if it exists.
228    pub fn encrypted_child(object_id: u64, name: Vec<u8>, hash_code: Option<u32>) -> Self {
229        if let Some(hash_code) = hash_code {
230            Self {
231                object_id,
232                data: ObjectKeyData::EncryptedCasefoldChild(EncryptedCasefoldChild {
233                    hash_code,
234                    name,
235                }),
236            }
237        } else {
238            Self { object_id, data: ObjectKeyData::EncryptedChild(EncryptedChild(name)) }
239        }
240    }
241
242    /// Creates a graveyard entry for an object.
243    pub fn graveyard_entry(graveyard_object_id: u64, object_id: u64) -> Self {
244        Self { object_id: graveyard_object_id, data: ObjectKeyData::GraveyardEntry { object_id } }
245    }
246
247    /// Creates a graveyard entry for an attribute.
248    pub fn graveyard_attribute_entry(
249        graveyard_object_id: u64,
250        object_id: u64,
251        attribute_id: u64,
252    ) -> Self {
253        Self {
254            object_id: graveyard_object_id,
255            data: ObjectKeyData::GraveyardAttributeEntry { object_id, attribute_id },
256        }
257    }
258
259    /// Creates an ObjectKey for a ProjectLimit entry.
260    pub fn project_limit(object_id: u64, project_id: u64) -> Self {
261        Self {
262            object_id,
263            data: ObjectKeyData::Project { project_id, property: ProjectProperty::Limit },
264        }
265    }
266
267    /// Creates an ObjectKey for a ProjectUsage entry.
268    pub fn project_usage(object_id: u64, project_id: u64) -> Self {
269        Self {
270            object_id,
271            data: ObjectKeyData::Project { project_id, property: ProjectProperty::Usage },
272        }
273    }
274
275    pub fn extended_attribute(object_id: u64, name: Vec<u8>) -> Self {
276        Self { object_id, data: ObjectKeyData::ExtendedAttribute { name } }
277    }
278
279    /// Returns the merge key for this key; that is, a key which is <= this key and any
280    /// other possibly overlapping key, under Ord. This would be used for the hint in |merge_into|.
281    pub fn key_for_merge_into(&self) -> Self {
282        if let Self {
283            object_id,
284            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(e)),
285        } = self
286        {
287            Self::attribute(*object_id, *attribute_id, AttributeKey::Extent(e.key_for_merge_into()))
288        } else {
289            self.clone()
290        }
291    }
292}
293
294impl OrdUpperBound for ObjectKey {
295    fn cmp_upper_bound(&self, other: &ObjectKey) -> std::cmp::Ordering {
296        self.object_id.cmp(&other.object_id).then_with(|| match (&self.data, &other.data) {
297            (
298                ObjectKeyData::Attribute(left_attr_id, AttributeKey::Extent(left_extent)),
299                ObjectKeyData::Attribute(right_attr_id, AttributeKey::Extent(right_extent)),
300            ) => left_attr_id.cmp(right_attr_id).then(left_extent.cmp_upper_bound(right_extent)),
301            _ => self.data.cmp(&other.data),
302        })
303    }
304}
305
306impl OrdLowerBound for ObjectKey {
307    fn cmp_lower_bound(&self, other: &ObjectKey) -> std::cmp::Ordering {
308        self.object_id.cmp(&other.object_id).then_with(|| match (&self.data, &other.data) {
309            (
310                ObjectKeyData::Attribute(left_attr_id, AttributeKey::Extent(left_extent)),
311                ObjectKeyData::Attribute(right_attr_id, AttributeKey::Extent(right_extent)),
312            ) => left_attr_id.cmp(right_attr_id).then(left_extent.cmp_lower_bound(right_extent)),
313            _ => self.data.cmp(&other.data),
314        })
315    }
316}
317
318impl LayerKey for ObjectKey {
319    fn merge_type(&self) -> MergeType {
320        // This listing is intentionally exhaustive to force folks to think about how certain
321        // subsets of the keyspace are merged.
322        match self.data {
323            ObjectKeyData::Object
324            | ObjectKeyData::Keys
325            | ObjectKeyData::Attribute(..)
326            | ObjectKeyData::Child { .. }
327            | ObjectKeyData::EncryptedChild(_)
328            | ObjectKeyData::EncryptedCasefoldChild(_)
329            | ObjectKeyData::CasefoldChild { .. }
330            | ObjectKeyData::LegacyCasefoldChild(_)
331            | ObjectKeyData::GraveyardEntry { .. }
332            | ObjectKeyData::GraveyardAttributeEntry { .. }
333            | ObjectKeyData::Project { property: ProjectProperty::Limit, .. }
334            | ObjectKeyData::ExtendedAttribute { .. } => MergeType::OptimizedMerge,
335            ObjectKeyData::Project { property: ProjectProperty::Usage, .. } => MergeType::FullMerge,
336        }
337    }
338
339    fn next_key(&self) -> Option<Self> {
340        match self.data {
341            ObjectKeyData::Attribute(_, AttributeKey::Extent(_)) => {
342                let mut key = self.clone();
343                if let ObjectKey {
344                    data: ObjectKeyData::Attribute(_, AttributeKey::Extent(ExtentKey { range })),
345                    ..
346                } = &mut key
347                {
348                    // We want a key such that cmp_lower_bound returns Greater for any key which
349                    // starts after end, and a key such that if you search for it, you'll get an
350                    // extent whose end > range.end.
351                    *range = range.end..range.end + 1;
352                }
353                Some(key)
354            }
355            _ => None,
356        }
357    }
358
359    fn search_key(&self) -> Self {
360        if let Self {
361            object_id,
362            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(e)),
363        } = self
364        {
365            Self::attribute(*object_id, *attribute_id, AttributeKey::Extent(e.search_key()))
366        } else {
367            self.clone()
368        }
369    }
370}
371
372impl RangeKey for ObjectKey {
373    fn overlaps(&self, other: &Self) -> bool {
374        if self.object_id != other.object_id {
375            return false;
376        }
377        match (&self.data, &other.data) {
378            (
379                ObjectKeyData::Attribute(left_attr_id, AttributeKey::Extent(left_key)),
380                ObjectKeyData::Attribute(right_attr_id, AttributeKey::Extent(right_key)),
381            ) if *left_attr_id == *right_attr_id => {
382                left_key.range.end > right_key.range.start
383                    && left_key.range.start < right_key.range.end
384            }
385            (a, b) => a == b,
386        }
387    }
388}
389
390pub enum ObjectKeyFuzzyHashIterator {
391    ExtentKey(/* object_id */ u64, /* attribute_id */ u64, ExtentKeyPartitionIterator),
392    NotExtentKey(/* hash */ Option<u64>),
393}
394
395impl Iterator for ObjectKeyFuzzyHashIterator {
396    type Item = u64;
397
398    fn next(&mut self) -> Option<Self::Item> {
399        match self {
400            Self::ExtentKey(oid, attr_id, extent_keys) => extent_keys.next().map(|range| {
401                let key = ObjectKey::extent(*oid, *attr_id, range);
402                crate::stable_hash::stable_hash(key)
403            }),
404            Self::NotExtentKey(hash) => hash.take(),
405        }
406    }
407}
408
409impl FuzzyHash for ObjectKey {
410    fn fuzzy_hash(&self) -> impl Iterator<Item = u64> {
411        match &self.data {
412            ObjectKeyData::Attribute(attr_id, AttributeKey::Extent(extent)) => {
413                ObjectKeyFuzzyHashIterator::ExtentKey(
414                    self.object_id,
415                    *attr_id,
416                    extent.fuzzy_hash_partition(),
417                )
418            }
419            _ => {
420                let hash = crate::stable_hash::stable_hash(self);
421                ObjectKeyFuzzyHashIterator::NotExtentKey(Some(hash))
422            }
423        }
424    }
425
426    fn is_range_key(&self) -> bool {
427        match &self.data {
428            ObjectKeyData::Attribute(_, AttributeKey::Extent(_)) => true,
429            _ => false,
430        }
431    }
432}
433
434/// UNIX epoch based timestamp in the UTC timezone.
435pub type Timestamp = TimestampV49;
436
437#[derive(
438    Copy,
439    Clone,
440    Debug,
441    Default,
442    Eq,
443    PartialEq,
444    Ord,
445    PartialOrd,
446    Serialize,
447    Deserialize,
448    TypeFingerprint,
449)]
450#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
451pub struct TimestampV49 {
452    nanos: u64,
453}
454
455impl Timestamp {
456    const NSEC_PER_SEC: u64 = 1_000_000_000;
457
458    pub fn now() -> Self {
459        SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or(Duration::ZERO).into()
460    }
461
462    pub const fn zero() -> Self {
463        Self { nanos: 0 }
464    }
465
466    pub const fn from_nanos(nanos: u64) -> Self {
467        Self { nanos }
468    }
469
470    pub fn from_secs_and_nanos(secs: u64, nanos: u32) -> Self {
471        let Some(secs_in_nanos) = secs.checked_mul(Self::NSEC_PER_SEC) else {
472            error!("Fxfs doesn't support dates past 2554-07-21");
473            return Self { nanos: u64::MAX };
474        };
475        let Some(nanos) = secs_in_nanos.checked_add(nanos as u64) else {
476            error!("Fxfs doesn't support dates past 2554-07-21");
477            return Self { nanos: u64::MAX };
478        };
479        Self { nanos }
480    }
481
482    /// Returns the total number of nanoseconds represented by this `Timestamp` since the Unix
483    /// epoch.
484    pub fn as_nanos(&self) -> u64 {
485        self.nanos
486    }
487
488    /// Returns the fractional nanoseconds represented by this `Timestamp`.
489    pub fn subsec_nanos(&self) -> u32 {
490        (self.nanos % Self::NSEC_PER_SEC) as u32
491    }
492
493    /// Returns the total number of whole seconds represented by this `Timestamp` since the Unix
494    /// epoch.
495    pub fn as_secs(&self) -> u64 {
496        self.nanos / Self::NSEC_PER_SEC
497    }
498}
499
500impl From<std::time::Duration> for Timestamp {
501    fn from(duration: std::time::Duration) -> Self {
502        Self::from_secs_and_nanos(duration.as_secs(), duration.subsec_nanos())
503    }
504}
505
506impl From<Timestamp> for std::time::Duration {
507    fn from(timestamp: Timestamp) -> std::time::Duration {
508        Duration::from_nanos(timestamp.nanos)
509    }
510}
511
512pub type ObjectKind = ObjectKindV54;
513
514#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, TypeFingerprint)]
515#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
516pub enum DirType {
517    Normal,
518    Encrypted(WrappingKeyId),
519    /// Legacy casefolded mode.
520    LegacyCasefold,
521    Casefold,
522    EncryptedCasefold(WrappingKeyId),
523}
524
525impl DirType {
526    pub fn is_casefold(&self) -> bool {
527        matches!(self, DirType::LegacyCasefold | DirType::Casefold | DirType::EncryptedCasefold(_))
528    }
529
530    pub fn is_encrypted(&self) -> bool {
531        matches!(self, DirType::Encrypted(_) | DirType::EncryptedCasefold(_))
532    }
533
534    pub fn with_encryption(self, id: WrappingKeyId) -> Self {
535        match self {
536            DirType::Normal => DirType::Encrypted(id),
537            DirType::Casefold => DirType::EncryptedCasefold(id),
538            _ => self,
539        }
540    }
541
542    pub fn with_casefold(self, val: bool) -> Self {
543        match (val, self) {
544            (true, DirType::Encrypted(id) | DirType::EncryptedCasefold(id)) => {
545                DirType::EncryptedCasefold(id)
546            }
547            (true, _) => DirType::Casefold,
548            (false, DirType::Encrypted(id) | DirType::EncryptedCasefold(id)) => {
549                DirType::Encrypted(id)
550            }
551            (false, _) => DirType::Normal,
552        }
553    }
554
555    pub fn wrapping_key_id(&self) -> Option<WrappingKeyId> {
556        match self {
557            DirType::Encrypted(id) | DirType::EncryptedCasefold(id) => Some(*id),
558            _ => None,
559        }
560    }
561}
562
563impl Default for DirType {
564    fn default() -> Self {
565        DirType::Normal
566    }
567}
568
569#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint)]
570#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
571pub enum ObjectKindV54 {
572    File {
573        /// The number of references to this file.
574        refs: u64,
575    },
576    Directory {
577        /// The number of sub-directories in this directory.
578        sub_dirs: u64,
579        /// The type of directory (encryption, casefolding, etc.)
580        dir_type: DirType,
581    },
582    Graveyard,
583    Symlink {
584        /// The number of references to this symbolic link.
585        refs: u64,
586        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
587        /// interpret it however they like.
588        #[serde(with = "crate::zerocopy_serialization")]
589        link: Box<[u8]>,
590    },
591    EncryptedSymlink {
592        /// The number of references to this symbolic link.
593        refs: u64,
594        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
595        /// interpret it however they like.
596        /// `link` is stored here in encrypted form, encrypted with the symlink's key using the
597        /// volume's data key.
598        #[serde(with = "crate::zerocopy_serialization")]
599        link: Box<[u8]>,
600    },
601}
602
603#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
604#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
605pub enum ObjectKindV49 {
606    File {
607        /// The number of references to this file.
608        refs: u64,
609    },
610    Directory {
611        /// The number of sub-directories in this directory.
612        sub_dirs: u64,
613        /// If set, contains the wrapping key id used to encrypt the file contents and filenames in
614        /// this directory.
615        wrapping_key_id: Option<WrappingKeyId>,
616        /// If true, all files and sub-directories created in this directory will support case
617        /// insensitive (but case-preserving) file naming.
618        casefold: bool,
619    },
620    Graveyard,
621    Symlink {
622        /// The number of references to this symbolic link.
623        refs: u64,
624        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
625        /// interpret it however they like.
626        #[serde(with = "crate::zerocopy_serialization")]
627        link: Box<[u8]>,
628    },
629    EncryptedSymlink {
630        /// The number of references to this symbolic link.
631        refs: u64,
632        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
633        /// interpret it however they like.
634        /// `link` is stored here in encrypted form, encrypted with the symlink's key using the
635        /// same encryption scheme as the one used to encrypt filenames.
636        #[serde(with = "crate::zerocopy_serialization")]
637        link: Box<[u8]>,
638    },
639}
640
641/// This consists of POSIX attributes that are not used in Fxfs but it may be meaningful to some
642/// clients to have the ability to to set and retrieve these values.
643pub type PosixAttributes = PosixAttributesV32;
644
645#[derive(Clone, Debug, Copy, Default, Serialize, Deserialize, PartialEq, TypeFingerprint)]
646#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
647pub struct PosixAttributesV32 {
648    /// The mode bits associated with this object
649    pub mode: u32,
650    /// User ID of owner
651    pub uid: u32,
652    /// Group ID of owner
653    pub gid: u32,
654    /// Device ID
655    pub rdev: u64,
656}
657
658/// Object-level attributes.  Note that these are not the same as "attributes" in the
659/// ObjectValue::Attribute sense, which refers to an arbitrary data payload associated with an
660/// object.  This naming collision is unfortunate.
661pub type ObjectAttributes = ObjectAttributesV49;
662
663#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, TypeFingerprint)]
664#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
665pub struct ObjectAttributesV49 {
666    /// The timestamp at which the object was created (i.e. crtime).
667    pub creation_time: TimestampV49,
668    /// The timestamp at which the object's data was last modified (i.e. mtime).
669    pub modification_time: TimestampV49,
670    /// The project id to associate this object's resource usage with. Zero means none.
671    pub project_id: u64,
672    /// Mode, uid, gid, and rdev
673    pub posix_attributes: Option<PosixAttributesV32>,
674    /// The number of bytes allocated to all extents across all attributes for this object.
675    pub allocated_size: u64,
676    /// The timestamp at which the object was last read (i.e. atime).
677    pub access_time: TimestampV49,
678    /// The timestamp at which the object's status was last modified (i.e. ctime).
679    pub change_time: TimestampV49,
680}
681
682pub type ExtendedAttributeValue = ExtendedAttributeValueV32;
683
684#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint)]
685#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
686pub enum ExtendedAttributeValueV32 {
687    /// The extended attribute value is stored directly in this object. If the value is above a
688    /// certain size, it should be stored as an attribute with extents instead.
689    Inline(#[serde(with = "crate::zerocopy_serialization")] Vec<u8>),
690    /// The extended attribute value is stored as an attribute with extents. The attribute id
691    /// should be chosen to be within the range of 64-512.
692    AttributeId(u64),
693}
694
695/// Id and descriptor for a child entry.
696pub type ChildValue = ChildValueV32;
697
698#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
699#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
700pub struct ChildValueV32 {
701    /// The ID of the child object.
702    pub object_id: u64,
703    /// Describes the type of the child.
704    pub object_descriptor: ObjectDescriptorV32,
705}
706
707pub type RootDigest = RootDigestV33;
708
709#[derive(
710    Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, Deserialize, TypeFingerprint,
711)]
712#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
713pub enum RootDigestV33 {
714    Sha256([u8; 32]),
715    Sha512(#[serde(with = "crate::zerocopy_serialization")] Vec<u8>),
716}
717
718pub type FsverityMetadata = FsverityMetadataV50;
719
720#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, TypeFingerprint, Versioned)]
721#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
722pub enum FsverityMetadataV50 {
723    /// The root hash and salt.
724    Internal(RootDigestV33, #[serde(with = "crate::zerocopy_serialization")] Vec<u8>),
725    /// The root hash and salt are in a descriptor inside the merkle attribute.
726    F2fs(std::ops::Range<u64>),
727}
728
729pub type EncryptionKey = EncryptionKeyV49;
730pub type EncryptionKeyV49 = fxfs_crypto::EncryptionKey;
731
732pub type EncryptionKeys = EncryptionKeysV49;
733
734#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize, TypeFingerprint)]
735#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
736pub struct EncryptionKeysV49(Vec<(u64, EncryptionKeyV49)>);
737
738impl EncryptionKeys {
739    pub fn get(&self, id: u64) -> Option<&EncryptionKey> {
740        self.0.iter().find_map(|(i, key)| (*i == id).then_some(key))
741    }
742
743    pub fn insert(&mut self, id: u64, key: EncryptionKey) {
744        self.0.push((id, key))
745    }
746
747    pub fn remove(&mut self, id: u64) -> Option<EncryptionKey> {
748        if let Some(ix) = self.0.iter().position(|(k, _)| *k == id) {
749            Some(self.0.remove(ix).1)
750        } else {
751            None
752        }
753    }
754}
755
756impl From<EncryptionKeys> for BTreeMap<u64, WrappedKey> {
757    fn from(keys: EncryptionKeys) -> Self {
758        keys.0.into_iter().map(|(id, key)| (id, key.into())).collect()
759    }
760}
761
762impl From<Vec<(u64, EncryptionKey)>> for EncryptionKeys {
763    fn from(value: Vec<(u64, EncryptionKey)>) -> Self {
764        Self(value)
765    }
766}
767
768impl std::ops::Deref for EncryptionKeys {
769    type Target = Vec<(u64, EncryptionKey)>;
770    fn deref(&self) -> &Self::Target {
771        &self.0
772    }
773}
774
775/// ObjectValue is the value of an item in the object store.
776/// Note that the tree stores deltas on objects, so these values describe deltas. Unless specified
777/// otherwise, a value indicates an insert/replace mutation.
778pub type ObjectValue = ObjectValueV54;
779impl Value for ObjectValue {
780    const DELETED_MARKER: Self = Self::None;
781}
782
783#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
784#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
785pub enum ObjectValueV54 {
786    /// Some keys have no value (this often indicates a tombstone of some sort).  Records with this
787    /// value are always filtered when a major compaction is performed, so the meaning must be the
788    /// same as if the item was not present.
789    None,
790    /// Some keys have no value but need to differentiate between a present value and no value
791    /// (None) i.e. their value is really a boolean: None => false, Some => true.
792    Some,
793    /// The value for an ObjectKey::Object record.
794    Object { kind: ObjectKindV54, attributes: ObjectAttributesV49 },
795    /// Specifies encryption keys to use for an object.
796    Keys(EncryptionKeysV49),
797    /// An attribute associated with a file object. |size| is the size of the attribute in bytes.
798    Attribute { size: u64, has_overwrite_extents: bool },
799    /// An extent associated with an object.
800    Extent(ExtentValueV38),
801    /// A child of an object.
802    Child(ChildValue),
803    /// Graveyard entries can contain these entries which will cause a file that has extents beyond
804    /// EOF to be trimmed at mount time.  This is used in cases where shrinking a file can exceed
805    /// the bounds of a single transaction.
806    Trim,
807    /// Added to support tracking Project ID usage and limits.
808    BytesAndNodes { bytes: i64, nodes: i64 },
809    /// A value for an extended attribute. Either inline or a redirection to an attribute with
810    /// extents.
811    ExtendedAttribute(ExtendedAttributeValueV32),
812    /// An attribute associated with a verified file object. |size| is the size of the attribute
813    /// in bytes.
814    VerifiedAttribute { size: u64, fsverity_metadata: FsverityMetadataV50 },
815}
816
817#[derive(Migrate, Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
818#[migrate_to_version(ObjectValueV54)]
819#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
820pub enum ObjectValueV50 {
821    /// Some keys have no value (this often indicates a tombstone of some sort).  Records with this
822    /// value are always filtered when a major compaction is performed, so the meaning must be the
823    /// same as if the item was not present.
824    None,
825    /// Some keys have no value but need to differentiate between a present value and no value
826    /// (None) i.e. their value is really a boolean: None => false, Some => true.
827    Some,
828    /// The value for an ObjectKey::Object record.
829    Object { kind: ObjectKindV49, attributes: ObjectAttributesV49 },
830    /// Specifies encryption keys to use for an object.
831    Keys(EncryptionKeysV49),
832    /// An attribute associated with a file object. |size| is the size of the attribute in bytes.
833    Attribute { size: u64, has_overwrite_extents: bool },
834    /// An extent associated with an object.
835    Extent(ExtentValueV38),
836    /// A child of an object.
837    Child(ChildValueV32),
838    /// Graveyard entries can contain these entries which will cause a file that has extents beyond
839    /// EOF to be trimmed at mount time.  This is used in cases where shrinking a file can exceed
840    /// the bounds of a single transaction.
841    Trim,
842    /// Added to support tracking Project ID usage and limits.
843    BytesAndNodes { bytes: i64, nodes: i64 },
844    /// A value for an extended attribute. Either inline or a redirection to an attribute with
845    /// extents.
846    ExtendedAttribute(ExtendedAttributeValueV32),
847    /// An attribute associated with a verified file object. |size| is the size of the attribute
848    /// in bytes.
849    VerifiedAttribute { size: u64, fsverity_metadata: FsverityMetadataV50 },
850}
851
852impl ObjectValue {
853    /// Creates an ObjectValue for a file object.
854    pub fn file(
855        refs: u64,
856        allocated_size: u64,
857        creation_time: Timestamp,
858        modification_time: Timestamp,
859        access_time: Timestamp,
860        change_time: Timestamp,
861        project_id: u64,
862        posix_attributes: Option<PosixAttributes>,
863    ) -> ObjectValue {
864        ObjectValue::Object {
865            kind: ObjectKind::File { refs },
866            attributes: ObjectAttributes {
867                creation_time,
868                modification_time,
869                project_id,
870                posix_attributes,
871                allocated_size,
872                access_time,
873                change_time,
874            },
875        }
876    }
877    pub fn keys(encryption_keys: EncryptionKeys) -> ObjectValue {
878        ObjectValue::Keys(encryption_keys)
879    }
880    /// Creates an ObjectValue for an object attribute.
881    pub fn attribute(size: u64, has_overwrite_extents: bool) -> ObjectValue {
882        ObjectValue::Attribute { size, has_overwrite_extents }
883    }
884    /// Creates an ObjectValue for an object attribute of a verified file.
885    pub fn verified_attribute(size: u64, fsverity_metadata: FsverityMetadata) -> ObjectValue {
886        ObjectValue::VerifiedAttribute { size, fsverity_metadata }
887    }
888    /// Creates an ObjectValue for an insertion/replacement of an object extent.
889    pub fn extent(device_offset: u64, key_id: u64) -> ObjectValue {
890        ObjectValue::Extent(ExtentValue::new_raw(device_offset, key_id))
891    }
892    /// Creates an ObjectValue for an insertion/replacement of an object extent.
893    pub fn extent_with_checksum(
894        device_offset: u64,
895        checksum: Checksums,
896        key_id: u64,
897    ) -> ObjectValue {
898        ObjectValue::Extent(ExtentValue::with_checksum(device_offset, checksum, key_id))
899    }
900    /// Creates an ObjectValue for a deletion of an object extent.
901    pub fn deleted_extent() -> ObjectValue {
902        ObjectValue::Extent(ExtentValue::deleted_extent())
903    }
904    /// Creates an ObjectValue for an object child.
905    pub fn child(object_id: u64, object_descriptor: ObjectDescriptor) -> ObjectValue {
906        ObjectValue::Child(ChildValue { object_id, object_descriptor })
907    }
908    /// Creates an ObjectValue for an object symlink.
909    pub fn symlink(
910        link: impl Into<Box<[u8]>>,
911        creation_time: Timestamp,
912        modification_time: Timestamp,
913        project_id: u64,
914    ) -> ObjectValue {
915        ObjectValue::Object {
916            kind: ObjectKind::Symlink { refs: 1, link: link.into() },
917            attributes: ObjectAttributes {
918                creation_time,
919                modification_time,
920                project_id,
921                ..Default::default()
922            },
923        }
924    }
925    /// Creates an ObjectValue for an encrypted symlink object.
926    pub fn encrypted_symlink(
927        link: impl Into<Box<[u8]>>,
928        creation_time: Timestamp,
929        modification_time: Timestamp,
930        project_id: u64,
931    ) -> ObjectValue {
932        ObjectValue::Object {
933            kind: ObjectKind::EncryptedSymlink { refs: 1, link: link.into() },
934            attributes: ObjectAttributes {
935                creation_time,
936                modification_time,
937                project_id,
938                ..Default::default()
939            },
940        }
941    }
942    pub fn inline_extended_attribute(value: impl Into<Vec<u8>>) -> ObjectValue {
943        ObjectValue::ExtendedAttribute(ExtendedAttributeValue::Inline(value.into()))
944    }
945    pub fn extended_attribute(attribute_id: u64) -> ObjectValue {
946        ObjectValue::ExtendedAttribute(ExtendedAttributeValue::AttributeId(attribute_id))
947    }
948}
949
950pub type ObjectItem = ObjectItemV54;
951
952pub type ObjectItemV54 = Item<ObjectKeyV54, ObjectValueV54>;
953pub type ObjectItemV50 = Item<ObjectKeyV43, ObjectValueV50>;
954
955impl ObjectItem {
956    pub fn is_tombstone(&self) -> bool {
957        matches!(
958            self,
959            Item {
960                key: ObjectKey { data: ObjectKeyData::Object, .. },
961                value: ObjectValue::None,
962                ..
963            }
964        )
965    }
966}
967
968// If the given item describes an extent, unwraps it and returns the extent key/value.
969impl<'a> From<ItemRef<'a, ObjectKey, ObjectValue>>
970    for Option<(/*object-id*/ u64, /*attribute-id*/ u64, &'a ExtentKey, &'a ExtentValue)>
971{
972    fn from(item: ItemRef<'a, ObjectKey, ObjectValue>) -> Self {
973        match item {
974            ItemRef {
975                key:
976                    ObjectKey {
977                        object_id,
978                        data:
979                            ObjectKeyData::Attribute(
980                                attribute_id, //
981                                AttributeKey::Extent(extent_key),
982                            ),
983                    },
984                value: ObjectValue::Extent(extent_value),
985                ..
986            } => Some((*object_id, *attribute_id, extent_key, extent_value)),
987            _ => None,
988        }
989    }
990}
991
992pub type FxfsKey = FxfsKeyV49;
993pub type FxfsKeyV49 = fxfs_crypto::FxfsKey;
994
995#[cfg(test)]
996mod tests {
997    use super::{ObjectKey, ObjectKeyV54, TimestampV49};
998    use crate::lsm_tree::types::{
999        FuzzyHash as _, LayerKey, OrdLowerBound, OrdUpperBound, RangeKey,
1000    };
1001    use std::cmp::Ordering;
1002    use std::ops::Add;
1003    use std::time::{Duration, SystemTime, UNIX_EPOCH};
1004
1005    // Smoke test to ensure hash stability for Fxfs objects.
1006    // If this test fails, the hash algorithm changed, and that won't do -- Fxfs relies on stable
1007    // hash values, and existing images will appear to be corrupt if they change (see
1008    // https://fxbug.dev/419133532).
1009    #[test]
1010    fn test_hash_stability() {
1011        // Target a specific version of ObjectKey.  If you want to delete ObjectKeyV54, simply
1012        // update this test with a later key version, which will also require re-generating the
1013        // hashes.
1014        assert_eq!(
1015            &ObjectKeyV54::object(100).fuzzy_hash().collect::<Vec<_>>()[..],
1016            &[11885326717398844384]
1017        );
1018        assert_eq!(
1019            &ObjectKeyV54::extent(1, 0, 0..2 * 1024 * 1024).fuzzy_hash().collect::<Vec<_>>()[..],
1020            &[11090579907097549012, 2814892992701560424]
1021        );
1022    }
1023
1024    #[test]
1025    fn test_next_key() {
1026        let next_key = ObjectKey::extent(1, 0, 0..100).next_key().unwrap();
1027        assert_eq!(ObjectKey::extent(1, 0, 101..200).cmp_lower_bound(&next_key), Ordering::Greater);
1028        assert_eq!(ObjectKey::extent(1, 0, 100..200).cmp_lower_bound(&next_key), Ordering::Equal);
1029        assert_eq!(ObjectKey::extent(1, 0, 100..101).cmp_lower_bound(&next_key), Ordering::Equal);
1030        assert_eq!(ObjectKey::extent(1, 0, 99..100).cmp_lower_bound(&next_key), Ordering::Less);
1031        assert_eq!(ObjectKey::extent(1, 0, 0..100).cmp_upper_bound(&next_key), Ordering::Less);
1032        assert_eq!(ObjectKey::extent(1, 0, 99..100).cmp_upper_bound(&next_key), Ordering::Less);
1033        assert_eq!(ObjectKey::extent(1, 0, 100..101).cmp_upper_bound(&next_key), Ordering::Equal);
1034        assert_eq!(ObjectKey::extent(1, 0, 100..200).cmp_upper_bound(&next_key), Ordering::Greater);
1035        assert_eq!(ObjectKey::extent(1, 0, 50..101).cmp_upper_bound(&next_key), Ordering::Equal);
1036        assert_eq!(ObjectKey::extent(1, 0, 50..200).cmp_upper_bound(&next_key), Ordering::Greater);
1037    }
1038    #[test]
1039    fn test_range_key() {
1040        // Make sure we disallow using extent keys with point queries. Other object keys should
1041        // still be allowed with point queries.
1042        assert!(ObjectKey::extent(1, 0, 0..2 * 1024 * 1024).is_range_key());
1043        assert!(!ObjectKey::object(100).is_range_key());
1044
1045        assert_eq!(ObjectKey::object(1).overlaps(&ObjectKey::object(1)), true);
1046        assert_eq!(ObjectKey::object(1).overlaps(&ObjectKey::object(2)), false);
1047        assert_eq!(ObjectKey::extent(1, 0, 0..100).overlaps(&ObjectKey::object(1)), false);
1048        assert_eq!(ObjectKey::object(1).overlaps(&ObjectKey::extent(1, 0, 0..100)), false);
1049        assert_eq!(
1050            ObjectKey::extent(1, 0, 0..100).overlaps(&ObjectKey::extent(2, 0, 0..100)),
1051            false
1052        );
1053        assert_eq!(
1054            ObjectKey::extent(1, 0, 0..100).overlaps(&ObjectKey::extent(1, 1, 0..100)),
1055            false
1056        );
1057        assert_eq!(
1058            ObjectKey::extent(1, 0, 0..100).overlaps(&ObjectKey::extent(1, 0, 0..100)),
1059            true
1060        );
1061
1062        assert_eq!(
1063            ObjectKey::extent(1, 0, 0..50).overlaps(&ObjectKey::extent(1, 0, 49..100)),
1064            true
1065        );
1066        assert_eq!(
1067            ObjectKey::extent(1, 0, 49..100).overlaps(&ObjectKey::extent(1, 0, 0..50)),
1068            true
1069        );
1070
1071        assert_eq!(
1072            ObjectKey::extent(1, 0, 0..50).overlaps(&ObjectKey::extent(1, 0, 50..100)),
1073            false
1074        );
1075        assert_eq!(
1076            ObjectKey::extent(1, 0, 50..100).overlaps(&ObjectKey::extent(1, 0, 0..50)),
1077            false
1078        );
1079    }
1080
1081    #[test]
1082    fn test_timestamp() {
1083        fn compare_time(std_time: Duration) {
1084            let ts_time: TimestampV49 = std_time.into();
1085            assert_eq!(<TimestampV49 as Into<Duration>>::into(ts_time), std_time);
1086            assert_eq!(ts_time.subsec_nanos(), std_time.subsec_nanos());
1087            assert_eq!(ts_time.as_secs(), std_time.as_secs());
1088            assert_eq!(ts_time.as_nanos() as u128, std_time.as_nanos());
1089        }
1090        compare_time(Duration::from_nanos(0));
1091        compare_time(Duration::from_nanos(u64::MAX));
1092        compare_time(SystemTime::now().duration_since(UNIX_EPOCH).unwrap());
1093
1094        let ts: TimestampV49 = Duration::from_secs(u64::MAX - 1).into();
1095        assert_eq!(ts.nanos, u64::MAX);
1096
1097        let ts: TimestampV49 = (Duration::from_nanos(u64::MAX).add(Duration::from_nanos(1))).into();
1098        assert_eq!(ts.nanos, u64::MAX);
1099    }
1100}