Skip to main content

fxfs/object_store/
object_record.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5mod legacy;
6
7pub use legacy::*;
8
9// TODO(https://fxbug.dev/42178223): need validation after deserialization.
10use crate::checksum::Checksums;
11use crate::log::error;
12use crate::lsm_tree::types::{
13    FuzzyHash, Item, ItemRef, LayerKey, LegacyItem, MergeType, OrdLowerBound, OrdUpperBound,
14    SortByU64, Value,
15};
16use crate::object_store::ProjectId;
17use crate::object_store::extent::{Extent, ExtentPartitionIterator};
18use crate::object_store::extent_record::{ExtentValue, ExtentValueV38};
19use crate::serialized_types::{Migrate, Versioned, migrate_nodefault, migrate_to_version};
20use fprint::TypeFingerprint;
21use fxfs_crypto::{WrappedKey, WrappingKeyId};
22use fxfs_macros::SerializeKey;
23use fxfs_unicode::CasefoldString;
24use serde::{Deserialize, Serialize};
25use std::collections::BTreeMap;
26use std::default::Default;
27use std::hash::Hash;
28use std::time::{Duration, SystemTime, UNIX_EPOCH};
29
30/// ObjectDescriptor is the set of possible records in the object store.
31pub type ObjectDescriptor = ObjectDescriptorV32;
32
33#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint)]
34#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
35pub enum ObjectDescriptorV32 {
36    /// A file (in the generic sense; i.e. an object with some attributes).
37    File,
38    /// A directory (in the generic sense; i.e. an object with children).
39    Directory,
40    /// A volume, which is the root of a distinct object store containing Files and Directories.
41    Volume,
42    /// A symbolic link.
43    Symlink,
44}
45
46/// For specifying what property of the project is being addressed.
47pub type ProjectProperty = ProjectPropertyV32;
48
49#[derive(
50    Clone,
51    Debug,
52    Eq,
53    Hash,
54    Ord,
55    PartialEq,
56    PartialOrd,
57    Serialize,
58    Deserialize,
59    TypeFingerprint,
60    SerializeKey,
61)]
62#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
63pub enum ProjectPropertyV32 {
64    /// The configured limit for the project.
65    Limit,
66    /// The currently tracked usage for the project.
67    Usage,
68}
69
70pub type ObjectKeyData = ObjectKeyDataV54;
71
72#[derive(
73    Clone,
74    Debug,
75    Eq,
76    Hash,
77    PartialEq,
78    PartialOrd,
79    Ord,
80    Serialize,
81    Deserialize,
82    TypeFingerprint,
83    SerializeKey,
84)]
85#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
86pub enum ObjectKeyDataV54 {
87    /// A generic, untyped object.  This must come first and sort before all other keys for a given
88    /// object because it's also used as a tombstone and it needs to merge with all following keys.
89    Object,
90    /// Encryption keys for an object.
91    Keys,
92    /// An attribute associated with an object.  It has a 64-bit ID.
93    Attribute(AttributeId, AttributeKeyV32),
94    /// A child of a directory.
95    Child { name: String },
96    /// A graveyard entry for an entire object.
97    GraveyardEntry { object_id: u64 },
98    /// Project ID info. This should only be attached to the volume's root node. Used to address the
99    /// configured limit and the usage tracking which are ordered after the `project_id` to provide
100    /// locality of the two related values.
101    Project { project_id: ProjectId, property: ProjectPropertyV32 },
102    /// An extended attribute associated with an object. It stores the name used for the extended
103    /// attribute, which has a maximum size of 255 bytes enforced by fuchsia.io.
104    ExtendedAttribute {
105        #[serde(with = "crate::zerocopy_serialization")]
106        name: Vec<u8>,
107    },
108    /// A graveyard entry for an attribute.
109    GraveyardAttributeEntry { object_id: u64, attribute_id: AttributeId },
110    /// A child of an encrypted directory.  We store the filename in its encrypted form.  hash_code
111    /// is the hash of the casefolded human-readable name if a directory is also casefolded.  In
112    /// some legacy cases, this is also used in non-casefolded cases, and in some of those cases the
113    /// hash code can be 0.  Going forward, these cases are covered by `EncryptedChild` below.
114    EncryptedCasefoldChild(EncryptedCasefoldChild),
115    /// Case-insensitive child (legacy).
116    LegacyCasefoldChild(CasefoldString),
117    /// An encrypted child that does not use case folding.
118    EncryptedChild(EncryptedChild),
119    /// A child of a directory that uses the casefold feature.
120    /// (i.e. case insensitive, case preserving names)
121    CasefoldChild { hash_code: u32, name: String },
122}
123
124#[derive(
125    Clone,
126    Debug,
127    Eq,
128    Hash,
129    PartialEq,
130    PartialOrd,
131    Ord,
132    Serialize,
133    Deserialize,
134    TypeFingerprint,
135    SerializeKey,
136)]
137#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
138pub struct EncryptedCasefoldChild {
139    pub hash_code: u32,
140    #[serde(with = "crate::zerocopy_serialization")]
141    pub name: Vec<u8>,
142}
143
144#[derive(
145    Clone,
146    Debug,
147    Eq,
148    Hash,
149    PartialEq,
150    PartialOrd,
151    Ord,
152    Serialize,
153    Deserialize,
154    TypeFingerprint,
155    SerializeKey,
156)]
157#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
158pub struct EncryptedChild(#[serde(with = "crate::zerocopy_serialization")] pub Vec<u8>);
159
160pub type AttributeKey = AttributeKeyV32;
161
162#[derive(
163    Clone,
164    Debug,
165    Eq,
166    Hash,
167    Ord,
168    PartialEq,
169    PartialOrd,
170    Serialize,
171    Deserialize,
172    TypeFingerprint,
173    SerializeKey,
174)]
175#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
176pub enum AttributeKeyV32 {
177    // Order here is important: code expects Attribute to precede Extent.
178    Attribute,
179    Extent(Extent),
180}
181
182/// ObjectKey is a key in the object store.
183pub type ObjectKey = ObjectKeyV54;
184
185#[derive(
186    Clone,
187    Debug,
188    Eq,
189    Ord,
190    Hash,
191    PartialEq,
192    PartialOrd,
193    Serialize,
194    Deserialize,
195    SerializeKey,
196    TypeFingerprint,
197    Versioned,
198)]
199#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
200pub struct ObjectKeyV54 {
201    /// The ID of the object referred to.
202    pub object_id: u64,
203    /// The type and data of the key.
204    pub data: ObjectKeyDataV54,
205}
206
207impl SortByU64 for ObjectKey {
208    fn get_leading_u64(&self) -> u64 {
209        self.object_id
210    }
211}
212
213impl ObjectKey {
214    /// Creates a generic ObjectKey.
215    pub fn object(object_id: u64) -> Self {
216        Self { object_id: object_id, data: ObjectKeyData::Object }
217    }
218
219    /// Creates an ObjectKey for encryption keys.
220    pub fn keys(object_id: u64) -> Self {
221        Self { object_id, data: ObjectKeyData::Keys }
222    }
223
224    /// Creates an ObjectKey for an attribute.
225    pub fn attribute(object_id: u64, attribute_id: AttributeId, key: AttributeKey) -> Self {
226        Self { object_id, data: ObjectKeyData::Attribute(attribute_id, key) }
227    }
228
229    /// Creates an ObjectKey for an extent.
230    pub fn extent(object_id: u64, attribute_id: AttributeId, range: std::ops::Range<u64>) -> Self {
231        Self {
232            object_id,
233            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(Extent(range))),
234        }
235    }
236
237    /// Creates an ObjectKey from an extent.
238    pub fn from_extent(object_id: u64, attribute_id: AttributeId, extent: Extent) -> Self {
239        Self {
240            object_id,
241            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(extent)),
242        }
243    }
244
245    /// Creates an ObjectKey for a child.
246    pub fn child(object_id: u64, name: &str, dir_type: DirType) -> Self {
247        match dir_type {
248            DirType::Casefold => {
249                let casefolded =
250                    fxfs_unicode::casefold(name.chars()).flat_map(fxfs_unicode::utf8_bytes);
251                let hash_code = fscrypt::direntry::tea_hash_filename(casefolded);
252                Self {
253                    object_id,
254                    data: ObjectKeyData::CasefoldChild { hash_code, name: name.into() },
255                }
256            }
257            DirType::LegacyCasefold => Self {
258                object_id,
259                data: ObjectKeyData::LegacyCasefoldChild(CasefoldString::new(name.into())),
260            },
261            DirType::Normal => Self { object_id, data: ObjectKeyData::Child { name: name.into() } },
262            DirType::Encrypted(_) | DirType::EncryptedCasefold(_) => {
263                // These shouldn't be used directly; encrypted_child should be used instead.
264                panic!("Encrypted modes require an encrypted name");
265            }
266        }
267    }
268
269    /// Creates an ObjectKey for an encrypted child.
270    ///
271    /// The hash_code is important here -- especially for fscrypt as it affects the
272    /// name of locked files.
273    ///
274    /// For case-insensitive lookups in large encrypted directories, we lose the ability to binary
275    /// search for an entry of interest because encryption breaks our sort order. In these cases
276    /// we prefix records with a 32-bit hash based on the stable *casefolded* name. Hash collisions
277    /// aside, this lets us jump straight to the entry of interest, if it exists.
278    pub fn encrypted_child(object_id: u64, name: Vec<u8>, hash_code: Option<u32>) -> Self {
279        if let Some(hash_code) = hash_code {
280            Self {
281                object_id,
282                data: ObjectKeyData::EncryptedCasefoldChild(EncryptedCasefoldChild {
283                    hash_code,
284                    name,
285                }),
286            }
287        } else {
288            Self { object_id, data: ObjectKeyData::EncryptedChild(EncryptedChild(name)) }
289        }
290    }
291
292    /// Creates a graveyard entry for an object.
293    pub fn graveyard_entry(graveyard_object_id: u64, object_id: u64) -> Self {
294        Self { object_id: graveyard_object_id, data: ObjectKeyData::GraveyardEntry { object_id } }
295    }
296
297    /// Creates a graveyard entry for an attribute.
298    pub fn graveyard_attribute_entry(
299        graveyard_object_id: u64,
300        object_id: u64,
301        attribute_id: AttributeId,
302    ) -> Self {
303        Self {
304            object_id: graveyard_object_id,
305            data: ObjectKeyData::GraveyardAttributeEntry { object_id, attribute_id },
306        }
307    }
308
309    /// Creates an ObjectKey for a ProjectLimit entry.
310    pub fn project_limit(object_id: u64, project_id: ProjectId) -> Self {
311        Self {
312            object_id,
313            data: ObjectKeyData::Project { project_id, property: ProjectProperty::Limit },
314        }
315    }
316
317    /// Creates an ObjectKey for a ProjectUsage entry.
318    pub fn project_usage(object_id: u64, project_id: ProjectId) -> Self {
319        Self {
320            object_id,
321            data: ObjectKeyData::Project { project_id, property: ProjectProperty::Usage },
322        }
323    }
324
325    pub fn extended_attribute(object_id: u64, name: Vec<u8>) -> Self {
326        Self { object_id, data: ObjectKeyData::ExtendedAttribute { name } }
327    }
328
329    /// Returns the merge key for this key; that is, a key which is <= this key and any
330    /// other possibly overlapping key, under Ord. This would be used for the hint in |merge_into|.
331    pub fn key_for_merge_into(&self) -> Self {
332        if let Self {
333            object_id,
334            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(e)),
335        } = self
336        {
337            Self::attribute(*object_id, *attribute_id, AttributeKey::Extent(e.key_for_merge_into()))
338        } else {
339            self.clone()
340        }
341    }
342}
343
344impl OrdUpperBound for ObjectKey {
345    fn cmp_upper_bound(&self, other: &ObjectKey) -> std::cmp::Ordering {
346        self.object_id.cmp(&other.object_id).then_with(|| match (&self.data, &other.data) {
347            (
348                ObjectKeyData::Attribute(left_attr_id, AttributeKey::Extent(left_extent)),
349                ObjectKeyData::Attribute(right_attr_id, AttributeKey::Extent(right_extent)),
350            ) => left_attr_id.cmp(right_attr_id).then(left_extent.cmp_upper_bound(right_extent)),
351            _ => self.data.cmp(&other.data),
352        })
353    }
354}
355
356impl OrdLowerBound for ObjectKey {
357    fn cmp_lower_bound(&self, other: &ObjectKey) -> std::cmp::Ordering {
358        self.object_id.cmp(&other.object_id).then_with(|| match (&self.data, &other.data) {
359            (
360                ObjectKeyData::Attribute(left_attr_id, AttributeKey::Extent(left_extent)),
361                ObjectKeyData::Attribute(right_attr_id, AttributeKey::Extent(right_extent)),
362            ) => left_attr_id.cmp(right_attr_id).then(left_extent.cmp_lower_bound(right_extent)),
363            _ => self.data.cmp(&other.data),
364        })
365    }
366}
367
368impl LayerKey for ObjectKey {
369    fn merge_type(&self) -> MergeType {
370        // This listing is intentionally exhaustive to force folks to think about how certain
371        // subsets of the keyspace are merged.
372        match self.data {
373            ObjectKeyData::Object
374            | ObjectKeyData::Keys
375            | ObjectKeyData::Attribute(..)
376            | ObjectKeyData::Child { .. }
377            | ObjectKeyData::EncryptedChild(_)
378            | ObjectKeyData::EncryptedCasefoldChild(_)
379            | ObjectKeyData::CasefoldChild { .. }
380            | ObjectKeyData::LegacyCasefoldChild(_)
381            | ObjectKeyData::GraveyardEntry { .. }
382            | ObjectKeyData::GraveyardAttributeEntry { .. }
383            | ObjectKeyData::Project { property: ProjectProperty::Limit, .. }
384            | ObjectKeyData::ExtendedAttribute { .. } => MergeType::OptimizedMerge,
385            ObjectKeyData::Project { property: ProjectProperty::Usage, .. } => MergeType::FullMerge,
386        }
387    }
388
389    fn next_key(&self) -> Option<Self> {
390        match &self.data {
391            ObjectKeyData::Attribute(attr_id, AttributeKey::Extent(extent)) => {
392                // This key comes before (or is equal to) any extent starting at or after the
393                // end of `self`. Searching for its `search_key` finds extents that end after
394                // the end of `self`.
395                Some(ObjectKey {
396                    object_id: self.object_id,
397                    data: ObjectKeyData::Attribute(
398                        *attr_id,
399                        AttributeKey::Extent(Extent(0..extent.end + 1)),
400                    ),
401                })
402            }
403            _ => None,
404        }
405    }
406
407    fn search_key(&self) -> Option<Self> {
408        if let Self {
409            object_id,
410            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(e)),
411        } = self
412        {
413            Some(Self::attribute(*object_id, *attribute_id, AttributeKey::Extent(e.search_key())))
414        } else {
415            None
416        }
417    }
418
419    fn is_search_key(&self) -> bool {
420        match self {
421            Self { data: ObjectKeyData::Attribute(_, AttributeKey::Extent(e)), .. } => e.start == 0,
422            _ => true,
423        }
424    }
425
426    fn overlaps(&self, other: &Self) -> bool {
427        if self.object_id != other.object_id {
428            return false;
429        }
430        match (&self.data, &other.data) {
431            (
432                ObjectKeyData::Attribute(left_attr_id, AttributeKey::Extent(left_key)),
433                ObjectKeyData::Attribute(right_attr_id, AttributeKey::Extent(right_key)),
434            ) if *left_attr_id == *right_attr_id => {
435                left_key.end > right_key.start && left_key.start < right_key.end
436            }
437            (a, b) => a == b,
438        }
439    }
440}
441
442pub enum ObjectKeyFuzzyHashIterator {
443    Extent(/* object_id */ u64, AttributeId, ExtentPartitionIterator),
444    NotExtent(/* hash */ Option<u64>),
445}
446
447impl Iterator for ObjectKeyFuzzyHashIterator {
448    type Item = u64;
449
450    fn next(&mut self) -> Option<Self::Item> {
451        match self {
452            Self::Extent(oid, attr_id, extent_keys) => extent_keys.next().map(|range| {
453                let key = ObjectKey::extent(*oid, *attr_id, range);
454                crate::stable_hash::stable_hash(key)
455            }),
456            Self::NotExtent(hash) => hash.take(),
457        }
458    }
459}
460
461impl FuzzyHash for ObjectKey {
462    fn fuzzy_hash(&self) -> impl Iterator<Item = u64> {
463        match &self.data {
464            ObjectKeyData::Attribute(attr_id, AttributeKey::Extent(extent)) => {
465                ObjectKeyFuzzyHashIterator::Extent(
466                    self.object_id,
467                    *attr_id,
468                    extent.fuzzy_hash_partition(),
469                )
470            }
471            _ => {
472                let hash = crate::stable_hash::stable_hash(self);
473                ObjectKeyFuzzyHashIterator::NotExtent(Some(hash))
474            }
475        }
476    }
477
478    fn is_range_key(&self) -> bool {
479        match &self.data {
480            ObjectKeyData::Attribute(_, AttributeKey::Extent(_)) => true,
481            _ => false,
482        }
483    }
484}
485
486/// UNIX epoch based timestamp in the UTC timezone.
487pub type Timestamp = TimestampV49;
488
489#[derive(
490    Copy,
491    Clone,
492    Debug,
493    Default,
494    Eq,
495    PartialEq,
496    Ord,
497    PartialOrd,
498    Serialize,
499    Deserialize,
500    TypeFingerprint,
501)]
502#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
503pub struct TimestampV49 {
504    nanos: u64,
505}
506
507impl Timestamp {
508    const NSEC_PER_SEC: u64 = 1_000_000_000;
509
510    pub fn now() -> Self {
511        SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or(Duration::ZERO).into()
512    }
513
514    pub const fn zero() -> Self {
515        Self { nanos: 0 }
516    }
517
518    pub const fn from_nanos(nanos: u64) -> Self {
519        Self { nanos }
520    }
521
522    pub fn from_secs_and_nanos(secs: u64, nanos: u32) -> Self {
523        let Some(secs_in_nanos) = secs.checked_mul(Self::NSEC_PER_SEC) else {
524            error!("Fxfs doesn't support dates past 2554-07-21");
525            return Self { nanos: u64::MAX };
526        };
527        let Some(nanos) = secs_in_nanos.checked_add(nanos as u64) else {
528            error!("Fxfs doesn't support dates past 2554-07-21");
529            return Self { nanos: u64::MAX };
530        };
531        Self { nanos }
532    }
533
534    /// Returns the total number of nanoseconds represented by this `Timestamp` since the Unix
535    /// epoch.
536    pub fn as_nanos(&self) -> u64 {
537        self.nanos
538    }
539
540    /// Returns the fractional nanoseconds represented by this `Timestamp`.
541    pub fn subsec_nanos(&self) -> u32 {
542        (self.nanos % Self::NSEC_PER_SEC) as u32
543    }
544
545    /// Returns the total number of whole seconds represented by this `Timestamp` since the Unix
546    /// epoch.
547    pub fn as_secs(&self) -> u64 {
548        self.nanos / Self::NSEC_PER_SEC
549    }
550}
551
552impl From<std::time::Duration> for Timestamp {
553    fn from(duration: std::time::Duration) -> Self {
554        Self::from_secs_and_nanos(duration.as_secs(), duration.subsec_nanos())
555    }
556}
557
558impl From<Timestamp> for std::time::Duration {
559    fn from(timestamp: Timestamp) -> std::time::Duration {
560        Duration::from_nanos(timestamp.nanos)
561    }
562}
563
564pub type ObjectKind = ObjectKindV54;
565
566#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, TypeFingerprint)]
567#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
568pub enum DirType {
569    Normal,
570    Encrypted(WrappingKeyId),
571    /// Legacy casefolded mode.
572    LegacyCasefold,
573    Casefold,
574    EncryptedCasefold(WrappingKeyId),
575}
576
577impl DirType {
578    pub fn is_casefold(&self) -> bool {
579        matches!(self, DirType::LegacyCasefold | DirType::Casefold | DirType::EncryptedCasefold(_))
580    }
581
582    pub fn is_encrypted(&self) -> bool {
583        matches!(self, DirType::Encrypted(_) | DirType::EncryptedCasefold(_))
584    }
585
586    pub fn with_encryption(self, id: WrappingKeyId) -> Self {
587        match self {
588            DirType::Normal => DirType::Encrypted(id),
589            DirType::Casefold => DirType::EncryptedCasefold(id),
590            _ => self,
591        }
592    }
593
594    pub fn with_casefold(self, val: bool) -> Self {
595        match (val, self) {
596            (true, DirType::Encrypted(id) | DirType::EncryptedCasefold(id)) => {
597                DirType::EncryptedCasefold(id)
598            }
599            (true, _) => DirType::Casefold,
600            (false, DirType::Encrypted(id) | DirType::EncryptedCasefold(id)) => {
601                DirType::Encrypted(id)
602            }
603            (false, _) => DirType::Normal,
604        }
605    }
606
607    pub fn wrapping_key_id(&self) -> Option<WrappingKeyId> {
608        match self {
609            DirType::Encrypted(id) | DirType::EncryptedCasefold(id) => Some(*id),
610            _ => None,
611        }
612    }
613}
614
615impl Default for DirType {
616    fn default() -> Self {
617        DirType::Normal
618    }
619}
620
621#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint)]
622#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
623pub enum ObjectKindV54 {
624    File {
625        /// The number of references to this file.
626        refs: u64,
627    },
628    Directory {
629        /// The number of sub-directories in this directory.
630        sub_dirs: u64,
631        /// The type of directory (encryption, casefolding, etc.)
632        dir_type: DirType,
633    },
634    Graveyard,
635    Symlink {
636        /// The number of references to this symbolic link.
637        refs: u64,
638        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
639        /// interpret it however they like.
640        #[serde(with = "crate::zerocopy_serialization")]
641        link: Box<[u8]>,
642    },
643    EncryptedSymlink {
644        /// The number of references to this symbolic link.
645        refs: u64,
646        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
647        /// interpret it however they like.
648        /// `link` is stored here in encrypted form, encrypted with the symlink's key using the
649        /// volume's data key.
650        #[serde(with = "crate::zerocopy_serialization")]
651        link: Box<[u8]>,
652    },
653}
654
655#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
656#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
657pub enum ObjectKindV49 {
658    File {
659        /// The number of references to this file.
660        refs: u64,
661    },
662    Directory {
663        /// The number of sub-directories in this directory.
664        sub_dirs: u64,
665        /// If set, contains the wrapping key id used to encrypt the file contents and filenames in
666        /// this directory.
667        wrapping_key_id: Option<WrappingKeyId>,
668        /// If true, all files and sub-directories created in this directory will support case
669        /// insensitive (but case-preserving) file naming.
670        casefold: bool,
671    },
672    Graveyard,
673    Symlink {
674        /// The number of references to this symbolic link.
675        refs: u64,
676        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
677        /// interpret it however they like.
678        #[serde(with = "crate::zerocopy_serialization")]
679        link: Box<[u8]>,
680    },
681    EncryptedSymlink {
682        /// The number of references to this symbolic link.
683        refs: u64,
684        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
685        /// interpret it however they like.
686        /// `link` is stored here in encrypted form, encrypted with the symlink's key using the
687        /// same encryption scheme as the one used to encrypt filenames.
688        #[serde(with = "crate::zerocopy_serialization")]
689        link: Box<[u8]>,
690    },
691}
692
693/// This consists of POSIX attributes that are not used in Fxfs but it may be meaningful to some
694/// clients to have the ability to to set and retrieve these values.
695pub type PosixAttributes = PosixAttributesV32;
696
697#[derive(Clone, Debug, Copy, Default, Serialize, Deserialize, PartialEq, TypeFingerprint)]
698#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
699pub struct PosixAttributesV32 {
700    /// The mode bits associated with this object
701    pub mode: u32,
702    /// User ID of owner
703    pub uid: u32,
704    /// Group ID of owner
705    pub gid: u32,
706    /// Device ID
707    pub rdev: u64,
708}
709
710/// Object-level attributes.  Note that these are not the same as "attributes" in the
711/// ObjectValue::Attribute sense, which refers to an arbitrary data payload associated with an
712/// object.  This naming collision is unfortunate.
713pub type ObjectAttributes = ObjectAttributesV49;
714
715#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, TypeFingerprint)]
716#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
717pub struct ObjectAttributesV49 {
718    /// The timestamp at which the object was created (i.e. crtime).
719    pub creation_time: TimestampV49,
720    /// The timestamp at which the object's data was last modified (i.e. mtime).
721    pub modification_time: TimestampV49,
722    /// The project id to associate this object's resource usage with.
723    #[serde(with = "crate::object_store::project_id::optional_project_id")]
724    pub project_id: Option<ProjectId>,
725    /// Mode, uid, gid, and rdev
726    pub posix_attributes: Option<PosixAttributesV32>,
727    /// The number of bytes allocated to all extents across all attributes for this object.
728    pub allocated_size: u64,
729    /// The timestamp at which the object was last read (i.e. atime).
730    pub access_time: TimestampV49,
731    /// The timestamp at which the object's status was last modified (i.e. ctime).
732    pub change_time: TimestampV49,
733}
734
735pub type ExtendedAttributeValue = ExtendedAttributeValueV32;
736
737#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint)]
738#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
739pub enum ExtendedAttributeValueV32 {
740    /// The extended attribute value is stored directly in this object. If the value is above a
741    /// certain size, it should be stored as an attribute with extents instead.
742    Inline(#[serde(with = "crate::zerocopy_serialization")] Vec<u8>),
743    /// The extended attribute value is stored as an attribute with extents. The attribute id
744    /// should be chosen to be within the range of 64-512.
745    AttributeId(AttributeId),
746}
747
748/// Id and descriptor for a child entry.
749pub type ChildValue = ChildValueV32;
750
751#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
752#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
753pub struct ChildValueV32 {
754    /// The ID of the child object.
755    pub object_id: u64,
756    /// Describes the type of the child.
757    pub object_descriptor: ObjectDescriptorV32,
758}
759
760pub type RootDigest = RootDigestV33;
761
762#[derive(
763    Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, Deserialize, TypeFingerprint,
764)]
765#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
766pub enum RootDigestV33 {
767    Sha256([u8; 32]),
768    Sha512(#[serde(with = "crate::zerocopy_serialization")] Vec<u8>),
769}
770
771pub type FsverityMetadata = FsverityMetadataV50;
772
773#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, TypeFingerprint, Versioned)]
774#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
775pub enum FsverityMetadataV50 {
776    /// The root hash and salt.
777    Internal(RootDigestV33, #[serde(with = "crate::zerocopy_serialization")] Vec<u8>),
778    /// The root hash and salt are in a descriptor inside the merkle attribute.
779    F2fs(std::ops::Range<u64>),
780}
781
782pub type EncryptionKey = EncryptionKeyV49;
783pub type EncryptionKeyV49 = fxfs_crypto::EncryptionKey;
784
785pub type EncryptionKeys = EncryptionKeysV49;
786
787#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize, TypeFingerprint)]
788#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
789pub struct EncryptionKeysV49(Vec<(u64, EncryptionKeyV49)>);
790
791impl EncryptionKeys {
792    pub fn get(&self, id: u64) -> Option<&EncryptionKey> {
793        self.0.iter().find_map(|(i, key)| (*i == id).then_some(key))
794    }
795
796    pub fn insert(&mut self, id: u64, key: EncryptionKey) {
797        self.0.push((id, key))
798    }
799
800    pub fn remove(&mut self, id: u64) -> Option<EncryptionKey> {
801        if let Some(ix) = self.0.iter().position(|(k, _)| *k == id) {
802            Some(self.0.remove(ix).1)
803        } else {
804            None
805        }
806    }
807}
808
809impl From<EncryptionKeys> for BTreeMap<u64, WrappedKey> {
810    fn from(keys: EncryptionKeys) -> Self {
811        keys.0.into_iter().map(|(id, key)| (id, key.into())).collect()
812    }
813}
814
815impl From<Vec<(u64, EncryptionKey)>> for EncryptionKeys {
816    fn from(value: Vec<(u64, EncryptionKey)>) -> Self {
817        Self(value)
818    }
819}
820
821impl std::ops::Deref for EncryptionKeys {
822    type Target = Vec<(u64, EncryptionKey)>;
823    fn deref(&self) -> &Self::Target {
824        &self.0
825    }
826}
827
828/// ObjectValue is the value of an item in the object store.
829/// Note that the tree stores deltas on objects, so these values describe deltas. Unless specified
830/// otherwise, a value indicates an insert/replace mutation.
831pub type ObjectValue = ObjectValueV54;
832impl Value for ObjectValue {
833    const DELETED_MARKER: Self = Self::None;
834}
835
836#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
837#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
838pub enum ObjectValueV54 {
839    /// Some keys have no value (this often indicates a tombstone of some sort).  Records with this
840    /// value are always filtered when a major compaction is performed, so the meaning must be the
841    /// same as if the item was not present.
842    None,
843    /// Some keys have no value but need to differentiate between a present value and no value
844    /// (None) i.e. their value is really a boolean: None => false, Some => true.
845    Some,
846    /// The value for an ObjectKey::Object record.
847    Object { kind: ObjectKindV54, attributes: ObjectAttributesV49 },
848    /// Specifies encryption keys to use for an object.
849    Keys(EncryptionKeysV49),
850    /// An attribute associated with a file object. |size| is the size of the attribute in bytes.
851    Attribute { size: u64, has_overwrite_extents: bool },
852    /// An extent associated with an object.
853    Extent(ExtentValueV38),
854    /// A child of an object.
855    Child(ChildValue),
856    /// Graveyard entries can contain these entries which will cause a file that has extents beyond
857    /// EOF to be trimmed at mount time.  This is used in cases where shrinking a file can exceed
858    /// the bounds of a single transaction.
859    Trim,
860    /// Added to support tracking Project ID usage and limits.
861    BytesAndNodes { bytes: i64, nodes: i64 },
862    /// A value for an extended attribute. Either inline or a redirection to an attribute with
863    /// extents.
864    ExtendedAttribute(ExtendedAttributeValueV32),
865    /// An attribute associated with a verified file object. |size| is the size of the attribute
866    /// in bytes.
867    VerifiedAttribute { size: u64, fsverity_metadata: FsverityMetadataV50 },
868}
869
870#[derive(Migrate, Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
871#[migrate_to_version(ObjectValueV54)]
872#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
873pub enum ObjectValueV50 {
874    /// Some keys have no value (this often indicates a tombstone of some sort).  Records with this
875    /// value are always filtered when a major compaction is performed, so the meaning must be the
876    /// same as if the item was not present.
877    None,
878    /// Some keys have no value but need to differentiate between a present value and no value
879    /// (None) i.e. their value is really a boolean: None => false, Some => true.
880    Some,
881    /// The value for an ObjectKey::Object record.
882    Object { kind: ObjectKindV49, attributes: ObjectAttributesV49 },
883    /// Specifies encryption keys to use for an object.
884    Keys(EncryptionKeysV49),
885    /// An attribute associated with a file object. |size| is the size of the attribute in bytes.
886    Attribute { size: u64, has_overwrite_extents: bool },
887    /// An extent associated with an object.
888    Extent(ExtentValueV38),
889    /// A child of an object.
890    Child(ChildValueV32),
891    /// Graveyard entries can contain these entries which will cause a file that has extents beyond
892    /// EOF to be trimmed at mount time.  This is used in cases where shrinking a file can exceed
893    /// the bounds of a single transaction.
894    Trim,
895    /// Added to support tracking Project ID usage and limits.
896    BytesAndNodes { bytes: i64, nodes: i64 },
897    /// A value for an extended attribute. Either inline or a redirection to an attribute with
898    /// extents.
899    ExtendedAttribute(ExtendedAttributeValueV32),
900    /// An attribute associated with a verified file object. |size| is the size of the attribute
901    /// in bytes.
902    VerifiedAttribute { size: u64, fsverity_metadata: FsverityMetadataV50 },
903}
904
905impl ObjectValue {
906    /// Creates an ObjectValue for a file object.
907    pub fn file(
908        refs: u64,
909        allocated_size: u64,
910        creation_time: Timestamp,
911        modification_time: Timestamp,
912        access_time: Timestamp,
913        change_time: Timestamp,
914        project_id: Option<ProjectId>,
915        posix_attributes: Option<PosixAttributes>,
916    ) -> ObjectValue {
917        ObjectValue::Object {
918            kind: ObjectKind::File { refs },
919            attributes: ObjectAttributes {
920                creation_time,
921                modification_time,
922                project_id,
923                posix_attributes,
924                allocated_size,
925                access_time,
926                change_time,
927            },
928        }
929    }
930    pub fn keys(encryption_keys: EncryptionKeys) -> ObjectValue {
931        ObjectValue::Keys(encryption_keys)
932    }
933    /// Creates an ObjectValue for an object attribute.
934    pub fn attribute(size: u64, has_overwrite_extents: bool) -> ObjectValue {
935        ObjectValue::Attribute { size, has_overwrite_extents }
936    }
937    /// Creates an ObjectValue for an object attribute of a verified file.
938    pub fn verified_attribute(size: u64, fsverity_metadata: FsverityMetadata) -> ObjectValue {
939        ObjectValue::VerifiedAttribute { size, fsverity_metadata }
940    }
941    /// Creates an ObjectValue for an insertion/replacement of an object extent.
942    pub fn extent(device_offset: u64, key_id: u64) -> ObjectValue {
943        ObjectValue::Extent(ExtentValue::new_raw(device_offset, key_id))
944    }
945    /// Creates an ObjectValue for an insertion/replacement of an object extent.
946    pub fn extent_with_checksum(
947        device_offset: u64,
948        checksum: Checksums,
949        key_id: u64,
950    ) -> ObjectValue {
951        ObjectValue::Extent(ExtentValue::with_checksum(device_offset, checksum, key_id))
952    }
953    /// Creates an ObjectValue for a deletion of an object extent.
954    pub fn deleted_extent() -> ObjectValue {
955        ObjectValue::Extent(ExtentValue::deleted_extent())
956    }
957    /// Creates an ObjectValue for an object child.
958    pub fn child(object_id: u64, object_descriptor: ObjectDescriptor) -> ObjectValue {
959        ObjectValue::Child(ChildValue { object_id, object_descriptor })
960    }
961    /// Creates an ObjectValue for an object symlink.
962    pub fn symlink(
963        link: impl Into<Box<[u8]>>,
964        creation_time: Timestamp,
965        modification_time: Timestamp,
966        project_id: Option<ProjectId>,
967    ) -> ObjectValue {
968        ObjectValue::Object {
969            kind: ObjectKind::Symlink { refs: 1, link: link.into() },
970            attributes: ObjectAttributes {
971                creation_time,
972                modification_time,
973                project_id,
974                ..Default::default()
975            },
976        }
977    }
978    /// Creates an ObjectValue for an encrypted symlink object.
979    pub fn encrypted_symlink(
980        link: impl Into<Box<[u8]>>,
981        creation_time: Timestamp,
982        modification_time: Timestamp,
983        project_id: Option<ProjectId>,
984    ) -> ObjectValue {
985        ObjectValue::Object {
986            kind: ObjectKind::EncryptedSymlink { refs: 1, link: link.into() },
987            attributes: ObjectAttributes {
988                creation_time,
989                modification_time,
990                project_id,
991                ..Default::default()
992            },
993        }
994    }
995    pub fn inline_extended_attribute(value: impl Into<Vec<u8>>) -> ObjectValue {
996        ObjectValue::ExtendedAttribute(ExtendedAttributeValue::Inline(value.into()))
997    }
998    pub fn extended_attribute(attribute_id: AttributeId) -> ObjectValue {
999        ObjectValue::ExtendedAttribute(ExtendedAttributeValue::AttributeId(attribute_id))
1000    }
1001}
1002
1003pub type ObjectItem = ObjectItemV55;
1004
1005pub type ObjectItemV54 = LegacyItem<ObjectKeyV54, ObjectValueV54>;
1006pub type ObjectItemV55 = Item<ObjectKeyV54, ObjectValueV54>;
1007
1008impl From<ObjectItemV54> for ObjectItemV55 {
1009    fn from(item: ObjectItemV54) -> Self {
1010        Self { key: item.key, value: item.value }
1011    }
1012}
1013
1014pub type ObjectItemV50 = LegacyItem<ObjectKeyV43, ObjectValueV50>;
1015
1016impl ObjectItem {
1017    pub fn is_tombstone(&self) -> bool {
1018        matches!(
1019            self,
1020            Item {
1021                key: ObjectKey { data: ObjectKeyData::Object, .. },
1022                value: ObjectValue::None,
1023                ..
1024            }
1025        )
1026    }
1027}
1028
1029// If the given item describes an extent, unwraps it and returns the extent key/value.
1030impl<'a> From<ItemRef<'a, ObjectKey, ObjectValue>>
1031    for Option<(/*object-id*/ u64, AttributeId, &'a Extent, &'a ExtentValue)>
1032{
1033    fn from(item: ItemRef<'a, ObjectKey, ObjectValue>) -> Self {
1034        match item {
1035            ItemRef {
1036                key:
1037                    ObjectKey {
1038                        object_id,
1039                        data:
1040                            ObjectKeyData::Attribute(
1041                                attribute_id, //
1042                                AttributeKey::Extent(extent_key),
1043                            ),
1044                    },
1045                value: ObjectValue::Extent(extent_value),
1046                ..
1047            } => Some((*object_id, *attribute_id, extent_key, extent_value)),
1048            _ => None,
1049        }
1050    }
1051}
1052
1053pub type FxfsKey = FxfsKeyV49;
1054pub type FxfsKeyV49 = fxfs_crypto::FxfsKey;
1055
1056#[derive(
1057    Clone,
1058    Copy,
1059    PartialEq,
1060    Eq,
1061    PartialOrd,
1062    Ord,
1063    Debug,
1064    Serialize,
1065    Deserialize,
1066    Hash,
1067    SerializeKey,
1068    TypeFingerprint,
1069)]
1070#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
1071#[repr(transparent)]
1072pub struct AttributeId(pub u64);
1073
1074impl AttributeId {
1075    /// The common case for extents which cover the data payload of an object.
1076    pub const DATA: Self = Self(0);
1077
1078    /// Contains a serialized and versioned `BlobMetadata` struct. Use [`BlobMetadata::read_from`]
1079    /// and [`BlobMetadata::write_to`] to access this attribute.
1080    pub const BLOB_METADATA: Self = Self(3);
1081
1082    /// Contains a serialized `BlobMetadataUnversioned` struct. This attribute may still exist on
1083    /// blobs but should no longer be written. Use `AttributeId::BLOB_METADATA` instead.
1084    pub const BLOB_MERKLE: Self = Self(1);
1085
1086    /// For fsverity files in Fxfs, we store the merkle tree of the verified file at a well-known
1087    /// attribute.
1088    pub const FSVERITY_MERKLE: Self = Self(2);
1089
1090    /// The range of fxfs attribute IDs which are reserved for extended attribute values. Whenever a
1091    /// new attribute is needed, the first unused ID will be chosen from this range. It's
1092    /// technically safe to change these values, but it has potential consequences - they are only
1093    /// used during ID selection, so any existing extended attributes keep their IDs, which means
1094    /// any past or present selected range here could potentially have used attributes unless they
1095    /// are explicitly migrated, which isn't currently done.
1096    pub const XATTR_RANGE_START: Self = Self(64);
1097    pub const XATTR_RANGE_END: Self = Self(512);
1098
1099    /// A semantic alias for the `0` attribute ID, indicating that it is being used as a starting
1100    /// point to iterate over all attributes rather than specifically looking up the primary data
1101    /// attribute [`AttributeId::DATA`].
1102    pub const SORTED_START: Self = Self(0);
1103
1104    /// An attribute ID to use in tests when no particular ID is necessary.
1105    #[cfg(test)]
1106    pub const TEST_ID: Self = Self(u64::MAX - 1000);
1107
1108    pub const fn raw(self) -> u64 {
1109        self.0
1110    }
1111
1112    /// Returns the current id + 1.
1113    pub const fn next(self) -> Self {
1114        Self(self.0 + 1)
1115    }
1116
1117    /// Returns true if the attribute ID is within the range of extended attributes.
1118    pub const fn is_xattr(self) -> bool {
1119        self.0 >= Self::XATTR_RANGE_START.0 && self.0 < Self::XATTR_RANGE_END.0
1120    }
1121}
1122
1123impl log::kv::ToValue for AttributeId {
1124    fn to_value(&self) -> log::kv::Value<'_> {
1125        log::kv::Value::from(self.0)
1126    }
1127}
1128
1129impl std::fmt::Display for AttributeId {
1130    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1131        std::fmt::Display::fmt(&self.0, f)
1132    }
1133}
1134
1135#[cfg(test)]
1136mod tests {
1137    use super::{AttributeId, ObjectKey, ObjectKeyV54, TimestampV49};
1138    use crate::lsm_tree::types::{FuzzyHash as _, LayerKey};
1139    use std::ops::Add;
1140    use std::time::{Duration, SystemTime, UNIX_EPOCH};
1141
1142    // Smoke test to ensure hash stability for Fxfs objects.
1143    // If this test fails, the hash algorithm changed, and that won't do -- Fxfs relies on stable
1144    // hash values, and existing images will appear to be corrupt if they change (see
1145    // https://fxbug.dev/419133532).
1146    #[test]
1147    fn test_hash_stability() {
1148        // Target a specific version of ObjectKey.  If you want to delete ObjectKeyV54, simply
1149        // update this test with a later key version, which will also require re-generating the
1150        // hashes.
1151        assert_eq!(
1152            &ObjectKeyV54::object(100).fuzzy_hash().collect::<Vec<_>>()[..],
1153            &[11885326717398844384]
1154        );
1155        assert_eq!(
1156            &ObjectKeyV54::extent(1, AttributeId::DATA, 0..2 * 1024 * 1024)
1157                .fuzzy_hash()
1158                .collect::<Vec<_>>()[..],
1159            &[11090579907097549012, 2814892992701560424]
1160        );
1161    }
1162
1163    #[test]
1164    fn test_next_key() {
1165        assert_eq!(
1166            ObjectKey::extent(1, AttributeId::TEST_ID, 25..100).next_key().unwrap(),
1167            ObjectKey::extent(1, AttributeId::TEST_ID, 0..101)
1168        );
1169        assert_eq!(ObjectKey::object(100).next_key(), None);
1170    }
1171
1172    #[test]
1173    fn test_range_key() {
1174        const ATTR_ID: AttributeId = AttributeId::TEST_ID;
1175        // Make sure we disallow using extent keys with point queries. Other object keys should
1176        // still be allowed with point queries.
1177        assert!(ObjectKey::extent(1, ATTR_ID, 0..2 * 1024 * 1024).is_range_key());
1178        assert!(!ObjectKey::object(100).is_range_key());
1179
1180        assert_eq!(ObjectKey::object(1).overlaps(&ObjectKey::object(1)), true);
1181        assert_eq!(ObjectKey::object(1).overlaps(&ObjectKey::object(2)), false);
1182        assert_eq!(ObjectKey::extent(1, ATTR_ID, 0..100).overlaps(&ObjectKey::object(1)), false);
1183        assert_eq!(ObjectKey::object(1).overlaps(&ObjectKey::extent(1, ATTR_ID, 0..100)), false);
1184        assert_eq!(
1185            ObjectKey::extent(1, ATTR_ID, 0..100).overlaps(&ObjectKey::extent(2, ATTR_ID, 0..100)),
1186            false
1187        );
1188        assert_eq!(
1189            ObjectKey::extent(1, ATTR_ID, 0..100).overlaps(&ObjectKey::extent(
1190                1,
1191                ATTR_ID.next(),
1192                0..100
1193            )),
1194            false
1195        );
1196        assert_eq!(
1197            ObjectKey::extent(1, ATTR_ID, 0..100).overlaps(&ObjectKey::extent(1, ATTR_ID, 0..100)),
1198            true
1199        );
1200
1201        assert_eq!(
1202            ObjectKey::extent(1, ATTR_ID, 0..50).overlaps(&ObjectKey::extent(1, ATTR_ID, 49..100)),
1203            true
1204        );
1205        assert_eq!(
1206            ObjectKey::extent(1, ATTR_ID, 49..100).overlaps(&ObjectKey::extent(1, ATTR_ID, 0..50)),
1207            true
1208        );
1209
1210        assert_eq!(
1211            ObjectKey::extent(1, ATTR_ID, 0..50).overlaps(&ObjectKey::extent(1, ATTR_ID, 50..100)),
1212            false
1213        );
1214        assert_eq!(
1215            ObjectKey::extent(1, ATTR_ID, 50..100).overlaps(&ObjectKey::extent(1, ATTR_ID, 0..50)),
1216            false
1217        );
1218    }
1219
1220    #[test]
1221    fn test_timestamp() {
1222        fn compare_time(std_time: Duration) {
1223            let ts_time: TimestampV49 = std_time.into();
1224            assert_eq!(<TimestampV49 as Into<Duration>>::into(ts_time), std_time);
1225            assert_eq!(ts_time.subsec_nanos(), std_time.subsec_nanos());
1226            assert_eq!(ts_time.as_secs(), std_time.as_secs());
1227            assert_eq!(ts_time.as_nanos() as u128, std_time.as_nanos());
1228        }
1229        compare_time(Duration::from_nanos(0));
1230        compare_time(Duration::from_nanos(u64::MAX));
1231        compare_time(SystemTime::now().duration_since(UNIX_EPOCH).unwrap());
1232
1233        let ts: TimestampV49 = Duration::from_secs(u64::MAX - 1).into();
1234        assert_eq!(ts.nanos, u64::MAX);
1235
1236        let ts: TimestampV49 = (Duration::from_nanos(u64::MAX).add(Duration::from_nanos(1))).into();
1237        assert_eq!(ts.nanos, u64::MAX);
1238    }
1239}