Skip to main content

fxfs/object_store/
object_record.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5mod legacy;
6
7pub use legacy::*;
8
9// TODO(https://fxbug.dev/42178223): need validation after deserialization.
10use crate::checksum::Checksums;
11use crate::log::error;
12use crate::lsm_tree::types::{
13    FuzzyHash, Item, ItemRef, LayerKey, LegacyItem, MergeType, OrdLowerBound, OrdUpperBound,
14    SortByU64, Value,
15};
16use crate::object_store::extent_record::{
17    ExtentKey, ExtentKeyPartitionIterator, ExtentKeyV32, ExtentValue, ExtentValueV38,
18};
19use crate::serialized_types::{Migrate, Versioned, migrate_nodefault, migrate_to_version};
20use fprint::TypeFingerprint;
21use fxfs_crypto::{WrappedKey, WrappingKeyId};
22use fxfs_macros::SerializeKey;
23use fxfs_unicode::CasefoldString;
24use serde::{Deserialize, Serialize};
25use std::collections::BTreeMap;
26use std::default::Default;
27use std::hash::Hash;
28use std::time::{Duration, SystemTime, UNIX_EPOCH};
29
30/// ObjectDescriptor is the set of possible records in the object store.
31pub type ObjectDescriptor = ObjectDescriptorV32;
32
33#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint)]
34#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
35pub enum ObjectDescriptorV32 {
36    /// A file (in the generic sense; i.e. an object with some attributes).
37    File,
38    /// A directory (in the generic sense; i.e. an object with children).
39    Directory,
40    /// A volume, which is the root of a distinct object store containing Files and Directories.
41    Volume,
42    /// A symbolic link.
43    Symlink,
44}
45
46/// For specifying what property of the project is being addressed.
47pub type ProjectProperty = ProjectPropertyV32;
48
49#[derive(
50    Clone,
51    Debug,
52    Eq,
53    Hash,
54    Ord,
55    PartialEq,
56    PartialOrd,
57    Serialize,
58    Deserialize,
59    TypeFingerprint,
60    SerializeKey,
61)]
62#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
63pub enum ProjectPropertyV32 {
64    /// The configured limit for the project.
65    Limit,
66    /// The currently tracked usage for the project.
67    Usage,
68}
69
70pub type ObjectKeyData = ObjectKeyDataV54;
71
72#[derive(
73    Clone,
74    Debug,
75    Eq,
76    Hash,
77    PartialEq,
78    PartialOrd,
79    Ord,
80    Serialize,
81    Deserialize,
82    TypeFingerprint,
83    SerializeKey,
84)]
85#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
86pub enum ObjectKeyDataV54 {
87    /// A generic, untyped object.  This must come first and sort before all other keys for a given
88    /// object because it's also used as a tombstone and it needs to merge with all following keys.
89    Object,
90    /// Encryption keys for an object.
91    Keys,
92    /// An attribute associated with an object.  It has a 64-bit ID.
93    Attribute(u64, AttributeKeyV32),
94    /// A child of a directory.
95    Child { name: String },
96    /// A graveyard entry for an entire object.
97    GraveyardEntry { object_id: u64 },
98    /// Project ID info. This should only be attached to the volume's root node. Used to address the
99    /// configured limit and the usage tracking which are ordered after the `project_id` to provide
100    /// locality of the two related values.
101    Project { project_id: u64, property: ProjectPropertyV32 },
102    /// An extended attribute associated with an object. It stores the name used for the extended
103    /// attribute, which has a maximum size of 255 bytes enforced by fuchsia.io.
104    ExtendedAttribute {
105        #[serde(with = "crate::zerocopy_serialization")]
106        name: Vec<u8>,
107    },
108    /// A graveyard entry for an attribute.
109    GraveyardAttributeEntry { object_id: u64, attribute_id: u64 },
110    /// A child of an encrypted directory.  We store the filename in its encrypted form.  hash_code
111    /// is the hash of the casefolded human-readable name if a directory is also casefolded.  In
112    /// some legacy cases, this is also used in non-casefolded cases, and in some of those cases the
113    /// hash code can be 0.  Going forward, these cases are covered by `EncryptedChild` below.
114    EncryptedCasefoldChild(EncryptedCasefoldChild),
115    /// Case-insensitive child (legacy).
116    LegacyCasefoldChild(CasefoldString),
117    /// An encrypted child that does not use case folding.
118    EncryptedChild(EncryptedChild),
119    /// A child of a directory that uses the casefold feature.
120    /// (i.e. case insensitive, case preserving names)
121    CasefoldChild { hash_code: u32, name: String },
122}
123
124#[derive(
125    Clone,
126    Debug,
127    Eq,
128    Hash,
129    PartialEq,
130    PartialOrd,
131    Ord,
132    Serialize,
133    Deserialize,
134    TypeFingerprint,
135    SerializeKey,
136)]
137#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
138pub struct EncryptedCasefoldChild {
139    pub hash_code: u32,
140    #[serde(with = "crate::zerocopy_serialization")]
141    pub name: Vec<u8>,
142}
143
144#[derive(
145    Clone,
146    Debug,
147    Eq,
148    Hash,
149    PartialEq,
150    PartialOrd,
151    Ord,
152    Serialize,
153    Deserialize,
154    TypeFingerprint,
155    SerializeKey,
156)]
157#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
158pub struct EncryptedChild(#[serde(with = "crate::zerocopy_serialization")] pub Vec<u8>);
159
160pub type AttributeKey = AttributeKeyV32;
161
162#[derive(
163    Clone,
164    Debug,
165    Eq,
166    Hash,
167    Ord,
168    PartialEq,
169    PartialOrd,
170    Serialize,
171    Deserialize,
172    TypeFingerprint,
173    SerializeKey,
174)]
175#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
176pub enum AttributeKeyV32 {
177    // Order here is important: code expects Attribute to precede Extent.
178    Attribute,
179    Extent(ExtentKeyV32),
180}
181
182/// ObjectKey is a key in the object store.
183pub type ObjectKey = ObjectKeyV54;
184
185#[derive(
186    Clone,
187    Debug,
188    Eq,
189    Ord,
190    Hash,
191    PartialEq,
192    PartialOrd,
193    Serialize,
194    Deserialize,
195    SerializeKey,
196    TypeFingerprint,
197    Versioned,
198)]
199#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
200pub struct ObjectKeyV54 {
201    /// The ID of the object referred to.
202    pub object_id: u64,
203    /// The type and data of the key.
204    pub data: ObjectKeyDataV54,
205}
206
207impl SortByU64 for ObjectKey {
208    fn get_leading_u64(&self) -> u64 {
209        self.object_id
210    }
211}
212
213impl ObjectKey {
214    /// Creates a generic ObjectKey.
215    pub fn object(object_id: u64) -> Self {
216        Self { object_id: object_id, data: ObjectKeyData::Object }
217    }
218
219    /// Creates an ObjectKey for encryption keys.
220    pub fn keys(object_id: u64) -> Self {
221        Self { object_id, data: ObjectKeyData::Keys }
222    }
223
224    /// Creates an ObjectKey for an attribute.
225    pub fn attribute(object_id: u64, attribute_id: u64, key: AttributeKey) -> Self {
226        Self { object_id, data: ObjectKeyData::Attribute(attribute_id, key) }
227    }
228
229    /// Creates an ObjectKey for an extent.
230    pub fn extent(object_id: u64, attribute_id: u64, range: std::ops::Range<u64>) -> Self {
231        Self {
232            object_id,
233            data: ObjectKeyData::Attribute(
234                attribute_id,
235                AttributeKey::Extent(ExtentKey::new(range)),
236            ),
237        }
238    }
239
240    /// Creates an ObjectKey from an extent.
241    pub fn from_extent(object_id: u64, attribute_id: u64, extent: ExtentKey) -> Self {
242        Self {
243            object_id,
244            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(extent)),
245        }
246    }
247
248    /// Creates an ObjectKey for a child.
249    pub fn child(object_id: u64, name: &str, dir_type: DirType) -> Self {
250        match dir_type {
251            DirType::Casefold => {
252                let casefolded_name: String = fxfs_unicode::casefold(name.chars()).collect();
253                let hash_code = fscrypt::direntry::tea_hash_filename(casefolded_name.as_bytes());
254                Self {
255                    object_id,
256                    data: ObjectKeyData::CasefoldChild { hash_code, name: name.into() },
257                }
258            }
259            DirType::LegacyCasefold => Self {
260                object_id,
261                data: ObjectKeyData::LegacyCasefoldChild(CasefoldString::new(name.into())),
262            },
263            DirType::Normal => Self { object_id, data: ObjectKeyData::Child { name: name.into() } },
264            DirType::Encrypted(_) | DirType::EncryptedCasefold(_) => {
265                // These shouldn't be used directly; encrypted_child should be used instead.
266                panic!("Encrypted modes require an encrypted name");
267            }
268        }
269    }
270
271    /// Creates an ObjectKey for an encrypted child.
272    ///
273    /// The hash_code is important here -- especially for fscrypt as it affects the
274    /// name of locked files.
275    ///
276    /// For case-insensitive lookups in large encrypted directories, we lose the ability to binary
277    /// search for an entry of interest because encryption breaks our sort order. In these cases
278    /// we prefix records with a 32-bit hash based on the stable *casefolded* name. Hash collisions
279    /// aside, this lets us jump straight to the entry of interest, if it exists.
280    pub fn encrypted_child(object_id: u64, name: Vec<u8>, hash_code: Option<u32>) -> Self {
281        if let Some(hash_code) = hash_code {
282            Self {
283                object_id,
284                data: ObjectKeyData::EncryptedCasefoldChild(EncryptedCasefoldChild {
285                    hash_code,
286                    name,
287                }),
288            }
289        } else {
290            Self { object_id, data: ObjectKeyData::EncryptedChild(EncryptedChild(name)) }
291        }
292    }
293
294    /// Creates a graveyard entry for an object.
295    pub fn graveyard_entry(graveyard_object_id: u64, object_id: u64) -> Self {
296        Self { object_id: graveyard_object_id, data: ObjectKeyData::GraveyardEntry { object_id } }
297    }
298
299    /// Creates a graveyard entry for an attribute.
300    pub fn graveyard_attribute_entry(
301        graveyard_object_id: u64,
302        object_id: u64,
303        attribute_id: u64,
304    ) -> Self {
305        Self {
306            object_id: graveyard_object_id,
307            data: ObjectKeyData::GraveyardAttributeEntry { object_id, attribute_id },
308        }
309    }
310
311    /// Creates an ObjectKey for a ProjectLimit entry.
312    pub fn project_limit(object_id: u64, project_id: u64) -> Self {
313        Self {
314            object_id,
315            data: ObjectKeyData::Project { project_id, property: ProjectProperty::Limit },
316        }
317    }
318
319    /// Creates an ObjectKey for a ProjectUsage entry.
320    pub fn project_usage(object_id: u64, project_id: u64) -> Self {
321        Self {
322            object_id,
323            data: ObjectKeyData::Project { project_id, property: ProjectProperty::Usage },
324        }
325    }
326
327    pub fn extended_attribute(object_id: u64, name: Vec<u8>) -> Self {
328        Self { object_id, data: ObjectKeyData::ExtendedAttribute { name } }
329    }
330
331    /// Returns the merge key for this key; that is, a key which is <= this key and any
332    /// other possibly overlapping key, under Ord. This would be used for the hint in |merge_into|.
333    pub fn key_for_merge_into(&self) -> Self {
334        if let Self {
335            object_id,
336            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(e)),
337        } = self
338        {
339            Self::attribute(*object_id, *attribute_id, AttributeKey::Extent(e.key_for_merge_into()))
340        } else {
341            self.clone()
342        }
343    }
344}
345
346impl OrdUpperBound for ObjectKey {
347    fn cmp_upper_bound(&self, other: &ObjectKey) -> std::cmp::Ordering {
348        self.object_id.cmp(&other.object_id).then_with(|| match (&self.data, &other.data) {
349            (
350                ObjectKeyData::Attribute(left_attr_id, AttributeKey::Extent(left_extent)),
351                ObjectKeyData::Attribute(right_attr_id, AttributeKey::Extent(right_extent)),
352            ) => left_attr_id.cmp(right_attr_id).then(left_extent.cmp_upper_bound(right_extent)),
353            _ => self.data.cmp(&other.data),
354        })
355    }
356}
357
358impl OrdLowerBound for ObjectKey {
359    fn cmp_lower_bound(&self, other: &ObjectKey) -> std::cmp::Ordering {
360        self.object_id.cmp(&other.object_id).then_with(|| match (&self.data, &other.data) {
361            (
362                ObjectKeyData::Attribute(left_attr_id, AttributeKey::Extent(left_extent)),
363                ObjectKeyData::Attribute(right_attr_id, AttributeKey::Extent(right_extent)),
364            ) => left_attr_id.cmp(right_attr_id).then(left_extent.cmp_lower_bound(right_extent)),
365            _ => self.data.cmp(&other.data),
366        })
367    }
368}
369
370impl LayerKey for ObjectKey {
371    fn merge_type(&self) -> MergeType {
372        // This listing is intentionally exhaustive to force folks to think about how certain
373        // subsets of the keyspace are merged.
374        match self.data {
375            ObjectKeyData::Object
376            | ObjectKeyData::Keys
377            | ObjectKeyData::Attribute(..)
378            | ObjectKeyData::Child { .. }
379            | ObjectKeyData::EncryptedChild(_)
380            | ObjectKeyData::EncryptedCasefoldChild(_)
381            | ObjectKeyData::CasefoldChild { .. }
382            | ObjectKeyData::LegacyCasefoldChild(_)
383            | ObjectKeyData::GraveyardEntry { .. }
384            | ObjectKeyData::GraveyardAttributeEntry { .. }
385            | ObjectKeyData::Project { property: ProjectProperty::Limit, .. }
386            | ObjectKeyData::ExtendedAttribute { .. } => MergeType::OptimizedMerge,
387            ObjectKeyData::Project { property: ProjectProperty::Usage, .. } => MergeType::FullMerge,
388        }
389    }
390
391    fn next_key(&self) -> Option<Self> {
392        match &self.data {
393            ObjectKeyData::Attribute(attr_id, AttributeKey::Extent(ExtentKey { range })) => {
394                // This key comes before (or is equal to) any extent starting at or after the
395                // end of `self`. Searching for its `search_key` finds extents that end after
396                // the end of `self`.
397                Some(ObjectKey {
398                    object_id: self.object_id,
399                    data: ObjectKeyData::Attribute(
400                        *attr_id,
401                        AttributeKey::Extent(ExtentKey { range: 0..range.end + 1 }),
402                    ),
403                })
404            }
405            _ => None,
406        }
407    }
408
409    fn search_key(&self) -> Option<Self> {
410        if let Self {
411            object_id,
412            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(e)),
413        } = self
414        {
415            Some(Self::attribute(*object_id, *attribute_id, AttributeKey::Extent(e.search_key())))
416        } else {
417            None
418        }
419    }
420
421    fn is_search_key(&self) -> bool {
422        match self {
423            Self { data: ObjectKeyData::Attribute(_, AttributeKey::Extent(e)), .. } => e.start == 0,
424            _ => true,
425        }
426    }
427
428    fn overlaps(&self, other: &Self) -> bool {
429        if self.object_id != other.object_id {
430            return false;
431        }
432        match (&self.data, &other.data) {
433            (
434                ObjectKeyData::Attribute(left_attr_id, AttributeKey::Extent(left_key)),
435                ObjectKeyData::Attribute(right_attr_id, AttributeKey::Extent(right_key)),
436            ) if *left_attr_id == *right_attr_id => {
437                left_key.range.end > right_key.range.start
438                    && left_key.range.start < right_key.range.end
439            }
440            (a, b) => a == b,
441        }
442    }
443}
444
445pub enum ObjectKeyFuzzyHashIterator {
446    ExtentKey(/* object_id */ u64, /* attribute_id */ u64, ExtentKeyPartitionIterator),
447    NotExtentKey(/* hash */ Option<u64>),
448}
449
450impl Iterator for ObjectKeyFuzzyHashIterator {
451    type Item = u64;
452
453    fn next(&mut self) -> Option<Self::Item> {
454        match self {
455            Self::ExtentKey(oid, attr_id, extent_keys) => extent_keys.next().map(|range| {
456                let key = ObjectKey::extent(*oid, *attr_id, range);
457                crate::stable_hash::stable_hash(key)
458            }),
459            Self::NotExtentKey(hash) => hash.take(),
460        }
461    }
462}
463
464impl FuzzyHash for ObjectKey {
465    fn fuzzy_hash(&self) -> impl Iterator<Item = u64> {
466        match &self.data {
467            ObjectKeyData::Attribute(attr_id, AttributeKey::Extent(extent)) => {
468                ObjectKeyFuzzyHashIterator::ExtentKey(
469                    self.object_id,
470                    *attr_id,
471                    extent.fuzzy_hash_partition(),
472                )
473            }
474            _ => {
475                let hash = crate::stable_hash::stable_hash(self);
476                ObjectKeyFuzzyHashIterator::NotExtentKey(Some(hash))
477            }
478        }
479    }
480
481    fn is_range_key(&self) -> bool {
482        match &self.data {
483            ObjectKeyData::Attribute(_, AttributeKey::Extent(_)) => true,
484            _ => false,
485        }
486    }
487}
488
489/// UNIX epoch based timestamp in the UTC timezone.
490pub type Timestamp = TimestampV49;
491
492#[derive(
493    Copy,
494    Clone,
495    Debug,
496    Default,
497    Eq,
498    PartialEq,
499    Ord,
500    PartialOrd,
501    Serialize,
502    Deserialize,
503    TypeFingerprint,
504)]
505#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
506pub struct TimestampV49 {
507    nanos: u64,
508}
509
510impl Timestamp {
511    const NSEC_PER_SEC: u64 = 1_000_000_000;
512
513    pub fn now() -> Self {
514        SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or(Duration::ZERO).into()
515    }
516
517    pub const fn zero() -> Self {
518        Self { nanos: 0 }
519    }
520
521    pub const fn from_nanos(nanos: u64) -> Self {
522        Self { nanos }
523    }
524
525    pub fn from_secs_and_nanos(secs: u64, nanos: u32) -> Self {
526        let Some(secs_in_nanos) = secs.checked_mul(Self::NSEC_PER_SEC) else {
527            error!("Fxfs doesn't support dates past 2554-07-21");
528            return Self { nanos: u64::MAX };
529        };
530        let Some(nanos) = secs_in_nanos.checked_add(nanos as u64) else {
531            error!("Fxfs doesn't support dates past 2554-07-21");
532            return Self { nanos: u64::MAX };
533        };
534        Self { nanos }
535    }
536
537    /// Returns the total number of nanoseconds represented by this `Timestamp` since the Unix
538    /// epoch.
539    pub fn as_nanos(&self) -> u64 {
540        self.nanos
541    }
542
543    /// Returns the fractional nanoseconds represented by this `Timestamp`.
544    pub fn subsec_nanos(&self) -> u32 {
545        (self.nanos % Self::NSEC_PER_SEC) as u32
546    }
547
548    /// Returns the total number of whole seconds represented by this `Timestamp` since the Unix
549    /// epoch.
550    pub fn as_secs(&self) -> u64 {
551        self.nanos / Self::NSEC_PER_SEC
552    }
553}
554
555impl From<std::time::Duration> for Timestamp {
556    fn from(duration: std::time::Duration) -> Self {
557        Self::from_secs_and_nanos(duration.as_secs(), duration.subsec_nanos())
558    }
559}
560
561impl From<Timestamp> for std::time::Duration {
562    fn from(timestamp: Timestamp) -> std::time::Duration {
563        Duration::from_nanos(timestamp.nanos)
564    }
565}
566
567pub type ObjectKind = ObjectKindV54;
568
569#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, TypeFingerprint)]
570#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
571pub enum DirType {
572    Normal,
573    Encrypted(WrappingKeyId),
574    /// Legacy casefolded mode.
575    LegacyCasefold,
576    Casefold,
577    EncryptedCasefold(WrappingKeyId),
578}
579
580impl DirType {
581    pub fn is_casefold(&self) -> bool {
582        matches!(self, DirType::LegacyCasefold | DirType::Casefold | DirType::EncryptedCasefold(_))
583    }
584
585    pub fn is_encrypted(&self) -> bool {
586        matches!(self, DirType::Encrypted(_) | DirType::EncryptedCasefold(_))
587    }
588
589    pub fn with_encryption(self, id: WrappingKeyId) -> Self {
590        match self {
591            DirType::Normal => DirType::Encrypted(id),
592            DirType::Casefold => DirType::EncryptedCasefold(id),
593            _ => self,
594        }
595    }
596
597    pub fn with_casefold(self, val: bool) -> Self {
598        match (val, self) {
599            (true, DirType::Encrypted(id) | DirType::EncryptedCasefold(id)) => {
600                DirType::EncryptedCasefold(id)
601            }
602            (true, _) => DirType::Casefold,
603            (false, DirType::Encrypted(id) | DirType::EncryptedCasefold(id)) => {
604                DirType::Encrypted(id)
605            }
606            (false, _) => DirType::Normal,
607        }
608    }
609
610    pub fn wrapping_key_id(&self) -> Option<WrappingKeyId> {
611        match self {
612            DirType::Encrypted(id) | DirType::EncryptedCasefold(id) => Some(*id),
613            _ => None,
614        }
615    }
616}
617
618impl Default for DirType {
619    fn default() -> Self {
620        DirType::Normal
621    }
622}
623
624#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint)]
625#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
626pub enum ObjectKindV54 {
627    File {
628        /// The number of references to this file.
629        refs: u64,
630    },
631    Directory {
632        /// The number of sub-directories in this directory.
633        sub_dirs: u64,
634        /// The type of directory (encryption, casefolding, etc.)
635        dir_type: DirType,
636    },
637    Graveyard,
638    Symlink {
639        /// The number of references to this symbolic link.
640        refs: u64,
641        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
642        /// interpret it however they like.
643        #[serde(with = "crate::zerocopy_serialization")]
644        link: Box<[u8]>,
645    },
646    EncryptedSymlink {
647        /// The number of references to this symbolic link.
648        refs: u64,
649        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
650        /// interpret it however they like.
651        /// `link` is stored here in encrypted form, encrypted with the symlink's key using the
652        /// volume's data key.
653        #[serde(with = "crate::zerocopy_serialization")]
654        link: Box<[u8]>,
655    },
656}
657
658#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
659#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
660pub enum ObjectKindV49 {
661    File {
662        /// The number of references to this file.
663        refs: u64,
664    },
665    Directory {
666        /// The number of sub-directories in this directory.
667        sub_dirs: u64,
668        /// If set, contains the wrapping key id used to encrypt the file contents and filenames in
669        /// this directory.
670        wrapping_key_id: Option<WrappingKeyId>,
671        /// If true, all files and sub-directories created in this directory will support case
672        /// insensitive (but case-preserving) file naming.
673        casefold: bool,
674    },
675    Graveyard,
676    Symlink {
677        /// The number of references to this symbolic link.
678        refs: u64,
679        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
680        /// interpret it however they like.
681        #[serde(with = "crate::zerocopy_serialization")]
682        link: Box<[u8]>,
683    },
684    EncryptedSymlink {
685        /// The number of references to this symbolic link.
686        refs: u64,
687        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
688        /// interpret it however they like.
689        /// `link` is stored here in encrypted form, encrypted with the symlink's key using the
690        /// same encryption scheme as the one used to encrypt filenames.
691        #[serde(with = "crate::zerocopy_serialization")]
692        link: Box<[u8]>,
693    },
694}
695
696/// This consists of POSIX attributes that are not used in Fxfs but it may be meaningful to some
697/// clients to have the ability to to set and retrieve these values.
698pub type PosixAttributes = PosixAttributesV32;
699
700#[derive(Clone, Debug, Copy, Default, Serialize, Deserialize, PartialEq, TypeFingerprint)]
701#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
702pub struct PosixAttributesV32 {
703    /// The mode bits associated with this object
704    pub mode: u32,
705    /// User ID of owner
706    pub uid: u32,
707    /// Group ID of owner
708    pub gid: u32,
709    /// Device ID
710    pub rdev: u64,
711}
712
713/// Object-level attributes.  Note that these are not the same as "attributes" in the
714/// ObjectValue::Attribute sense, which refers to an arbitrary data payload associated with an
715/// object.  This naming collision is unfortunate.
716pub type ObjectAttributes = ObjectAttributesV49;
717
718#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, TypeFingerprint)]
719#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
720pub struct ObjectAttributesV49 {
721    /// The timestamp at which the object was created (i.e. crtime).
722    pub creation_time: TimestampV49,
723    /// The timestamp at which the object's data was last modified (i.e. mtime).
724    pub modification_time: TimestampV49,
725    /// The project id to associate this object's resource usage with. Zero means none.
726    pub project_id: u64,
727    /// Mode, uid, gid, and rdev
728    pub posix_attributes: Option<PosixAttributesV32>,
729    /// The number of bytes allocated to all extents across all attributes for this object.
730    pub allocated_size: u64,
731    /// The timestamp at which the object was last read (i.e. atime).
732    pub access_time: TimestampV49,
733    /// The timestamp at which the object's status was last modified (i.e. ctime).
734    pub change_time: TimestampV49,
735}
736
737pub type ExtendedAttributeValue = ExtendedAttributeValueV32;
738
739#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint)]
740#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
741pub enum ExtendedAttributeValueV32 {
742    /// The extended attribute value is stored directly in this object. If the value is above a
743    /// certain size, it should be stored as an attribute with extents instead.
744    Inline(#[serde(with = "crate::zerocopy_serialization")] Vec<u8>),
745    /// The extended attribute value is stored as an attribute with extents. The attribute id
746    /// should be chosen to be within the range of 64-512.
747    AttributeId(u64),
748}
749
750/// Id and descriptor for a child entry.
751pub type ChildValue = ChildValueV32;
752
753#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
754#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
755pub struct ChildValueV32 {
756    /// The ID of the child object.
757    pub object_id: u64,
758    /// Describes the type of the child.
759    pub object_descriptor: ObjectDescriptorV32,
760}
761
762pub type RootDigest = RootDigestV33;
763
764#[derive(
765    Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, Deserialize, TypeFingerprint,
766)]
767#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
768pub enum RootDigestV33 {
769    Sha256([u8; 32]),
770    Sha512(#[serde(with = "crate::zerocopy_serialization")] Vec<u8>),
771}
772
773pub type FsverityMetadata = FsverityMetadataV50;
774
775#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, TypeFingerprint, Versioned)]
776#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
777pub enum FsverityMetadataV50 {
778    /// The root hash and salt.
779    Internal(RootDigestV33, #[serde(with = "crate::zerocopy_serialization")] Vec<u8>),
780    /// The root hash and salt are in a descriptor inside the merkle attribute.
781    F2fs(std::ops::Range<u64>),
782}
783
784pub type EncryptionKey = EncryptionKeyV49;
785pub type EncryptionKeyV49 = fxfs_crypto::EncryptionKey;
786
787pub type EncryptionKeys = EncryptionKeysV49;
788
789#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize, TypeFingerprint)]
790#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
791pub struct EncryptionKeysV49(Vec<(u64, EncryptionKeyV49)>);
792
793impl EncryptionKeys {
794    pub fn get(&self, id: u64) -> Option<&EncryptionKey> {
795        self.0.iter().find_map(|(i, key)| (*i == id).then_some(key))
796    }
797
798    pub fn insert(&mut self, id: u64, key: EncryptionKey) {
799        self.0.push((id, key))
800    }
801
802    pub fn remove(&mut self, id: u64) -> Option<EncryptionKey> {
803        if let Some(ix) = self.0.iter().position(|(k, _)| *k == id) {
804            Some(self.0.remove(ix).1)
805        } else {
806            None
807        }
808    }
809}
810
811impl From<EncryptionKeys> for BTreeMap<u64, WrappedKey> {
812    fn from(keys: EncryptionKeys) -> Self {
813        keys.0.into_iter().map(|(id, key)| (id, key.into())).collect()
814    }
815}
816
817impl From<Vec<(u64, EncryptionKey)>> for EncryptionKeys {
818    fn from(value: Vec<(u64, EncryptionKey)>) -> Self {
819        Self(value)
820    }
821}
822
823impl std::ops::Deref for EncryptionKeys {
824    type Target = Vec<(u64, EncryptionKey)>;
825    fn deref(&self) -> &Self::Target {
826        &self.0
827    }
828}
829
830/// ObjectValue is the value of an item in the object store.
831/// Note that the tree stores deltas on objects, so these values describe deltas. Unless specified
832/// otherwise, a value indicates an insert/replace mutation.
833pub type ObjectValue = ObjectValueV54;
834impl Value for ObjectValue {
835    const DELETED_MARKER: Self = Self::None;
836}
837
838#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
839#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
840pub enum ObjectValueV54 {
841    /// Some keys have no value (this often indicates a tombstone of some sort).  Records with this
842    /// value are always filtered when a major compaction is performed, so the meaning must be the
843    /// same as if the item was not present.
844    None,
845    /// Some keys have no value but need to differentiate between a present value and no value
846    /// (None) i.e. their value is really a boolean: None => false, Some => true.
847    Some,
848    /// The value for an ObjectKey::Object record.
849    Object { kind: ObjectKindV54, attributes: ObjectAttributesV49 },
850    /// Specifies encryption keys to use for an object.
851    Keys(EncryptionKeysV49),
852    /// An attribute associated with a file object. |size| is the size of the attribute in bytes.
853    Attribute { size: u64, has_overwrite_extents: bool },
854    /// An extent associated with an object.
855    Extent(ExtentValueV38),
856    /// A child of an object.
857    Child(ChildValue),
858    /// Graveyard entries can contain these entries which will cause a file that has extents beyond
859    /// EOF to be trimmed at mount time.  This is used in cases where shrinking a file can exceed
860    /// the bounds of a single transaction.
861    Trim,
862    /// Added to support tracking Project ID usage and limits.
863    BytesAndNodes { bytes: i64, nodes: i64 },
864    /// A value for an extended attribute. Either inline or a redirection to an attribute with
865    /// extents.
866    ExtendedAttribute(ExtendedAttributeValueV32),
867    /// An attribute associated with a verified file object. |size| is the size of the attribute
868    /// in bytes.
869    VerifiedAttribute { size: u64, fsverity_metadata: FsverityMetadataV50 },
870}
871
872#[derive(Migrate, Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
873#[migrate_to_version(ObjectValueV54)]
874#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
875pub enum ObjectValueV50 {
876    /// Some keys have no value (this often indicates a tombstone of some sort).  Records with this
877    /// value are always filtered when a major compaction is performed, so the meaning must be the
878    /// same as if the item was not present.
879    None,
880    /// Some keys have no value but need to differentiate between a present value and no value
881    /// (None) i.e. their value is really a boolean: None => false, Some => true.
882    Some,
883    /// The value for an ObjectKey::Object record.
884    Object { kind: ObjectKindV49, attributes: ObjectAttributesV49 },
885    /// Specifies encryption keys to use for an object.
886    Keys(EncryptionKeysV49),
887    /// An attribute associated with a file object. |size| is the size of the attribute in bytes.
888    Attribute { size: u64, has_overwrite_extents: bool },
889    /// An extent associated with an object.
890    Extent(ExtentValueV38),
891    /// A child of an object.
892    Child(ChildValueV32),
893    /// Graveyard entries can contain these entries which will cause a file that has extents beyond
894    /// EOF to be trimmed at mount time.  This is used in cases where shrinking a file can exceed
895    /// the bounds of a single transaction.
896    Trim,
897    /// Added to support tracking Project ID usage and limits.
898    BytesAndNodes { bytes: i64, nodes: i64 },
899    /// A value for an extended attribute. Either inline or a redirection to an attribute with
900    /// extents.
901    ExtendedAttribute(ExtendedAttributeValueV32),
902    /// An attribute associated with a verified file object. |size| is the size of the attribute
903    /// in bytes.
904    VerifiedAttribute { size: u64, fsverity_metadata: FsverityMetadataV50 },
905}
906
907impl ObjectValue {
908    /// Creates an ObjectValue for a file object.
909    pub fn file(
910        refs: u64,
911        allocated_size: u64,
912        creation_time: Timestamp,
913        modification_time: Timestamp,
914        access_time: Timestamp,
915        change_time: Timestamp,
916        project_id: u64,
917        posix_attributes: Option<PosixAttributes>,
918    ) -> ObjectValue {
919        ObjectValue::Object {
920            kind: ObjectKind::File { refs },
921            attributes: ObjectAttributes {
922                creation_time,
923                modification_time,
924                project_id,
925                posix_attributes,
926                allocated_size,
927                access_time,
928                change_time,
929            },
930        }
931    }
932    pub fn keys(encryption_keys: EncryptionKeys) -> ObjectValue {
933        ObjectValue::Keys(encryption_keys)
934    }
935    /// Creates an ObjectValue for an object attribute.
936    pub fn attribute(size: u64, has_overwrite_extents: bool) -> ObjectValue {
937        ObjectValue::Attribute { size, has_overwrite_extents }
938    }
939    /// Creates an ObjectValue for an object attribute of a verified file.
940    pub fn verified_attribute(size: u64, fsverity_metadata: FsverityMetadata) -> ObjectValue {
941        ObjectValue::VerifiedAttribute { size, fsverity_metadata }
942    }
943    /// Creates an ObjectValue for an insertion/replacement of an object extent.
944    pub fn extent(device_offset: u64, key_id: u64) -> ObjectValue {
945        ObjectValue::Extent(ExtentValue::new_raw(device_offset, key_id))
946    }
947    /// Creates an ObjectValue for an insertion/replacement of an object extent.
948    pub fn extent_with_checksum(
949        device_offset: u64,
950        checksum: Checksums,
951        key_id: u64,
952    ) -> ObjectValue {
953        ObjectValue::Extent(ExtentValue::with_checksum(device_offset, checksum, key_id))
954    }
955    /// Creates an ObjectValue for a deletion of an object extent.
956    pub fn deleted_extent() -> ObjectValue {
957        ObjectValue::Extent(ExtentValue::deleted_extent())
958    }
959    /// Creates an ObjectValue for an object child.
960    pub fn child(object_id: u64, object_descriptor: ObjectDescriptor) -> ObjectValue {
961        ObjectValue::Child(ChildValue { object_id, object_descriptor })
962    }
963    /// Creates an ObjectValue for an object symlink.
964    pub fn symlink(
965        link: impl Into<Box<[u8]>>,
966        creation_time: Timestamp,
967        modification_time: Timestamp,
968        project_id: u64,
969    ) -> ObjectValue {
970        ObjectValue::Object {
971            kind: ObjectKind::Symlink { refs: 1, link: link.into() },
972            attributes: ObjectAttributes {
973                creation_time,
974                modification_time,
975                project_id,
976                ..Default::default()
977            },
978        }
979    }
980    /// Creates an ObjectValue for an encrypted symlink object.
981    pub fn encrypted_symlink(
982        link: impl Into<Box<[u8]>>,
983        creation_time: Timestamp,
984        modification_time: Timestamp,
985        project_id: u64,
986    ) -> ObjectValue {
987        ObjectValue::Object {
988            kind: ObjectKind::EncryptedSymlink { refs: 1, link: link.into() },
989            attributes: ObjectAttributes {
990                creation_time,
991                modification_time,
992                project_id,
993                ..Default::default()
994            },
995        }
996    }
997    pub fn inline_extended_attribute(value: impl Into<Vec<u8>>) -> ObjectValue {
998        ObjectValue::ExtendedAttribute(ExtendedAttributeValue::Inline(value.into()))
999    }
1000    pub fn extended_attribute(attribute_id: u64) -> ObjectValue {
1001        ObjectValue::ExtendedAttribute(ExtendedAttributeValue::AttributeId(attribute_id))
1002    }
1003}
1004
1005pub type ObjectItem = ObjectItemV55;
1006
1007pub type ObjectItemV54 = LegacyItem<ObjectKeyV54, ObjectValueV54>;
1008pub type ObjectItemV55 = Item<ObjectKeyV54, ObjectValueV54>;
1009
1010impl From<ObjectItemV54> for ObjectItemV55 {
1011    fn from(item: ObjectItemV54) -> Self {
1012        Self { key: item.key, value: item.value }
1013    }
1014}
1015
1016pub type ObjectItemV50 = LegacyItem<ObjectKeyV43, ObjectValueV50>;
1017
1018impl ObjectItem {
1019    pub fn is_tombstone(&self) -> bool {
1020        matches!(
1021            self,
1022            Item {
1023                key: ObjectKey { data: ObjectKeyData::Object, .. },
1024                value: ObjectValue::None,
1025                ..
1026            }
1027        )
1028    }
1029}
1030
1031// If the given item describes an extent, unwraps it and returns the extent key/value.
1032impl<'a> From<ItemRef<'a, ObjectKey, ObjectValue>>
1033    for Option<(/*object-id*/ u64, /*attribute-id*/ u64, &'a ExtentKey, &'a ExtentValue)>
1034{
1035    fn from(item: ItemRef<'a, ObjectKey, ObjectValue>) -> Self {
1036        match item {
1037            ItemRef {
1038                key:
1039                    ObjectKey {
1040                        object_id,
1041                        data:
1042                            ObjectKeyData::Attribute(
1043                                attribute_id, //
1044                                AttributeKey::Extent(extent_key),
1045                            ),
1046                    },
1047                value: ObjectValue::Extent(extent_value),
1048                ..
1049            } => Some((*object_id, *attribute_id, extent_key, extent_value)),
1050            _ => None,
1051        }
1052    }
1053}
1054
1055pub type FxfsKey = FxfsKeyV49;
1056pub type FxfsKeyV49 = fxfs_crypto::FxfsKey;
1057
1058#[cfg(test)]
1059mod tests {
1060    use super::{ObjectKey, ObjectKeyV54, TimestampV49};
1061    use crate::lsm_tree::types::{FuzzyHash as _, LayerKey};
1062    use std::ops::Add;
1063    use std::time::{Duration, SystemTime, UNIX_EPOCH};
1064
1065    // Smoke test to ensure hash stability for Fxfs objects.
1066    // If this test fails, the hash algorithm changed, and that won't do -- Fxfs relies on stable
1067    // hash values, and existing images will appear to be corrupt if they change (see
1068    // https://fxbug.dev/419133532).
1069    #[test]
1070    fn test_hash_stability() {
1071        // Target a specific version of ObjectKey.  If you want to delete ObjectKeyV54, simply
1072        // update this test with a later key version, which will also require re-generating the
1073        // hashes.
1074        assert_eq!(
1075            &ObjectKeyV54::object(100).fuzzy_hash().collect::<Vec<_>>()[..],
1076            &[11885326717398844384]
1077        );
1078        assert_eq!(
1079            &ObjectKeyV54::extent(1, 0, 0..2 * 1024 * 1024).fuzzy_hash().collect::<Vec<_>>()[..],
1080            &[11090579907097549012, 2814892992701560424]
1081        );
1082    }
1083
1084    #[test]
1085    fn test_next_key() {
1086        assert_eq!(
1087            ObjectKey::extent(1, 0, 25..100).next_key().unwrap(),
1088            ObjectKey::extent(1, 0, 0..101)
1089        );
1090        assert_eq!(ObjectKey::object(100).next_key(), None);
1091    }
1092
1093    #[test]
1094    fn test_range_key() {
1095        // Make sure we disallow using extent keys with point queries. Other object keys should
1096        // still be allowed with point queries.
1097        assert!(ObjectKey::extent(1, 0, 0..2 * 1024 * 1024).is_range_key());
1098        assert!(!ObjectKey::object(100).is_range_key());
1099
1100        assert_eq!(ObjectKey::object(1).overlaps(&ObjectKey::object(1)), true);
1101        assert_eq!(ObjectKey::object(1).overlaps(&ObjectKey::object(2)), false);
1102        assert_eq!(ObjectKey::extent(1, 0, 0..100).overlaps(&ObjectKey::object(1)), false);
1103        assert_eq!(ObjectKey::object(1).overlaps(&ObjectKey::extent(1, 0, 0..100)), false);
1104        assert_eq!(
1105            ObjectKey::extent(1, 0, 0..100).overlaps(&ObjectKey::extent(2, 0, 0..100)),
1106            false
1107        );
1108        assert_eq!(
1109            ObjectKey::extent(1, 0, 0..100).overlaps(&ObjectKey::extent(1, 1, 0..100)),
1110            false
1111        );
1112        assert_eq!(
1113            ObjectKey::extent(1, 0, 0..100).overlaps(&ObjectKey::extent(1, 0, 0..100)),
1114            true
1115        );
1116
1117        assert_eq!(
1118            ObjectKey::extent(1, 0, 0..50).overlaps(&ObjectKey::extent(1, 0, 49..100)),
1119            true
1120        );
1121        assert_eq!(
1122            ObjectKey::extent(1, 0, 49..100).overlaps(&ObjectKey::extent(1, 0, 0..50)),
1123            true
1124        );
1125
1126        assert_eq!(
1127            ObjectKey::extent(1, 0, 0..50).overlaps(&ObjectKey::extent(1, 0, 50..100)),
1128            false
1129        );
1130        assert_eq!(
1131            ObjectKey::extent(1, 0, 50..100).overlaps(&ObjectKey::extent(1, 0, 0..50)),
1132            false
1133        );
1134    }
1135
1136    #[test]
1137    fn test_timestamp() {
1138        fn compare_time(std_time: Duration) {
1139            let ts_time: TimestampV49 = std_time.into();
1140            assert_eq!(<TimestampV49 as Into<Duration>>::into(ts_time), std_time);
1141            assert_eq!(ts_time.subsec_nanos(), std_time.subsec_nanos());
1142            assert_eq!(ts_time.as_secs(), std_time.as_secs());
1143            assert_eq!(ts_time.as_nanos() as u128, std_time.as_nanos());
1144        }
1145        compare_time(Duration::from_nanos(0));
1146        compare_time(Duration::from_nanos(u64::MAX));
1147        compare_time(SystemTime::now().duration_since(UNIX_EPOCH).unwrap());
1148
1149        let ts: TimestampV49 = Duration::from_secs(u64::MAX - 1).into();
1150        assert_eq!(ts.nanos, u64::MAX);
1151
1152        let ts: TimestampV49 = (Duration::from_nanos(u64::MAX).add(Duration::from_nanos(1))).into();
1153        assert_eq!(ts.nanos, u64::MAX);
1154    }
1155}