Skip to main content

fxfs/object_store/
object_record.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5mod legacy;
6
7pub use legacy::*;
8
9// TODO(https://fxbug.dev/42178223): need validation after deserialization.
10use crate::checksum::Checksums;
11use crate::log::error;
12use crate::lsm_tree::types::{
13    FuzzyHash, Item, ItemRef, LayerKey, LegacyItem, MergeType, OrdLowerBound, OrdUpperBound,
14    SortByU64, Value,
15};
16use crate::object_store::ProjectId;
17use crate::object_store::extent::{Extent, ExtentPartitionIterator};
18use crate::object_store::extent_record::{ExtentValue, ExtentValueV38};
19use crate::serialized_types::{Migrate, Versioned, migrate_nodefault, migrate_to_version};
20use fprint::TypeFingerprint;
21use fxfs_crypto::{WrappedKey, WrappingKeyId};
22use fxfs_macros::SerializeKey;
23use fxfs_unicode::CasefoldString;
24use serde::{Deserialize, Serialize};
25use std::collections::BTreeMap;
26use std::default::Default;
27use std::hash::Hash;
28use std::time::{Duration, SystemTime, UNIX_EPOCH};
29
30/// ObjectDescriptor is the set of possible records in the object store.
31pub type ObjectDescriptor = ObjectDescriptorV32;
32
33#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint)]
34#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
35pub enum ObjectDescriptorV32 {
36    /// A file (in the generic sense; i.e. an object with some attributes).
37    File,
38    /// A directory (in the generic sense; i.e. an object with children).
39    Directory,
40    /// A volume, which is the root of a distinct object store containing Files and Directories.
41    Volume,
42    /// A symbolic link.
43    Symlink,
44}
45
46/// For specifying what property of the project is being addressed.
47pub type ProjectProperty = ProjectPropertyV32;
48
49#[derive(
50    Clone,
51    Debug,
52    Eq,
53    Hash,
54    Ord,
55    PartialEq,
56    PartialOrd,
57    Serialize,
58    Deserialize,
59    TypeFingerprint,
60    SerializeKey,
61)]
62#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
63pub enum ProjectPropertyV32 {
64    /// The configured limit for the project.
65    Limit,
66    /// The currently tracked usage for the project.
67    Usage,
68}
69
70pub type ObjectKeyData = ObjectKeyDataV54;
71
72#[derive(
73    Clone,
74    Debug,
75    Eq,
76    Hash,
77    PartialEq,
78    PartialOrd,
79    Ord,
80    Serialize,
81    Deserialize,
82    TypeFingerprint,
83    SerializeKey,
84)]
85#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
86pub enum ObjectKeyDataV54 {
87    /// A generic, untyped object.  This must come first and sort before all other keys for a given
88    /// object because it's also used as a tombstone and it needs to merge with all following keys.
89    Object,
90    /// Encryption keys for an object.
91    Keys,
92    /// An attribute associated with an object.  It has a 64-bit ID.
93    Attribute(AttributeId, AttributeKeyV32),
94    /// A child of a directory.
95    Child { name: String },
96    /// A graveyard entry for an entire object.
97    GraveyardEntry { object_id: u64 },
98    /// Project ID info. This should only be attached to the volume's root node. Used to address the
99    /// configured limit and the usage tracking which are ordered after the `project_id` to provide
100    /// locality of the two related values.
101    Project { project_id: ProjectId, property: ProjectPropertyV32 },
102    /// An extended attribute associated with an object. It stores the name used for the extended
103    /// attribute, which has a maximum size of 255 bytes enforced by fuchsia.io.
104    ExtendedAttribute {
105        #[serde(with = "crate::zerocopy_serialization")]
106        name: Vec<u8>,
107    },
108    /// A graveyard entry for an attribute.
109    GraveyardAttributeEntry { object_id: u64, attribute_id: AttributeId },
110    /// A child of an encrypted directory.  We store the filename in its encrypted form.  hash_code
111    /// is the hash of the casefolded human-readable name if a directory is also casefolded.  In
112    /// some legacy cases, this is also used in non-casefolded cases, and in some of those cases the
113    /// hash code can be 0.  Going forward, these cases are covered by `EncryptedChild` below.
114    EncryptedCasefoldChild(EncryptedCasefoldChild),
115    /// Case-insensitive child (legacy).
116    LegacyCasefoldChild(CasefoldString),
117    /// An encrypted child that does not use case folding.
118    EncryptedChild(EncryptedChild),
119    /// A child of a directory that uses the casefold feature.
120    /// (i.e. case insensitive, case preserving names)
121    CasefoldChild { hash_code: u32, name: String },
122}
123
124#[derive(
125    Clone,
126    Debug,
127    Eq,
128    Hash,
129    PartialEq,
130    PartialOrd,
131    Ord,
132    Serialize,
133    Deserialize,
134    TypeFingerprint,
135    SerializeKey,
136)]
137#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
138pub struct EncryptedCasefoldChild {
139    pub hash_code: u32,
140    #[serde(with = "crate::zerocopy_serialization")]
141    pub name: Vec<u8>,
142}
143
144#[derive(
145    Clone,
146    Debug,
147    Eq,
148    Hash,
149    PartialEq,
150    PartialOrd,
151    Ord,
152    Serialize,
153    Deserialize,
154    TypeFingerprint,
155    SerializeKey,
156)]
157#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
158pub struct EncryptedChild(#[serde(with = "crate::zerocopy_serialization")] pub Vec<u8>);
159
160pub type AttributeKey = AttributeKeyV32;
161
162#[derive(
163    Clone,
164    Debug,
165    Eq,
166    Hash,
167    Ord,
168    PartialEq,
169    PartialOrd,
170    Serialize,
171    Deserialize,
172    TypeFingerprint,
173    SerializeKey,
174)]
175#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
176pub enum AttributeKeyV32 {
177    // Order here is important: code expects Attribute to precede Extent.
178    Attribute,
179    Extent(Extent),
180}
181
182/// ObjectKey is a key in the object store.
183pub type ObjectKey = ObjectKeyV54;
184
185#[derive(
186    Clone,
187    Debug,
188    Eq,
189    Ord,
190    Hash,
191    PartialEq,
192    PartialOrd,
193    Serialize,
194    Deserialize,
195    SerializeKey,
196    TypeFingerprint,
197    Versioned,
198)]
199#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
200pub struct ObjectKeyV54 {
201    /// The ID of the object referred to.
202    pub object_id: u64,
203    /// The type and data of the key.
204    pub data: ObjectKeyDataV54,
205}
206
207impl SortByU64 for ObjectKey {
208    fn get_leading_u64(&self) -> u64 {
209        self.object_id
210    }
211}
212
213impl ObjectKey {
214    /// Creates a generic ObjectKey.
215    pub fn object(object_id: u64) -> Self {
216        Self { object_id: object_id, data: ObjectKeyData::Object }
217    }
218
219    /// Creates an ObjectKey for encryption keys.
220    pub fn keys(object_id: u64) -> Self {
221        Self { object_id, data: ObjectKeyData::Keys }
222    }
223
224    /// Creates an ObjectKey for an attribute.
225    pub fn attribute(object_id: u64, attribute_id: AttributeId, key: AttributeKey) -> Self {
226        Self { object_id, data: ObjectKeyData::Attribute(attribute_id, key) }
227    }
228
229    /// Creates an ObjectKey for an extent.
230    pub fn extent(object_id: u64, attribute_id: AttributeId, range: std::ops::Range<u64>) -> Self {
231        Self {
232            object_id,
233            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(Extent(range))),
234        }
235    }
236
237    /// Creates an ObjectKey from an extent.
238    pub fn from_extent(object_id: u64, attribute_id: AttributeId, extent: Extent) -> Self {
239        Self {
240            object_id,
241            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(extent)),
242        }
243    }
244
245    /// Creates an ObjectKey for a child.
246    pub fn child(object_id: u64, name: &str, dir_type: DirType) -> Self {
247        match dir_type {
248            DirType::Casefold => {
249                let casefolded_name: String = fxfs_unicode::casefold(name.chars()).collect();
250                let hash_code = fscrypt::direntry::tea_hash_filename(casefolded_name.as_bytes());
251                Self {
252                    object_id,
253                    data: ObjectKeyData::CasefoldChild { hash_code, name: name.into() },
254                }
255            }
256            DirType::LegacyCasefold => Self {
257                object_id,
258                data: ObjectKeyData::LegacyCasefoldChild(CasefoldString::new(name.into())),
259            },
260            DirType::Normal => Self { object_id, data: ObjectKeyData::Child { name: name.into() } },
261            DirType::Encrypted(_) | DirType::EncryptedCasefold(_) => {
262                // These shouldn't be used directly; encrypted_child should be used instead.
263                panic!("Encrypted modes require an encrypted name");
264            }
265        }
266    }
267
268    /// Creates an ObjectKey for an encrypted child.
269    ///
270    /// The hash_code is important here -- especially for fscrypt as it affects the
271    /// name of locked files.
272    ///
273    /// For case-insensitive lookups in large encrypted directories, we lose the ability to binary
274    /// search for an entry of interest because encryption breaks our sort order. In these cases
275    /// we prefix records with a 32-bit hash based on the stable *casefolded* name. Hash collisions
276    /// aside, this lets us jump straight to the entry of interest, if it exists.
277    pub fn encrypted_child(object_id: u64, name: Vec<u8>, hash_code: Option<u32>) -> Self {
278        if let Some(hash_code) = hash_code {
279            Self {
280                object_id,
281                data: ObjectKeyData::EncryptedCasefoldChild(EncryptedCasefoldChild {
282                    hash_code,
283                    name,
284                }),
285            }
286        } else {
287            Self { object_id, data: ObjectKeyData::EncryptedChild(EncryptedChild(name)) }
288        }
289    }
290
291    /// Creates a graveyard entry for an object.
292    pub fn graveyard_entry(graveyard_object_id: u64, object_id: u64) -> Self {
293        Self { object_id: graveyard_object_id, data: ObjectKeyData::GraveyardEntry { object_id } }
294    }
295
296    /// Creates a graveyard entry for an attribute.
297    pub fn graveyard_attribute_entry(
298        graveyard_object_id: u64,
299        object_id: u64,
300        attribute_id: AttributeId,
301    ) -> Self {
302        Self {
303            object_id: graveyard_object_id,
304            data: ObjectKeyData::GraveyardAttributeEntry { object_id, attribute_id },
305        }
306    }
307
308    /// Creates an ObjectKey for a ProjectLimit entry.
309    pub fn project_limit(object_id: u64, project_id: ProjectId) -> Self {
310        Self {
311            object_id,
312            data: ObjectKeyData::Project { project_id, property: ProjectProperty::Limit },
313        }
314    }
315
316    /// Creates an ObjectKey for a ProjectUsage entry.
317    pub fn project_usage(object_id: u64, project_id: ProjectId) -> Self {
318        Self {
319            object_id,
320            data: ObjectKeyData::Project { project_id, property: ProjectProperty::Usage },
321        }
322    }
323
324    pub fn extended_attribute(object_id: u64, name: Vec<u8>) -> Self {
325        Self { object_id, data: ObjectKeyData::ExtendedAttribute { name } }
326    }
327
328    /// Returns the merge key for this key; that is, a key which is <= this key and any
329    /// other possibly overlapping key, under Ord. This would be used for the hint in |merge_into|.
330    pub fn key_for_merge_into(&self) -> Self {
331        if let Self {
332            object_id,
333            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(e)),
334        } = self
335        {
336            Self::attribute(*object_id, *attribute_id, AttributeKey::Extent(e.key_for_merge_into()))
337        } else {
338            self.clone()
339        }
340    }
341}
342
343impl OrdUpperBound for ObjectKey {
344    fn cmp_upper_bound(&self, other: &ObjectKey) -> std::cmp::Ordering {
345        self.object_id.cmp(&other.object_id).then_with(|| match (&self.data, &other.data) {
346            (
347                ObjectKeyData::Attribute(left_attr_id, AttributeKey::Extent(left_extent)),
348                ObjectKeyData::Attribute(right_attr_id, AttributeKey::Extent(right_extent)),
349            ) => left_attr_id.cmp(right_attr_id).then(left_extent.cmp_upper_bound(right_extent)),
350            _ => self.data.cmp(&other.data),
351        })
352    }
353}
354
355impl OrdLowerBound for ObjectKey {
356    fn cmp_lower_bound(&self, other: &ObjectKey) -> std::cmp::Ordering {
357        self.object_id.cmp(&other.object_id).then_with(|| match (&self.data, &other.data) {
358            (
359                ObjectKeyData::Attribute(left_attr_id, AttributeKey::Extent(left_extent)),
360                ObjectKeyData::Attribute(right_attr_id, AttributeKey::Extent(right_extent)),
361            ) => left_attr_id.cmp(right_attr_id).then(left_extent.cmp_lower_bound(right_extent)),
362            _ => self.data.cmp(&other.data),
363        })
364    }
365}
366
367impl LayerKey for ObjectKey {
368    fn merge_type(&self) -> MergeType {
369        // This listing is intentionally exhaustive to force folks to think about how certain
370        // subsets of the keyspace are merged.
371        match self.data {
372            ObjectKeyData::Object
373            | ObjectKeyData::Keys
374            | ObjectKeyData::Attribute(..)
375            | ObjectKeyData::Child { .. }
376            | ObjectKeyData::EncryptedChild(_)
377            | ObjectKeyData::EncryptedCasefoldChild(_)
378            | ObjectKeyData::CasefoldChild { .. }
379            | ObjectKeyData::LegacyCasefoldChild(_)
380            | ObjectKeyData::GraveyardEntry { .. }
381            | ObjectKeyData::GraveyardAttributeEntry { .. }
382            | ObjectKeyData::Project { property: ProjectProperty::Limit, .. }
383            | ObjectKeyData::ExtendedAttribute { .. } => MergeType::OptimizedMerge,
384            ObjectKeyData::Project { property: ProjectProperty::Usage, .. } => MergeType::FullMerge,
385        }
386    }
387
388    fn next_key(&self) -> Option<Self> {
389        match &self.data {
390            ObjectKeyData::Attribute(attr_id, AttributeKey::Extent(extent)) => {
391                // This key comes before (or is equal to) any extent starting at or after the
392                // end of `self`. Searching for its `search_key` finds extents that end after
393                // the end of `self`.
394                Some(ObjectKey {
395                    object_id: self.object_id,
396                    data: ObjectKeyData::Attribute(
397                        *attr_id,
398                        AttributeKey::Extent(Extent(0..extent.end + 1)),
399                    ),
400                })
401            }
402            _ => None,
403        }
404    }
405
406    fn search_key(&self) -> Option<Self> {
407        if let Self {
408            object_id,
409            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(e)),
410        } = self
411        {
412            Some(Self::attribute(*object_id, *attribute_id, AttributeKey::Extent(e.search_key())))
413        } else {
414            None
415        }
416    }
417
418    fn is_search_key(&self) -> bool {
419        match self {
420            Self { data: ObjectKeyData::Attribute(_, AttributeKey::Extent(e)), .. } => e.start == 0,
421            _ => true,
422        }
423    }
424
425    fn overlaps(&self, other: &Self) -> bool {
426        if self.object_id != other.object_id {
427            return false;
428        }
429        match (&self.data, &other.data) {
430            (
431                ObjectKeyData::Attribute(left_attr_id, AttributeKey::Extent(left_key)),
432                ObjectKeyData::Attribute(right_attr_id, AttributeKey::Extent(right_key)),
433            ) if *left_attr_id == *right_attr_id => {
434                left_key.end > right_key.start && left_key.start < right_key.end
435            }
436            (a, b) => a == b,
437        }
438    }
439}
440
441pub enum ObjectKeyFuzzyHashIterator {
442    Extent(/* object_id */ u64, AttributeId, ExtentPartitionIterator),
443    NotExtent(/* hash */ Option<u64>),
444}
445
446impl Iterator for ObjectKeyFuzzyHashIterator {
447    type Item = u64;
448
449    fn next(&mut self) -> Option<Self::Item> {
450        match self {
451            Self::Extent(oid, attr_id, extent_keys) => extent_keys.next().map(|range| {
452                let key = ObjectKey::extent(*oid, *attr_id, range);
453                crate::stable_hash::stable_hash(key)
454            }),
455            Self::NotExtent(hash) => hash.take(),
456        }
457    }
458}
459
460impl FuzzyHash for ObjectKey {
461    fn fuzzy_hash(&self) -> impl Iterator<Item = u64> {
462        match &self.data {
463            ObjectKeyData::Attribute(attr_id, AttributeKey::Extent(extent)) => {
464                ObjectKeyFuzzyHashIterator::Extent(
465                    self.object_id,
466                    *attr_id,
467                    extent.fuzzy_hash_partition(),
468                )
469            }
470            _ => {
471                let hash = crate::stable_hash::stable_hash(self);
472                ObjectKeyFuzzyHashIterator::NotExtent(Some(hash))
473            }
474        }
475    }
476
477    fn is_range_key(&self) -> bool {
478        match &self.data {
479            ObjectKeyData::Attribute(_, AttributeKey::Extent(_)) => true,
480            _ => false,
481        }
482    }
483}
484
485/// UNIX epoch based timestamp in the UTC timezone.
486pub type Timestamp = TimestampV49;
487
488#[derive(
489    Copy,
490    Clone,
491    Debug,
492    Default,
493    Eq,
494    PartialEq,
495    Ord,
496    PartialOrd,
497    Serialize,
498    Deserialize,
499    TypeFingerprint,
500)]
501#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
502pub struct TimestampV49 {
503    nanos: u64,
504}
505
506impl Timestamp {
507    const NSEC_PER_SEC: u64 = 1_000_000_000;
508
509    pub fn now() -> Self {
510        SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or(Duration::ZERO).into()
511    }
512
513    pub const fn zero() -> Self {
514        Self { nanos: 0 }
515    }
516
517    pub const fn from_nanos(nanos: u64) -> Self {
518        Self { nanos }
519    }
520
521    pub fn from_secs_and_nanos(secs: u64, nanos: u32) -> Self {
522        let Some(secs_in_nanos) = secs.checked_mul(Self::NSEC_PER_SEC) else {
523            error!("Fxfs doesn't support dates past 2554-07-21");
524            return Self { nanos: u64::MAX };
525        };
526        let Some(nanos) = secs_in_nanos.checked_add(nanos as u64) else {
527            error!("Fxfs doesn't support dates past 2554-07-21");
528            return Self { nanos: u64::MAX };
529        };
530        Self { nanos }
531    }
532
533    /// Returns the total number of nanoseconds represented by this `Timestamp` since the Unix
534    /// epoch.
535    pub fn as_nanos(&self) -> u64 {
536        self.nanos
537    }
538
539    /// Returns the fractional nanoseconds represented by this `Timestamp`.
540    pub fn subsec_nanos(&self) -> u32 {
541        (self.nanos % Self::NSEC_PER_SEC) as u32
542    }
543
544    /// Returns the total number of whole seconds represented by this `Timestamp` since the Unix
545    /// epoch.
546    pub fn as_secs(&self) -> u64 {
547        self.nanos / Self::NSEC_PER_SEC
548    }
549}
550
551impl From<std::time::Duration> for Timestamp {
552    fn from(duration: std::time::Duration) -> Self {
553        Self::from_secs_and_nanos(duration.as_secs(), duration.subsec_nanos())
554    }
555}
556
557impl From<Timestamp> for std::time::Duration {
558    fn from(timestamp: Timestamp) -> std::time::Duration {
559        Duration::from_nanos(timestamp.nanos)
560    }
561}
562
563pub type ObjectKind = ObjectKindV54;
564
565#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, TypeFingerprint)]
566#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
567pub enum DirType {
568    Normal,
569    Encrypted(WrappingKeyId),
570    /// Legacy casefolded mode.
571    LegacyCasefold,
572    Casefold,
573    EncryptedCasefold(WrappingKeyId),
574}
575
576impl DirType {
577    pub fn is_casefold(&self) -> bool {
578        matches!(self, DirType::LegacyCasefold | DirType::Casefold | DirType::EncryptedCasefold(_))
579    }
580
581    pub fn is_encrypted(&self) -> bool {
582        matches!(self, DirType::Encrypted(_) | DirType::EncryptedCasefold(_))
583    }
584
585    pub fn with_encryption(self, id: WrappingKeyId) -> Self {
586        match self {
587            DirType::Normal => DirType::Encrypted(id),
588            DirType::Casefold => DirType::EncryptedCasefold(id),
589            _ => self,
590        }
591    }
592
593    pub fn with_casefold(self, val: bool) -> Self {
594        match (val, self) {
595            (true, DirType::Encrypted(id) | DirType::EncryptedCasefold(id)) => {
596                DirType::EncryptedCasefold(id)
597            }
598            (true, _) => DirType::Casefold,
599            (false, DirType::Encrypted(id) | DirType::EncryptedCasefold(id)) => {
600                DirType::Encrypted(id)
601            }
602            (false, _) => DirType::Normal,
603        }
604    }
605
606    pub fn wrapping_key_id(&self) -> Option<WrappingKeyId> {
607        match self {
608            DirType::Encrypted(id) | DirType::EncryptedCasefold(id) => Some(*id),
609            _ => None,
610        }
611    }
612}
613
614impl Default for DirType {
615    fn default() -> Self {
616        DirType::Normal
617    }
618}
619
620#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint)]
621#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
622pub enum ObjectKindV54 {
623    File {
624        /// The number of references to this file.
625        refs: u64,
626    },
627    Directory {
628        /// The number of sub-directories in this directory.
629        sub_dirs: u64,
630        /// The type of directory (encryption, casefolding, etc.)
631        dir_type: DirType,
632    },
633    Graveyard,
634    Symlink {
635        /// The number of references to this symbolic link.
636        refs: u64,
637        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
638        /// interpret it however they like.
639        #[serde(with = "crate::zerocopy_serialization")]
640        link: Box<[u8]>,
641    },
642    EncryptedSymlink {
643        /// The number of references to this symbolic link.
644        refs: u64,
645        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
646        /// interpret it however they like.
647        /// `link` is stored here in encrypted form, encrypted with the symlink's key using the
648        /// volume's data key.
649        #[serde(with = "crate::zerocopy_serialization")]
650        link: Box<[u8]>,
651    },
652}
653
654#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
655#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
656pub enum ObjectKindV49 {
657    File {
658        /// The number of references to this file.
659        refs: u64,
660    },
661    Directory {
662        /// The number of sub-directories in this directory.
663        sub_dirs: u64,
664        /// If set, contains the wrapping key id used to encrypt the file contents and filenames in
665        /// this directory.
666        wrapping_key_id: Option<WrappingKeyId>,
667        /// If true, all files and sub-directories created in this directory will support case
668        /// insensitive (but case-preserving) file naming.
669        casefold: bool,
670    },
671    Graveyard,
672    Symlink {
673        /// The number of references to this symbolic link.
674        refs: u64,
675        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
676        /// interpret it however they like.
677        #[serde(with = "crate::zerocopy_serialization")]
678        link: Box<[u8]>,
679    },
680    EncryptedSymlink {
681        /// The number of references to this symbolic link.
682        refs: u64,
683        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
684        /// interpret it however they like.
685        /// `link` is stored here in encrypted form, encrypted with the symlink's key using the
686        /// same encryption scheme as the one used to encrypt filenames.
687        #[serde(with = "crate::zerocopy_serialization")]
688        link: Box<[u8]>,
689    },
690}
691
692/// This consists of POSIX attributes that are not used in Fxfs but it may be meaningful to some
693/// clients to have the ability to to set and retrieve these values.
694pub type PosixAttributes = PosixAttributesV32;
695
696#[derive(Clone, Debug, Copy, Default, Serialize, Deserialize, PartialEq, TypeFingerprint)]
697#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
698pub struct PosixAttributesV32 {
699    /// The mode bits associated with this object
700    pub mode: u32,
701    /// User ID of owner
702    pub uid: u32,
703    /// Group ID of owner
704    pub gid: u32,
705    /// Device ID
706    pub rdev: u64,
707}
708
709/// Object-level attributes.  Note that these are not the same as "attributes" in the
710/// ObjectValue::Attribute sense, which refers to an arbitrary data payload associated with an
711/// object.  This naming collision is unfortunate.
712pub type ObjectAttributes = ObjectAttributesV49;
713
714#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, TypeFingerprint)]
715#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
716pub struct ObjectAttributesV49 {
717    /// The timestamp at which the object was created (i.e. crtime).
718    pub creation_time: TimestampV49,
719    /// The timestamp at which the object's data was last modified (i.e. mtime).
720    pub modification_time: TimestampV49,
721    /// The project id to associate this object's resource usage with.
722    #[serde(with = "crate::object_store::project_id::optional_project_id")]
723    pub project_id: Option<ProjectId>,
724    /// Mode, uid, gid, and rdev
725    pub posix_attributes: Option<PosixAttributesV32>,
726    /// The number of bytes allocated to all extents across all attributes for this object.
727    pub allocated_size: u64,
728    /// The timestamp at which the object was last read (i.e. atime).
729    pub access_time: TimestampV49,
730    /// The timestamp at which the object's status was last modified (i.e. ctime).
731    pub change_time: TimestampV49,
732}
733
734pub type ExtendedAttributeValue = ExtendedAttributeValueV32;
735
736#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint)]
737#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
738pub enum ExtendedAttributeValueV32 {
739    /// The extended attribute value is stored directly in this object. If the value is above a
740    /// certain size, it should be stored as an attribute with extents instead.
741    Inline(#[serde(with = "crate::zerocopy_serialization")] Vec<u8>),
742    /// The extended attribute value is stored as an attribute with extents. The attribute id
743    /// should be chosen to be within the range of 64-512.
744    AttributeId(AttributeId),
745}
746
747/// Id and descriptor for a child entry.
748pub type ChildValue = ChildValueV32;
749
750#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
751#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
752pub struct ChildValueV32 {
753    /// The ID of the child object.
754    pub object_id: u64,
755    /// Describes the type of the child.
756    pub object_descriptor: ObjectDescriptorV32,
757}
758
759pub type RootDigest = RootDigestV33;
760
761#[derive(
762    Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, Deserialize, TypeFingerprint,
763)]
764#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
765pub enum RootDigestV33 {
766    Sha256([u8; 32]),
767    Sha512(#[serde(with = "crate::zerocopy_serialization")] Vec<u8>),
768}
769
770pub type FsverityMetadata = FsverityMetadataV50;
771
772#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, TypeFingerprint, Versioned)]
773#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
774pub enum FsverityMetadataV50 {
775    /// The root hash and salt.
776    Internal(RootDigestV33, #[serde(with = "crate::zerocopy_serialization")] Vec<u8>),
777    /// The root hash and salt are in a descriptor inside the merkle attribute.
778    F2fs(std::ops::Range<u64>),
779}
780
781pub type EncryptionKey = EncryptionKeyV49;
782pub type EncryptionKeyV49 = fxfs_crypto::EncryptionKey;
783
784pub type EncryptionKeys = EncryptionKeysV49;
785
786#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize, TypeFingerprint)]
787#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
788pub struct EncryptionKeysV49(Vec<(u64, EncryptionKeyV49)>);
789
790impl EncryptionKeys {
791    pub fn get(&self, id: u64) -> Option<&EncryptionKey> {
792        self.0.iter().find_map(|(i, key)| (*i == id).then_some(key))
793    }
794
795    pub fn insert(&mut self, id: u64, key: EncryptionKey) {
796        self.0.push((id, key))
797    }
798
799    pub fn remove(&mut self, id: u64) -> Option<EncryptionKey> {
800        if let Some(ix) = self.0.iter().position(|(k, _)| *k == id) {
801            Some(self.0.remove(ix).1)
802        } else {
803            None
804        }
805    }
806}
807
808impl From<EncryptionKeys> for BTreeMap<u64, WrappedKey> {
809    fn from(keys: EncryptionKeys) -> Self {
810        keys.0.into_iter().map(|(id, key)| (id, key.into())).collect()
811    }
812}
813
814impl From<Vec<(u64, EncryptionKey)>> for EncryptionKeys {
815    fn from(value: Vec<(u64, EncryptionKey)>) -> Self {
816        Self(value)
817    }
818}
819
820impl std::ops::Deref for EncryptionKeys {
821    type Target = Vec<(u64, EncryptionKey)>;
822    fn deref(&self) -> &Self::Target {
823        &self.0
824    }
825}
826
827/// ObjectValue is the value of an item in the object store.
828/// Note that the tree stores deltas on objects, so these values describe deltas. Unless specified
829/// otherwise, a value indicates an insert/replace mutation.
830pub type ObjectValue = ObjectValueV54;
831impl Value for ObjectValue {
832    const DELETED_MARKER: Self = Self::None;
833}
834
835#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
836#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
837pub enum ObjectValueV54 {
838    /// Some keys have no value (this often indicates a tombstone of some sort).  Records with this
839    /// value are always filtered when a major compaction is performed, so the meaning must be the
840    /// same as if the item was not present.
841    None,
842    /// Some keys have no value but need to differentiate between a present value and no value
843    /// (None) i.e. their value is really a boolean: None => false, Some => true.
844    Some,
845    /// The value for an ObjectKey::Object record.
846    Object { kind: ObjectKindV54, attributes: ObjectAttributesV49 },
847    /// Specifies encryption keys to use for an object.
848    Keys(EncryptionKeysV49),
849    /// An attribute associated with a file object. |size| is the size of the attribute in bytes.
850    Attribute { size: u64, has_overwrite_extents: bool },
851    /// An extent associated with an object.
852    Extent(ExtentValueV38),
853    /// A child of an object.
854    Child(ChildValue),
855    /// Graveyard entries can contain these entries which will cause a file that has extents beyond
856    /// EOF to be trimmed at mount time.  This is used in cases where shrinking a file can exceed
857    /// the bounds of a single transaction.
858    Trim,
859    /// Added to support tracking Project ID usage and limits.
860    BytesAndNodes { bytes: i64, nodes: i64 },
861    /// A value for an extended attribute. Either inline or a redirection to an attribute with
862    /// extents.
863    ExtendedAttribute(ExtendedAttributeValueV32),
864    /// An attribute associated with a verified file object. |size| is the size of the attribute
865    /// in bytes.
866    VerifiedAttribute { size: u64, fsverity_metadata: FsverityMetadataV50 },
867}
868
869#[derive(Migrate, Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
870#[migrate_to_version(ObjectValueV54)]
871#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
872pub enum ObjectValueV50 {
873    /// Some keys have no value (this often indicates a tombstone of some sort).  Records with this
874    /// value are always filtered when a major compaction is performed, so the meaning must be the
875    /// same as if the item was not present.
876    None,
877    /// Some keys have no value but need to differentiate between a present value and no value
878    /// (None) i.e. their value is really a boolean: None => false, Some => true.
879    Some,
880    /// The value for an ObjectKey::Object record.
881    Object { kind: ObjectKindV49, attributes: ObjectAttributesV49 },
882    /// Specifies encryption keys to use for an object.
883    Keys(EncryptionKeysV49),
884    /// An attribute associated with a file object. |size| is the size of the attribute in bytes.
885    Attribute { size: u64, has_overwrite_extents: bool },
886    /// An extent associated with an object.
887    Extent(ExtentValueV38),
888    /// A child of an object.
889    Child(ChildValueV32),
890    /// Graveyard entries can contain these entries which will cause a file that has extents beyond
891    /// EOF to be trimmed at mount time.  This is used in cases where shrinking a file can exceed
892    /// the bounds of a single transaction.
893    Trim,
894    /// Added to support tracking Project ID usage and limits.
895    BytesAndNodes { bytes: i64, nodes: i64 },
896    /// A value for an extended attribute. Either inline or a redirection to an attribute with
897    /// extents.
898    ExtendedAttribute(ExtendedAttributeValueV32),
899    /// An attribute associated with a verified file object. |size| is the size of the attribute
900    /// in bytes.
901    VerifiedAttribute { size: u64, fsverity_metadata: FsverityMetadataV50 },
902}
903
904impl ObjectValue {
905    /// Creates an ObjectValue for a file object.
906    pub fn file(
907        refs: u64,
908        allocated_size: u64,
909        creation_time: Timestamp,
910        modification_time: Timestamp,
911        access_time: Timestamp,
912        change_time: Timestamp,
913        project_id: Option<ProjectId>,
914        posix_attributes: Option<PosixAttributes>,
915    ) -> ObjectValue {
916        ObjectValue::Object {
917            kind: ObjectKind::File { refs },
918            attributes: ObjectAttributes {
919                creation_time,
920                modification_time,
921                project_id,
922                posix_attributes,
923                allocated_size,
924                access_time,
925                change_time,
926            },
927        }
928    }
929    pub fn keys(encryption_keys: EncryptionKeys) -> ObjectValue {
930        ObjectValue::Keys(encryption_keys)
931    }
932    /// Creates an ObjectValue for an object attribute.
933    pub fn attribute(size: u64, has_overwrite_extents: bool) -> ObjectValue {
934        ObjectValue::Attribute { size, has_overwrite_extents }
935    }
936    /// Creates an ObjectValue for an object attribute of a verified file.
937    pub fn verified_attribute(size: u64, fsverity_metadata: FsverityMetadata) -> ObjectValue {
938        ObjectValue::VerifiedAttribute { size, fsverity_metadata }
939    }
940    /// Creates an ObjectValue for an insertion/replacement of an object extent.
941    pub fn extent(device_offset: u64, key_id: u64) -> ObjectValue {
942        ObjectValue::Extent(ExtentValue::new_raw(device_offset, key_id))
943    }
944    /// Creates an ObjectValue for an insertion/replacement of an object extent.
945    pub fn extent_with_checksum(
946        device_offset: u64,
947        checksum: Checksums,
948        key_id: u64,
949    ) -> ObjectValue {
950        ObjectValue::Extent(ExtentValue::with_checksum(device_offset, checksum, key_id))
951    }
952    /// Creates an ObjectValue for a deletion of an object extent.
953    pub fn deleted_extent() -> ObjectValue {
954        ObjectValue::Extent(ExtentValue::deleted_extent())
955    }
956    /// Creates an ObjectValue for an object child.
957    pub fn child(object_id: u64, object_descriptor: ObjectDescriptor) -> ObjectValue {
958        ObjectValue::Child(ChildValue { object_id, object_descriptor })
959    }
960    /// Creates an ObjectValue for an object symlink.
961    pub fn symlink(
962        link: impl Into<Box<[u8]>>,
963        creation_time: Timestamp,
964        modification_time: Timestamp,
965        project_id: Option<ProjectId>,
966    ) -> ObjectValue {
967        ObjectValue::Object {
968            kind: ObjectKind::Symlink { refs: 1, link: link.into() },
969            attributes: ObjectAttributes {
970                creation_time,
971                modification_time,
972                project_id,
973                ..Default::default()
974            },
975        }
976    }
977    /// Creates an ObjectValue for an encrypted symlink object.
978    pub fn encrypted_symlink(
979        link: impl Into<Box<[u8]>>,
980        creation_time: Timestamp,
981        modification_time: Timestamp,
982        project_id: Option<ProjectId>,
983    ) -> ObjectValue {
984        ObjectValue::Object {
985            kind: ObjectKind::EncryptedSymlink { refs: 1, link: link.into() },
986            attributes: ObjectAttributes {
987                creation_time,
988                modification_time,
989                project_id,
990                ..Default::default()
991            },
992        }
993    }
994    pub fn inline_extended_attribute(value: impl Into<Vec<u8>>) -> ObjectValue {
995        ObjectValue::ExtendedAttribute(ExtendedAttributeValue::Inline(value.into()))
996    }
997    pub fn extended_attribute(attribute_id: AttributeId) -> ObjectValue {
998        ObjectValue::ExtendedAttribute(ExtendedAttributeValue::AttributeId(attribute_id))
999    }
1000}
1001
1002pub type ObjectItem = ObjectItemV55;
1003
1004pub type ObjectItemV54 = LegacyItem<ObjectKeyV54, ObjectValueV54>;
1005pub type ObjectItemV55 = Item<ObjectKeyV54, ObjectValueV54>;
1006
1007impl From<ObjectItemV54> for ObjectItemV55 {
1008    fn from(item: ObjectItemV54) -> Self {
1009        Self { key: item.key, value: item.value }
1010    }
1011}
1012
1013pub type ObjectItemV50 = LegacyItem<ObjectKeyV43, ObjectValueV50>;
1014
1015impl ObjectItem {
1016    pub fn is_tombstone(&self) -> bool {
1017        matches!(
1018            self,
1019            Item {
1020                key: ObjectKey { data: ObjectKeyData::Object, .. },
1021                value: ObjectValue::None,
1022                ..
1023            }
1024        )
1025    }
1026}
1027
1028// If the given item describes an extent, unwraps it and returns the extent key/value.
1029impl<'a> From<ItemRef<'a, ObjectKey, ObjectValue>>
1030    for Option<(/*object-id*/ u64, AttributeId, &'a Extent, &'a ExtentValue)>
1031{
1032    fn from(item: ItemRef<'a, ObjectKey, ObjectValue>) -> Self {
1033        match item {
1034            ItemRef {
1035                key:
1036                    ObjectKey {
1037                        object_id,
1038                        data:
1039                            ObjectKeyData::Attribute(
1040                                attribute_id, //
1041                                AttributeKey::Extent(extent_key),
1042                            ),
1043                    },
1044                value: ObjectValue::Extent(extent_value),
1045                ..
1046            } => Some((*object_id, *attribute_id, extent_key, extent_value)),
1047            _ => None,
1048        }
1049    }
1050}
1051
1052pub type FxfsKey = FxfsKeyV49;
1053pub type FxfsKeyV49 = fxfs_crypto::FxfsKey;
1054
1055#[derive(
1056    Clone,
1057    Copy,
1058    PartialEq,
1059    Eq,
1060    PartialOrd,
1061    Ord,
1062    Debug,
1063    Serialize,
1064    Deserialize,
1065    Hash,
1066    SerializeKey,
1067    TypeFingerprint,
1068)]
1069#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
1070#[repr(transparent)]
1071pub struct AttributeId(pub u64);
1072
1073impl AttributeId {
1074    /// The common case for extents which cover the data payload of an object.
1075    pub const DATA: Self = Self(0);
1076
1077    /// Contains a serialized and versioned `BlobMetadata` struct. Use [`BlobMetadata::read_from`]
1078    /// and [`BlobMetadata::write_to`] to access this attribute.
1079    pub const BLOB_METADATA: Self = Self(3);
1080
1081    /// Contains a serialized `BlobMetadataUnversioned` struct. This attribute may still exist on
1082    /// blobs but should no longer be written. Use `AttributeId::BLOB_METADATA` instead.
1083    pub const BLOB_MERKLE: Self = Self(1);
1084
1085    /// For fsverity files in Fxfs, we store the merkle tree of the verified file at a well-known
1086    /// attribute.
1087    pub const FSVERITY_MERKLE: Self = Self(2);
1088
1089    /// The range of fxfs attribute IDs which are reserved for extended attribute values. Whenever a
1090    /// new attribute is needed, the first unused ID will be chosen from this range. It's
1091    /// technically safe to change these values, but it has potential consequences - they are only
1092    /// used during ID selection, so any existing extended attributes keep their IDs, which means
1093    /// any past or present selected range here could potentially have used attributes unless they
1094    /// are explicitly migrated, which isn't currently done.
1095    pub const XATTR_RANGE_START: Self = Self(64);
1096    pub const XATTR_RANGE_END: Self = Self(512);
1097
1098    /// A semantic alias for the `0` attribute ID, indicating that it is being used as a starting
1099    /// point to iterate over all attributes rather than specifically looking up the primary data
1100    /// attribute [`AttributeId::DATA`].
1101    pub const SORTED_START: Self = Self(0);
1102
1103    /// An attribute ID to use in tests when no particular ID is necessary.
1104    #[cfg(test)]
1105    pub const TEST_ID: Self = Self(u64::MAX - 1000);
1106
1107    pub const fn raw(self) -> u64 {
1108        self.0
1109    }
1110
1111    /// Returns the current id + 1.
1112    pub const fn next(self) -> Self {
1113        Self(self.0 + 1)
1114    }
1115
1116    /// Returns true if the attribute ID is within the range of extended attributes.
1117    pub const fn is_xattr(self) -> bool {
1118        self.0 >= Self::XATTR_RANGE_START.0 && self.0 < Self::XATTR_RANGE_END.0
1119    }
1120}
1121
1122impl log::kv::ToValue for AttributeId {
1123    fn to_value(&self) -> log::kv::Value<'_> {
1124        log::kv::Value::from(self.0)
1125    }
1126}
1127
1128impl std::fmt::Display for AttributeId {
1129    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1130        std::fmt::Display::fmt(&self.0, f)
1131    }
1132}
1133
1134#[cfg(test)]
1135mod tests {
1136    use super::{AttributeId, ObjectKey, ObjectKeyV54, TimestampV49};
1137    use crate::lsm_tree::types::{FuzzyHash as _, LayerKey};
1138    use std::ops::Add;
1139    use std::time::{Duration, SystemTime, UNIX_EPOCH};
1140
1141    // Smoke test to ensure hash stability for Fxfs objects.
1142    // If this test fails, the hash algorithm changed, and that won't do -- Fxfs relies on stable
1143    // hash values, and existing images will appear to be corrupt if they change (see
1144    // https://fxbug.dev/419133532).
1145    #[test]
1146    fn test_hash_stability() {
1147        // Target a specific version of ObjectKey.  If you want to delete ObjectKeyV54, simply
1148        // update this test with a later key version, which will also require re-generating the
1149        // hashes.
1150        assert_eq!(
1151            &ObjectKeyV54::object(100).fuzzy_hash().collect::<Vec<_>>()[..],
1152            &[11885326717398844384]
1153        );
1154        assert_eq!(
1155            &ObjectKeyV54::extent(1, AttributeId::DATA, 0..2 * 1024 * 1024)
1156                .fuzzy_hash()
1157                .collect::<Vec<_>>()[..],
1158            &[11090579907097549012, 2814892992701560424]
1159        );
1160    }
1161
1162    #[test]
1163    fn test_next_key() {
1164        assert_eq!(
1165            ObjectKey::extent(1, AttributeId::TEST_ID, 25..100).next_key().unwrap(),
1166            ObjectKey::extent(1, AttributeId::TEST_ID, 0..101)
1167        );
1168        assert_eq!(ObjectKey::object(100).next_key(), None);
1169    }
1170
1171    #[test]
1172    fn test_range_key() {
1173        const ATTR_ID: AttributeId = AttributeId::TEST_ID;
1174        // Make sure we disallow using extent keys with point queries. Other object keys should
1175        // still be allowed with point queries.
1176        assert!(ObjectKey::extent(1, ATTR_ID, 0..2 * 1024 * 1024).is_range_key());
1177        assert!(!ObjectKey::object(100).is_range_key());
1178
1179        assert_eq!(ObjectKey::object(1).overlaps(&ObjectKey::object(1)), true);
1180        assert_eq!(ObjectKey::object(1).overlaps(&ObjectKey::object(2)), false);
1181        assert_eq!(ObjectKey::extent(1, ATTR_ID, 0..100).overlaps(&ObjectKey::object(1)), false);
1182        assert_eq!(ObjectKey::object(1).overlaps(&ObjectKey::extent(1, ATTR_ID, 0..100)), false);
1183        assert_eq!(
1184            ObjectKey::extent(1, ATTR_ID, 0..100).overlaps(&ObjectKey::extent(2, ATTR_ID, 0..100)),
1185            false
1186        );
1187        assert_eq!(
1188            ObjectKey::extent(1, ATTR_ID, 0..100).overlaps(&ObjectKey::extent(
1189                1,
1190                ATTR_ID.next(),
1191                0..100
1192            )),
1193            false
1194        );
1195        assert_eq!(
1196            ObjectKey::extent(1, ATTR_ID, 0..100).overlaps(&ObjectKey::extent(1, ATTR_ID, 0..100)),
1197            true
1198        );
1199
1200        assert_eq!(
1201            ObjectKey::extent(1, ATTR_ID, 0..50).overlaps(&ObjectKey::extent(1, ATTR_ID, 49..100)),
1202            true
1203        );
1204        assert_eq!(
1205            ObjectKey::extent(1, ATTR_ID, 49..100).overlaps(&ObjectKey::extent(1, ATTR_ID, 0..50)),
1206            true
1207        );
1208
1209        assert_eq!(
1210            ObjectKey::extent(1, ATTR_ID, 0..50).overlaps(&ObjectKey::extent(1, ATTR_ID, 50..100)),
1211            false
1212        );
1213        assert_eq!(
1214            ObjectKey::extent(1, ATTR_ID, 50..100).overlaps(&ObjectKey::extent(1, ATTR_ID, 0..50)),
1215            false
1216        );
1217    }
1218
1219    #[test]
1220    fn test_timestamp() {
1221        fn compare_time(std_time: Duration) {
1222            let ts_time: TimestampV49 = std_time.into();
1223            assert_eq!(<TimestampV49 as Into<Duration>>::into(ts_time), std_time);
1224            assert_eq!(ts_time.subsec_nanos(), std_time.subsec_nanos());
1225            assert_eq!(ts_time.as_secs(), std_time.as_secs());
1226            assert_eq!(ts_time.as_nanos() as u128, std_time.as_nanos());
1227        }
1228        compare_time(Duration::from_nanos(0));
1229        compare_time(Duration::from_nanos(u64::MAX));
1230        compare_time(SystemTime::now().duration_since(UNIX_EPOCH).unwrap());
1231
1232        let ts: TimestampV49 = Duration::from_secs(u64::MAX - 1).into();
1233        assert_eq!(ts.nanos, u64::MAX);
1234
1235        let ts: TimestampV49 = (Duration::from_nanos(u64::MAX).add(Duration::from_nanos(1))).into();
1236        assert_eq!(ts.nanos, u64::MAX);
1237    }
1238}