Skip to main content

fxfs/object_store/
object_record.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5mod legacy;
6
7pub use legacy::*;
8
9// TODO(https://fxbug.dev/42178223): need validation after deserialization.
10use crate::checksum::Checksums;
11use crate::log::error;
12use crate::lsm_tree::types::{
13    FuzzyHash, Item, ItemRef, LayerKey, LegacyItem, MergeType, OrdLowerBound, OrdUpperBound,
14    RangeKey, SortByU64, Value,
15};
16use crate::object_store::extent_record::{
17    ExtentKey, ExtentKeyPartitionIterator, ExtentKeyV32, ExtentValue, ExtentValueV38,
18};
19use crate::serialized_types::{Migrate, Versioned, migrate_nodefault, migrate_to_version};
20use fprint::TypeFingerprint;
21use fxfs_crypto::{WrappedKey, WrappingKeyId};
22use fxfs_unicode::CasefoldString;
23use serde::{Deserialize, Serialize};
24use std::collections::BTreeMap;
25use std::default::Default;
26use std::hash::Hash;
27use std::time::{Duration, SystemTime, UNIX_EPOCH};
28
29/// ObjectDescriptor is the set of possible records in the object store.
30pub type ObjectDescriptor = ObjectDescriptorV32;
31
32#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint)]
33#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
34pub enum ObjectDescriptorV32 {
35    /// A file (in the generic sense; i.e. an object with some attributes).
36    File,
37    /// A directory (in the generic sense; i.e. an object with children).
38    Directory,
39    /// A volume, which is the root of a distinct object store containing Files and Directories.
40    Volume,
41    /// A symbolic link.
42    Symlink,
43}
44
45/// For specifying what property of the project is being addressed.
46pub type ProjectProperty = ProjectPropertyV32;
47
48#[derive(
49    Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, Deserialize, TypeFingerprint,
50)]
51#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
52pub enum ProjectPropertyV32 {
53    /// The configured limit for the project.
54    Limit,
55    /// The currently tracked usage for the project.
56    Usage,
57}
58
59pub type ObjectKeyData = ObjectKeyDataV54;
60
61#[derive(
62    Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord, Serialize, Deserialize, TypeFingerprint,
63)]
64#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
65pub enum ObjectKeyDataV54 {
66    /// A generic, untyped object.  This must come first and sort before all other keys for a given
67    /// object because it's also used as a tombstone and it needs to merge with all following keys.
68    Object,
69    /// Encryption keys for an object.
70    Keys,
71    /// An attribute associated with an object.  It has a 64-bit ID.
72    Attribute(u64, AttributeKeyV32),
73    /// A child of a directory.
74    Child { name: String },
75    /// A graveyard entry for an entire object.
76    GraveyardEntry { object_id: u64 },
77    /// Project ID info. This should only be attached to the volume's root node. Used to address the
78    /// configured limit and the usage tracking which are ordered after the `project_id` to provide
79    /// locality of the two related values.
80    Project { project_id: u64, property: ProjectPropertyV32 },
81    /// An extended attribute associated with an object. It stores the name used for the extended
82    /// attribute, which has a maximum size of 255 bytes enforced by fuchsia.io.
83    ExtendedAttribute {
84        #[serde(with = "crate::zerocopy_serialization")]
85        name: Vec<u8>,
86    },
87    /// A graveyard entry for an attribute.
88    GraveyardAttributeEntry { object_id: u64, attribute_id: u64 },
89    /// A child of an encrypted directory.  We store the filename in its encrypted form.  hash_code
90    /// is the hash of the casefolded human-readable name if a directory is also casefolded.  In
91    /// some legacy cases, this is also used in non-casefolded cases, and in some of those cases the
92    /// hash code can be 0.  Going forward, these cases are covered by `EncryptedChild` below.
93    EncryptedCasefoldChild(EncryptedCasefoldChild),
94    /// Case-insensitive child (legacy).
95    LegacyCasefoldChild(CasefoldString),
96    /// An encrypted child that does not use case folding.
97    EncryptedChild(EncryptedChild),
98    /// A child of a directory that uses the casefold feature.
99    /// (i.e. case insensitive, case preserving names)
100    CasefoldChild { hash_code: u32, name: String },
101}
102
103#[derive(
104    Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord, Serialize, Deserialize, TypeFingerprint,
105)]
106#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
107pub struct EncryptedCasefoldChild {
108    pub hash_code: u32,
109    #[serde(with = "crate::zerocopy_serialization")]
110    pub name: Vec<u8>,
111}
112
113#[derive(
114    Clone, Debug, Eq, Hash, PartialEq, PartialOrd, Ord, Serialize, Deserialize, TypeFingerprint,
115)]
116#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
117pub struct EncryptedChild(#[serde(with = "crate::zerocopy_serialization")] pub Vec<u8>);
118
119pub type AttributeKey = AttributeKeyV32;
120
121#[derive(
122    Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, Deserialize, TypeFingerprint,
123)]
124#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
125pub enum AttributeKeyV32 {
126    // Order here is important: code expects Attribute to precede Extent.
127    Attribute,
128    Extent(ExtentKeyV32),
129}
130
131/// ObjectKey is a key in the object store.
132pub type ObjectKey = ObjectKeyV54;
133
134#[derive(
135    Clone,
136    Debug,
137    Eq,
138    Ord,
139    Hash,
140    PartialEq,
141    PartialOrd,
142    Serialize,
143    Deserialize,
144    TypeFingerprint,
145    Versioned,
146)]
147#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
148pub struct ObjectKeyV54 {
149    /// The ID of the object referred to.
150    pub object_id: u64,
151    /// The type and data of the key.
152    pub data: ObjectKeyDataV54,
153}
154
155impl SortByU64 for ObjectKey {
156    fn get_leading_u64(&self) -> u64 {
157        self.object_id
158    }
159}
160
161impl ObjectKey {
162    /// Creates a generic ObjectKey.
163    pub fn object(object_id: u64) -> Self {
164        Self { object_id: object_id, data: ObjectKeyData::Object }
165    }
166
167    /// Creates an ObjectKey for encryption keys.
168    pub fn keys(object_id: u64) -> Self {
169        Self { object_id, data: ObjectKeyData::Keys }
170    }
171
172    /// Creates an ObjectKey for an attribute.
173    pub fn attribute(object_id: u64, attribute_id: u64, key: AttributeKey) -> Self {
174        Self { object_id, data: ObjectKeyData::Attribute(attribute_id, key) }
175    }
176
177    /// Creates an ObjectKey for an extent.
178    pub fn extent(object_id: u64, attribute_id: u64, range: std::ops::Range<u64>) -> Self {
179        Self {
180            object_id,
181            data: ObjectKeyData::Attribute(
182                attribute_id,
183                AttributeKey::Extent(ExtentKey::new(range)),
184            ),
185        }
186    }
187
188    /// Creates an ObjectKey from an extent.
189    pub fn from_extent(object_id: u64, attribute_id: u64, extent: ExtentKey) -> Self {
190        Self {
191            object_id,
192            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(extent)),
193        }
194    }
195
196    /// Creates an ObjectKey for a child.
197    pub fn child(object_id: u64, name: &str, dir_type: DirType) -> Self {
198        match dir_type {
199            DirType::Casefold => {
200                let casefolded_name: String = fxfs_unicode::casefold(name.chars()).collect();
201                let hash_code = fscrypt::direntry::tea_hash_filename(casefolded_name.as_bytes());
202                Self {
203                    object_id,
204                    data: ObjectKeyData::CasefoldChild { hash_code, name: name.into() },
205                }
206            }
207            DirType::LegacyCasefold => Self {
208                object_id,
209                data: ObjectKeyData::LegacyCasefoldChild(CasefoldString::new(name.into())),
210            },
211            DirType::Normal => Self { object_id, data: ObjectKeyData::Child { name: name.into() } },
212            DirType::Encrypted(_) | DirType::EncryptedCasefold(_) => {
213                // These shouldn't be used directly; encrypted_child should be used instead.
214                panic!("Encrypted modes require an encrypted name");
215            }
216        }
217    }
218
219    /// Creates an ObjectKey for an encrypted child.
220    ///
221    /// The hash_code is important here -- especially for fscrypt as it affects the
222    /// name of locked files.
223    ///
224    /// For case-insensitive lookups in large encrypted directories, we lose the ability to binary
225    /// search for an entry of interest because encryption breaks our sort order. In these cases
226    /// we prefix records with a 32-bit hash based on the stable *casefolded* name. Hash collisions
227    /// aside, this lets us jump straight to the entry of interest, if it exists.
228    pub fn encrypted_child(object_id: u64, name: Vec<u8>, hash_code: Option<u32>) -> Self {
229        if let Some(hash_code) = hash_code {
230            Self {
231                object_id,
232                data: ObjectKeyData::EncryptedCasefoldChild(EncryptedCasefoldChild {
233                    hash_code,
234                    name,
235                }),
236            }
237        } else {
238            Self { object_id, data: ObjectKeyData::EncryptedChild(EncryptedChild(name)) }
239        }
240    }
241
242    /// Creates a graveyard entry for an object.
243    pub fn graveyard_entry(graveyard_object_id: u64, object_id: u64) -> Self {
244        Self { object_id: graveyard_object_id, data: ObjectKeyData::GraveyardEntry { object_id } }
245    }
246
247    /// Creates a graveyard entry for an attribute.
248    pub fn graveyard_attribute_entry(
249        graveyard_object_id: u64,
250        object_id: u64,
251        attribute_id: u64,
252    ) -> Self {
253        Self {
254            object_id: graveyard_object_id,
255            data: ObjectKeyData::GraveyardAttributeEntry { object_id, attribute_id },
256        }
257    }
258
259    /// Creates an ObjectKey for a ProjectLimit entry.
260    pub fn project_limit(object_id: u64, project_id: u64) -> Self {
261        Self {
262            object_id,
263            data: ObjectKeyData::Project { project_id, property: ProjectProperty::Limit },
264        }
265    }
266
267    /// Creates an ObjectKey for a ProjectUsage entry.
268    pub fn project_usage(object_id: u64, project_id: u64) -> Self {
269        Self {
270            object_id,
271            data: ObjectKeyData::Project { project_id, property: ProjectProperty::Usage },
272        }
273    }
274
275    pub fn extended_attribute(object_id: u64, name: Vec<u8>) -> Self {
276        Self { object_id, data: ObjectKeyData::ExtendedAttribute { name } }
277    }
278
279    /// Returns the merge key for this key; that is, a key which is <= this key and any
280    /// other possibly overlapping key, under Ord. This would be used for the hint in |merge_into|.
281    pub fn key_for_merge_into(&self) -> Self {
282        if let Self {
283            object_id,
284            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(e)),
285        } = self
286        {
287            Self::attribute(*object_id, *attribute_id, AttributeKey::Extent(e.key_for_merge_into()))
288        } else {
289            self.clone()
290        }
291    }
292}
293
294impl OrdUpperBound for ObjectKey {
295    fn cmp_upper_bound(&self, other: &ObjectKey) -> std::cmp::Ordering {
296        self.object_id.cmp(&other.object_id).then_with(|| match (&self.data, &other.data) {
297            (
298                ObjectKeyData::Attribute(left_attr_id, AttributeKey::Extent(left_extent)),
299                ObjectKeyData::Attribute(right_attr_id, AttributeKey::Extent(right_extent)),
300            ) => left_attr_id.cmp(right_attr_id).then(left_extent.cmp_upper_bound(right_extent)),
301            _ => self.data.cmp(&other.data),
302        })
303    }
304}
305
306impl OrdLowerBound for ObjectKey {
307    fn cmp_lower_bound(&self, other: &ObjectKey) -> std::cmp::Ordering {
308        self.object_id.cmp(&other.object_id).then_with(|| match (&self.data, &other.data) {
309            (
310                ObjectKeyData::Attribute(left_attr_id, AttributeKey::Extent(left_extent)),
311                ObjectKeyData::Attribute(right_attr_id, AttributeKey::Extent(right_extent)),
312            ) => left_attr_id.cmp(right_attr_id).then(left_extent.cmp_lower_bound(right_extent)),
313            _ => self.data.cmp(&other.data),
314        })
315    }
316}
317
318impl LayerKey for ObjectKey {
319    fn merge_type(&self) -> MergeType {
320        // This listing is intentionally exhaustive to force folks to think about how certain
321        // subsets of the keyspace are merged.
322        match self.data {
323            ObjectKeyData::Object
324            | ObjectKeyData::Keys
325            | ObjectKeyData::Attribute(..)
326            | ObjectKeyData::Child { .. }
327            | ObjectKeyData::EncryptedChild(_)
328            | ObjectKeyData::EncryptedCasefoldChild(_)
329            | ObjectKeyData::CasefoldChild { .. }
330            | ObjectKeyData::LegacyCasefoldChild(_)
331            | ObjectKeyData::GraveyardEntry { .. }
332            | ObjectKeyData::GraveyardAttributeEntry { .. }
333            | ObjectKeyData::Project { property: ProjectProperty::Limit, .. }
334            | ObjectKeyData::ExtendedAttribute { .. } => MergeType::OptimizedMerge,
335            ObjectKeyData::Project { property: ProjectProperty::Usage, .. } => MergeType::FullMerge,
336        }
337    }
338
339    fn next_key(&self) -> Option<Self> {
340        match &self.data {
341            ObjectKeyData::Attribute(attr_id, AttributeKey::Extent(ExtentKey { range })) => {
342                // We want a key such that cmp_lower_bound returns Greater for any key which
343                // starts after end, and a key such that if you search for it, you'll get an
344                // extent whose end > range.end.
345                Some(ObjectKey {
346                    object_id: self.object_id,
347                    data: ObjectKeyData::Attribute(
348                        *attr_id,
349                        AttributeKey::Extent(ExtentKey { range: range.end..range.end + 1 }),
350                    ),
351                })
352            }
353            _ => None,
354        }
355    }
356
357    fn search_key(&self) -> Self {
358        if let Self {
359            object_id,
360            data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Extent(e)),
361        } = self
362        {
363            Self::attribute(*object_id, *attribute_id, AttributeKey::Extent(e.search_key()))
364        } else {
365            self.clone()
366        }
367    }
368}
369
370impl RangeKey for ObjectKey {
371    fn overlaps(&self, other: &Self) -> bool {
372        if self.object_id != other.object_id {
373            return false;
374        }
375        match (&self.data, &other.data) {
376            (
377                ObjectKeyData::Attribute(left_attr_id, AttributeKey::Extent(left_key)),
378                ObjectKeyData::Attribute(right_attr_id, AttributeKey::Extent(right_key)),
379            ) if *left_attr_id == *right_attr_id => {
380                left_key.range.end > right_key.range.start
381                    && left_key.range.start < right_key.range.end
382            }
383            (a, b) => a == b,
384        }
385    }
386}
387
388pub enum ObjectKeyFuzzyHashIterator {
389    ExtentKey(/* object_id */ u64, /* attribute_id */ u64, ExtentKeyPartitionIterator),
390    NotExtentKey(/* hash */ Option<u64>),
391}
392
393impl Iterator for ObjectKeyFuzzyHashIterator {
394    type Item = u64;
395
396    fn next(&mut self) -> Option<Self::Item> {
397        match self {
398            Self::ExtentKey(oid, attr_id, extent_keys) => extent_keys.next().map(|range| {
399                let key = ObjectKey::extent(*oid, *attr_id, range);
400                crate::stable_hash::stable_hash(key)
401            }),
402            Self::NotExtentKey(hash) => hash.take(),
403        }
404    }
405}
406
407impl FuzzyHash for ObjectKey {
408    fn fuzzy_hash(&self) -> impl Iterator<Item = u64> {
409        match &self.data {
410            ObjectKeyData::Attribute(attr_id, AttributeKey::Extent(extent)) => {
411                ObjectKeyFuzzyHashIterator::ExtentKey(
412                    self.object_id,
413                    *attr_id,
414                    extent.fuzzy_hash_partition(),
415                )
416            }
417            _ => {
418                let hash = crate::stable_hash::stable_hash(self);
419                ObjectKeyFuzzyHashIterator::NotExtentKey(Some(hash))
420            }
421        }
422    }
423
424    fn is_range_key(&self) -> bool {
425        match &self.data {
426            ObjectKeyData::Attribute(_, AttributeKey::Extent(_)) => true,
427            _ => false,
428        }
429    }
430}
431
432/// UNIX epoch based timestamp in the UTC timezone.
433pub type Timestamp = TimestampV49;
434
435#[derive(
436    Copy,
437    Clone,
438    Debug,
439    Default,
440    Eq,
441    PartialEq,
442    Ord,
443    PartialOrd,
444    Serialize,
445    Deserialize,
446    TypeFingerprint,
447)]
448#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
449pub struct TimestampV49 {
450    nanos: u64,
451}
452
453impl Timestamp {
454    const NSEC_PER_SEC: u64 = 1_000_000_000;
455
456    pub fn now() -> Self {
457        SystemTime::now().duration_since(UNIX_EPOCH).unwrap_or(Duration::ZERO).into()
458    }
459
460    pub const fn zero() -> Self {
461        Self { nanos: 0 }
462    }
463
464    pub const fn from_nanos(nanos: u64) -> Self {
465        Self { nanos }
466    }
467
468    pub fn from_secs_and_nanos(secs: u64, nanos: u32) -> Self {
469        let Some(secs_in_nanos) = secs.checked_mul(Self::NSEC_PER_SEC) else {
470            error!("Fxfs doesn't support dates past 2554-07-21");
471            return Self { nanos: u64::MAX };
472        };
473        let Some(nanos) = secs_in_nanos.checked_add(nanos as u64) else {
474            error!("Fxfs doesn't support dates past 2554-07-21");
475            return Self { nanos: u64::MAX };
476        };
477        Self { nanos }
478    }
479
480    /// Returns the total number of nanoseconds represented by this `Timestamp` since the Unix
481    /// epoch.
482    pub fn as_nanos(&self) -> u64 {
483        self.nanos
484    }
485
486    /// Returns the fractional nanoseconds represented by this `Timestamp`.
487    pub fn subsec_nanos(&self) -> u32 {
488        (self.nanos % Self::NSEC_PER_SEC) as u32
489    }
490
491    /// Returns the total number of whole seconds represented by this `Timestamp` since the Unix
492    /// epoch.
493    pub fn as_secs(&self) -> u64 {
494        self.nanos / Self::NSEC_PER_SEC
495    }
496}
497
498impl From<std::time::Duration> for Timestamp {
499    fn from(duration: std::time::Duration) -> Self {
500        Self::from_secs_and_nanos(duration.as_secs(), duration.subsec_nanos())
501    }
502}
503
504impl From<Timestamp> for std::time::Duration {
505    fn from(timestamp: Timestamp) -> std::time::Duration {
506        Duration::from_nanos(timestamp.nanos)
507    }
508}
509
510pub type ObjectKind = ObjectKindV54;
511
512#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, TypeFingerprint)]
513#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
514pub enum DirType {
515    Normal,
516    Encrypted(WrappingKeyId),
517    /// Legacy casefolded mode.
518    LegacyCasefold,
519    Casefold,
520    EncryptedCasefold(WrappingKeyId),
521}
522
523impl DirType {
524    pub fn is_casefold(&self) -> bool {
525        matches!(self, DirType::LegacyCasefold | DirType::Casefold | DirType::EncryptedCasefold(_))
526    }
527
528    pub fn is_encrypted(&self) -> bool {
529        matches!(self, DirType::Encrypted(_) | DirType::EncryptedCasefold(_))
530    }
531
532    pub fn with_encryption(self, id: WrappingKeyId) -> Self {
533        match self {
534            DirType::Normal => DirType::Encrypted(id),
535            DirType::Casefold => DirType::EncryptedCasefold(id),
536            _ => self,
537        }
538    }
539
540    pub fn with_casefold(self, val: bool) -> Self {
541        match (val, self) {
542            (true, DirType::Encrypted(id) | DirType::EncryptedCasefold(id)) => {
543                DirType::EncryptedCasefold(id)
544            }
545            (true, _) => DirType::Casefold,
546            (false, DirType::Encrypted(id) | DirType::EncryptedCasefold(id)) => {
547                DirType::Encrypted(id)
548            }
549            (false, _) => DirType::Normal,
550        }
551    }
552
553    pub fn wrapping_key_id(&self) -> Option<WrappingKeyId> {
554        match self {
555            DirType::Encrypted(id) | DirType::EncryptedCasefold(id) => Some(*id),
556            _ => None,
557        }
558    }
559}
560
561impl Default for DirType {
562    fn default() -> Self {
563        DirType::Normal
564    }
565}
566
567#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint)]
568#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
569pub enum ObjectKindV54 {
570    File {
571        /// The number of references to this file.
572        refs: u64,
573    },
574    Directory {
575        /// The number of sub-directories in this directory.
576        sub_dirs: u64,
577        /// The type of directory (encryption, casefolding, etc.)
578        dir_type: DirType,
579    },
580    Graveyard,
581    Symlink {
582        /// The number of references to this symbolic link.
583        refs: u64,
584        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
585        /// interpret it however they like.
586        #[serde(with = "crate::zerocopy_serialization")]
587        link: Box<[u8]>,
588    },
589    EncryptedSymlink {
590        /// The number of references to this symbolic link.
591        refs: u64,
592        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
593        /// interpret it however they like.
594        /// `link` is stored here in encrypted form, encrypted with the symlink's key using the
595        /// volume's data key.
596        #[serde(with = "crate::zerocopy_serialization")]
597        link: Box<[u8]>,
598    },
599}
600
601#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
602#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
603pub enum ObjectKindV49 {
604    File {
605        /// The number of references to this file.
606        refs: u64,
607    },
608    Directory {
609        /// The number of sub-directories in this directory.
610        sub_dirs: u64,
611        /// If set, contains the wrapping key id used to encrypt the file contents and filenames in
612        /// this directory.
613        wrapping_key_id: Option<WrappingKeyId>,
614        /// If true, all files and sub-directories created in this directory will support case
615        /// insensitive (but case-preserving) file naming.
616        casefold: bool,
617    },
618    Graveyard,
619    Symlink {
620        /// The number of references to this symbolic link.
621        refs: u64,
622        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
623        /// interpret it however they like.
624        #[serde(with = "crate::zerocopy_serialization")]
625        link: Box<[u8]>,
626    },
627    EncryptedSymlink {
628        /// The number of references to this symbolic link.
629        refs: u64,
630        /// `link` is the target of the link and has no meaning within Fxfs; clients are free to
631        /// interpret it however they like.
632        /// `link` is stored here in encrypted form, encrypted with the symlink's key using the
633        /// same encryption scheme as the one used to encrypt filenames.
634        #[serde(with = "crate::zerocopy_serialization")]
635        link: Box<[u8]>,
636    },
637}
638
639/// This consists of POSIX attributes that are not used in Fxfs but it may be meaningful to some
640/// clients to have the ability to to set and retrieve these values.
641pub type PosixAttributes = PosixAttributesV32;
642
643#[derive(Clone, Debug, Copy, Default, Serialize, Deserialize, PartialEq, TypeFingerprint)]
644#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
645pub struct PosixAttributesV32 {
646    /// The mode bits associated with this object
647    pub mode: u32,
648    /// User ID of owner
649    pub uid: u32,
650    /// Group ID of owner
651    pub gid: u32,
652    /// Device ID
653    pub rdev: u64,
654}
655
656/// Object-level attributes.  Note that these are not the same as "attributes" in the
657/// ObjectValue::Attribute sense, which refers to an arbitrary data payload associated with an
658/// object.  This naming collision is unfortunate.
659pub type ObjectAttributes = ObjectAttributesV49;
660
661#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq, TypeFingerprint)]
662#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
663pub struct ObjectAttributesV49 {
664    /// The timestamp at which the object was created (i.e. crtime).
665    pub creation_time: TimestampV49,
666    /// The timestamp at which the object's data was last modified (i.e. mtime).
667    pub modification_time: TimestampV49,
668    /// The project id to associate this object's resource usage with. Zero means none.
669    pub project_id: u64,
670    /// Mode, uid, gid, and rdev
671    pub posix_attributes: Option<PosixAttributesV32>,
672    /// The number of bytes allocated to all extents across all attributes for this object.
673    pub allocated_size: u64,
674    /// The timestamp at which the object was last read (i.e. atime).
675    pub access_time: TimestampV49,
676    /// The timestamp at which the object's status was last modified (i.e. ctime).
677    pub change_time: TimestampV49,
678}
679
680pub type ExtendedAttributeValue = ExtendedAttributeValueV32;
681
682#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint)]
683#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
684pub enum ExtendedAttributeValueV32 {
685    /// The extended attribute value is stored directly in this object. If the value is above a
686    /// certain size, it should be stored as an attribute with extents instead.
687    Inline(#[serde(with = "crate::zerocopy_serialization")] Vec<u8>),
688    /// The extended attribute value is stored as an attribute with extents. The attribute id
689    /// should be chosen to be within the range of 64-512.
690    AttributeId(u64),
691}
692
693/// Id and descriptor for a child entry.
694pub type ChildValue = ChildValueV32;
695
696#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
697#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
698pub struct ChildValueV32 {
699    /// The ID of the child object.
700    pub object_id: u64,
701    /// Describes the type of the child.
702    pub object_descriptor: ObjectDescriptorV32,
703}
704
705pub type RootDigest = RootDigestV33;
706
707#[derive(
708    Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd, Serialize, Deserialize, TypeFingerprint,
709)]
710#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
711pub enum RootDigestV33 {
712    Sha256([u8; 32]),
713    Sha512(#[serde(with = "crate::zerocopy_serialization")] Vec<u8>),
714}
715
716pub type FsverityMetadata = FsverityMetadataV50;
717
718#[derive(Debug, Clone, PartialEq, Serialize, Deserialize, TypeFingerprint, Versioned)]
719#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
720pub enum FsverityMetadataV50 {
721    /// The root hash and salt.
722    Internal(RootDigestV33, #[serde(with = "crate::zerocopy_serialization")] Vec<u8>),
723    /// The root hash and salt are in a descriptor inside the merkle attribute.
724    F2fs(std::ops::Range<u64>),
725}
726
727pub type EncryptionKey = EncryptionKeyV49;
728pub type EncryptionKeyV49 = fxfs_crypto::EncryptionKey;
729
730pub type EncryptionKeys = EncryptionKeysV49;
731
732#[derive(Clone, Default, Debug, PartialEq, Serialize, Deserialize, TypeFingerprint)]
733#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
734pub struct EncryptionKeysV49(Vec<(u64, EncryptionKeyV49)>);
735
736impl EncryptionKeys {
737    pub fn get(&self, id: u64) -> Option<&EncryptionKey> {
738        self.0.iter().find_map(|(i, key)| (*i == id).then_some(key))
739    }
740
741    pub fn insert(&mut self, id: u64, key: EncryptionKey) {
742        self.0.push((id, key))
743    }
744
745    pub fn remove(&mut self, id: u64) -> Option<EncryptionKey> {
746        if let Some(ix) = self.0.iter().position(|(k, _)| *k == id) {
747            Some(self.0.remove(ix).1)
748        } else {
749            None
750        }
751    }
752}
753
754impl From<EncryptionKeys> for BTreeMap<u64, WrappedKey> {
755    fn from(keys: EncryptionKeys) -> Self {
756        keys.0.into_iter().map(|(id, key)| (id, key.into())).collect()
757    }
758}
759
760impl From<Vec<(u64, EncryptionKey)>> for EncryptionKeys {
761    fn from(value: Vec<(u64, EncryptionKey)>) -> Self {
762        Self(value)
763    }
764}
765
766impl std::ops::Deref for EncryptionKeys {
767    type Target = Vec<(u64, EncryptionKey)>;
768    fn deref(&self) -> &Self::Target {
769        &self.0
770    }
771}
772
773/// ObjectValue is the value of an item in the object store.
774/// Note that the tree stores deltas on objects, so these values describe deltas. Unless specified
775/// otherwise, a value indicates an insert/replace mutation.
776pub type ObjectValue = ObjectValueV54;
777impl Value for ObjectValue {
778    const DELETED_MARKER: Self = Self::None;
779}
780
781#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
782#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
783pub enum ObjectValueV54 {
784    /// Some keys have no value (this often indicates a tombstone of some sort).  Records with this
785    /// value are always filtered when a major compaction is performed, so the meaning must be the
786    /// same as if the item was not present.
787    None,
788    /// Some keys have no value but need to differentiate between a present value and no value
789    /// (None) i.e. their value is really a boolean: None => false, Some => true.
790    Some,
791    /// The value for an ObjectKey::Object record.
792    Object { kind: ObjectKindV54, attributes: ObjectAttributesV49 },
793    /// Specifies encryption keys to use for an object.
794    Keys(EncryptionKeysV49),
795    /// An attribute associated with a file object. |size| is the size of the attribute in bytes.
796    Attribute { size: u64, has_overwrite_extents: bool },
797    /// An extent associated with an object.
798    Extent(ExtentValueV38),
799    /// A child of an object.
800    Child(ChildValue),
801    /// Graveyard entries can contain these entries which will cause a file that has extents beyond
802    /// EOF to be trimmed at mount time.  This is used in cases where shrinking a file can exceed
803    /// the bounds of a single transaction.
804    Trim,
805    /// Added to support tracking Project ID usage and limits.
806    BytesAndNodes { bytes: i64, nodes: i64 },
807    /// A value for an extended attribute. Either inline or a redirection to an attribute with
808    /// extents.
809    ExtendedAttribute(ExtendedAttributeValueV32),
810    /// An attribute associated with a verified file object. |size| is the size of the attribute
811    /// in bytes.
812    VerifiedAttribute { size: u64, fsverity_metadata: FsverityMetadataV50 },
813}
814
815#[derive(Migrate, Clone, Debug, Serialize, Deserialize, PartialEq, TypeFingerprint, Versioned)]
816#[migrate_to_version(ObjectValueV54)]
817#[cfg_attr(fuzz, derive(arbitrary::Arbitrary))]
818pub enum ObjectValueV50 {
819    /// Some keys have no value (this often indicates a tombstone of some sort).  Records with this
820    /// value are always filtered when a major compaction is performed, so the meaning must be the
821    /// same as if the item was not present.
822    None,
823    /// Some keys have no value but need to differentiate between a present value and no value
824    /// (None) i.e. their value is really a boolean: None => false, Some => true.
825    Some,
826    /// The value for an ObjectKey::Object record.
827    Object { kind: ObjectKindV49, attributes: ObjectAttributesV49 },
828    /// Specifies encryption keys to use for an object.
829    Keys(EncryptionKeysV49),
830    /// An attribute associated with a file object. |size| is the size of the attribute in bytes.
831    Attribute { size: u64, has_overwrite_extents: bool },
832    /// An extent associated with an object.
833    Extent(ExtentValueV38),
834    /// A child of an object.
835    Child(ChildValueV32),
836    /// Graveyard entries can contain these entries which will cause a file that has extents beyond
837    /// EOF to be trimmed at mount time.  This is used in cases where shrinking a file can exceed
838    /// the bounds of a single transaction.
839    Trim,
840    /// Added to support tracking Project ID usage and limits.
841    BytesAndNodes { bytes: i64, nodes: i64 },
842    /// A value for an extended attribute. Either inline or a redirection to an attribute with
843    /// extents.
844    ExtendedAttribute(ExtendedAttributeValueV32),
845    /// An attribute associated with a verified file object. |size| is the size of the attribute
846    /// in bytes.
847    VerifiedAttribute { size: u64, fsverity_metadata: FsverityMetadataV50 },
848}
849
850impl ObjectValue {
851    /// Creates an ObjectValue for a file object.
852    pub fn file(
853        refs: u64,
854        allocated_size: u64,
855        creation_time: Timestamp,
856        modification_time: Timestamp,
857        access_time: Timestamp,
858        change_time: Timestamp,
859        project_id: u64,
860        posix_attributes: Option<PosixAttributes>,
861    ) -> ObjectValue {
862        ObjectValue::Object {
863            kind: ObjectKind::File { refs },
864            attributes: ObjectAttributes {
865                creation_time,
866                modification_time,
867                project_id,
868                posix_attributes,
869                allocated_size,
870                access_time,
871                change_time,
872            },
873        }
874    }
875    pub fn keys(encryption_keys: EncryptionKeys) -> ObjectValue {
876        ObjectValue::Keys(encryption_keys)
877    }
878    /// Creates an ObjectValue for an object attribute.
879    pub fn attribute(size: u64, has_overwrite_extents: bool) -> ObjectValue {
880        ObjectValue::Attribute { size, has_overwrite_extents }
881    }
882    /// Creates an ObjectValue for an object attribute of a verified file.
883    pub fn verified_attribute(size: u64, fsverity_metadata: FsverityMetadata) -> ObjectValue {
884        ObjectValue::VerifiedAttribute { size, fsverity_metadata }
885    }
886    /// Creates an ObjectValue for an insertion/replacement of an object extent.
887    pub fn extent(device_offset: u64, key_id: u64) -> ObjectValue {
888        ObjectValue::Extent(ExtentValue::new_raw(device_offset, key_id))
889    }
890    /// Creates an ObjectValue for an insertion/replacement of an object extent.
891    pub fn extent_with_checksum(
892        device_offset: u64,
893        checksum: Checksums,
894        key_id: u64,
895    ) -> ObjectValue {
896        ObjectValue::Extent(ExtentValue::with_checksum(device_offset, checksum, key_id))
897    }
898    /// Creates an ObjectValue for a deletion of an object extent.
899    pub fn deleted_extent() -> ObjectValue {
900        ObjectValue::Extent(ExtentValue::deleted_extent())
901    }
902    /// Creates an ObjectValue for an object child.
903    pub fn child(object_id: u64, object_descriptor: ObjectDescriptor) -> ObjectValue {
904        ObjectValue::Child(ChildValue { object_id, object_descriptor })
905    }
906    /// Creates an ObjectValue for an object symlink.
907    pub fn symlink(
908        link: impl Into<Box<[u8]>>,
909        creation_time: Timestamp,
910        modification_time: Timestamp,
911        project_id: u64,
912    ) -> ObjectValue {
913        ObjectValue::Object {
914            kind: ObjectKind::Symlink { refs: 1, link: link.into() },
915            attributes: ObjectAttributes {
916                creation_time,
917                modification_time,
918                project_id,
919                ..Default::default()
920            },
921        }
922    }
923    /// Creates an ObjectValue for an encrypted symlink object.
924    pub fn encrypted_symlink(
925        link: impl Into<Box<[u8]>>,
926        creation_time: Timestamp,
927        modification_time: Timestamp,
928        project_id: u64,
929    ) -> ObjectValue {
930        ObjectValue::Object {
931            kind: ObjectKind::EncryptedSymlink { refs: 1, link: link.into() },
932            attributes: ObjectAttributes {
933                creation_time,
934                modification_time,
935                project_id,
936                ..Default::default()
937            },
938        }
939    }
940    pub fn inline_extended_attribute(value: impl Into<Vec<u8>>) -> ObjectValue {
941        ObjectValue::ExtendedAttribute(ExtendedAttributeValue::Inline(value.into()))
942    }
943    pub fn extended_attribute(attribute_id: u64) -> ObjectValue {
944        ObjectValue::ExtendedAttribute(ExtendedAttributeValue::AttributeId(attribute_id))
945    }
946}
947
948pub type ObjectItem = ObjectItemV55;
949
950pub type ObjectItemV54 = LegacyItem<ObjectKeyV54, ObjectValueV54>;
951pub type ObjectItemV55 = Item<ObjectKeyV54, ObjectValueV54>;
952
953impl From<ObjectItemV54> for ObjectItemV55 {
954    fn from(item: ObjectItemV54) -> Self {
955        Self { key: item.key, value: item.value }
956    }
957}
958
959pub type ObjectItemV50 = LegacyItem<ObjectKeyV43, ObjectValueV50>;
960
961impl ObjectItem {
962    pub fn is_tombstone(&self) -> bool {
963        matches!(
964            self,
965            Item {
966                key: ObjectKey { data: ObjectKeyData::Object, .. },
967                value: ObjectValue::None,
968                ..
969            }
970        )
971    }
972}
973
974// If the given item describes an extent, unwraps it and returns the extent key/value.
975impl<'a> From<ItemRef<'a, ObjectKey, ObjectValue>>
976    for Option<(/*object-id*/ u64, /*attribute-id*/ u64, &'a ExtentKey, &'a ExtentValue)>
977{
978    fn from(item: ItemRef<'a, ObjectKey, ObjectValue>) -> Self {
979        match item {
980            ItemRef {
981                key:
982                    ObjectKey {
983                        object_id,
984                        data:
985                            ObjectKeyData::Attribute(
986                                attribute_id, //
987                                AttributeKey::Extent(extent_key),
988                            ),
989                    },
990                value: ObjectValue::Extent(extent_value),
991                ..
992            } => Some((*object_id, *attribute_id, extent_key, extent_value)),
993            _ => None,
994        }
995    }
996}
997
998pub type FxfsKey = FxfsKeyV49;
999pub type FxfsKeyV49 = fxfs_crypto::FxfsKey;
1000
1001#[cfg(test)]
1002mod tests {
1003    use super::{ObjectKey, ObjectKeyV54, TimestampV49};
1004    use crate::lsm_tree::types::{
1005        FuzzyHash as _, LayerKey, OrdLowerBound, OrdUpperBound, RangeKey,
1006    };
1007    use std::cmp::Ordering;
1008    use std::ops::Add;
1009    use std::time::{Duration, SystemTime, UNIX_EPOCH};
1010
1011    // Smoke test to ensure hash stability for Fxfs objects.
1012    // If this test fails, the hash algorithm changed, and that won't do -- Fxfs relies on stable
1013    // hash values, and existing images will appear to be corrupt if they change (see
1014    // https://fxbug.dev/419133532).
1015    #[test]
1016    fn test_hash_stability() {
1017        // Target a specific version of ObjectKey.  If you want to delete ObjectKeyV54, simply
1018        // update this test with a later key version, which will also require re-generating the
1019        // hashes.
1020        assert_eq!(
1021            &ObjectKeyV54::object(100).fuzzy_hash().collect::<Vec<_>>()[..],
1022            &[11885326717398844384]
1023        );
1024        assert_eq!(
1025            &ObjectKeyV54::extent(1, 0, 0..2 * 1024 * 1024).fuzzy_hash().collect::<Vec<_>>()[..],
1026            &[11090579907097549012, 2814892992701560424]
1027        );
1028    }
1029
1030    #[test]
1031    fn test_next_key() {
1032        let next_key = ObjectKey::extent(1, 0, 0..100).next_key().unwrap();
1033        assert_eq!(ObjectKey::extent(1, 0, 101..200).cmp_lower_bound(&next_key), Ordering::Greater);
1034        assert_eq!(ObjectKey::extent(1, 0, 100..200).cmp_lower_bound(&next_key), Ordering::Equal);
1035        assert_eq!(ObjectKey::extent(1, 0, 100..101).cmp_lower_bound(&next_key), Ordering::Equal);
1036        assert_eq!(ObjectKey::extent(1, 0, 99..100).cmp_lower_bound(&next_key), Ordering::Less);
1037        assert_eq!(ObjectKey::extent(1, 0, 0..100).cmp_upper_bound(&next_key), Ordering::Less);
1038        assert_eq!(ObjectKey::extent(1, 0, 99..100).cmp_upper_bound(&next_key), Ordering::Less);
1039        assert_eq!(ObjectKey::extent(1, 0, 100..101).cmp_upper_bound(&next_key), Ordering::Equal);
1040        assert_eq!(ObjectKey::extent(1, 0, 100..200).cmp_upper_bound(&next_key), Ordering::Greater);
1041        assert_eq!(ObjectKey::extent(1, 0, 50..101).cmp_upper_bound(&next_key), Ordering::Equal);
1042        assert_eq!(ObjectKey::extent(1, 0, 50..200).cmp_upper_bound(&next_key), Ordering::Greater);
1043    }
1044    #[test]
1045    fn test_range_key() {
1046        // Make sure we disallow using extent keys with point queries. Other object keys should
1047        // still be allowed with point queries.
1048        assert!(ObjectKey::extent(1, 0, 0..2 * 1024 * 1024).is_range_key());
1049        assert!(!ObjectKey::object(100).is_range_key());
1050
1051        assert_eq!(ObjectKey::object(1).overlaps(&ObjectKey::object(1)), true);
1052        assert_eq!(ObjectKey::object(1).overlaps(&ObjectKey::object(2)), false);
1053        assert_eq!(ObjectKey::extent(1, 0, 0..100).overlaps(&ObjectKey::object(1)), false);
1054        assert_eq!(ObjectKey::object(1).overlaps(&ObjectKey::extent(1, 0, 0..100)), false);
1055        assert_eq!(
1056            ObjectKey::extent(1, 0, 0..100).overlaps(&ObjectKey::extent(2, 0, 0..100)),
1057            false
1058        );
1059        assert_eq!(
1060            ObjectKey::extent(1, 0, 0..100).overlaps(&ObjectKey::extent(1, 1, 0..100)),
1061            false
1062        );
1063        assert_eq!(
1064            ObjectKey::extent(1, 0, 0..100).overlaps(&ObjectKey::extent(1, 0, 0..100)),
1065            true
1066        );
1067
1068        assert_eq!(
1069            ObjectKey::extent(1, 0, 0..50).overlaps(&ObjectKey::extent(1, 0, 49..100)),
1070            true
1071        );
1072        assert_eq!(
1073            ObjectKey::extent(1, 0, 49..100).overlaps(&ObjectKey::extent(1, 0, 0..50)),
1074            true
1075        );
1076
1077        assert_eq!(
1078            ObjectKey::extent(1, 0, 0..50).overlaps(&ObjectKey::extent(1, 0, 50..100)),
1079            false
1080        );
1081        assert_eq!(
1082            ObjectKey::extent(1, 0, 50..100).overlaps(&ObjectKey::extent(1, 0, 0..50)),
1083            false
1084        );
1085    }
1086
1087    #[test]
1088    fn test_timestamp() {
1089        fn compare_time(std_time: Duration) {
1090            let ts_time: TimestampV49 = std_time.into();
1091            assert_eq!(<TimestampV49 as Into<Duration>>::into(ts_time), std_time);
1092            assert_eq!(ts_time.subsec_nanos(), std_time.subsec_nanos());
1093            assert_eq!(ts_time.as_secs(), std_time.as_secs());
1094            assert_eq!(ts_time.as_nanos() as u128, std_time.as_nanos());
1095        }
1096        compare_time(Duration::from_nanos(0));
1097        compare_time(Duration::from_nanos(u64::MAX));
1098        compare_time(SystemTime::now().duration_since(UNIX_EPOCH).unwrap());
1099
1100        let ts: TimestampV49 = Duration::from_secs(u64::MAX - 1).into();
1101        assert_eq!(ts.nanos, u64::MAX);
1102
1103        let ts: TimestampV49 = (Duration::from_nanos(u64::MAX).add(Duration::from_nanos(1))).into();
1104        assert_eq!(ts.nanos, u64::MAX);
1105    }
1106}