fxfs/object_store/journal/
super_block.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! We currently store two of these super-blocks (A/B) starting at offset 0 and 512kB.
6//!
7//! Immediately following the serialized `SuperBlockHeader` structure below is a stream of
8//! serialized operations that are replayed into the root parent `ObjectStore`. Note that the root
9//! parent object store exists entirely in RAM until serialized back into the super-block.
10//!
11//! Super-blocks are updated alternately with a monotonically increasing generation number.
12//! At mount time, the super-block used is the valid `SuperBlock` with the highest generation
13//! number.
14//!
15//! Note the asymmetry here regarding load/save:
16//!   * We load a superblock from a Device/SuperBlockInstance and return a
17//!     (SuperBlockHeader, ObjectStore) pair. The ObjectStore is populated directly from device.
18//!   * We save a superblock from a (SuperBlockHeader, Vec<ObjectItem>) pair to a WriteObjectHandle.
19//!
20//! This asymmetry is required for consistency.
21//! The Vec<ObjectItem> is produced by scanning the root_parent_store. This is the responsibility
22//! of the journal code, which must hold a lock to avoid concurrent updates. However, this lock
23//! must NOT be held when saving the superblock as additional extents may need to be allocated as
24//! part of the save process.
25use crate::errors::FxfsError;
26use crate::filesystem::{ApplyContext, ApplyMode, FxFilesystem, JournalingObject};
27use crate::log::*;
28use crate::lsm_tree::types::LayerIterator;
29use crate::lsm_tree::{LSMTree, LayerSet, Query};
30use crate::metrics;
31use crate::object_handle::ObjectHandle as _;
32use crate::object_store::allocator::Reservation;
33use crate::object_store::data_object_handle::{FileExtent, OverwriteOptions};
34use crate::object_store::journal::bootstrap_handle::BootstrapObjectHandle;
35use crate::object_store::journal::reader::{JournalReader, ReadResult};
36use crate::object_store::journal::writer::JournalWriter;
37use crate::object_store::journal::{BLOCK_SIZE, JournalCheckpoint, JournalCheckpointV32};
38use crate::object_store::object_record::{
39    ObjectItem, ObjectItemV40, ObjectItemV41, ObjectItemV43, ObjectItemV46, ObjectItemV47,
40    ObjectItemV49, ObjectItemV50,
41};
42use crate::object_store::transaction::{AssocObj, Options};
43use crate::object_store::tree::MajorCompactable;
44use crate::object_store::{
45    DataObjectHandle, HandleOptions, HandleOwner, Mutation, ObjectKey, ObjectStore, ObjectValue,
46};
47use crate::range::RangeExt;
48use crate::serialized_types::{
49    EARLIEST_SUPPORTED_VERSION, FIRST_EXTENT_IN_SUPERBLOCK_VERSION, Migrate,
50    SMALL_SUPERBLOCK_VERSION, Version, Versioned, VersionedLatest, migrate_to_version,
51};
52use anyhow::{Context, Error, bail, ensure};
53use fprint::TypeFingerprint;
54use fuchsia_inspect::{Property as _, UintProperty};
55use fuchsia_sync::Mutex;
56use futures::FutureExt;
57use rustc_hash::FxHashMap as HashMap;
58use serde::{Deserialize, Serialize};
59use std::collections::VecDeque;
60use std::fmt;
61use std::io::{Read, Write};
62use std::ops::Range;
63use std::sync::Arc;
64use std::time::SystemTime;
65use storage_device::Device;
66use uuid::Uuid;
67
68// These only exist in the root store.
69const SUPER_BLOCK_A_OBJECT_ID: u64 = 1;
70const SUPER_BLOCK_B_OBJECT_ID: u64 = 2;
71
72/// The superblock is extended in units of `SUPER_BLOCK_CHUNK_SIZE` as required.
73pub const SUPER_BLOCK_CHUNK_SIZE: u64 = 65536;
74
75/// Each superblock is one block but may contain records that extend its own length.
76const MIN_SUPER_BLOCK_SIZE: u64 = 4096;
77/// The first 2 * 512 KiB on the disk used to be reserved for two A/B super-blocks.
78const LEGACY_MIN_SUPER_BLOCK_SIZE: u64 = 524_288;
79
80/// All superblocks start with the magic bytes "FxfsSupr".
81const SUPER_BLOCK_MAGIC: &[u8; 8] = b"FxfsSupr";
82
83/// An enum representing one of our super-block instances.
84///
85/// This provides hard-coded constants related to the location and properties of the super-blocks
86/// that are required to bootstrap the filesystem.
87#[derive(Copy, Clone, Debug)]
88pub enum SuperBlockInstance {
89    A,
90    B,
91}
92
93impl SuperBlockInstance {
94    /// Returns the next [SuperBlockInstance] for use in round-robining writes across super-blocks.
95    pub fn next(&self) -> SuperBlockInstance {
96        match self {
97            SuperBlockInstance::A => SuperBlockInstance::B,
98            SuperBlockInstance::B => SuperBlockInstance::A,
99        }
100    }
101
102    pub fn object_id(&self) -> u64 {
103        match self {
104            SuperBlockInstance::A => SUPER_BLOCK_A_OBJECT_ID,
105            SuperBlockInstance::B => SUPER_BLOCK_B_OBJECT_ID,
106        }
107    }
108
109    /// Returns the byte range where the first extent of the [SuperBlockInstance] is stored.
110    /// (Note that a [SuperBlockInstance] may still have multiple extents.)
111    pub fn first_extent(&self) -> Range<u64> {
112        match self {
113            SuperBlockInstance::A => 0..MIN_SUPER_BLOCK_SIZE,
114            SuperBlockInstance::B => 524288..524288 + MIN_SUPER_BLOCK_SIZE,
115        }
116    }
117
118    /// We used to allocate 512kB to superblocks but this was almost always more than needed.
119    pub fn legacy_first_extent(&self) -> Range<u64> {
120        match self {
121            SuperBlockInstance::A => 0..LEGACY_MIN_SUPER_BLOCK_SIZE,
122            SuperBlockInstance::B => LEGACY_MIN_SUPER_BLOCK_SIZE..2 * LEGACY_MIN_SUPER_BLOCK_SIZE,
123        }
124    }
125}
126
127pub type SuperBlockHeader = SuperBlockHeaderV32;
128
129#[derive(
130    Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize, TypeFingerprint, Versioned,
131)]
132pub struct SuperBlockHeaderV32 {
133    /// The globally unique identifier for the filesystem.
134    pub guid: UuidWrapperV32,
135
136    /// There are two super-blocks which are used in an A/B configuration. The super-block with the
137    /// greatest generation number is what is used when mounting an Fxfs image; the other is
138    /// discarded.
139    pub generation: u64,
140
141    /// The root parent store is an in-memory only store and serves as the backing store for the
142    /// root store and the journal.  The records for this store are serialized into the super-block
143    /// and mutations are also recorded in the journal.
144    pub root_parent_store_object_id: u64,
145
146    /// The root parent needs a graveyard and there's nowhere else to store it other than in the
147    /// super-block.
148    pub root_parent_graveyard_directory_object_id: u64,
149
150    /// The root object store contains all other metadata objects (including the allocator, the
151    /// journal and the super-blocks) and is the parent for all other object stores.
152    pub root_store_object_id: u64,
153
154    /// This is in the root object store.
155    pub allocator_object_id: u64,
156
157    /// This is in the root parent object store.
158    pub journal_object_id: u64,
159
160    /// Start checkpoint for the journal file.
161    pub journal_checkpoint: JournalCheckpointV32,
162
163    /// Offset of the journal file when the super-block was written.  If no entry is present in
164    /// journal_file_offsets for a particular object, then an object might have dependencies on the
165    /// journal from super_block_journal_file_offset onwards, but not earlier.
166    pub super_block_journal_file_offset: u64,
167
168    /// object id -> journal file offset. Indicates where each object has been flushed to.
169    pub journal_file_offsets: HashMap<u64, u64>,
170
171    /// Records the amount of borrowed metadata space as applicable at
172    /// `super_block_journal_file_offset`.
173    pub borrowed_metadata_space: u64,
174
175    /// The earliest version of Fxfs used to create any still-existing struct in the filesystem.
176    ///
177    /// Note: structs in the filesystem may had been made with various different versions of Fxfs.
178    pub earliest_version: Version,
179}
180
181type UuidWrapper = UuidWrapperV32;
182#[derive(Clone, Default, Eq, PartialEq)]
183pub struct UuidWrapperV32(pub Uuid);
184
185impl UuidWrapper {
186    fn new() -> Self {
187        Self(Uuid::new_v4())
188    }
189    #[cfg(test)]
190    fn nil() -> Self {
191        Self(Uuid::nil())
192    }
193}
194
195impl fmt::Debug for UuidWrapper {
196    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
197        // The UUID uniquely identifies the filesystem, so we should redact it so that we don't leak
198        // it in logs.
199        f.write_str("<redacted>")
200    }
201}
202
203impl TypeFingerprint for UuidWrapper {
204    fn fingerprint() -> String {
205        "<[u8;16]>".to_owned()
206    }
207}
208
209// Uuid serializes like a slice, but SuperBlockHeader used to contain [u8; 16] and we want to remain
210// compatible.
211impl Serialize for UuidWrapper {
212    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
213        self.0.as_bytes().serialize(serializer)
214    }
215}
216
217impl<'de> Deserialize<'de> for UuidWrapper {
218    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
219        <[u8; 16]>::deserialize(deserializer).map(|bytes| UuidWrapperV32(Uuid::from_bytes(bytes)))
220    }
221}
222
223pub type SuperBlockRecord = SuperBlockRecordV50;
224
225#[allow(clippy::large_enum_variant)]
226#[derive(Debug, Serialize, Deserialize, TypeFingerprint, Versioned)]
227pub enum SuperBlockRecordV50 {
228    // When reading the super-block we know the initial extent, but not subsequent extents, so these
229    // records need to exist to allow us to completely read the super-block.
230    Extent(Range<u64>),
231
232    // Following the super-block header are ObjectItem records that are to be replayed into the root
233    // parent object store.
234    ObjectItem(ObjectItemV50),
235
236    // Marks the end of the full super-block.
237    End,
238}
239
240#[allow(clippy::large_enum_variant)]
241#[derive(Migrate, Serialize, Deserialize, TypeFingerprint, Versioned)]
242#[migrate_to_version(SuperBlockRecordV50)]
243pub enum SuperBlockRecordV49 {
244    Extent(Range<u64>),
245    ObjectItem(ObjectItemV49),
246    End,
247}
248
249#[allow(clippy::large_enum_variant)]
250#[derive(Migrate, Serialize, Deserialize, TypeFingerprint, Versioned)]
251#[migrate_to_version(SuperBlockRecordV49)]
252pub enum SuperBlockRecordV47 {
253    Extent(Range<u64>),
254    ObjectItem(ObjectItemV47),
255    End,
256}
257
258#[allow(clippy::large_enum_variant)]
259#[derive(Migrate, Serialize, Deserialize, TypeFingerprint, Versioned)]
260#[migrate_to_version(SuperBlockRecordV47)]
261pub enum SuperBlockRecordV46 {
262    Extent(Range<u64>),
263    ObjectItem(ObjectItemV46),
264    End,
265}
266
267#[allow(clippy::large_enum_variant)]
268#[derive(Migrate, Serialize, Deserialize, TypeFingerprint, Versioned)]
269#[migrate_to_version(SuperBlockRecordV46)]
270pub enum SuperBlockRecordV43 {
271    Extent(Range<u64>),
272    ObjectItem(ObjectItemV43),
273    End,
274}
275
276#[derive(Migrate, Serialize, Deserialize, TypeFingerprint, Versioned)]
277#[migrate_to_version(SuperBlockRecordV43)]
278pub enum SuperBlockRecordV41 {
279    Extent(Range<u64>),
280    ObjectItem(ObjectItemV41),
281    End,
282}
283
284#[derive(Migrate, Serialize, Deserialize, TypeFingerprint, Versioned)]
285#[migrate_to_version(SuperBlockRecordV41)]
286pub enum SuperBlockRecordV40 {
287    Extent(Range<u64>),
288    ObjectItem(ObjectItemV40),
289    End,
290}
291
292struct SuperBlockMetrics {
293    /// Time we wrote the most recent superblock in milliseconds since [`std::time::UNIX_EPOCH`].
294    /// Uses [`std::time::SystemTime`] as the clock source.
295    last_super_block_update_time_ms: UintProperty,
296
297    /// Offset of the most recent superblock we wrote in the journal.
298    last_super_block_offset: UintProperty,
299}
300
301impl Default for SuperBlockMetrics {
302    fn default() -> Self {
303        SuperBlockMetrics {
304            last_super_block_update_time_ms: metrics::detail()
305                .create_uint("last_super_block_update_time_ms", 0),
306            last_super_block_offset: metrics::detail().create_uint("last_super_block_offset", 0),
307        }
308    }
309}
310
311/// Reads an individual (A/B) super-block instance and root_parent_store from device.
312/// Users should use SuperBlockManager::load() instead.
313async fn read(
314    device: Arc<dyn Device>,
315    block_size: u64,
316    instance: SuperBlockInstance,
317) -> Result<(SuperBlockHeader, SuperBlockInstance, ObjectStore), Error> {
318    let (super_block_header, mut reader) = SuperBlockHeader::read_header(device.clone(), instance)
319        .await
320        .context("failed to read superblock")?;
321    let root_parent = ObjectStore::new_root_parent(
322        device,
323        block_size,
324        super_block_header.root_parent_store_object_id,
325    );
326    root_parent.set_graveyard_directory_object_id(
327        super_block_header.root_parent_graveyard_directory_object_id,
328    );
329
330    loop {
331        // TODO: Flatten a layer and move reader here?
332        let (mutation, sequence) = match reader.next_item().await? {
333            // RecordReader should filter out extent records.
334            SuperBlockRecord::Extent(_) => bail!("Unexpected extent record"),
335            SuperBlockRecord::ObjectItem(item) => {
336                (Mutation::insert_object(item.key, item.value), item.sequence)
337            }
338            SuperBlockRecord::End => break,
339        };
340        root_parent.apply_mutation(
341            mutation,
342            &ApplyContext {
343                mode: ApplyMode::Replay,
344                checkpoint: JournalCheckpoint { file_offset: sequence, ..Default::default() },
345            },
346            AssocObj::None,
347        )?;
348    }
349    Ok((super_block_header, instance, root_parent))
350}
351
352/// Write a super-block to the given file handle.
353/// Requires that the filesystem is fully loaded and writable as this may require allocation.
354async fn write<S: HandleOwner>(
355    super_block_header: &SuperBlockHeader,
356    items: LayerSet<ObjectKey, ObjectValue>,
357    handle: DataObjectHandle<S>,
358) -> Result<(), Error> {
359    let object_manager = handle.store().filesystem().object_manager().clone();
360    // TODO(https://fxbug.dev/42177407): Don't use the same code here for Journal and SuperBlock. They
361    // aren't the same things and it is already getting convoluted. e.g of diff stream content:
362    //   Superblock:  (Magic, Ver, Header(Ver), Extent(Ver)*, SuperBlockRecord(Ver)*, ...)
363    //   Journal:     (Ver, JournalRecord(Ver)*, RESET, Ver2, JournalRecord(Ver2)*, ...)
364    // We should abstract away the checksum code and implement these separately.
365
366    let mut writer =
367        SuperBlockWriter::new(handle, super_block_header, object_manager.metadata_reservation())
368            .await?;
369    let mut merger = items.merger();
370    let mut iter = LSMTree::major_iter(merger.query(Query::FullScan).await?).await?;
371    while let Some(item) = iter.get() {
372        writer.write_root_parent_item(item.cloned()).await?;
373        iter.advance().await?;
374    }
375    writer.finalize().await
376}
377
378// Compacts and returns the *old* snapshot of the root_parent store.
379// Must be performed whilst holding a writer lock.
380pub fn compact_root_parent(
381    root_parent_store: &ObjectStore,
382) -> Result<LayerSet<ObjectKey, ObjectValue>, Error> {
383    // The root parent always uses in-memory layers which shouldn't be async, so we can use
384    // `now_or_never`.
385    let tree = root_parent_store.tree();
386    let layer_set = tree.layer_set();
387    {
388        let mut merger = layer_set.merger();
389        let mut iter = LSMTree::major_iter(merger.query(Query::FullScan).now_or_never().unwrap()?)
390            .now_or_never()
391            .unwrap()?;
392        let new_layer = LSMTree::new_mutable_layer();
393        while let Some(item_ref) = iter.get() {
394            new_layer.insert(item_ref.cloned())?;
395            iter.advance().now_or_never().unwrap()?;
396        }
397        tree.set_mutable_layer(new_layer);
398    }
399    Ok(layer_set)
400}
401
402/// This encapsulates the A/B alternating super-block logic.
403/// All super-block load/save operations should be via the methods on this type.
404pub(super) struct SuperBlockManager {
405    pub next_instance: Mutex<SuperBlockInstance>,
406    metrics: SuperBlockMetrics,
407}
408
409impl SuperBlockManager {
410    pub fn new() -> Self {
411        Self { next_instance: Mutex::new(SuperBlockInstance::A), metrics: Default::default() }
412    }
413
414    /// Loads both A/B super-blocks and root_parent ObjectStores and and returns the newest valid
415    /// pair. Also ensures the next superblock updated via |save| will be the other instance.
416    pub async fn load(
417        &self,
418        device: Arc<dyn Device>,
419        block_size: u64,
420    ) -> Result<(SuperBlockHeader, ObjectStore), Error> {
421        // Superblocks consume a minimum of one block. We currently hard code the length of
422        // this first extent. It should work with larger block sizes, but has not been tested.
423        // TODO(https://fxbug.dev/42063349): Consider relaxing this.
424        debug_assert!(MIN_SUPER_BLOCK_SIZE == block_size);
425
426        let (super_block, current_super_block, root_parent) = match futures::join!(
427            read(device.clone(), block_size, SuperBlockInstance::A),
428            read(device.clone(), block_size, SuperBlockInstance::B)
429        ) {
430            (Err(e1), Err(e2)) => {
431                bail!("Failed to load both superblocks due to {:?}\nand\n{:?}", e1, e2)
432            }
433            (Ok(result), Err(_)) => result,
434            (Err(_), Ok(result)) => result,
435            (Ok(result1), Ok(result2)) => {
436                // Break the tie by taking the super-block with the greatest generation.
437                if result2.0.generation > result1.0.generation { result2 } else { result1 }
438            }
439        };
440        info!(super_block:?, current_super_block:?; "loaded super-block");
441        *self.next_instance.lock() = current_super_block.next();
442        Ok((super_block, root_parent))
443    }
444
445    /// Writes the provided superblock and root_parent ObjectStore to the device.
446    /// Requires that the filesystem is fully loaded and writable as this may require allocation.
447    pub async fn save(
448        &self,
449        super_block_header: SuperBlockHeader,
450        filesystem: Arc<FxFilesystem>,
451        root_parent: LayerSet<ObjectKey, ObjectValue>,
452    ) -> Result<(), Error> {
453        let root_store = filesystem.root_store();
454        let object_id = {
455            let mut next_instance = self.next_instance.lock();
456            let object_id = next_instance.object_id();
457            *next_instance = next_instance.next();
458            object_id
459        };
460        let handle = ObjectStore::open_object(
461            &root_store,
462            object_id,
463            HandleOptions { skip_journal_checks: true, ..Default::default() },
464            None,
465        )
466        .await
467        .context("Failed to open superblock object")?;
468        write(&super_block_header, root_parent, handle).await?;
469        self.metrics
470            .last_super_block_offset
471            .set(super_block_header.super_block_journal_file_offset);
472        self.metrics.last_super_block_update_time_ms.set(
473            SystemTime::now()
474                .duration_since(SystemTime::UNIX_EPOCH)
475                .unwrap()
476                .as_millis()
477                .try_into()
478                .unwrap_or(0u64),
479        );
480        Ok(())
481    }
482}
483
484impl SuperBlockHeader {
485    /// Creates a new instance with random GUID.
486    pub fn new(
487        root_parent_store_object_id: u64,
488        root_parent_graveyard_directory_object_id: u64,
489        root_store_object_id: u64,
490        allocator_object_id: u64,
491        journal_object_id: u64,
492        journal_checkpoint: JournalCheckpoint,
493        earliest_version: Version,
494    ) -> Self {
495        SuperBlockHeader {
496            guid: UuidWrapper::new(),
497            generation: 1u64,
498            root_parent_store_object_id,
499            root_parent_graveyard_directory_object_id,
500            root_store_object_id,
501            allocator_object_id,
502            journal_object_id,
503            journal_checkpoint,
504            earliest_version,
505            ..Default::default()
506        }
507    }
508
509    /// Read the super-block header, and return it and a reader that produces the records that are
510    /// to be replayed in to the root parent object store.
511    async fn read_header(
512        device: Arc<dyn Device>,
513        target_super_block: SuperBlockInstance,
514    ) -> Result<(SuperBlockHeader, RecordReader), Error> {
515        let handle = BootstrapObjectHandle::new(
516            target_super_block.object_id(),
517            device,
518            target_super_block.first_extent(),
519        );
520        let mut reader = JournalReader::new(handle, &JournalCheckpoint::default());
521        reader.set_eof_ok();
522
523        reader.fill_buf().await?;
524
525        let mut super_block_header;
526        let super_block_version;
527        reader.consume({
528            let mut cursor = std::io::Cursor::new(reader.buffer());
529            // Validate magic bytes.
530            let mut magic_bytes: [u8; 8] = [0; 8];
531            cursor.read_exact(&mut magic_bytes)?;
532            if magic_bytes.as_slice() != SUPER_BLOCK_MAGIC.as_slice() {
533                bail!("Invalid magic: {:?}", magic_bytes);
534            }
535            (super_block_header, super_block_version) =
536                SuperBlockHeader::deserialize_with_version(&mut cursor)?;
537
538            if super_block_version < EARLIEST_SUPPORTED_VERSION {
539                bail!("Unsupported SuperBlock version: {:?}", super_block_version);
540            }
541
542            // NOTE: It is possible that data was written to the journal with an old version
543            // but no compaction ever happened, so the journal version could potentially be older
544            // than the layer file versions.
545            if super_block_header.journal_checkpoint.version < EARLIEST_SUPPORTED_VERSION {
546                bail!(
547                    "Unsupported JournalCheckpoint version: {:?}",
548                    super_block_header.journal_checkpoint.version
549                );
550            }
551
552            if super_block_header.earliest_version < EARLIEST_SUPPORTED_VERSION {
553                bail!(
554                    "Filesystem contains struct with unsupported version: {:?}",
555                    super_block_header.earliest_version
556                );
557            }
558
559            cursor.position() as usize
560        });
561
562        // From version 45 superblocks describe their own extents (a noop here).
563        // At version 44, superblocks assume a 4kb first extent.
564        // Prior to version 44, superblocks assume a 512kb first extent.
565        if super_block_version < SMALL_SUPERBLOCK_VERSION {
566            reader.handle().push_extent(0, target_super_block.legacy_first_extent());
567        } else if super_block_version < FIRST_EXTENT_IN_SUPERBLOCK_VERSION {
568            reader.handle().push_extent(0, target_super_block.first_extent())
569        }
570
571        // If guid is zeroed (e.g. in a newly imaged system), assign one randomly.
572        if super_block_header.guid.0.is_nil() {
573            super_block_header.guid = UuidWrapper::new();
574        }
575        reader.set_version(super_block_version);
576        Ok((super_block_header, RecordReader { reader }))
577    }
578}
579
580struct SuperBlockWriter<'a, S: HandleOwner> {
581    handle: DataObjectHandle<S>,
582    writer: JournalWriter,
583    existing_extents: VecDeque<FileExtent>,
584    size: u64,
585    reservation: &'a Reservation,
586}
587
588impl<'a, S: HandleOwner> SuperBlockWriter<'a, S> {
589    /// Create a new writer, outputs FXFS magic, version and SuperBlockHeader.
590    /// On success, the writer is ready to accept root parent store mutations.
591    pub async fn new(
592        handle: DataObjectHandle<S>,
593        super_block_header: &SuperBlockHeader,
594        reservation: &'a Reservation,
595    ) -> Result<Self, Error> {
596        let existing_extents = handle.device_extents().await?;
597        let mut this = Self {
598            handle,
599            writer: JournalWriter::new(BLOCK_SIZE as usize, 0),
600            existing_extents: existing_extents.into_iter().collect(),
601            size: 0,
602            reservation,
603        };
604        this.writer.write_all(SUPER_BLOCK_MAGIC)?;
605        super_block_header.serialize_with_version(&mut this.writer)?;
606        Ok(this)
607    }
608
609    /// Internal helper function to pull ranges from a list of existing extents and tack
610    /// corresponding extent records onto the journal.
611    fn try_extend_existing(&mut self, target_size: u64) -> Result<(), Error> {
612        while self.size < target_size {
613            if let Some(extent) = self.existing_extents.pop_front() {
614                ensure!(
615                    extent.logical_range().start == self.size,
616                    "superblock file contains a hole."
617                );
618                self.size += extent.length();
619                SuperBlockRecord::Extent(extent.device_range().clone())
620                    .serialize_into(&mut self.writer)?;
621            } else {
622                break;
623            }
624        }
625        Ok(())
626    }
627
628    pub async fn write_root_parent_item(&mut self, record: ObjectItem) -> Result<(), Error> {
629        let min_len = self.writer.journal_file_checkpoint().file_offset + SUPER_BLOCK_CHUNK_SIZE;
630        self.try_extend_existing(min_len)?;
631        if min_len > self.size {
632            // Need to allocate some more space.
633            let mut transaction = self
634                .handle
635                .new_transaction_with_options(Options {
636                    skip_journal_checks: true,
637                    borrow_metadata_space: true,
638                    allocator_reservation: Some(self.reservation),
639                    ..Default::default()
640                })
641                .await?;
642            let mut file_range = self.size..self.size + SUPER_BLOCK_CHUNK_SIZE;
643            let allocated = self
644                .handle
645                .preallocate_range(&mut transaction, &mut file_range)
646                .await
647                .context("preallocate superblock")?;
648            if file_range.start < file_range.end {
649                bail!("preallocate_range returned too little space");
650            }
651            transaction.commit().await?;
652            for device_range in allocated {
653                self.size += device_range.end - device_range.start;
654                SuperBlockRecord::Extent(device_range).serialize_into(&mut self.writer)?;
655            }
656        }
657        SuperBlockRecord::ObjectItem(record).serialize_into(&mut self.writer)?;
658        Ok(())
659    }
660
661    pub async fn finalize(mut self) -> Result<(), Error> {
662        SuperBlockRecord::End.serialize_into(&mut self.writer)?;
663        self.writer.pad_to_block()?;
664        let mut buf = self.handle.allocate_buffer(self.writer.flushable_bytes()).await;
665        let offset = self.writer.take_flushable(buf.as_mut());
666        self.handle.overwrite(offset, buf.as_mut(), OverwriteOptions::default()).await?;
667        let len =
668            std::cmp::max(MIN_SUPER_BLOCK_SIZE, self.writer.journal_file_checkpoint().file_offset)
669                + SUPER_BLOCK_CHUNK_SIZE;
670        self.handle
671            .truncate_with_options(
672                Options {
673                    skip_journal_checks: true,
674                    borrow_metadata_space: true,
675                    ..Default::default()
676                },
677                len,
678            )
679            .await?;
680        Ok(())
681    }
682}
683
684pub struct RecordReader {
685    reader: JournalReader,
686}
687
688impl RecordReader {
689    pub async fn next_item(&mut self) -> Result<SuperBlockRecord, Error> {
690        loop {
691            match self.reader.deserialize().await? {
692                ReadResult::Reset(_) => bail!("Unexpected reset"),
693                ReadResult::ChecksumMismatch => bail!("Checksum mismatch"),
694                ReadResult::Some(SuperBlockRecord::Extent(extent)) => {
695                    ensure!(extent.is_valid(), FxfsError::Inconsistent);
696                    self.reader.handle().push_extent(0, extent)
697                }
698                ReadResult::Some(x) => return Ok(x),
699            }
700        }
701    }
702}
703
704#[cfg(test)]
705mod tests {
706    use super::{
707        MIN_SUPER_BLOCK_SIZE, SUPER_BLOCK_CHUNK_SIZE, SuperBlockHeader, SuperBlockInstance,
708        UuidWrapper, compact_root_parent, write,
709    };
710    use crate::filesystem::{FxFilesystem, OpenFxFilesystem};
711    use crate::object_handle::ReadObjectHandle;
712    use crate::object_store::journal::JournalCheckpoint;
713    use crate::object_store::transaction::{Options, lock_keys};
714    use crate::object_store::{
715        DataObjectHandle, HandleOptions, ObjectHandle, ObjectKey, ObjectStore,
716    };
717    use crate::serialized_types::LATEST_VERSION;
718    use storage_device::DeviceHolder;
719    use storage_device::fake_device::FakeDevice;
720
721    // We require 512kiB each for A/B super-blocks, 256kiB for the journal (128kiB before flush)
722    // and compactions require double the layer size to complete.
723    const TEST_DEVICE_BLOCK_SIZE: u32 = 512;
724    const TEST_DEVICE_BLOCK_COUNT: u64 = 16384;
725
726    async fn filesystem_and_super_block_handles()
727    -> (OpenFxFilesystem, DataObjectHandle<ObjectStore>, DataObjectHandle<ObjectStore>) {
728        let device =
729            DeviceHolder::new(FakeDevice::new(TEST_DEVICE_BLOCK_COUNT, TEST_DEVICE_BLOCK_SIZE));
730        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
731        fs.close().await.expect("Close failed");
732        let device = fs.take_device().await;
733        device.reopen(false);
734        let fs = FxFilesystem::open(device).await.expect("open failed");
735
736        let handle_a = ObjectStore::open_object(
737            &fs.object_manager().root_store(),
738            SuperBlockInstance::A.object_id(),
739            HandleOptions::default(),
740            None,
741        )
742        .await
743        .expect("open superblock failed");
744
745        let handle_b = ObjectStore::open_object(
746            &fs.object_manager().root_store(),
747            SuperBlockInstance::B.object_id(),
748            HandleOptions::default(),
749            None,
750        )
751        .await
752        .expect("open superblock failed");
753        (fs, handle_a, handle_b)
754    }
755
756    #[fuchsia::test]
757    async fn test_read_written_super_block() {
758        let (fs, _handle_a, _handle_b) = filesystem_and_super_block_handles().await;
759        const JOURNAL_OBJECT_ID: u64 = 5;
760
761        // Confirm that the (first) super-block is expected size.
762        // It should be MIN_SUPER_BLOCK_SIZE + SUPER_BLOCK_CHUNK_SIZE.
763        assert_eq!(
764            ObjectStore::open_object(
765                &fs.root_store(),
766                SuperBlockInstance::A.object_id(),
767                HandleOptions::default(),
768                None,
769            )
770            .await
771            .expect("open_object failed")
772            .get_size(),
773            MIN_SUPER_BLOCK_SIZE + SUPER_BLOCK_CHUNK_SIZE
774        );
775
776        // Create a large number of objects in the root parent store so that we test growing
777        // of the super-block file, requiring us to add extents.
778        let mut created_object_ids = vec![];
779        const NUM_ENTRIES: u64 = 16384;
780        for _ in 0..NUM_ENTRIES {
781            let mut transaction = fs
782                .clone()
783                .new_transaction(lock_keys![], Options::default())
784                .await
785                .expect("new_transaction failed");
786            created_object_ids.push(
787                ObjectStore::create_object(
788                    &fs.object_manager().root_parent_store(),
789                    &mut transaction,
790                    HandleOptions::default(),
791                    None,
792                )
793                .await
794                .expect("create_object failed")
795                .object_id(),
796            );
797            transaction.commit().await.expect("commit failed");
798        }
799
800        // Note here that DataObjectHandle caches the size given to it at construction.
801        // If we want to know the true size after a super-block has been written, we need
802        // a new handle.
803        assert!(
804            ObjectStore::open_object(
805                &fs.root_store(),
806                SuperBlockInstance::A.object_id(),
807                HandleOptions::default(),
808                None,
809            )
810            .await
811            .expect("open_object failed")
812            .get_size()
813                > MIN_SUPER_BLOCK_SIZE + SUPER_BLOCK_CHUNK_SIZE
814        );
815
816        let written_super_block_a =
817            SuperBlockHeader::read_header(fs.device(), SuperBlockInstance::A)
818                .await
819                .expect("read failed");
820        let written_super_block_b =
821            SuperBlockHeader::read_header(fs.device(), SuperBlockInstance::B)
822                .await
823                .expect("read failed");
824
825        // Check that a non-zero GUID has been assigned.
826        assert!(!written_super_block_a.0.guid.0.is_nil());
827
828        // Depending on specific offsets is fragile so we just validate the fields we believe
829        // to be stable.
830        assert_eq!(written_super_block_a.0.guid, written_super_block_b.0.guid);
831        assert_eq!(written_super_block_a.0.guid, written_super_block_b.0.guid);
832        assert!(written_super_block_a.0.generation != written_super_block_b.0.generation);
833        assert_eq!(
834            written_super_block_a.0.root_parent_store_object_id,
835            written_super_block_b.0.root_parent_store_object_id
836        );
837        assert_eq!(
838            written_super_block_a.0.root_parent_graveyard_directory_object_id,
839            written_super_block_b.0.root_parent_graveyard_directory_object_id
840        );
841        assert_eq!(written_super_block_a.0.root_store_object_id, fs.root_store().store_object_id());
842        assert_eq!(
843            written_super_block_a.0.root_store_object_id,
844            written_super_block_b.0.root_store_object_id
845        );
846        assert_eq!(written_super_block_a.0.allocator_object_id, fs.allocator().object_id());
847        assert_eq!(
848            written_super_block_a.0.allocator_object_id,
849            written_super_block_b.0.allocator_object_id
850        );
851        assert_eq!(written_super_block_a.0.journal_object_id, JOURNAL_OBJECT_ID);
852        assert_eq!(
853            written_super_block_a.0.journal_object_id,
854            written_super_block_b.0.journal_object_id
855        );
856        assert!(
857            written_super_block_a.0.journal_checkpoint.file_offset
858                != written_super_block_b.0.journal_checkpoint.file_offset
859        );
860        assert!(
861            written_super_block_a.0.super_block_journal_file_offset
862                != written_super_block_b.0.super_block_journal_file_offset
863        );
864        // Nb: We skip journal_file_offsets and borrowed metadata space checks.
865        assert_eq!(written_super_block_a.0.earliest_version, LATEST_VERSION);
866        assert_eq!(
867            written_super_block_a.0.earliest_version,
868            written_super_block_b.0.earliest_version
869        );
870
871        // Nb: Skip comparison of root_parent store contents because we have no way of anticipating
872        // the extent offsets and it is reasonable that a/b differ.
873
874        // Delete all the objects we just made.
875        for object_id in created_object_ids {
876            let mut transaction = fs
877                .clone()
878                .new_transaction(lock_keys![], Options::default())
879                .await
880                .expect("new_transaction failed");
881            fs.object_manager()
882                .root_parent_store()
883                .adjust_refs(&mut transaction, object_id, -1)
884                .await
885                .expect("adjust_refs failed");
886            transaction.commit().await.expect("commit failed");
887            fs.object_manager()
888                .root_parent_store()
889                .tombstone_object(object_id, Options::default())
890                .await
891                .expect("tombstone failed");
892        }
893        // Write some stuff to the root store to ensure we rotate the journal and produce new
894        // super blocks.
895        for _ in 0..NUM_ENTRIES {
896            let mut transaction = fs
897                .clone()
898                .new_transaction(lock_keys![], Options::default())
899                .await
900                .expect("new_transaction failed");
901            ObjectStore::create_object(
902                &fs.object_manager().root_store(),
903                &mut transaction,
904                HandleOptions::default(),
905                None,
906            )
907            .await
908            .expect("create_object failed");
909            transaction.commit().await.expect("commit failed");
910        }
911
912        assert_eq!(
913            ObjectStore::open_object(
914                &fs.root_store(),
915                SuperBlockInstance::A.object_id(),
916                HandleOptions::default(),
917                None,
918            )
919            .await
920            .expect("open_object failed")
921            .get_size(),
922            MIN_SUPER_BLOCK_SIZE + SUPER_BLOCK_CHUNK_SIZE
923        );
924    }
925
926    #[fuchsia::test]
927    async fn test_guid_assign_on_read() {
928        let (fs, handle_a, _handle_b) = filesystem_and_super_block_handles().await;
929        const JOURNAL_OBJECT_ID: u64 = 5;
930        let mut super_block_header_a = SuperBlockHeader::new(
931            fs.object_manager().root_parent_store().store_object_id(),
932            /* root_parent_graveyard_directory_object_id: */ 1000,
933            fs.root_store().store_object_id(),
934            fs.allocator().object_id(),
935            JOURNAL_OBJECT_ID,
936            JournalCheckpoint { file_offset: 1234, checksum: 5678, version: LATEST_VERSION },
937            /* earliest_version: */ LATEST_VERSION,
938        );
939        // Ensure the superblock has no set GUID.
940        super_block_header_a.guid = UuidWrapper::nil();
941        write(
942            &super_block_header_a,
943            compact_root_parent(fs.object_manager().root_parent_store().as_ref())
944                .expect("scan failed"),
945            handle_a,
946        )
947        .await
948        .expect("write failed");
949        let super_block_header = SuperBlockHeader::read_header(fs.device(), SuperBlockInstance::A)
950            .await
951            .expect("read failed");
952        // Ensure a GUID has been assigned.
953        assert!(!super_block_header.0.guid.0.is_nil());
954    }
955
956    #[fuchsia::test]
957    async fn test_init_wipes_superblocks() {
958        let device = DeviceHolder::new(FakeDevice::new(8192, TEST_DEVICE_BLOCK_SIZE));
959
960        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
961        let root_store = fs.root_store();
962        // Generate enough work to induce a journal flush and thus a new superblock being written.
963        for _ in 0..6000 {
964            let mut transaction = fs
965                .clone()
966                .new_transaction(lock_keys![], Options::default())
967                .await
968                .expect("new_transaction failed");
969            ObjectStore::create_object(
970                &root_store,
971                &mut transaction,
972                HandleOptions::default(),
973                None,
974            )
975            .await
976            .expect("create_object failed");
977            transaction.commit().await.expect("commit failed");
978        }
979        fs.close().await.expect("Close failed");
980        let device = fs.take_device().await;
981        device.reopen(false);
982
983        SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::A)
984            .await
985            .expect("read failed");
986        let header = SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::B)
987            .await
988            .expect("read failed");
989
990        let old_guid = header.0.guid;
991
992        // Re-initialize the filesystem.  The A and B blocks should be for the new FS.
993        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
994        fs.close().await.expect("Close failed");
995        let device = fs.take_device().await;
996        device.reopen(false);
997
998        let a = SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::A)
999            .await
1000            .expect("read failed");
1001        let b = SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::B)
1002            .await
1003            .expect("read failed");
1004
1005        assert_eq!(a.0.guid, b.0.guid);
1006        assert_ne!(old_guid, a.0.guid);
1007    }
1008
1009    #[fuchsia::test]
1010    async fn test_alternating_super_blocks() {
1011        let device = DeviceHolder::new(FakeDevice::new(8192, TEST_DEVICE_BLOCK_SIZE));
1012
1013        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
1014        fs.close().await.expect("Close failed");
1015        let device = fs.take_device().await;
1016        device.reopen(false);
1017
1018        let (super_block_header_a, _) =
1019            SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::A)
1020                .await
1021                .expect("read failed");
1022
1023        // The second super-block won't be valid at this time so there's no point reading it.
1024
1025        let fs = FxFilesystem::open(device).await.expect("open failed");
1026        let root_store = fs.root_store();
1027        // Generate enough work to induce a journal flush.
1028        for _ in 0..6000 {
1029            let mut transaction = fs
1030                .clone()
1031                .new_transaction(lock_keys![], Options::default())
1032                .await
1033                .expect("new_transaction failed");
1034            ObjectStore::create_object(
1035                &root_store,
1036                &mut transaction,
1037                HandleOptions::default(),
1038                None,
1039            )
1040            .await
1041            .expect("create_object failed");
1042            transaction.commit().await.expect("commit failed");
1043        }
1044        fs.close().await.expect("Close failed");
1045        let device = fs.take_device().await;
1046        device.reopen(false);
1047
1048        let (super_block_header_a_after, _) =
1049            SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::A)
1050                .await
1051                .expect("read failed");
1052        let (super_block_header_b_after, _) =
1053            SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::B)
1054                .await
1055                .expect("read failed");
1056
1057        // It's possible that multiple super-blocks were written, so cater for that.
1058
1059        // The sequence numbers should be one apart.
1060        assert_eq!(
1061            (super_block_header_b_after.generation as i64
1062                - super_block_header_a_after.generation as i64)
1063                .abs(),
1064            1
1065        );
1066
1067        // At least one super-block should have been written.
1068        assert!(
1069            std::cmp::max(
1070                super_block_header_a_after.generation,
1071                super_block_header_b_after.generation
1072            ) > super_block_header_a.generation
1073        );
1074
1075        // They should have the same oddness.
1076        assert_eq!(super_block_header_a_after.generation & 1, super_block_header_a.generation & 1);
1077    }
1078
1079    #[fuchsia::test]
1080    async fn test_root_parent_is_compacted() {
1081        let device = DeviceHolder::new(FakeDevice::new(8192, TEST_DEVICE_BLOCK_SIZE));
1082
1083        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
1084
1085        let mut transaction = fs
1086            .clone()
1087            .new_transaction(lock_keys![], Options::default())
1088            .await
1089            .expect("new_transaction failed");
1090        let store = fs.root_parent_store();
1091        let handle =
1092            ObjectStore::create_object(&store, &mut transaction, HandleOptions::default(), None)
1093                .await
1094                .expect("create_object failed");
1095        transaction.commit().await.expect("commit failed");
1096
1097        store
1098            .tombstone_object(handle.object_id(), Options::default())
1099            .await
1100            .expect("tombstone failed");
1101
1102        // Generate enough work to induce a journal flush.
1103        let root_store = fs.root_store();
1104        for _ in 0..6000 {
1105            let mut transaction = fs
1106                .clone()
1107                .new_transaction(lock_keys![], Options::default())
1108                .await
1109                .expect("new_transaction failed");
1110            ObjectStore::create_object(
1111                &root_store,
1112                &mut transaction,
1113                HandleOptions::default(),
1114                None,
1115            )
1116            .await
1117            .expect("create_object failed");
1118            transaction.commit().await.expect("commit failed");
1119        }
1120
1121        // The root parent store should have been compacted, so we shouldn't be able to find any
1122        // record referring to the object we tombstoned.
1123        assert_eq!(
1124            store.tree().find(&ObjectKey::object(handle.object_id())).await.expect("find failed"),
1125            None
1126        );
1127    }
1128}