fxfs/object_store/journal/
super_block.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! We currently store two of these super-blocks (A/B) starting at offset 0 and 512kB.
6//!
7//! Immediately following the serialized `SuperBlockHeader` structure below is a stream of
8//! serialized operations that are replayed into the root parent `ObjectStore`. Note that the root
9//! parent object store exists entirely in RAM until serialized back into the super-block.
10//!
11//! Super-blocks are updated alternately with a monotonically increasing generation number.
12//! At mount time, the super-block used is the valid `SuperBlock` with the highest generation
13//! number.
14//!
15//! Note the asymmetry here regarding load/save:
16//!   * We load a superblock from a Device/SuperBlockInstance and return a
17//!     (SuperBlockHeader, ObjectStore) pair. The ObjectStore is populated directly from device.
18//!   * We save a superblock from a (SuperBlockHeader, Vec<ObjectItem>) pair to a WriteObjectHandle.
19//!
20//! This asymmetry is required for consistency.
21//! The Vec<ObjectItem> is produced by scanning the root_parent_store. This is the responsibility
22//! of the journal code, which must hold a lock to avoid concurrent updates. However, this lock
23//! must NOT be held when saving the superblock as additional extents may need to be allocated as
24//! part of the save process.
25use crate::errors::FxfsError;
26use crate::filesystem::{ApplyContext, ApplyMode, FxFilesystem, JournalingObject};
27use crate::log::*;
28use crate::lsm_tree::types::LayerIterator;
29use crate::lsm_tree::{LSMTree, LayerSet, Query};
30use crate::metrics;
31use crate::object_handle::ObjectHandle as _;
32use crate::object_store::allocator::Reservation;
33use crate::object_store::data_object_handle::{FileExtent, OverwriteOptions};
34use crate::object_store::journal::bootstrap_handle::BootstrapObjectHandle;
35use crate::object_store::journal::reader::{JournalReader, ReadResult};
36use crate::object_store::journal::writer::JournalWriter;
37use crate::object_store::journal::{BLOCK_SIZE, JournalCheckpoint, JournalCheckpointV32};
38use crate::object_store::object_record::{
39    ObjectItem, ObjectItemV40, ObjectItemV41, ObjectItemV43, ObjectItemV46, ObjectItemV47,
40    ObjectItemV49,
41};
42use crate::object_store::transaction::{AssocObj, Options};
43use crate::object_store::tree::MajorCompactable;
44use crate::object_store::{
45    DataObjectHandle, HandleOptions, HandleOwner, Mutation, ObjectKey, ObjectStore, ObjectValue,
46};
47use crate::range::RangeExt;
48use crate::serialized_types::{
49    EARLIEST_SUPPORTED_VERSION, FIRST_EXTENT_IN_SUPERBLOCK_VERSION, Migrate,
50    SMALL_SUPERBLOCK_VERSION, Version, Versioned, VersionedLatest, migrate_to_version,
51};
52use anyhow::{Context, Error, bail, ensure};
53use fprint::TypeFingerprint;
54use fuchsia_inspect::{Property as _, UintProperty};
55use fuchsia_sync::Mutex;
56use futures::FutureExt;
57use rustc_hash::FxHashMap as HashMap;
58use serde::{Deserialize, Serialize};
59use std::collections::VecDeque;
60use std::fmt;
61use std::io::{Read, Write};
62use std::ops::Range;
63use std::sync::Arc;
64use std::time::SystemTime;
65use storage_device::Device;
66use uuid::Uuid;
67
68// These only exist in the root store.
69const SUPER_BLOCK_A_OBJECT_ID: u64 = 1;
70const SUPER_BLOCK_B_OBJECT_ID: u64 = 2;
71
72/// The superblock is extended in units of `SUPER_BLOCK_CHUNK_SIZE` as required.
73pub const SUPER_BLOCK_CHUNK_SIZE: u64 = 65536;
74
75/// Each superblock is one block but may contain records that extend its own length.
76const MIN_SUPER_BLOCK_SIZE: u64 = 4096;
77/// The first 2 * 512 KiB on the disk used to be reserved for two A/B super-blocks.
78const LEGACY_MIN_SUPER_BLOCK_SIZE: u64 = 524_288;
79
80/// All superblocks start with the magic bytes "FxfsSupr".
81const SUPER_BLOCK_MAGIC: &[u8; 8] = b"FxfsSupr";
82
83/// An enum representing one of our super-block instances.
84///
85/// This provides hard-coded constants related to the location and properties of the super-blocks
86/// that are required to bootstrap the filesystem.
87#[derive(Copy, Clone, Debug)]
88pub enum SuperBlockInstance {
89    A,
90    B,
91}
92
93impl SuperBlockInstance {
94    /// Returns the next [SuperBlockInstance] for use in round-robining writes across super-blocks.
95    pub fn next(&self) -> SuperBlockInstance {
96        match self {
97            SuperBlockInstance::A => SuperBlockInstance::B,
98            SuperBlockInstance::B => SuperBlockInstance::A,
99        }
100    }
101
102    pub fn object_id(&self) -> u64 {
103        match self {
104            SuperBlockInstance::A => SUPER_BLOCK_A_OBJECT_ID,
105            SuperBlockInstance::B => SUPER_BLOCK_B_OBJECT_ID,
106        }
107    }
108
109    /// Returns the byte range where the first extent of the [SuperBlockInstance] is stored.
110    /// (Note that a [SuperBlockInstance] may still have multiple extents.)
111    pub fn first_extent(&self) -> Range<u64> {
112        match self {
113            SuperBlockInstance::A => 0..MIN_SUPER_BLOCK_SIZE,
114            SuperBlockInstance::B => 524288..524288 + MIN_SUPER_BLOCK_SIZE,
115        }
116    }
117
118    /// We used to allocate 512kB to superblocks but this was almost always more than needed.
119    pub fn legacy_first_extent(&self) -> Range<u64> {
120        match self {
121            SuperBlockInstance::A => 0..LEGACY_MIN_SUPER_BLOCK_SIZE,
122            SuperBlockInstance::B => LEGACY_MIN_SUPER_BLOCK_SIZE..2 * LEGACY_MIN_SUPER_BLOCK_SIZE,
123        }
124    }
125}
126
127pub type SuperBlockHeader = SuperBlockHeaderV32;
128
129#[derive(
130    Clone, Debug, Default, Eq, PartialEq, Serialize, Deserialize, TypeFingerprint, Versioned,
131)]
132pub struct SuperBlockHeaderV32 {
133    /// The globally unique identifier for the filesystem.
134    pub guid: UuidWrapperV32,
135
136    /// There are two super-blocks which are used in an A/B configuration. The super-block with the
137    /// greatest generation number is what is used when mounting an Fxfs image; the other is
138    /// discarded.
139    pub generation: u64,
140
141    /// The root parent store is an in-memory only store and serves as the backing store for the
142    /// root store and the journal.  The records for this store are serialized into the super-block
143    /// and mutations are also recorded in the journal.
144    pub root_parent_store_object_id: u64,
145
146    /// The root parent needs a graveyard and there's nowhere else to store it other than in the
147    /// super-block.
148    pub root_parent_graveyard_directory_object_id: u64,
149
150    /// The root object store contains all other metadata objects (including the allocator, the
151    /// journal and the super-blocks) and is the parent for all other object stores.
152    pub root_store_object_id: u64,
153
154    /// This is in the root object store.
155    pub allocator_object_id: u64,
156
157    /// This is in the root parent object store.
158    pub journal_object_id: u64,
159
160    /// Start checkpoint for the journal file.
161    pub journal_checkpoint: JournalCheckpointV32,
162
163    /// Offset of the journal file when the super-block was written.  If no entry is present in
164    /// journal_file_offsets for a particular object, then an object might have dependencies on the
165    /// journal from super_block_journal_file_offset onwards, but not earlier.
166    pub super_block_journal_file_offset: u64,
167
168    /// object id -> journal file offset. Indicates where each object has been flushed to.
169    pub journal_file_offsets: HashMap<u64, u64>,
170
171    /// Records the amount of borrowed metadata space as applicable at
172    /// `super_block_journal_file_offset`.
173    pub borrowed_metadata_space: u64,
174
175    /// The earliest version of Fxfs used to create any still-existing struct in the filesystem.
176    ///
177    /// Note: structs in the filesystem may had been made with various different versions of Fxfs.
178    pub earliest_version: Version,
179}
180
181type UuidWrapper = UuidWrapperV32;
182#[derive(Clone, Default, Eq, PartialEq)]
183pub struct UuidWrapperV32(pub Uuid);
184
185impl UuidWrapper {
186    fn new() -> Self {
187        Self(Uuid::new_v4())
188    }
189    #[cfg(test)]
190    fn nil() -> Self {
191        Self(Uuid::nil())
192    }
193}
194
195impl fmt::Debug for UuidWrapper {
196    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
197        // The UUID uniquely identifies the filesystem, so we should redact it so that we don't leak
198        // it in logs.
199        f.write_str("<redacted>")
200    }
201}
202
203impl TypeFingerprint for UuidWrapper {
204    fn fingerprint() -> String {
205        "<[u8;16]>".to_owned()
206    }
207}
208
209// Uuid serializes like a slice, but SuperBlockHeader used to contain [u8; 16] and we want to remain
210// compatible.
211impl Serialize for UuidWrapper {
212    fn serialize<S: serde::Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
213        self.0.as_bytes().serialize(serializer)
214    }
215}
216
217impl<'de> Deserialize<'de> for UuidWrapper {
218    fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
219        <[u8; 16]>::deserialize(deserializer).map(|bytes| UuidWrapperV32(Uuid::from_bytes(bytes)))
220    }
221}
222
223pub type SuperBlockRecord = SuperBlockRecordV49;
224
225#[allow(clippy::large_enum_variant)]
226#[derive(Debug, Serialize, Deserialize, TypeFingerprint, Versioned)]
227pub enum SuperBlockRecordV49 {
228    // When reading the super-block we know the initial extent, but not subsequent extents, so these
229    // records need to exist to allow us to completely read the super-block.
230    Extent(Range<u64>),
231
232    // Following the super-block header are ObjectItem records that are to be replayed into the root
233    // parent object store.
234    ObjectItem(ObjectItemV49),
235
236    // Marks the end of the full super-block.
237    End,
238}
239
240#[allow(clippy::large_enum_variant)]
241#[derive(Migrate, Serialize, Deserialize, TypeFingerprint, Versioned)]
242#[migrate_to_version(SuperBlockRecordV49)]
243pub enum SuperBlockRecordV47 {
244    Extent(Range<u64>),
245    ObjectItem(ObjectItemV47),
246    End,
247}
248
249#[allow(clippy::large_enum_variant)]
250#[derive(Migrate, Serialize, Deserialize, TypeFingerprint, Versioned)]
251#[migrate_to_version(SuperBlockRecordV47)]
252pub enum SuperBlockRecordV46 {
253    Extent(Range<u64>),
254    ObjectItem(ObjectItemV46),
255    End,
256}
257
258#[allow(clippy::large_enum_variant)]
259#[derive(Migrate, Serialize, Deserialize, TypeFingerprint, Versioned)]
260#[migrate_to_version(SuperBlockRecordV46)]
261pub enum SuperBlockRecordV43 {
262    Extent(Range<u64>),
263    ObjectItem(ObjectItemV43),
264    End,
265}
266
267#[derive(Migrate, Serialize, Deserialize, TypeFingerprint, Versioned)]
268#[migrate_to_version(SuperBlockRecordV43)]
269pub enum SuperBlockRecordV41 {
270    Extent(Range<u64>),
271    ObjectItem(ObjectItemV41),
272    End,
273}
274
275#[derive(Migrate, Serialize, Deserialize, TypeFingerprint, Versioned)]
276#[migrate_to_version(SuperBlockRecordV41)]
277pub enum SuperBlockRecordV40 {
278    Extent(Range<u64>),
279    ObjectItem(ObjectItemV40),
280    End,
281}
282
283struct SuperBlockMetrics {
284    /// Time we wrote the most recent superblock in milliseconds since [`std::time::UNIX_EPOCH`].
285    /// Uses [`std::time::SystemTime`] as the clock source.
286    last_super_block_update_time_ms: UintProperty,
287
288    /// Offset of the most recent superblock we wrote in the journal.
289    last_super_block_offset: UintProperty,
290}
291
292impl Default for SuperBlockMetrics {
293    fn default() -> Self {
294        SuperBlockMetrics {
295            last_super_block_update_time_ms: metrics::detail()
296                .create_uint("last_super_block_update_time_ms", 0),
297            last_super_block_offset: metrics::detail().create_uint("last_super_block_offset", 0),
298        }
299    }
300}
301
302/// Reads an individual (A/B) super-block instance and root_parent_store from device.
303/// Users should use SuperBlockManager::load() instead.
304async fn read(
305    device: Arc<dyn Device>,
306    block_size: u64,
307    instance: SuperBlockInstance,
308) -> Result<(SuperBlockHeader, SuperBlockInstance, ObjectStore), Error> {
309    let (super_block_header, mut reader) = SuperBlockHeader::read_header(device.clone(), instance)
310        .await
311        .context("failed to read superblock")?;
312    let root_parent = ObjectStore::new_root_parent(
313        device,
314        block_size,
315        super_block_header.root_parent_store_object_id,
316    );
317    root_parent.set_graveyard_directory_object_id(
318        super_block_header.root_parent_graveyard_directory_object_id,
319    );
320
321    loop {
322        // TODO: Flatten a layer and move reader here?
323        let (mutation, sequence) = match reader.next_item().await? {
324            // RecordReader should filter out extent records.
325            SuperBlockRecord::Extent(_) => bail!("Unexpected extent record"),
326            SuperBlockRecord::ObjectItem(item) => {
327                (Mutation::insert_object(item.key, item.value), item.sequence)
328            }
329            SuperBlockRecord::End => break,
330        };
331        root_parent.apply_mutation(
332            mutation,
333            &ApplyContext {
334                mode: ApplyMode::Replay,
335                checkpoint: JournalCheckpoint { file_offset: sequence, ..Default::default() },
336            },
337            AssocObj::None,
338        )?;
339    }
340    Ok((super_block_header, instance, root_parent))
341}
342
343/// Write a super-block to the given file handle.
344/// Requires that the filesystem is fully loaded and writable as this may require allocation.
345async fn write<S: HandleOwner>(
346    super_block_header: &SuperBlockHeader,
347    items: LayerSet<ObjectKey, ObjectValue>,
348    handle: DataObjectHandle<S>,
349) -> Result<(), Error> {
350    let object_manager = handle.store().filesystem().object_manager().clone();
351    // TODO(https://fxbug.dev/42177407): Don't use the same code here for Journal and SuperBlock. They
352    // aren't the same things and it is already getting convoluted. e.g of diff stream content:
353    //   Superblock:  (Magic, Ver, Header(Ver), Extent(Ver)*, SuperBlockRecord(Ver)*, ...)
354    //   Journal:     (Ver, JournalRecord(Ver)*, RESET, Ver2, JournalRecord(Ver2)*, ...)
355    // We should abstract away the checksum code and implement these separately.
356
357    let mut writer =
358        SuperBlockWriter::new(handle, super_block_header, object_manager.metadata_reservation())
359            .await?;
360    let mut merger = items.merger();
361    let mut iter = LSMTree::major_iter(merger.query(Query::FullScan).await?).await?;
362    while let Some(item) = iter.get() {
363        writer.write_root_parent_item(item.cloned()).await?;
364        iter.advance().await?;
365    }
366    writer.finalize().await
367}
368
369// Compacts and returns the *old* snapshot of the root_parent store.
370// Must be performed whilst holding a writer lock.
371pub fn compact_root_parent(
372    root_parent_store: &ObjectStore,
373) -> Result<LayerSet<ObjectKey, ObjectValue>, Error> {
374    // The root parent always uses in-memory layers which shouldn't be async, so we can use
375    // `now_or_never`.
376    let tree = root_parent_store.tree();
377    let layer_set = tree.layer_set();
378    {
379        let mut merger = layer_set.merger();
380        let mut iter = LSMTree::major_iter(merger.query(Query::FullScan).now_or_never().unwrap()?)
381            .now_or_never()
382            .unwrap()?;
383        let new_layer = LSMTree::new_mutable_layer();
384        while let Some(item_ref) = iter.get() {
385            new_layer.insert(item_ref.cloned())?;
386            iter.advance().now_or_never().unwrap()?;
387        }
388        tree.set_mutable_layer(new_layer);
389    }
390    Ok(layer_set)
391}
392
393/// This encapsulates the A/B alternating super-block logic.
394/// All super-block load/save operations should be via the methods on this type.
395pub(super) struct SuperBlockManager {
396    pub next_instance: Mutex<SuperBlockInstance>,
397    metrics: SuperBlockMetrics,
398}
399
400impl SuperBlockManager {
401    pub fn new() -> Self {
402        Self { next_instance: Mutex::new(SuperBlockInstance::A), metrics: Default::default() }
403    }
404
405    /// Loads both A/B super-blocks and root_parent ObjectStores and and returns the newest valid
406    /// pair. Also ensures the next superblock updated via |save| will be the other instance.
407    pub async fn load(
408        &self,
409        device: Arc<dyn Device>,
410        block_size: u64,
411    ) -> Result<(SuperBlockHeader, ObjectStore), Error> {
412        // Superblocks consume a minimum of one block. We currently hard code the length of
413        // this first extent. It should work with larger block sizes, but has not been tested.
414        // TODO(https://fxbug.dev/42063349): Consider relaxing this.
415        debug_assert!(MIN_SUPER_BLOCK_SIZE == block_size);
416
417        let (super_block, current_super_block, root_parent) = match futures::join!(
418            read(device.clone(), block_size, SuperBlockInstance::A),
419            read(device.clone(), block_size, SuperBlockInstance::B)
420        ) {
421            (Err(e1), Err(e2)) => {
422                bail!("Failed to load both superblocks due to {:?}\nand\n{:?}", e1, e2)
423            }
424            (Ok(result), Err(_)) => result,
425            (Err(_), Ok(result)) => result,
426            (Ok(result1), Ok(result2)) => {
427                // Break the tie by taking the super-block with the greatest generation.
428                if result2.0.generation > result1.0.generation { result2 } else { result1 }
429            }
430        };
431        info!(super_block:?, current_super_block:?; "loaded super-block");
432        *self.next_instance.lock() = current_super_block.next();
433        Ok((super_block, root_parent))
434    }
435
436    /// Writes the provided superblock and root_parent ObjectStore to the device.
437    /// Requires that the filesystem is fully loaded and writable as this may require allocation.
438    pub async fn save(
439        &self,
440        super_block_header: SuperBlockHeader,
441        filesystem: Arc<FxFilesystem>,
442        root_parent: LayerSet<ObjectKey, ObjectValue>,
443    ) -> Result<(), Error> {
444        let root_store = filesystem.root_store();
445        let object_id = {
446            let mut next_instance = self.next_instance.lock();
447            let object_id = next_instance.object_id();
448            *next_instance = next_instance.next();
449            object_id
450        };
451        let handle = ObjectStore::open_object(
452            &root_store,
453            object_id,
454            HandleOptions { skip_journal_checks: true, ..Default::default() },
455            None,
456        )
457        .await
458        .context("Failed to open superblock object")?;
459        write(&super_block_header, root_parent, handle).await?;
460        self.metrics
461            .last_super_block_offset
462            .set(super_block_header.super_block_journal_file_offset);
463        self.metrics.last_super_block_update_time_ms.set(
464            SystemTime::now()
465                .duration_since(SystemTime::UNIX_EPOCH)
466                .unwrap()
467                .as_millis()
468                .try_into()
469                .unwrap_or(0u64),
470        );
471        Ok(())
472    }
473}
474
475impl SuperBlockHeader {
476    /// Creates a new instance with random GUID.
477    pub fn new(
478        root_parent_store_object_id: u64,
479        root_parent_graveyard_directory_object_id: u64,
480        root_store_object_id: u64,
481        allocator_object_id: u64,
482        journal_object_id: u64,
483        journal_checkpoint: JournalCheckpoint,
484        earliest_version: Version,
485    ) -> Self {
486        SuperBlockHeader {
487            guid: UuidWrapper::new(),
488            generation: 1u64,
489            root_parent_store_object_id,
490            root_parent_graveyard_directory_object_id,
491            root_store_object_id,
492            allocator_object_id,
493            journal_object_id,
494            journal_checkpoint,
495            earliest_version,
496            ..Default::default()
497        }
498    }
499
500    /// Read the super-block header, and return it and a reader that produces the records that are
501    /// to be replayed in to the root parent object store.
502    async fn read_header(
503        device: Arc<dyn Device>,
504        target_super_block: SuperBlockInstance,
505    ) -> Result<(SuperBlockHeader, RecordReader), Error> {
506        let handle = BootstrapObjectHandle::new(
507            target_super_block.object_id(),
508            device,
509            target_super_block.first_extent(),
510        );
511        let mut reader = JournalReader::new(handle, &JournalCheckpoint::default());
512        reader.set_eof_ok();
513
514        reader.fill_buf().await?;
515
516        let mut super_block_header;
517        let super_block_version;
518        reader.consume({
519            let mut cursor = std::io::Cursor::new(reader.buffer());
520            // Validate magic bytes.
521            let mut magic_bytes: [u8; 8] = [0; 8];
522            cursor.read_exact(&mut magic_bytes)?;
523            if magic_bytes.as_slice() != SUPER_BLOCK_MAGIC.as_slice() {
524                bail!("Invalid magic: {:?}", magic_bytes);
525            }
526            (super_block_header, super_block_version) =
527                SuperBlockHeader::deserialize_with_version(&mut cursor)?;
528
529            if super_block_version < EARLIEST_SUPPORTED_VERSION {
530                bail!("Unsupported SuperBlock version: {:?}", super_block_version);
531            }
532
533            // NOTE: It is possible that data was written to the journal with an old version
534            // but no compaction ever happened, so the journal version could potentially be older
535            // than the layer file versions.
536            if super_block_header.journal_checkpoint.version < EARLIEST_SUPPORTED_VERSION {
537                bail!(
538                    "Unsupported JournalCheckpoint version: {:?}",
539                    super_block_header.journal_checkpoint.version
540                );
541            }
542
543            if super_block_header.earliest_version < EARLIEST_SUPPORTED_VERSION {
544                bail!(
545                    "Filesystem contains struct with unsupported version: {:?}",
546                    super_block_header.earliest_version
547                );
548            }
549
550            cursor.position() as usize
551        });
552
553        // From version 45 superblocks describe their own extents (a noop here).
554        // At version 44, superblocks assume a 4kb first extent.
555        // Prior to version 44, superblocks assume a 512kb first extent.
556        if super_block_version < SMALL_SUPERBLOCK_VERSION {
557            reader.handle().push_extent(0, target_super_block.legacy_first_extent());
558        } else if super_block_version < FIRST_EXTENT_IN_SUPERBLOCK_VERSION {
559            reader.handle().push_extent(0, target_super_block.first_extent())
560        }
561
562        // If guid is zeroed (e.g. in a newly imaged system), assign one randomly.
563        if super_block_header.guid.0.is_nil() {
564            super_block_header.guid = UuidWrapper::new();
565        }
566        reader.set_version(super_block_version);
567        Ok((super_block_header, RecordReader { reader }))
568    }
569}
570
571struct SuperBlockWriter<'a, S: HandleOwner> {
572    handle: DataObjectHandle<S>,
573    writer: JournalWriter,
574    existing_extents: VecDeque<FileExtent>,
575    size: u64,
576    reservation: &'a Reservation,
577}
578
579impl<'a, S: HandleOwner> SuperBlockWriter<'a, S> {
580    /// Create a new writer, outputs FXFS magic, version and SuperBlockHeader.
581    /// On success, the writer is ready to accept root parent store mutations.
582    pub async fn new(
583        handle: DataObjectHandle<S>,
584        super_block_header: &SuperBlockHeader,
585        reservation: &'a Reservation,
586    ) -> Result<Self, Error> {
587        let existing_extents = handle.device_extents().await?;
588        let mut this = Self {
589            handle,
590            writer: JournalWriter::new(BLOCK_SIZE as usize, 0),
591            existing_extents: existing_extents.into_iter().collect(),
592            size: 0,
593            reservation,
594        };
595        this.writer.write_all(SUPER_BLOCK_MAGIC)?;
596        super_block_header.serialize_with_version(&mut this.writer)?;
597        Ok(this)
598    }
599
600    /// Internal helper function to pull ranges from a list of existing extents and tack
601    /// corresponding extent records onto the journal.
602    fn try_extend_existing(&mut self, target_size: u64) -> Result<(), Error> {
603        while self.size < target_size {
604            if let Some(extent) = self.existing_extents.pop_front() {
605                ensure!(
606                    extent.logical_range().start == self.size,
607                    "superblock file contains a hole."
608                );
609                self.size += extent.length();
610                SuperBlockRecord::Extent(extent.device_range().clone())
611                    .serialize_into(&mut self.writer)?;
612            } else {
613                break;
614            }
615        }
616        Ok(())
617    }
618
619    pub async fn write_root_parent_item(&mut self, record: ObjectItem) -> Result<(), Error> {
620        let min_len = self.writer.journal_file_checkpoint().file_offset + SUPER_BLOCK_CHUNK_SIZE;
621        self.try_extend_existing(min_len)?;
622        if min_len > self.size {
623            // Need to allocate some more space.
624            let mut transaction = self
625                .handle
626                .new_transaction_with_options(Options {
627                    skip_journal_checks: true,
628                    borrow_metadata_space: true,
629                    allocator_reservation: Some(self.reservation),
630                    ..Default::default()
631                })
632                .await?;
633            let mut file_range = self.size..self.size + SUPER_BLOCK_CHUNK_SIZE;
634            let allocated = self
635                .handle
636                .preallocate_range(&mut transaction, &mut file_range)
637                .await
638                .context("preallocate superblock")?;
639            if file_range.start < file_range.end {
640                bail!("preallocate_range returned too little space");
641            }
642            transaction.commit().await?;
643            for device_range in allocated {
644                self.size += device_range.end - device_range.start;
645                SuperBlockRecord::Extent(device_range).serialize_into(&mut self.writer)?;
646            }
647        }
648        SuperBlockRecord::ObjectItem(record).serialize_into(&mut self.writer)?;
649        Ok(())
650    }
651
652    pub async fn finalize(mut self) -> Result<(), Error> {
653        SuperBlockRecord::End.serialize_into(&mut self.writer)?;
654        self.writer.pad_to_block()?;
655        let mut buf = self.handle.allocate_buffer(self.writer.flushable_bytes()).await;
656        let offset = self.writer.take_flushable(buf.as_mut());
657        self.handle.overwrite(offset, buf.as_mut(), OverwriteOptions::default()).await?;
658        let len =
659            std::cmp::max(MIN_SUPER_BLOCK_SIZE, self.writer.journal_file_checkpoint().file_offset)
660                + SUPER_BLOCK_CHUNK_SIZE;
661        self.handle
662            .truncate_with_options(
663                Options {
664                    skip_journal_checks: true,
665                    borrow_metadata_space: true,
666                    ..Default::default()
667                },
668                len,
669            )
670            .await?;
671        Ok(())
672    }
673}
674
675pub struct RecordReader {
676    reader: JournalReader,
677}
678
679impl RecordReader {
680    pub async fn next_item(&mut self) -> Result<SuperBlockRecord, Error> {
681        loop {
682            match self.reader.deserialize().await? {
683                ReadResult::Reset(_) => bail!("Unexpected reset"),
684                ReadResult::ChecksumMismatch => bail!("Checksum mismatch"),
685                ReadResult::Some(SuperBlockRecord::Extent(extent)) => {
686                    ensure!(extent.is_valid(), FxfsError::Inconsistent);
687                    self.reader.handle().push_extent(0, extent)
688                }
689                ReadResult::Some(x) => return Ok(x),
690            }
691        }
692    }
693}
694
695#[cfg(test)]
696mod tests {
697    use super::{
698        MIN_SUPER_BLOCK_SIZE, SUPER_BLOCK_CHUNK_SIZE, SuperBlockHeader, SuperBlockInstance,
699        UuidWrapper, compact_root_parent, write,
700    };
701    use crate::filesystem::{FxFilesystem, OpenFxFilesystem};
702    use crate::object_handle::ReadObjectHandle;
703    use crate::object_store::journal::JournalCheckpoint;
704    use crate::object_store::transaction::{Options, lock_keys};
705    use crate::object_store::{
706        DataObjectHandle, HandleOptions, ObjectHandle, ObjectKey, ObjectStore,
707    };
708    use crate::serialized_types::LATEST_VERSION;
709    use storage_device::DeviceHolder;
710    use storage_device::fake_device::FakeDevice;
711
712    // We require 512kiB each for A/B super-blocks, 256kiB for the journal (128kiB before flush)
713    // and compactions require double the layer size to complete.
714    const TEST_DEVICE_BLOCK_SIZE: u32 = 512;
715    const TEST_DEVICE_BLOCK_COUNT: u64 = 16384;
716
717    async fn filesystem_and_super_block_handles()
718    -> (OpenFxFilesystem, DataObjectHandle<ObjectStore>, DataObjectHandle<ObjectStore>) {
719        let device =
720            DeviceHolder::new(FakeDevice::new(TEST_DEVICE_BLOCK_COUNT, TEST_DEVICE_BLOCK_SIZE));
721        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
722        fs.close().await.expect("Close failed");
723        let device = fs.take_device().await;
724        device.reopen(false);
725        let fs = FxFilesystem::open(device).await.expect("open failed");
726
727        let handle_a = ObjectStore::open_object(
728            &fs.object_manager().root_store(),
729            SuperBlockInstance::A.object_id(),
730            HandleOptions::default(),
731            None,
732        )
733        .await
734        .expect("open superblock failed");
735
736        let handle_b = ObjectStore::open_object(
737            &fs.object_manager().root_store(),
738            SuperBlockInstance::B.object_id(),
739            HandleOptions::default(),
740            None,
741        )
742        .await
743        .expect("open superblock failed");
744        (fs, handle_a, handle_b)
745    }
746
747    #[fuchsia::test]
748    async fn test_read_written_super_block() {
749        let (fs, _handle_a, _handle_b) = filesystem_and_super_block_handles().await;
750        const JOURNAL_OBJECT_ID: u64 = 5;
751
752        // Confirm that the (first) super-block is expected size.
753        // It should be MIN_SUPER_BLOCK_SIZE + SUPER_BLOCK_CHUNK_SIZE.
754        assert_eq!(
755            ObjectStore::open_object(
756                &fs.root_store(),
757                SuperBlockInstance::A.object_id(),
758                HandleOptions::default(),
759                None,
760            )
761            .await
762            .expect("open_object failed")
763            .get_size(),
764            MIN_SUPER_BLOCK_SIZE + SUPER_BLOCK_CHUNK_SIZE
765        );
766
767        // Create a large number of objects in the root parent store so that we test growing
768        // of the super-block file, requiring us to add extents.
769        let mut created_object_ids = vec![];
770        const NUM_ENTRIES: u64 = 16384;
771        for _ in 0..NUM_ENTRIES {
772            let mut transaction = fs
773                .clone()
774                .new_transaction(lock_keys![], Options::default())
775                .await
776                .expect("new_transaction failed");
777            created_object_ids.push(
778                ObjectStore::create_object(
779                    &fs.object_manager().root_parent_store(),
780                    &mut transaction,
781                    HandleOptions::default(),
782                    None,
783                )
784                .await
785                .expect("create_object failed")
786                .object_id(),
787            );
788            transaction.commit().await.expect("commit failed");
789        }
790
791        // Note here that DataObjectHandle caches the size given to it at construction.
792        // If we want to know the true size after a super-block has been written, we need
793        // a new handle.
794        assert!(
795            ObjectStore::open_object(
796                &fs.root_store(),
797                SuperBlockInstance::A.object_id(),
798                HandleOptions::default(),
799                None,
800            )
801            .await
802            .expect("open_object failed")
803            .get_size()
804                > MIN_SUPER_BLOCK_SIZE + SUPER_BLOCK_CHUNK_SIZE
805        );
806
807        let written_super_block_a =
808            SuperBlockHeader::read_header(fs.device(), SuperBlockInstance::A)
809                .await
810                .expect("read failed");
811        let written_super_block_b =
812            SuperBlockHeader::read_header(fs.device(), SuperBlockInstance::B)
813                .await
814                .expect("read failed");
815
816        // Check that a non-zero GUID has been assigned.
817        assert!(!written_super_block_a.0.guid.0.is_nil());
818
819        // Depending on specific offsets is fragile so we just validate the fields we believe
820        // to be stable.
821        assert_eq!(written_super_block_a.0.guid, written_super_block_b.0.guid);
822        assert_eq!(written_super_block_a.0.guid, written_super_block_b.0.guid);
823        assert!(written_super_block_a.0.generation != written_super_block_b.0.generation);
824        assert_eq!(
825            written_super_block_a.0.root_parent_store_object_id,
826            written_super_block_b.0.root_parent_store_object_id
827        );
828        assert_eq!(
829            written_super_block_a.0.root_parent_graveyard_directory_object_id,
830            written_super_block_b.0.root_parent_graveyard_directory_object_id
831        );
832        assert_eq!(written_super_block_a.0.root_store_object_id, fs.root_store().store_object_id());
833        assert_eq!(
834            written_super_block_a.0.root_store_object_id,
835            written_super_block_b.0.root_store_object_id
836        );
837        assert_eq!(written_super_block_a.0.allocator_object_id, fs.allocator().object_id());
838        assert_eq!(
839            written_super_block_a.0.allocator_object_id,
840            written_super_block_b.0.allocator_object_id
841        );
842        assert_eq!(written_super_block_a.0.journal_object_id, JOURNAL_OBJECT_ID);
843        assert_eq!(
844            written_super_block_a.0.journal_object_id,
845            written_super_block_b.0.journal_object_id
846        );
847        assert!(
848            written_super_block_a.0.journal_checkpoint.file_offset
849                != written_super_block_b.0.journal_checkpoint.file_offset
850        );
851        assert!(
852            written_super_block_a.0.super_block_journal_file_offset
853                != written_super_block_b.0.super_block_journal_file_offset
854        );
855        // Nb: We skip journal_file_offsets and borrowed metadata space checks.
856        assert_eq!(written_super_block_a.0.earliest_version, LATEST_VERSION);
857        assert_eq!(
858            written_super_block_a.0.earliest_version,
859            written_super_block_b.0.earliest_version
860        );
861
862        // Nb: Skip comparison of root_parent store contents because we have no way of anticipating
863        // the extent offsets and it is reasonable that a/b differ.
864
865        // Delete all the objects we just made.
866        for object_id in created_object_ids {
867            let mut transaction = fs
868                .clone()
869                .new_transaction(lock_keys![], Options::default())
870                .await
871                .expect("new_transaction failed");
872            fs.object_manager()
873                .root_parent_store()
874                .adjust_refs(&mut transaction, object_id, -1)
875                .await
876                .expect("adjust_refs failed");
877            transaction.commit().await.expect("commit failed");
878            fs.object_manager()
879                .root_parent_store()
880                .tombstone_object(object_id, Options::default())
881                .await
882                .expect("tombstone failed");
883        }
884        // Write some stuff to the root store to ensure we rotate the journal and produce new
885        // super blocks.
886        for _ in 0..NUM_ENTRIES {
887            let mut transaction = fs
888                .clone()
889                .new_transaction(lock_keys![], Options::default())
890                .await
891                .expect("new_transaction failed");
892            ObjectStore::create_object(
893                &fs.object_manager().root_store(),
894                &mut transaction,
895                HandleOptions::default(),
896                None,
897            )
898            .await
899            .expect("create_object failed");
900            transaction.commit().await.expect("commit failed");
901        }
902
903        assert_eq!(
904            ObjectStore::open_object(
905                &fs.root_store(),
906                SuperBlockInstance::A.object_id(),
907                HandleOptions::default(),
908                None,
909            )
910            .await
911            .expect("open_object failed")
912            .get_size(),
913            MIN_SUPER_BLOCK_SIZE + SUPER_BLOCK_CHUNK_SIZE
914        );
915    }
916
917    #[fuchsia::test]
918    async fn test_guid_assign_on_read() {
919        let (fs, handle_a, _handle_b) = filesystem_and_super_block_handles().await;
920        const JOURNAL_OBJECT_ID: u64 = 5;
921        let mut super_block_header_a = SuperBlockHeader::new(
922            fs.object_manager().root_parent_store().store_object_id(),
923            /* root_parent_graveyard_directory_object_id: */ 1000,
924            fs.root_store().store_object_id(),
925            fs.allocator().object_id(),
926            JOURNAL_OBJECT_ID,
927            JournalCheckpoint { file_offset: 1234, checksum: 5678, version: LATEST_VERSION },
928            /* earliest_version: */ LATEST_VERSION,
929        );
930        // Ensure the superblock has no set GUID.
931        super_block_header_a.guid = UuidWrapper::nil();
932        write(
933            &super_block_header_a,
934            compact_root_parent(fs.object_manager().root_parent_store().as_ref())
935                .expect("scan failed"),
936            handle_a,
937        )
938        .await
939        .expect("write failed");
940        let super_block_header = SuperBlockHeader::read_header(fs.device(), SuperBlockInstance::A)
941            .await
942            .expect("read failed");
943        // Ensure a GUID has been assigned.
944        assert!(!super_block_header.0.guid.0.is_nil());
945    }
946
947    #[fuchsia::test]
948    async fn test_init_wipes_superblocks() {
949        let device = DeviceHolder::new(FakeDevice::new(8192, TEST_DEVICE_BLOCK_SIZE));
950
951        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
952        let root_store = fs.root_store();
953        // Generate enough work to induce a journal flush and thus a new superblock being written.
954        for _ in 0..6000 {
955            let mut transaction = fs
956                .clone()
957                .new_transaction(lock_keys![], Options::default())
958                .await
959                .expect("new_transaction failed");
960            ObjectStore::create_object(
961                &root_store,
962                &mut transaction,
963                HandleOptions::default(),
964                None,
965            )
966            .await
967            .expect("create_object failed");
968            transaction.commit().await.expect("commit failed");
969        }
970        fs.close().await.expect("Close failed");
971        let device = fs.take_device().await;
972        device.reopen(false);
973
974        SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::A)
975            .await
976            .expect("read failed");
977        let header = SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::B)
978            .await
979            .expect("read failed");
980
981        let old_guid = header.0.guid;
982
983        // Re-initialize the filesystem.  The A and B blocks should be for the new FS.
984        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
985        fs.close().await.expect("Close failed");
986        let device = fs.take_device().await;
987        device.reopen(false);
988
989        let a = SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::A)
990            .await
991            .expect("read failed");
992        let b = SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::B)
993            .await
994            .expect("read failed");
995
996        assert_eq!(a.0.guid, b.0.guid);
997        assert_ne!(old_guid, a.0.guid);
998    }
999
1000    #[fuchsia::test]
1001    async fn test_alternating_super_blocks() {
1002        let device = DeviceHolder::new(FakeDevice::new(8192, TEST_DEVICE_BLOCK_SIZE));
1003
1004        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
1005        fs.close().await.expect("Close failed");
1006        let device = fs.take_device().await;
1007        device.reopen(false);
1008
1009        let (super_block_header_a, _) =
1010            SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::A)
1011                .await
1012                .expect("read failed");
1013
1014        // The second super-block won't be valid at this time so there's no point reading it.
1015
1016        let fs = FxFilesystem::open(device).await.expect("open failed");
1017        let root_store = fs.root_store();
1018        // Generate enough work to induce a journal flush.
1019        for _ in 0..6000 {
1020            let mut transaction = fs
1021                .clone()
1022                .new_transaction(lock_keys![], Options::default())
1023                .await
1024                .expect("new_transaction failed");
1025            ObjectStore::create_object(
1026                &root_store,
1027                &mut transaction,
1028                HandleOptions::default(),
1029                None,
1030            )
1031            .await
1032            .expect("create_object failed");
1033            transaction.commit().await.expect("commit failed");
1034        }
1035        fs.close().await.expect("Close failed");
1036        let device = fs.take_device().await;
1037        device.reopen(false);
1038
1039        let (super_block_header_a_after, _) =
1040            SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::A)
1041                .await
1042                .expect("read failed");
1043        let (super_block_header_b_after, _) =
1044            SuperBlockHeader::read_header(device.clone(), SuperBlockInstance::B)
1045                .await
1046                .expect("read failed");
1047
1048        // It's possible that multiple super-blocks were written, so cater for that.
1049
1050        // The sequence numbers should be one apart.
1051        assert_eq!(
1052            (super_block_header_b_after.generation as i64
1053                - super_block_header_a_after.generation as i64)
1054                .abs(),
1055            1
1056        );
1057
1058        // At least one super-block should have been written.
1059        assert!(
1060            std::cmp::max(
1061                super_block_header_a_after.generation,
1062                super_block_header_b_after.generation
1063            ) > super_block_header_a.generation
1064        );
1065
1066        // They should have the same oddness.
1067        assert_eq!(super_block_header_a_after.generation & 1, super_block_header_a.generation & 1);
1068    }
1069
1070    #[fuchsia::test]
1071    async fn test_root_parent_is_compacted() {
1072        let device = DeviceHolder::new(FakeDevice::new(8192, TEST_DEVICE_BLOCK_SIZE));
1073
1074        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
1075
1076        let mut transaction = fs
1077            .clone()
1078            .new_transaction(lock_keys![], Options::default())
1079            .await
1080            .expect("new_transaction failed");
1081        let store = fs.root_parent_store();
1082        let handle =
1083            ObjectStore::create_object(&store, &mut transaction, HandleOptions::default(), None)
1084                .await
1085                .expect("create_object failed");
1086        transaction.commit().await.expect("commit failed");
1087
1088        store
1089            .tombstone_object(handle.object_id(), Options::default())
1090            .await
1091            .expect("tombstone failed");
1092
1093        // Generate enough work to induce a journal flush.
1094        let root_store = fs.root_store();
1095        for _ in 0..6000 {
1096            let mut transaction = fs
1097                .clone()
1098                .new_transaction(lock_keys![], Options::default())
1099                .await
1100                .expect("new_transaction failed");
1101            ObjectStore::create_object(
1102                &root_store,
1103                &mut transaction,
1104                HandleOptions::default(),
1105                None,
1106            )
1107            .await
1108            .expect("create_object failed");
1109            transaction.commit().await.expect("commit failed");
1110        }
1111
1112        // The root parent store should have been compacted, so we shouldn't be able to find any
1113        // record referring to the object we tombstoned.
1114        assert_eq!(
1115            store.tree().find(&ObjectKey::object(handle.object_id())).await.expect("find failed"),
1116            None
1117        );
1118    }
1119}