fxfs/object_store/
volume.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::errors::FxfsError;
6use crate::filesystem::FxFilesystem;
7use crate::object_store::directory::Directory;
8use crate::object_store::transaction::{LockKeys, Mutation, Options, Transaction, lock_keys};
9use crate::object_store::tree_cache::TreeCache;
10use crate::object_store::{
11    ChildValue, INVALID_OBJECT_ID, LockKey, NewChildStoreOptions, ObjectDescriptor, ObjectKey,
12    ObjectStore, ObjectValue, StoreOptions, load_store_info,
13};
14use anyhow::{Context, Error, anyhow, bail, ensure};
15use std::sync::Arc;
16
17// Volumes are a grouping of an object store and a root directory within this object store. They
18// model a hierarchical tree of objects within a single store.
19//
20// Typically there will be one root volume which is referenced directly by the superblock. This root
21// volume stores references to all other volumes on the system (as volumes/foo, volumes/bar, ...).
22// For now, this hierarchy is only one deep.
23
24pub const VOLUMES_DIRECTORY: &str = "volumes";
25
26/// RootVolume is the top-level volume which stores references to all of the other Volumes.
27pub struct RootVolume {
28    _root_directory: Directory<ObjectStore>,
29    filesystem: Arc<FxFilesystem>,
30}
31
32impl RootVolume {
33    pub fn volume_directory(&self) -> &Directory<ObjectStore> {
34        self.filesystem.object_manager().volume_directory()
35    }
36
37    /// Creates a new volume under a transaction lock.
38    pub async fn new_volume(
39        &self,
40        volume_name: &str,
41        options: NewChildStoreOptions,
42    ) -> Result<Arc<ObjectStore>, Error> {
43        let root_store = self.filesystem.root_store();
44        let store;
45        let mut transaction = self
46            .filesystem
47            .clone()
48            .new_transaction(
49                lock_keys![LockKey::object(
50                    root_store.store_object_id(),
51                    self.volume_directory().object_id(),
52                )],
53                Options::default(),
54            )
55            .await?;
56
57        ensure!(
58            matches!(self.volume_directory().lookup(volume_name).await?, None),
59            FxfsError::AlreadyExists
60        );
61        store = root_store
62            .new_child_store(&mut transaction, options, Box::new(TreeCache::new()))
63            .await?;
64        store.set_trace(self.filesystem.trace());
65
66        // We must register the store here because create will add mutations for the store.
67        self.filesystem.object_manager().add_store(store.clone());
68
69        // If the transaction fails, we must unregister the store.
70        struct CleanUp<'a>(&'a ObjectStore);
71        impl Drop for CleanUp<'_> {
72            fn drop(&mut self) {
73                self.0.filesystem().object_manager().forget_store(self.0.store_object_id());
74            }
75        }
76        let clean_up = CleanUp(&store);
77
78        // Actually create the store in the transaction.
79        store.create(&mut transaction).await?;
80
81        self.volume_directory()
82            .add_child_volume(&mut transaction, volume_name, store.store_object_id())
83            .await?;
84        transaction.commit().await?;
85
86        std::mem::forget(clean_up);
87
88        Ok(store)
89    }
90
91    /// Returns the volume with the given name.  This is not thread-safe.
92    pub async fn volume(
93        &self,
94        volume_name: &str,
95        options: StoreOptions,
96    ) -> Result<Arc<ObjectStore>, Error> {
97        // Lookup the volume object in the volume directory.
98        let (store_object_id, descriptor, _) = self
99            .volume_directory()
100            .lookup(volume_name)
101            .await
102            .context("Volume lookup failed")?
103            .ok_or(FxfsError::NotFound)
104            .context("Volume missing in volume directory")?;
105        match descriptor {
106            ObjectDescriptor::Volume => (),
107            _ => bail!(anyhow!(FxfsError::Inconsistent).context("Expected volume")),
108        }
109        // Lookup the object store corresponding to the volume.
110        let store = self
111            .filesystem
112            .object_manager()
113            .store(store_object_id)
114            .ok_or(FxfsError::NotFound)
115            .context("Missing volume store")?;
116        store.set_trace(self.filesystem.trace());
117        // Unlock the volume if required.
118        if let Some(crypt) = options.crypt {
119            let read_only = self.filesystem.options().read_only;
120            store
121                .unlock_inner(options.owner, crypt, read_only)
122                .await
123                .context("Failed to unlock volume")?;
124        } else if store.is_locked() {
125            bail!(FxfsError::AccessDenied);
126        }
127        Ok(store)
128    }
129
130    /// Deletes the given volume.  Consumes `transaction` and runs `callback` during commit. The
131    /// caller must have the correct locks for the volumes directory.
132    pub async fn delete_volume(
133        &self,
134        volume_name: &str,
135        mut transaction: Transaction<'_>,
136        callback: impl FnOnce() + Send,
137    ) -> Result<(), Error> {
138        let objects_to_delete = self.delete_volume_impl(volume_name, &mut transaction).await?;
139        transaction.commit_with_callback(|_| callback()).await.context("commit")?;
140        // Tombstone the deleted objects.
141        let root_store = self.filesystem.root_store();
142        for object_id in &objects_to_delete {
143            root_store.tombstone_object(*object_id, Options::default()).await?;
144        }
145        Ok(())
146    }
147
148    async fn delete_volume_impl(
149        &self,
150        volume_name: &str,
151        transaction: &mut Transaction<'_>,
152    ) -> Result<Vec<u64>, Error> {
153        let object_id =
154            match self.volume_directory().lookup(volume_name).await?.ok_or(FxfsError::NotFound)? {
155                (object_id, ObjectDescriptor::Volume, _) => object_id,
156                _ => bail!(anyhow!(FxfsError::Inconsistent).context("Expected volume")),
157            };
158        let root_store = self.filesystem.root_store();
159
160        // Delete all the layers and encrypted mutations stored in root_store for this volume.
161        // This includes the StoreInfo itself.
162        let mut objects_to_delete = load_store_info(&root_store, object_id).await?.parent_objects();
163        objects_to_delete.push(object_id);
164
165        for object_id in &objects_to_delete {
166            root_store.adjust_refs(transaction, *object_id, -1).await?;
167        }
168        // Mark all volume data as deleted.
169        self.filesystem.allocator().mark_for_deletion(transaction, object_id);
170        // Remove the volume entry from the VolumeDirectory.
171        self.volume_directory().delete_child_volume(transaction, volume_name, object_id)?;
172        Ok(objects_to_delete)
173    }
174
175    /// Adds the required mutations to atomically replace a volume, returning a list of object IDs
176    /// of objects which can be deleted. If `dst` does not exist, this is equivalent to renaming the
177    /// volume from `src` to `dst`. The caller must have the correct locks on the volumes directory.
178    pub(crate) async fn replace_volume(
179        &self,
180        transaction: &mut Transaction<'_>,
181        src: &str,
182        dst: &str,
183    ) -> Result<Option<Vec<u64>>, Error> {
184        let src_object_id = match self.volume_directory().lookup(src).await? {
185            Some((object_id, ObjectDescriptor::Volume, _)) => Ok(object_id),
186            Some(_) => Err(FxfsError::Inconsistent),
187            None => Err(FxfsError::NotFound),
188        }?;
189
190        let replaced_objects = if let Some((_, ObjectDescriptor::Volume, _)) =
191            self.volume_directory().lookup(dst).await?
192        {
193            Some(self.delete_volume_impl(dst, transaction).await?)
194        } else {
195            None
196        };
197
198        transaction.add(
199            self.volume_directory().store().store_object_id(),
200            Mutation::replace_or_insert_object(
201                ObjectKey::child(self.volume_directory().object_id(), src, false),
202                ObjectValue::None,
203            ),
204        );
205
206        transaction.add(
207            self.volume_directory().store().store_object_id(),
208            Mutation::replace_or_insert_object(
209                ObjectKey::child(self.volume_directory().object_id(), dst, false),
210                ObjectValue::Child(ChildValue {
211                    object_id: src_object_id,
212                    object_descriptor: ObjectDescriptor::Volume,
213                }),
214            ),
215        );
216
217        Ok(replaced_objects)
218    }
219
220    /// Attempts to install the image `image_file` in the volume `src` as the volume `dst`. The
221    /// image file should be an fxfs partition image containing a volume matching the name `dst`.
222    /// The contents of the `dst` volume in the image will be installed in-place into this
223    /// filesystem, replacing an existing `dst` volume if one exists.
224    ///
225    /// There can be no other objects in `src` with extent records, and neither `src` nor `dst` can
226    /// be encrypted.
227    pub async fn install_volume(
228        &self,
229        src: &str,
230        image_file: &str,
231        dst: &str,
232    ) -> Result<(), Error> {
233        ObjectStore::install_volume(self, src, image_file, dst).await
234    }
235
236    /// Acquires a transaction with appropriate locks to remove volume |name|.
237    /// Also returns the object ID of the store which will be deleted.
238    pub async fn acquire_transaction_for_remove_volume(
239        &self,
240        name: &str,
241        extra_keys: impl IntoIterator<Item = LockKey>,
242        allow_not_found: bool,
243    ) -> Result<(u64, Transaction<'_>), Error> {
244        // Since we don't know the store object ID until we've looked it up in the volumes
245        // directory, we need to loop until we have acquired a lock on a store whose ID is the same
246        // as it was in the last iteration.
247        let volume_dir = self.volume_directory();
248        let store = volume_dir.store();
249        let extra_keys = extra_keys.into_iter();
250        let mut lock_keys = Vec::with_capacity(extra_keys.size_hint().1.unwrap_or(2) + 2);
251        lock_keys.extend(extra_keys);
252        lock_keys.push(LockKey::object(store.store_object_id(), volume_dir.object_id()));
253        let orig_len = lock_keys.len();
254        let mut transaction = None;
255        loop {
256            lock_keys.truncate(orig_len);
257            let object_id = match volume_dir.lookup(name).await? {
258                Some((object_id, ObjectDescriptor::Volume, _)) => {
259                    // We have to ensure that the store isn't flushed while we delete it, because
260                    // deleting the store will remove references to it from ObjectManager which are
261                    // then updated by flushing.
262                    lock_keys.push(LockKey::flush(object_id));
263                    object_id
264                }
265                None => {
266                    if allow_not_found {
267                        INVALID_OBJECT_ID
268                    } else {
269                        bail!(FxfsError::NotFound);
270                    }
271                }
272                _ => bail!(anyhow!(FxfsError::Inconsistent).context("Expected volume")),
273            };
274
275            // If the IDs match, return the transaction now.
276            match transaction {
277                Some(result @ (id, _)) if id == object_id => return Ok(result),
278                _ => {}
279            }
280
281            transaction = Some((
282                object_id,
283                store
284                    .filesystem()
285                    .new_transaction(
286                        LockKeys::Vec(lock_keys.clone()),
287                        Options { borrow_metadata_space: true, ..Default::default() },
288                    )
289                    .await?,
290            ));
291        }
292    }
293}
294
295/// Returns the root volume for the filesystem.
296pub async fn root_volume(filesystem: Arc<FxFilesystem>) -> Result<RootVolume, Error> {
297    let root_store = filesystem.root_store();
298    let root_directory = Directory::open(&root_store, root_store.root_directory_object_id())
299        .await
300        .context("Unable to open root volume directory")?;
301    Ok(RootVolume { _root_directory: root_directory, filesystem })
302}
303
304/// Returns the object IDs for all volumes.
305pub async fn list_volumes(volume_directory: &Directory<ObjectStore>) -> Result<Vec<u64>, Error> {
306    let layer_set = volume_directory.store().tree().layer_set();
307    let mut merger = layer_set.merger();
308    let mut iter = volume_directory.iter(&mut merger).await?;
309    let mut object_ids = vec![];
310    while let Some((_, id, _)) = iter.get() {
311        object_ids.push(id);
312        iter.advance().await?;
313    }
314    Ok(object_ids)
315}
316
317#[cfg(test)]
318mod tests {
319    use super::root_volume;
320    use crate::filesystem::{FxFilesystem, JournalingObject, SyncOptions};
321    use crate::fsck::{FsckOptions, fsck_volume_with_options, fsck_with_options};
322    use crate::object_handle::{ObjectHandle, WriteObjectHandle};
323    use crate::object_store::directory::Directory;
324    use crate::object_store::transaction::{Options, lock_keys};
325    use crate::object_store::{LockKey, NewChildStoreOptions, StoreOptions};
326    use fxfs_crypto::Crypt;
327    use fxfs_insecure_crypto::InsecureCrypt;
328    use std::sync::Arc;
329    use storage_device::DeviceHolder;
330    use storage_device::fake_device::FakeDevice;
331
332    async fn do_fsck(
333        fs: &Arc<FxFilesystem>,
334        volume_name: Option<&str>,
335        crypt: Option<Arc<dyn Crypt>>,
336    ) {
337        let fsck_options = FsckOptions {
338            fail_on_warning: true,
339            on_error: Box::new(|err| eprintln!("fsck error: {:?}", err)),
340            ..Default::default()
341        };
342        fsck_with_options(fs.clone(), &fsck_options).await.expect("fsck filesystem");
343        if let Some(volume_name) = volume_name {
344            let root = root_volume(fs.clone()).await.unwrap();
345            let vol = root
346                .volume(
347                    volume_name,
348                    StoreOptions { crypt: crypt.clone(), ..StoreOptions::default() },
349                )
350                .await
351                .expect("could not open volume");
352            fsck_volume_with_options(&fs, &fsck_options, vol.store_object_id(), crypt)
353                .await
354                .expect("fsck volume");
355        }
356    }
357
358    #[fuchsia::test]
359    async fn test_lookup_nonexistent_volume() {
360        let device = DeviceHolder::new(FakeDevice::new(8192, 512));
361        let filesystem = FxFilesystem::new_empty(device).await.expect("new_empty failed");
362        let root_volume = root_volume(filesystem.clone()).await.expect("root_volume failed");
363        root_volume
364            .volume(
365                "vol",
366                StoreOptions {
367                    crypt: Some(Arc::new(InsecureCrypt::new())),
368                    ..StoreOptions::default()
369                },
370            )
371            .await
372            .err()
373            .expect("Volume shouldn't exist");
374        filesystem.close().await.expect("Close failed");
375    }
376
377    #[fuchsia::test]
378    async fn test_add_volume() {
379        let device = DeviceHolder::new(FakeDevice::new(16384, 512));
380        let filesystem = FxFilesystem::new_empty(device).await.expect("new_empty failed");
381        let crypt = Arc::new(InsecureCrypt::new());
382        {
383            let root_volume = root_volume(filesystem.clone()).await.expect("root_volume failed");
384            let store = root_volume
385                .new_volume(
386                    "vol",
387                    NewChildStoreOptions {
388                        options: StoreOptions {
389                            crypt: Some(crypt.clone()),
390                            ..StoreOptions::default()
391                        },
392                        ..Default::default()
393                    },
394                )
395                .await
396                .expect("new_volume failed");
397            let mut transaction = filesystem
398                .clone()
399                .new_transaction(
400                    lock_keys![LockKey::object(
401                        store.store_object_id(),
402                        store.root_directory_object_id()
403                    )],
404                    Options::default(),
405                )
406                .await
407                .expect("new transaction failed");
408            let root_directory = Directory::open(&store, store.root_directory_object_id())
409                .await
410                .expect("open failed");
411            root_directory
412                .create_child_file(&mut transaction, "foo")
413                .await
414                .expect("create_child_file failed");
415            transaction.commit().await.expect("commit failed");
416            filesystem.sync(SyncOptions::default()).await.expect("sync failed");
417        };
418        {
419            filesystem.close().await.expect("Close failed");
420            let device = filesystem.take_device().await;
421            device.reopen(false);
422            let filesystem = FxFilesystem::open(device).await.expect("open failed");
423            do_fsck(&filesystem, Some("vol"), Some(crypt)).await;
424            let root_volume = root_volume(filesystem.clone()).await.expect("root_volume failed");
425            // NOTE: The volume should have been unlocked by `do_fsck` so we omit `crypt` here.
426            let volume = root_volume
427                .volume("vol", StoreOptions { crypt: None, ..StoreOptions::default() })
428                .await
429                .expect("volume failed");
430            let root_directory = Directory::open(&volume, volume.root_directory_object_id())
431                .await
432                .expect("open failed");
433            root_directory.lookup("foo").await.expect("lookup failed").expect("not found");
434            filesystem.close().await.expect("Close failed");
435        };
436    }
437
438    #[fuchsia::test]
439    async fn test_delete_volume() {
440        let device = DeviceHolder::new(FakeDevice::new(16384, 512));
441        let filesystem = FxFilesystem::new_empty(device).await.expect("new_empty failed");
442        let crypt = Arc::new(InsecureCrypt::new());
443        let store_object_id;
444        let parent_objects;
445        // Add volume and a file (some data).
446        let store_id = {
447            let root_volume = root_volume(filesystem.clone()).await.expect("root_volume failed");
448            let store = root_volume
449                .new_volume(
450                    "vol",
451                    NewChildStoreOptions {
452                        options: StoreOptions {
453                            crypt: Some(crypt.clone()),
454                            ..StoreOptions::default()
455                        },
456                        ..Default::default()
457                    },
458                )
459                .await
460                .expect("new_volume failed");
461            store_object_id = store.store_object_id();
462            let mut transaction = filesystem
463                .clone()
464                .new_transaction(
465                    lock_keys![LockKey::object(store_object_id, store.root_directory_object_id())],
466                    Options::default(),
467                )
468                .await
469                .expect("new transaction failed");
470            let root_directory = Directory::open(&store, store.root_directory_object_id())
471                .await
472                .expect("open failed");
473            let handle = root_directory
474                .create_child_file(&mut transaction, "foo")
475                .await
476                .expect("create_child_file failed");
477            transaction.commit().await.expect("commit failed");
478
479            let mut buf = handle.allocate_buffer(8192).await;
480            buf.as_mut_slice().fill(0xaa);
481            handle.write_or_append(Some(0), buf.as_ref()).await.expect("write failed");
482            store.flush().await.expect("flush failed");
483            filesystem.sync(SyncOptions::default()).await.expect("sync failed");
484            parent_objects = store.parent_objects();
485            // Confirm parent objects exist.
486            for object_id in &parent_objects {
487                let _ = filesystem
488                    .root_store()
489                    .get_file_size(*object_id)
490                    .await
491                    .expect("Layer file missing? Bug in test.");
492            }
493            store.store_object_id()
494        };
495        filesystem.close().await.expect("Close failed");
496        let device = filesystem.take_device().await;
497        device.reopen(false);
498        let filesystem = FxFilesystem::open(device).await.expect("open failed");
499        do_fsck(&filesystem, Some("vol"), Some(crypt.clone())).await;
500        {
501            // Expect 8kiB accounted to the new volume.
502            assert_eq!(
503                filesystem.allocator().get_owner_allocated_bytes().get(&store_object_id),
504                Some(&8192)
505            );
506            let root = root_volume(filesystem.clone()).await.expect("root_volume failed");
507            let transaction = filesystem
508                .clone()
509                .new_transaction(
510                    lock_keys![
511                        LockKey::object(
512                            root.volume_directory().store().store_object_id(),
513                            root.volume_directory().object_id(),
514                        ),
515                        LockKey::flush(store_id)
516                    ],
517                    Options { borrow_metadata_space: true, ..Default::default() },
518                )
519                .await
520                .expect("new_transaction failed");
521            root.delete_volume("vol", transaction, || {}).await.expect("delete_volume");
522            // Confirm data allocation is gone.
523            assert_eq!(
524                filesystem
525                    .allocator()
526                    .get_owner_allocated_bytes()
527                    .get(&store_object_id)
528                    .unwrap_or(&0),
529                &0,
530            );
531            // Confirm volume entry is gone.
532            root.volume(
533                "vol",
534                StoreOptions { crypt: Some(crypt.clone()), ..StoreOptions::default() },
535            )
536            .await
537            .err()
538            .expect("volume shouldn't exist anymore.");
539        }
540        filesystem.close().await.expect("Close failed");
541        let device = filesystem.take_device().await;
542        device.reopen(false);
543        // All artifacts of the original volume should be gone.
544        let filesystem = FxFilesystem::open(device).await.expect("open failed");
545        do_fsck(&filesystem, None, None).await;
546        for object_id in &parent_objects {
547            let _ = filesystem
548                .root_store()
549                .get_file_size(*object_id)
550                .await
551                .err()
552                .expect("File wasn't deleted.");
553        }
554        filesystem.close().await.expect("Close failed");
555    }
556
557    #[fuchsia::test]
558    async fn test_replace_volume() {
559        let device = DeviceHolder::new(FakeDevice::new(16384, 512));
560        let fs = FxFilesystem::new_empty(device).await.expect("new_empty failed");
561        // Add volume "vol" with a file "foo".
562        {
563            let root = root_volume(fs.clone()).await.expect("root_volume failed");
564            let store = root.new_volume("vol", NewChildStoreOptions::default()).await.unwrap();
565            let mut transaction = fs
566                .clone()
567                .new_transaction(
568                    lock_keys![LockKey::object(
569                        store.store_object_id(),
570                        store.root_directory_object_id()
571                    )],
572                    Options::default(),
573                )
574                .await
575                .unwrap();
576            let root_directory =
577                Directory::open(&store, store.root_directory_object_id()).await.unwrap();
578            let _ = root_directory.create_child_file(&mut transaction, "foo").await.unwrap();
579            transaction.commit().await.expect("commit failed");
580        }
581        // Add a second volume "vol2" with a file "foo2".
582        {
583            let root = root_volume(fs.clone()).await.expect("root_volume failed");
584            let store = root
585                .new_volume("vol2", NewChildStoreOptions::default())
586                .await
587                .expect("new_volume failed");
588            let mut transaction = fs
589                .clone()
590                .new_transaction(
591                    lock_keys![LockKey::object(
592                        store.store_object_id(),
593                        store.root_directory_object_id()
594                    )],
595                    Options::default(),
596                )
597                .await
598                .expect("new transaction failed");
599            let root_directory = Directory::open(&store, store.root_directory_object_id())
600                .await
601                .expect("open failed");
602            let _ = root_directory
603                .create_child_file(&mut transaction, "foo2")
604                .await
605                .expect("create_child_file failed");
606            transaction.commit().await.expect("commit failed");
607        }
608        // Replace "vol" with "vol2", and ensure the filesystem and installed volume passes fsck.
609        {
610            let root = root_volume(fs.clone()).await.expect("root_volume failed");
611            let mut transaction =
612                root.acquire_transaction_for_remove_volume("vol", [], false).await.unwrap().1;
613            root.replace_volume(&mut transaction, "vol2", "vol").await.unwrap();
614            transaction.commit().await.unwrap();
615            do_fsck(&fs, Some("vol"), None).await;
616        }
617        fs.close().await.expect("Close failed");
618        let device = fs.take_device().await;
619        device.reopen(false);
620        let fs = FxFilesystem::open(device).await.unwrap();
621        do_fsck(&fs, Some("vol"), None).await;
622        {
623            let root = root_volume(fs.clone()).await.unwrap();
624            // vol2 should now have replaced vol
625            root.volume("vol2", StoreOptions::default())
626                .await
627                .err()
628                .expect("vol2 shouldn't exist anymore.");
629            let vol = root.volume("vol", StoreOptions::default()).await.unwrap();
630            let dir = Directory::open(&vol, vol.root_directory_object_id()).await.unwrap();
631            // The contents of "foo" should have been replaced entirely with those from "foo2".
632            assert!(dir.lookup("foo").await.unwrap().is_none(), "foo should not be present");
633            assert!(dir.lookup("foo2").await.unwrap().is_some(), "foo2 should be present");
634        }
635        fs.close().await.unwrap();
636    }
637}