1use crate::checksum::{Checksum, Checksums, fletcher64};
6use crate::errors::FxfsError;
7use crate::log::*;
8use crate::lsm_tree::Query;
9use crate::lsm_tree::merge::MergerIterator;
10use crate::lsm_tree::types::{Item, ItemRef, LayerIterator};
11use crate::object_handle::ObjectHandle;
12use crate::object_store::extent_record::{ExtentKey, ExtentMode, ExtentValue};
13use crate::object_store::object_manager::ObjectManager;
14use crate::object_store::object_record::{
15 AttributeKey, ExtendedAttributeValue, ObjectAttributes, ObjectItem, ObjectKey, ObjectKeyData,
16 ObjectValue, Timestamp,
17};
18use crate::object_store::transaction::{
19 AssocObj, AssociatedObject, LockKey, Mutation, ObjectStoreMutation, Options, ReadGuard,
20 Transaction, lock_keys,
21};
22use crate::object_store::{
23 HandleOptions, HandleOwner, ObjectStore, TrimMode, TrimResult, VOLUME_DATA_KEY_ID,
24};
25use crate::range::RangeExt;
26use crate::round::{round_down, round_up};
27use anyhow::{Context, Error, anyhow, bail, ensure};
28use assert_matches::assert_matches;
29use bit_vec::BitVec;
30use futures::stream::{FuturesOrdered, FuturesUnordered};
31use futures::{TryStreamExt, try_join};
32use fxfs_crypto::{
33 Cipher, CipherHolder, CipherSet, EncryptionKey, FindKeyResult, FxfsCipher, KeyPurpose,
34};
35use fxfs_trace::trace;
36use static_assertions::const_assert;
37use std::cmp::min;
38use std::future::Future;
39use std::ops::Range;
40use std::sync::Arc;
41use std::sync::atomic::{self, AtomicBool, Ordering};
42use storage_device::buffer::{Buffer, BufferFuture, BufferRef, MutableBufferRef};
43use storage_device::{InlineCryptoOptions, ReadOptions, WriteOptions};
44
45use {fidl_fuchsia_io as fio, fuchsia_async as fasync};
46
47pub const MAX_XATTR_NAME_SIZE: usize = 255;
49pub const MAX_INLINE_XATTR_SIZE: usize = 256;
52pub const MAX_XATTR_VALUE_SIZE: usize = 64000;
56pub const EXTENDED_ATTRIBUTE_RANGE_START: u64 = 64;
63pub const EXTENDED_ATTRIBUTE_RANGE_END: u64 = 512;
64
65fn apply_bitmap_zeroing(
67 block_size: usize,
68 bitmap: &bit_vec::BitVec,
69 mut buffer: MutableBufferRef<'_>,
70) {
71 let buf = buffer.as_mut_slice();
72 debug_assert_eq!(bitmap.len() * block_size, buf.len());
73 for (i, block) in bitmap.iter().enumerate() {
74 if !block {
75 let start = i * block_size;
76 buf[start..start + block_size].fill(0);
77 }
78 }
79}
80
81#[derive(Debug, Clone, PartialEq)]
85pub enum MaybeChecksums {
86 None,
87 Fletcher(Vec<Checksum>),
88}
89
90impl MaybeChecksums {
91 pub fn maybe_as_ref(&self) -> Option<&[Checksum]> {
92 match self {
93 Self::None => None,
94 Self::Fletcher(sums) => Some(&sums),
95 }
96 }
97
98 pub fn split_off(&mut self, at: usize) -> Self {
99 match self {
100 Self::None => Self::None,
101 Self::Fletcher(sums) => Self::Fletcher(sums.split_off(at)),
102 }
103 }
104
105 pub fn to_mode(self) -> ExtentMode {
106 match self {
107 Self::None => ExtentMode::Raw,
108 Self::Fletcher(sums) => ExtentMode::Cow(Checksums::fletcher(sums)),
109 }
110 }
111
112 pub fn into_option(self) -> Option<Vec<Checksum>> {
113 match self {
114 Self::None => None,
115 Self::Fletcher(sums) => Some(sums),
116 }
117 }
118}
119
120#[derive(Debug, Clone, Copy, PartialEq, Eq)]
124pub enum SetExtendedAttributeMode {
125 Set,
127 Create,
129 Replace,
131}
132
133impl From<fio::SetExtendedAttributeMode> for SetExtendedAttributeMode {
134 fn from(other: fio::SetExtendedAttributeMode) -> SetExtendedAttributeMode {
135 match other {
136 fio::SetExtendedAttributeMode::Set => SetExtendedAttributeMode::Set,
137 fio::SetExtendedAttributeMode::Create => SetExtendedAttributeMode::Create,
138 fio::SetExtendedAttributeMode::Replace => SetExtendedAttributeMode::Replace,
139 }
140 }
141}
142
143enum Encryption {
144 None,
146
147 CachedKeys,
150
151 PermanentKeys,
153}
154
155#[derive(PartialEq, Debug)]
156enum OverwriteBitmaps {
157 None,
158 Some {
159 extent_bitmap: BitVec,
161 write_bitmap: BitVec,
163 bitmap_offset: usize,
166 },
167}
168
169impl OverwriteBitmaps {
170 fn new(extent_bitmap: BitVec) -> Self {
171 OverwriteBitmaps::Some {
172 write_bitmap: BitVec::from_elem(extent_bitmap.len(), false),
173 extent_bitmap,
174 bitmap_offset: 0,
175 }
176 }
177
178 fn is_none(&self) -> bool {
179 *self == OverwriteBitmaps::None
180 }
181
182 fn set_offset(&mut self, new_offset: usize) {
183 match self {
184 OverwriteBitmaps::None => (),
185 OverwriteBitmaps::Some { bitmap_offset, .. } => *bitmap_offset = new_offset,
186 }
187 }
188
189 fn get_from_extent_bitmap(&self, i: usize) -> Option<bool> {
190 match self {
191 OverwriteBitmaps::None => None,
192 OverwriteBitmaps::Some { extent_bitmap, bitmap_offset, .. } => {
193 extent_bitmap.get(*bitmap_offset + i)
194 }
195 }
196 }
197
198 fn set_in_write_bitmap(&mut self, i: usize, x: bool) {
199 match self {
200 OverwriteBitmaps::None => (),
201 OverwriteBitmaps::Some { write_bitmap, bitmap_offset, .. } => {
202 write_bitmap.set(*bitmap_offset + i, x)
203 }
204 }
205 }
206
207 fn take_bitmaps(self) -> Option<(BitVec, BitVec)> {
208 match self {
209 OverwriteBitmaps::None => None,
210 OverwriteBitmaps::Some { extent_bitmap, write_bitmap, .. } => {
211 Some((extent_bitmap, write_bitmap))
212 }
213 }
214 }
215}
216
217#[derive(PartialEq, Debug)]
221struct ChecksumRangeChunk {
222 checksum_range: Range<usize>,
223 device_range: Range<u64>,
224 is_first_write: bool,
225}
226
227impl ChecksumRangeChunk {
228 fn group_first_write_ranges(
229 bitmaps: &mut OverwriteBitmaps,
230 block_size: u64,
231 write_device_range: Range<u64>,
232 ) -> Vec<ChecksumRangeChunk> {
233 let write_block_len = (write_device_range.length().unwrap() / block_size) as usize;
234 if bitmaps.is_none() {
235 vec![ChecksumRangeChunk {
240 checksum_range: 0..write_block_len,
241 device_range: write_device_range,
242 is_first_write: false,
243 }]
244 } else {
245 let mut checksum_ranges = vec![ChecksumRangeChunk {
246 checksum_range: 0..0,
247 device_range: write_device_range.start..write_device_range.start,
248 is_first_write: !bitmaps.get_from_extent_bitmap(0).unwrap(),
249 }];
250 let mut working_range = checksum_ranges.last_mut().unwrap();
251 for i in 0..write_block_len {
252 bitmaps.set_in_write_bitmap(i, true);
253
254 if working_range.is_first_write != bitmaps.get_from_extent_bitmap(i).unwrap() {
257 working_range.checksum_range.end += 1;
260 working_range.device_range.end += block_size;
261 } else {
262 let new_chunk = ChecksumRangeChunk {
264 checksum_range: working_range.checksum_range.end
265 ..working_range.checksum_range.end + 1,
266 device_range: working_range.device_range.end
267 ..working_range.device_range.end + block_size,
268 is_first_write: !working_range.is_first_write,
269 };
270 checksum_ranges.push(new_chunk);
271 working_range = checksum_ranges.last_mut().unwrap();
272 }
273 }
274 checksum_ranges
275 }
276 }
277}
278
279pub struct StoreObjectHandle<S: HandleOwner> {
292 owner: Arc<S>,
293 object_id: u64,
294 options: HandleOptions,
295 trace: AtomicBool,
296 encryption: Encryption,
297}
298
299impl<S: HandleOwner> ObjectHandle for StoreObjectHandle<S> {
300 fn set_trace(&self, v: bool) {
301 info!(store_id = self.store().store_object_id, oid = self.object_id(), trace = v; "trace");
302 self.trace.store(v, atomic::Ordering::Relaxed);
303 }
304
305 fn object_id(&self) -> u64 {
306 return self.object_id;
307 }
308
309 fn allocate_buffer(&self, size: usize) -> BufferFuture<'_> {
310 self.store().device.allocate_buffer(size)
311 }
312
313 fn block_size(&self) -> u64 {
314 self.store().block_size()
315 }
316}
317
318struct Watchdog {
319 _task: fasync::Task<()>,
320}
321
322impl Watchdog {
323 fn new(increment_seconds: u64, cb: impl Fn(u64) + Send + 'static) -> Self {
324 Self {
325 _task: fasync::Task::spawn(async move {
326 let increment = increment_seconds.try_into().unwrap();
327 let mut fired_counter = 0;
328 let mut next_wake = fasync::MonotonicInstant::now();
329 loop {
330 next_wake += std::time::Duration::from_secs(increment).into();
331 if fasync::MonotonicInstant::now() < next_wake {
335 fasync::Timer::new(next_wake).await;
336 }
337 fired_counter += 1;
338 cb(fired_counter);
339 }
340 }),
341 }
342 }
343}
344
345impl<S: HandleOwner> StoreObjectHandle<S> {
346 pub fn new(
348 owner: Arc<S>,
349 object_id: u64,
350 permanent_keys: bool,
351 options: HandleOptions,
352 trace: bool,
353 ) -> Self {
354 let encryption = if permanent_keys {
355 Encryption::PermanentKeys
356 } else if owner.as_ref().as_ref().is_encrypted() {
357 Encryption::CachedKeys
358 } else {
359 Encryption::None
360 };
361 Self { owner, object_id, encryption, options, trace: AtomicBool::new(trace) }
362 }
363
364 pub fn owner(&self) -> &Arc<S> {
365 &self.owner
366 }
367
368 pub fn store(&self) -> &ObjectStore {
369 self.owner.as_ref().as_ref()
370 }
371
372 pub fn trace(&self) -> bool {
373 self.trace.load(atomic::Ordering::Relaxed)
374 }
375
376 pub fn is_encrypted(&self) -> bool {
377 !matches!(self.encryption, Encryption::None)
378 }
379
380 pub fn default_transaction_options<'b>(&self) -> Options<'b> {
383 Options { skip_journal_checks: self.options.skip_journal_checks, ..Default::default() }
384 }
385
386 pub async fn new_transaction_with_options<'b>(
387 &self,
388 attribute_id: u64,
389 options: Options<'b>,
390 ) -> Result<Transaction<'b>, Error> {
391 Ok(self
392 .store()
393 .filesystem()
394 .new_transaction(
395 lock_keys![
396 LockKey::object_attribute(
397 self.store().store_object_id(),
398 self.object_id(),
399 attribute_id,
400 ),
401 LockKey::object(self.store().store_object_id(), self.object_id()),
402 ],
403 options,
404 )
405 .await?)
406 }
407
408 pub async fn new_transaction<'b>(&self, attribute_id: u64) -> Result<Transaction<'b>, Error> {
409 self.new_transaction_with_options(attribute_id, self.default_transaction_options()).await
410 }
411
412 async fn txn_get_object_mutation(
415 &self,
416 transaction: &Transaction<'_>,
417 ) -> Result<ObjectStoreMutation, Error> {
418 self.store().txn_get_object_mutation(transaction, self.object_id()).await
419 }
420
421 async fn deallocate_old_extents(
423 &self,
424 transaction: &mut Transaction<'_>,
425 attribute_id: u64,
426 range: Range<u64>,
427 ) -> Result<u64, Error> {
428 let block_size = self.block_size();
429 assert_eq!(range.start % block_size, 0);
430 assert_eq!(range.end % block_size, 0);
431 if range.start == range.end {
432 return Ok(0);
433 }
434 let tree = &self.store().tree;
435 let layer_set = tree.layer_set();
436 let key = ExtentKey { range };
437 let lower_bound = ObjectKey::attribute(
438 self.object_id(),
439 attribute_id,
440 AttributeKey::Extent(key.search_key()),
441 );
442 let mut merger = layer_set.merger();
443 let mut iter = merger.query(Query::FullRange(&lower_bound)).await?;
444 let allocator = self.store().allocator();
445 let mut deallocated = 0;
446 let trace = self.trace();
447 while let Some(ItemRef {
448 key:
449 ObjectKey {
450 object_id,
451 data: ObjectKeyData::Attribute(attr_id, AttributeKey::Extent(extent_key)),
452 },
453 value: ObjectValue::Extent(value),
454 ..
455 }) = iter.get()
456 {
457 if *object_id != self.object_id() || *attr_id != attribute_id {
458 break;
459 }
460 if let ExtentValue::Some { device_offset, .. } = value {
461 if let Some(overlap) = key.overlap(extent_key) {
462 let range = device_offset + overlap.start - extent_key.range.start
463 ..device_offset + overlap.end - extent_key.range.start;
464 ensure!(range.is_aligned(block_size), FxfsError::Inconsistent);
465 if trace {
466 info!(
467 store_id = self.store().store_object_id(),
468 oid = self.object_id(),
469 device_range:? = range,
470 len = range.end - range.start,
471 extent_key:?;
472 "D",
473 );
474 }
475 allocator
476 .deallocate(transaction, self.store().store_object_id(), range)
477 .await?;
478 deallocated += overlap.end - overlap.start;
479 } else {
480 break;
481 }
482 }
483 iter.advance().await?;
484 }
485 Ok(deallocated)
486 }
487
488 async fn write_aligned(
491 &self,
492 buf: BufferRef<'_>,
493 device_offset: u64,
494 crypt_ctx: Option<(u32, u8)>,
495 ) -> Result<MaybeChecksums, Error> {
496 if self.trace() {
497 info!(
498 store_id = self.store().store_object_id(),
499 oid = self.object_id(),
500 device_range:? = (device_offset..device_offset + buf.len() as u64),
501 len = buf.len();
502 "W",
503 );
504 }
505 let store = self.store();
506 store.device_write_ops.fetch_add(1, Ordering::Relaxed);
507 let mut checksums = Vec::new();
508 let _watchdog = Watchdog::new(10, |count| {
509 warn!("Write has been stalled for {} seconds", count * 10);
510 });
511
512 match crypt_ctx {
513 Some((dun, slot)) => {
514 if !store.filesystem().options().barriers_enabled {
515 return Err(anyhow!(FxfsError::InvalidArgs)
516 .context("Barriers must be enabled for inline encrypted writes."));
517 }
518 store
519 .device
520 .write_with_opts(
521 device_offset as u64,
522 buf,
523 WriteOptions {
524 inline_crypto: InlineCryptoOptions::enabled(slot, dun),
525 ..Default::default()
526 },
527 )
528 .await?;
529 Ok(MaybeChecksums::None)
530 }
531 None => {
532 if self.options.skip_checksums {
533 store
534 .device
535 .write_with_opts(device_offset as u64, buf, WriteOptions::default())
536 .await?;
537 Ok(MaybeChecksums::None)
538 } else {
539 try_join!(store.device.write(device_offset, buf), async {
540 let block_size = self.block_size();
541 for chunk in buf.as_slice().chunks_exact(block_size as usize) {
542 checksums.push(fletcher64(chunk, 0));
543 }
544 Ok(())
545 })?;
546 Ok(MaybeChecksums::Fletcher(checksums))
547 }
548 }
549 }
550 }
551
552 pub async fn flush_device(&self) -> Result<(), Error> {
554 self.store().device().flush().await
555 }
556
557 pub async fn update_allocated_size(
558 &self,
559 transaction: &mut Transaction<'_>,
560 allocated: u64,
561 deallocated: u64,
562 ) -> Result<(), Error> {
563 if allocated == deallocated {
564 return Ok(());
565 }
566 let mut mutation = self.txn_get_object_mutation(transaction).await?;
567 if let ObjectValue::Object {
568 attributes: ObjectAttributes { project_id, allocated_size, .. },
569 ..
570 } = &mut mutation.item.value
571 {
572 *allocated_size = allocated_size
574 .checked_add(allocated)
575 .ok_or_else(|| anyhow!(FxfsError::Inconsistent).context("Allocated size overflow"))?
576 .checked_sub(deallocated)
577 .ok_or_else(|| {
578 anyhow!(FxfsError::Inconsistent).context("Allocated size underflow")
579 })?;
580
581 if *project_id != 0 {
582 let diff = i64::try_from(allocated).unwrap() - i64::try_from(deallocated).unwrap();
585 transaction.add(
586 self.store().store_object_id(),
587 Mutation::merge_object(
588 ObjectKey::project_usage(
589 self.store().root_directory_object_id(),
590 *project_id,
591 ),
592 ObjectValue::BytesAndNodes { bytes: diff, nodes: 0 },
593 ),
594 );
595 }
596 } else {
597 bail!(anyhow!(FxfsError::Inconsistent).context("Unexpected object value"));
600 }
601 transaction.add(self.store().store_object_id, Mutation::ObjectStore(mutation));
602 Ok(())
603 }
604
605 pub async fn update_attributes<'a>(
606 &self,
607 transaction: &mut Transaction<'a>,
608 node_attributes: Option<&fio::MutableNodeAttributes>,
609 change_time: Option<Timestamp>,
610 ) -> Result<(), Error> {
611 if let Some(&fio::MutableNodeAttributes { selinux_context: Some(ref context), .. }) =
612 node_attributes
613 {
614 if let fio::SelinuxContext::Data(context) = context {
615 self.set_extended_attribute_impl(
616 "security.selinux".into(),
617 context.clone(),
618 SetExtendedAttributeMode::Set,
619 transaction,
620 )
621 .await?;
622 } else {
623 return Err(anyhow!(FxfsError::InvalidArgs)
624 .context("Only set SELinux context with `data` member."));
625 }
626 }
627 self.store()
628 .update_attributes(transaction, self.object_id, node_attributes, change_time)
629 .await
630 }
631
632 pub async fn zero(
634 &self,
635 transaction: &mut Transaction<'_>,
636 attribute_id: u64,
637 range: Range<u64>,
638 ) -> Result<(), Error> {
639 let deallocated =
640 self.deallocate_old_extents(transaction, attribute_id, range.clone()).await?;
641 if deallocated > 0 {
642 self.update_allocated_size(transaction, 0, deallocated).await?;
643 transaction.add(
644 self.store().store_object_id,
645 Mutation::merge_object(
646 ObjectKey::extent(self.object_id(), attribute_id, range),
647 ObjectValue::Extent(ExtentValue::deleted_extent()),
648 ),
649 );
650 }
651 Ok(())
652 }
653
654 pub async fn align_buffer(
657 &self,
658 attribute_id: u64,
659 offset: u64,
660 buf: BufferRef<'_>,
661 ) -> Result<(std::ops::Range<u64>, Buffer<'_>), Error> {
662 let block_size = self.block_size();
663 let end = offset + buf.len() as u64;
664 let aligned =
665 round_down(offset, block_size)..round_up(end, block_size).ok_or(FxfsError::TooBig)?;
666
667 let mut aligned_buf =
668 self.store().device.allocate_buffer((aligned.end - aligned.start) as usize).await;
669
670 if aligned.start < offset {
672 let mut head_block = aligned_buf.subslice_mut(..block_size as usize);
673 let read = self.read(attribute_id, aligned.start, head_block.reborrow()).await?;
674 head_block.as_mut_slice()[read..].fill(0);
675 }
676
677 if aligned.end > end {
679 let end_block_offset = aligned.end - block_size;
680 if offset <= end_block_offset {
682 let mut tail_block =
683 aligned_buf.subslice_mut(aligned_buf.len() - block_size as usize..);
684 let read = self.read(attribute_id, end_block_offset, tail_block.reborrow()).await?;
685 tail_block.as_mut_slice()[read..].fill(0);
686 }
687 }
688
689 aligned_buf.as_mut_slice()
690 [(offset - aligned.start) as usize..(end - aligned.start) as usize]
691 .copy_from_slice(buf.as_slice());
692
693 Ok((aligned, aligned_buf))
694 }
695
696 pub async fn shrink(
702 &self,
703 transaction: &mut Transaction<'_>,
704 attribute_id: u64,
705 size: u64,
706 ) -> Result<NeedsTrim, Error> {
707 let store = self.store();
708 let needs_trim = matches!(
709 store
710 .trim_some(transaction, self.object_id(), attribute_id, TrimMode::FromOffset(size))
711 .await?,
712 TrimResult::Incomplete
713 );
714 if needs_trim {
715 let graveyard_id = store.graveyard_directory_object_id();
718 match store
719 .tree
720 .find(&ObjectKey::graveyard_entry(graveyard_id, self.object_id()))
721 .await?
722 {
723 Some(ObjectItem { value: ObjectValue::Some, .. })
724 | Some(ObjectItem { value: ObjectValue::Trim, .. }) => {
725 }
727 _ => {
728 transaction.add(
729 store.store_object_id,
730 Mutation::replace_or_insert_object(
731 ObjectKey::graveyard_entry(graveyard_id, self.object_id()),
732 ObjectValue::Trim,
733 ),
734 );
735 }
736 }
737 }
738 Ok(NeedsTrim(needs_trim))
739 }
740
741 pub async fn read_and_decrypt(
743 &self,
744 device_offset: u64,
745 file_offset: u64,
746 mut buffer: MutableBufferRef<'_>,
747 key_id: u64,
748 ) -> Result<(), Error> {
749 let store = self.store();
750 store.device_read_ops.fetch_add(1, Ordering::Relaxed);
751
752 let _watchdog = Watchdog::new(10, |count| {
753 warn!("Read has been stalled for {} seconds", count * 10);
754 });
755
756 let (_key_id, key) = self.get_key(Some(key_id)).await?;
757 if let Some(key) = key {
758 if let Some((dun, slot)) = key.crypt_ctx(self.object_id, file_offset) {
759 store
760 .device
761 .read_with_opts(
762 device_offset as u64,
763 buffer.reborrow(),
764 ReadOptions { inline_crypto: InlineCryptoOptions::enabled(slot, dun) },
765 )
766 .await?;
767 } else {
768 store.device.read(device_offset, buffer.reborrow()).await?;
769 key.decrypt(self.object_id, device_offset, file_offset, buffer.as_mut_slice())?;
770 }
771 } else {
772 store.device.read(device_offset, buffer.reborrow()).await?;
773 }
774
775 Ok(())
776 }
777
778 pub async fn get_key(
783 &self,
784 key_id: Option<u64>,
785 ) -> Result<(u64, Option<Arc<dyn Cipher>>), Error> {
786 let store = self.store();
787 let result = match self.encryption {
788 Encryption::None => (VOLUME_DATA_KEY_ID, None),
789 Encryption::CachedKeys => {
790 if let Some(key_id) = key_id {
791 (
792 key_id,
793 Some(
794 store
795 .key_manager
796 .get_key(
797 self.object_id,
798 store.crypt().ok_or_else(|| anyhow!("No crypt!"))?.as_ref(),
799 async || store.get_keys(self.object_id).await,
800 key_id,
801 )
802 .await?,
803 ),
804 )
805 } else {
806 let (key_id, key) = store
807 .key_manager
808 .get_fscrypt_key_if_present(
809 self.object_id,
810 store.crypt().ok_or_else(|| anyhow!("No crypt!"))?.as_ref(),
811 async || store.get_keys(self.object_id).await,
812 )
813 .await?;
814 (key_id, Some(key))
815 }
816 }
817 Encryption::PermanentKeys => {
818 (VOLUME_DATA_KEY_ID, Some(store.key_manager.get(self.object_id).await?.unwrap()))
819 }
820 };
821
822 if let Some(ref key) = result.1 {
824 if key.crypt_ctx(self.object_id, 0).is_some() {
825 if !store.filesystem().options().barriers_enabled {
826 return Err(anyhow!(FxfsError::InvalidArgs)
827 .context("Barriers must be enabled for inline encrypted writes."));
828 }
829 }
830 }
831
832 Ok(result)
833 }
834
835 async fn get_or_create_key(
839 &self,
840 transaction: &mut Transaction<'_>,
841 ) -> Result<Arc<dyn Cipher>, Error> {
842 let store = self.store();
843
844 if let Some(key) = store.key_manager.get(self.object_id).await.context("get failed")? {
846 return Ok(key);
847 }
848
849 let crypt = store.crypt().ok_or_else(|| anyhow!("No crypt!"))?;
850
851 let (mut encryption_keys, mut cipher_set) = if let Some(item) =
853 store.tree.find(&ObjectKey::keys(self.object_id)).await.context("find failed")?
854 {
855 if let ObjectValue::Keys(encryption_keys) = item.value {
856 let cipher_set = store
857 .key_manager
858 .get_keys(
859 self.object_id,
860 crypt.as_ref(),
861 &mut Some(async || Ok(encryption_keys.clone())),
862 false,
863 false,
864 )
865 .await
866 .context("get_keys failed")?;
867 match cipher_set.find_key(VOLUME_DATA_KEY_ID) {
868 FindKeyResult::NotFound => {}
869 FindKeyResult::Unavailable => return Err(FxfsError::NoKey.into()),
870 FindKeyResult::Key(key) => return Ok(key),
871 }
872 (encryption_keys, (*cipher_set).clone())
873 } else {
874 return Err(anyhow!(FxfsError::Inconsistent));
875 }
876 } else {
877 Default::default()
878 };
879
880 let (key, unwrapped_key) = crypt.create_key(self.object_id, KeyPurpose::Data).await?;
882 let cipher: Arc<dyn Cipher> = Arc::new(FxfsCipher::new(&unwrapped_key));
883
884 cipher_set.add_key(VOLUME_DATA_KEY_ID, CipherHolder::Cipher(cipher.clone()));
887 let cipher_set = Arc::new(cipher_set);
888
889 struct UnwrappedKeys {
892 object_id: u64,
893 new_keys: Arc<CipherSet>,
894 }
895
896 impl AssociatedObject for UnwrappedKeys {
897 fn will_apply_mutation(
898 &self,
899 _mutation: &Mutation,
900 object_id: u64,
901 manager: &ObjectManager,
902 ) {
903 manager.store(object_id).unwrap().key_manager.insert(
904 self.object_id,
905 self.new_keys.clone(),
906 false,
907 );
908 }
909 }
910
911 encryption_keys.insert(VOLUME_DATA_KEY_ID, EncryptionKey::Fxfs(key).into());
912
913 transaction.add_with_object(
914 store.store_object_id(),
915 Mutation::replace_or_insert_object(
916 ObjectKey::keys(self.object_id),
917 ObjectValue::keys(encryption_keys),
918 ),
919 AssocObj::Owned(Box::new(UnwrappedKeys {
920 object_id: self.object_id,
921 new_keys: cipher_set,
922 })),
923 );
924
925 Ok(cipher)
926 }
927
928 pub async fn read(
929 &self,
930 attribute_id: u64,
931 offset: u64,
932 mut buf: MutableBufferRef<'_>,
933 ) -> Result<usize, Error> {
934 let fs = self.store().filesystem();
935 let guard = fs
936 .lock_manager()
937 .read_lock(lock_keys![LockKey::object_attribute(
938 self.store().store_object_id(),
939 self.object_id(),
940 attribute_id,
941 )])
942 .await;
943
944 let key = ObjectKey::attribute(self.object_id(), attribute_id, AttributeKey::Attribute);
945 let item = self.store().tree().find(&key).await?;
946 let size = match item {
947 Some(item) if item.key == key => match item.value {
948 ObjectValue::Attribute { size, .. } => size,
949 _ => bail!(FxfsError::Inconsistent),
950 },
951 _ => return Ok(0),
952 };
953 if offset >= size {
954 return Ok(0);
955 }
956 let length = min(buf.len() as u64, size - offset) as usize;
957 buf = buf.subslice_mut(0..length);
958 self.read_unchecked(attribute_id, offset, buf, &guard).await?;
959 Ok(length)
960 }
961
962 pub(super) async fn read_unchecked(
971 &self,
972 attribute_id: u64,
973 mut offset: u64,
974 mut buf: MutableBufferRef<'_>,
975 _guard: &ReadGuard<'_>,
976 ) -> Result<(), Error> {
977 if buf.len() == 0 {
978 return Ok(());
979 }
980 let end_offset = offset + buf.len() as u64;
981
982 self.store().logical_read_ops.fetch_add(1, Ordering::Relaxed);
983
984 let block_size = self.block_size() as u64;
987 let device_block_size = self.store().device.block_size() as u64;
988 assert_eq!(offset % block_size, 0);
989 assert_eq!(buf.range().start as u64 % device_block_size, 0);
990 let tree = &self.store().tree;
991 let layer_set = tree.layer_set();
992 let mut merger = layer_set.merger();
993 let mut iter = merger
994 .query(Query::LimitedRange(&ObjectKey::extent(
995 self.object_id(),
996 attribute_id,
997 offset..end_offset,
998 )))
999 .await?;
1000 let end_align = ((offset + buf.len() as u64) % block_size) as usize;
1001 let trace = self.trace();
1002 let reads = FuturesUnordered::new();
1003 while let Some(ItemRef {
1004 key:
1005 ObjectKey {
1006 object_id,
1007 data: ObjectKeyData::Attribute(attr_id, AttributeKey::Extent(extent_key)),
1008 },
1009 value: ObjectValue::Extent(extent_value),
1010 ..
1011 }) = iter.get()
1012 {
1013 if *object_id != self.object_id() || *attr_id != attribute_id {
1014 break;
1015 }
1016 ensure!(
1017 extent_key.range.is_valid() && extent_key.range.is_aligned(block_size),
1018 FxfsError::Inconsistent
1019 );
1020 if extent_key.range.start > offset {
1021 let to_zero = min(extent_key.range.start - offset, buf.len() as u64) as usize;
1023 for i in &mut buf.as_mut_slice()[..to_zero] {
1024 *i = 0;
1025 }
1026 buf = buf.subslice_mut(to_zero..);
1027 if buf.is_empty() {
1028 break;
1029 }
1030 offset += to_zero as u64;
1031 }
1032
1033 if let ExtentValue::Some { device_offset, key_id, mode } = extent_value {
1034 let mut device_offset = device_offset + (offset - extent_key.range.start);
1035 let key_id = *key_id;
1036
1037 let to_copy = min(buf.len() - end_align, (extent_key.range.end - offset) as usize);
1038 if to_copy > 0 {
1039 if trace {
1040 info!(
1041 store_id = self.store().store_object_id(),
1042 oid = self.object_id(),
1043 device_range:? = (device_offset..device_offset + to_copy as u64),
1044 offset,
1045 range:? = extent_key.range,
1046 block_size;
1047 "R",
1048 );
1049 }
1050 let (mut head, tail) = buf.split_at_mut(to_copy);
1051 let maybe_bitmap = match mode {
1052 ExtentMode::OverwritePartial(bitmap) => {
1053 let mut read_bitmap = bitmap.clone().split_off(
1054 ((offset - extent_key.range.start) / block_size) as usize,
1055 );
1056 read_bitmap.truncate(to_copy / block_size as usize);
1057 Some(read_bitmap)
1058 }
1059 _ => None,
1060 };
1061 reads.push(async move {
1062 self.read_and_decrypt(device_offset, offset, head.reborrow(), key_id)
1063 .await?;
1064 if let Some(bitmap) = maybe_bitmap {
1065 apply_bitmap_zeroing(self.block_size() as usize, &bitmap, head);
1066 }
1067 Ok::<(), Error>(())
1068 });
1069 buf = tail;
1070 if buf.is_empty() {
1071 break;
1072 }
1073 offset += to_copy as u64;
1074 device_offset += to_copy as u64;
1075 }
1076
1077 if offset < extent_key.range.end && end_align > 0 {
1080 if let ExtentMode::OverwritePartial(bitmap) = mode {
1081 let bitmap_offset = (offset - extent_key.range.start) / block_size;
1082 if !bitmap.get(bitmap_offset as usize).ok_or(FxfsError::Inconsistent)? {
1083 break;
1085 }
1086 }
1087 let mut align_buf =
1088 self.store().device.allocate_buffer(block_size as usize).await;
1089 if trace {
1090 info!(
1091 store_id = self.store().store_object_id(),
1092 oid = self.object_id(),
1093 device_range:? = (device_offset..device_offset + align_buf.len() as u64);
1094 "RT",
1095 );
1096 }
1097 self.read_and_decrypt(device_offset, offset, align_buf.as_mut(), key_id)
1098 .await?;
1099 buf.as_mut_slice().copy_from_slice(&align_buf.as_slice()[..end_align]);
1100 buf = buf.subslice_mut(0..0);
1101 break;
1102 }
1103 } else if extent_key.range.end >= offset + buf.len() as u64 {
1104 break;
1106 }
1107
1108 iter.advance().await?;
1109 }
1110 reads.try_collect::<()>().await?;
1111 buf.as_mut_slice().fill(0);
1112 Ok(())
1113 }
1114
1115 pub async fn read_attr(&self, attribute_id: u64) -> Result<Option<Box<[u8]>>, Error> {
1117 let store = self.store();
1118 let tree = &store.tree;
1119 let layer_set = tree.layer_set();
1120 let mut merger = layer_set.merger();
1121 let key = ObjectKey::attribute(self.object_id(), attribute_id, AttributeKey::Attribute);
1122 let iter = merger.query(Query::FullRange(&key)).await?;
1123 match iter.get() {
1124 Some(item) if item.key == &key => match item.value {
1125 ObjectValue::Attribute { .. } => Ok(Some(self.read_attr_from_iter(iter).await?)),
1126 ObjectValue::None => Ok(None),
1128 _ => Err(FxfsError::Inconsistent.into()),
1129 },
1130 _ => Ok(None),
1131 }
1132 }
1133
1134 pub async fn read_attr_from_iter(
1137 &self,
1138 mut iter: MergerIterator<'_, '_, ObjectKey, ObjectValue>,
1139 ) -> Result<Box<[u8]>, Error> {
1140 let (mut buffer, size, attribute_id) = match iter.get() {
1141 Some(ItemRef {
1142 key:
1143 ObjectKey {
1144 object_id,
1145 data: ObjectKeyData::Attribute(attribute_id, AttributeKey::Attribute),
1146 },
1147 value: ObjectValue::Attribute { size, .. },
1148 ..
1149 }) if *object_id == self.object_id => {
1150 (
1152 self.store()
1153 .device
1154 .allocate_buffer(round_up(*size, self.block_size()).unwrap() as usize)
1155 .await,
1156 *size as usize,
1157 *attribute_id,
1158 )
1159 }
1160 _ => bail!(FxfsError::InvalidArgs),
1161 };
1162
1163 self.store().logical_read_ops.fetch_add(1, Ordering::Relaxed);
1164 let mut last_offset = 0;
1165 loop {
1166 iter.advance().await?;
1167 match iter.get() {
1168 Some(ItemRef {
1169 key:
1170 ObjectKey {
1171 object_id,
1172 data:
1173 ObjectKeyData::Attribute(attr_id, AttributeKey::Extent(extent_key)),
1174 },
1175 value: ObjectValue::Extent(extent_value),
1176 ..
1177 }) if *object_id == self.object_id() && *attr_id == attribute_id => {
1178 if let ExtentValue::Some { device_offset, key_id, mode } = extent_value {
1179 let offset = extent_key.range.start as usize;
1180 buffer.as_mut_slice()[last_offset..offset].fill(0);
1181 let end = std::cmp::min(extent_key.range.end as usize, buffer.len());
1182 let maybe_bitmap = match mode {
1183 ExtentMode::OverwritePartial(bitmap) => {
1184 let mut read_bitmap = bitmap.clone();
1187 read_bitmap.truncate(
1188 (end - extent_key.range.start as usize)
1189 / self.block_size() as usize,
1190 );
1191 Some(read_bitmap)
1192 }
1193 _ => None,
1194 };
1195 self.read_and_decrypt(
1196 *device_offset,
1197 extent_key.range.start,
1198 buffer.subslice_mut(offset..end as usize),
1199 *key_id,
1200 )
1201 .await?;
1202 if let Some(bitmap) = maybe_bitmap {
1203 apply_bitmap_zeroing(
1204 self.block_size() as usize,
1205 &bitmap,
1206 buffer.subslice_mut(offset..end as usize),
1207 );
1208 }
1209 last_offset = end;
1210 if last_offset >= size {
1211 break;
1212 }
1213 }
1214 }
1215 _ => break,
1216 }
1217 }
1218 buffer.as_mut_slice()[std::cmp::min(last_offset, size)..].fill(0);
1219 Ok(buffer.as_slice()[..size].into())
1220 }
1221
1222 pub async fn write_at(
1230 &self,
1231 attribute_id: u64,
1232 offset: u64,
1233 buf: MutableBufferRef<'_>,
1234 key_id: Option<u64>,
1235 mut device_offset: u64,
1236 ) -> Result<MaybeChecksums, Error> {
1237 let mut transfer_buf;
1238 let block_size = self.block_size();
1239 let (range, mut transfer_buf_ref) =
1240 if offset % block_size == 0 && buf.len() as u64 % block_size == 0 {
1241 (offset..offset + buf.len() as u64, buf)
1242 } else {
1243 let (range, buf) = self.align_buffer(attribute_id, offset, buf.as_ref()).await?;
1244 transfer_buf = buf;
1245 device_offset -= offset - range.start;
1246 (range, transfer_buf.as_mut())
1247 };
1248
1249 let mut crypt_ctx = None;
1250 if let (_, Some(key)) = self.get_key(key_id).await? {
1251 if let Some(ctx) = key.crypt_ctx(self.object_id, range.start) {
1252 crypt_ctx = Some(ctx);
1253 } else {
1254 key.encrypt(
1255 self.object_id,
1256 device_offset,
1257 range.start,
1258 transfer_buf_ref.as_mut_slice(),
1259 )?;
1260 }
1261 }
1262 self.write_aligned(transfer_buf_ref.as_ref(), device_offset, crypt_ctx).await
1263 }
1264
1265 #[cfg(feature = "migration")]
1270 pub async fn raw_multi_write(
1271 &self,
1272 transaction: &mut Transaction<'_>,
1273 attribute_id: u64,
1274 key_id: Option<u64>,
1275 ranges: &[Range<u64>],
1276 buf: MutableBufferRef<'_>,
1277 ) -> Result<(), Error> {
1278 self.multi_write_internal(transaction, attribute_id, key_id, ranges, buf).await?;
1279 Ok(())
1280 }
1281
1282 async fn multi_write_internal(
1288 &self,
1289 transaction: &mut Transaction<'_>,
1290 attribute_id: u64,
1291 key_id: Option<u64>,
1292 ranges: &[Range<u64>],
1293 mut buf: MutableBufferRef<'_>,
1294 ) -> Result<(u64, u64), Error> {
1295 if buf.is_empty() {
1296 return Ok((0, 0));
1297 }
1298 let block_size = self.block_size();
1299 let store = self.store();
1300 let store_id = store.store_object_id();
1301
1302 let (key_id, key) = if key_id == Some(VOLUME_DATA_KEY_ID)
1305 && matches!(self.encryption, Encryption::CachedKeys)
1306 {
1307 (
1308 VOLUME_DATA_KEY_ID,
1309 Some(
1310 self.get_or_create_key(transaction)
1311 .await
1312 .context("get_or_create_key failed")?,
1313 ),
1314 )
1315 } else {
1316 self.get_key(key_id).await?
1317 };
1318 if let Some(key) = &key {
1319 if !key.supports_inline_encryption() {
1320 let mut slice = buf.as_mut_slice();
1321 for r in ranges {
1322 let l = r.end - r.start;
1323 let (head, tail) = slice.split_at_mut(l as usize);
1324 key.encrypt(
1325 self.object_id,
1326 0, r.start,
1328 head,
1329 )?;
1330 slice = tail;
1331 }
1332 }
1333 }
1334
1335 let mut allocated = 0;
1336 let allocator = store.allocator();
1337 let trace = self.trace();
1338 let mut writes = FuturesOrdered::new();
1339
1340 let mut logical_ranges = ranges.iter();
1341 let mut current_range = logical_ranges.next().unwrap().clone();
1342
1343 while !buf.is_empty() {
1344 let mut device_range = allocator
1345 .allocate(transaction, store_id, buf.len() as u64)
1346 .await
1347 .context("allocation failed")?;
1348 if trace {
1349 info!(
1350 store_id,
1351 oid = self.object_id(),
1352 device_range:?,
1353 len = device_range.end - device_range.start;
1354 "A",
1355 );
1356 }
1357 let mut device_range_len = device_range.end - device_range.start;
1358 allocated += device_range_len;
1359 while device_range_len > 0 {
1361 if current_range.end <= current_range.start {
1362 current_range = logical_ranges.next().unwrap().clone();
1363 }
1364 let (crypt_ctx, split) = if let Some(key) = &key {
1365 if key.supports_inline_encryption() {
1366 let split = std::cmp::min(
1367 current_range.end - current_range.start,
1368 device_range_len,
1369 );
1370 let crypt_ctx = key.crypt_ctx(self.object_id, current_range.start);
1371 current_range.start += split;
1372 (crypt_ctx, split)
1373 } else {
1374 (None, device_range_len)
1375 }
1376 } else {
1377 (None, device_range_len)
1378 };
1379
1380 let (head, tail) = buf.split_at_mut(split as usize);
1381 buf = tail;
1382
1383 writes.push_back(async move {
1384 let len = head.len() as u64;
1385 Result::<_, Error>::Ok((
1386 device_range.start,
1387 len,
1388 self.write_aligned(head.as_ref(), device_range.start, crypt_ctx).await?,
1389 ))
1390 });
1391 device_range.start += split;
1392 device_range_len = device_range.end - device_range.start;
1393 }
1394 }
1395
1396 self.store().logical_write_ops.fetch_add(1, Ordering::Relaxed);
1397 let ((mutations, checksums), deallocated) = try_join!(
1398 async {
1399 let mut current_range = 0..0;
1400 let mut mutations = Vec::new();
1401 let mut out_checksums = Vec::new();
1402 let mut ranges = ranges.iter();
1403 while let Some((mut device_offset, mut len, mut checksums)) =
1404 writes.try_next().await?
1405 {
1406 while len > 0 {
1407 if current_range.end <= current_range.start {
1408 current_range = ranges.next().unwrap().clone();
1409 }
1410 let chunk_len = std::cmp::min(len, current_range.end - current_range.start);
1411 let tail = checksums.split_off((chunk_len / block_size) as usize);
1412 if let Some(checksums) = checksums.maybe_as_ref() {
1413 out_checksums.push((
1414 device_offset..device_offset + chunk_len,
1415 checksums.to_owned(),
1416 ));
1417 }
1418 mutations.push(Mutation::merge_object(
1419 ObjectKey::extent(
1420 self.object_id(),
1421 attribute_id,
1422 current_range.start..current_range.start + chunk_len,
1423 ),
1424 ObjectValue::Extent(ExtentValue::new(
1425 device_offset,
1426 checksums.to_mode(),
1427 key_id,
1428 )),
1429 ));
1430 checksums = tail;
1431 device_offset += chunk_len;
1432 len -= chunk_len;
1433 current_range.start += chunk_len;
1434 }
1435 }
1436 Result::<_, Error>::Ok((mutations, out_checksums))
1437 },
1438 async {
1439 let mut deallocated = 0;
1440 for r in ranges {
1441 deallocated +=
1442 self.deallocate_old_extents(transaction, attribute_id, r.clone()).await?;
1443 }
1444 Result::<_, Error>::Ok(deallocated)
1445 }
1446 )?;
1447
1448 for m in mutations {
1449 transaction.add(store_id, m);
1450 }
1451
1452 if !store.filesystem().options().barriers_enabled {
1454 for (r, c) in checksums {
1455 transaction.add_checksum(r, c, true);
1456 }
1457 }
1458 Ok((allocated, deallocated))
1459 }
1460
1461 pub async fn multi_write(
1467 &self,
1468 transaction: &mut Transaction<'_>,
1469 attribute_id: u64,
1470 key_id: Option<u64>,
1471 ranges: &[Range<u64>],
1472 buf: MutableBufferRef<'_>,
1473 ) -> Result<(), Error> {
1474 let (allocated, deallocated) =
1475 self.multi_write_internal(transaction, attribute_id, key_id, ranges, buf).await?;
1476 if allocated == 0 && deallocated == 0 {
1477 return Ok(());
1478 }
1479 self.update_allocated_size(transaction, allocated, deallocated).await
1480 }
1481
1482 pub async fn multi_overwrite<'a>(
1487 &'a self,
1488 transaction: &mut Transaction<'a>,
1489 attr_id: u64,
1490 ranges: &[Range<u64>],
1491 mut buf: MutableBufferRef<'_>,
1492 ) -> Result<(), Error> {
1493 if buf.is_empty() {
1494 return Ok(());
1495 }
1496 let block_size = self.block_size();
1497 let store = self.store();
1498 let tree = store.tree();
1499 let store_id = store.store_object_id();
1500
1501 let (key_id, key) = self.get_key(None).await?;
1502 if let Some(key) = &key {
1503 if !key.supports_inline_encryption() {
1504 let mut slice = buf.as_mut_slice();
1505 for r in ranges {
1506 let l = r.end - r.start;
1507 let (head, tail) = slice.split_at_mut(l as usize);
1508 key.encrypt(
1509 self.object_id,
1510 0, r.start,
1512 head,
1513 )?;
1514 slice = tail;
1515 }
1516 }
1517 }
1518
1519 let mut range_iter = ranges.into_iter();
1520 let mut target_range = range_iter.next().unwrap().clone();
1522 let mut mutations = Vec::new();
1523 let writes = FuturesUnordered::new();
1524
1525 let layer_set = tree.layer_set();
1526 let mut merger = layer_set.merger();
1527 let mut iter = merger
1528 .query(Query::FullRange(&ObjectKey::attribute(
1529 self.object_id(),
1530 attr_id,
1531 AttributeKey::Extent(ExtentKey::search_key_from_offset(target_range.start)),
1532 )))
1533 .await?;
1534
1535 loop {
1536 match iter.get() {
1537 Some(ItemRef {
1538 key:
1539 ObjectKey {
1540 object_id,
1541 data:
1542 ObjectKeyData::Attribute(
1543 attribute_id,
1544 AttributeKey::Extent(ExtentKey { range }),
1545 ),
1546 },
1547 value: ObjectValue::Extent(extent_value),
1548 ..
1549 }) if *object_id == self.object_id() && *attribute_id == attr_id => {
1550 if range.end <= target_range.start {
1554 iter.advance().await?;
1555 continue;
1556 }
1557 let (device_offset, mode) = match extent_value {
1558 ExtentValue::None => {
1559 return Err(anyhow!(FxfsError::Inconsistent)).with_context(|| {
1560 format!(
1561 "multi_overwrite failed: target_range ({}, {}) overlaps with \
1562 deleted extent found at ({}, {})",
1563 target_range.start, target_range.end, range.start, range.end,
1564 )
1565 });
1566 }
1567 ExtentValue::Some { device_offset, mode, .. } => (device_offset, mode),
1568 };
1569 if range.start > target_range.start {
1572 return Err(anyhow!(FxfsError::Inconsistent)).with_context(|| {
1573 format!(
1574 "multi_overwrite failed: target range ({}, {}) starts before first \
1575 extent found at ({}, {})",
1576 target_range.start, target_range.end, range.start, range.end,
1577 )
1578 });
1579 }
1580 let mut bitmap = match mode {
1581 ExtentMode::Raw | ExtentMode::Cow(_) => {
1582 return Err(anyhow!(FxfsError::Inconsistent)).with_context(|| {
1583 format!(
1584 "multi_overwrite failed: \
1585 extent from ({}, {}) which overlaps target range ({}, {}) had the \
1586 wrong extent mode",
1587 range.start, range.end, target_range.start, target_range.end,
1588 )
1589 });
1590 }
1591 ExtentMode::OverwritePartial(bitmap) => {
1592 OverwriteBitmaps::new(bitmap.clone())
1593 }
1594 ExtentMode::Overwrite => OverwriteBitmaps::None,
1595 };
1596 loop {
1597 let offset_within_extent = target_range.start - range.start;
1598 let bitmap_offset = offset_within_extent / block_size;
1599 let write_device_offset = *device_offset + offset_within_extent;
1600 let write_end = min(range.end, target_range.end);
1601 let write_len = write_end - target_range.start;
1602 let write_device_range =
1603 write_device_offset..write_device_offset + write_len;
1604 let (current_buf, remaining_buf) = buf.split_at_mut(write_len as usize);
1605
1606 bitmap.set_offset(bitmap_offset as usize);
1607 let checksum_ranges = ChecksumRangeChunk::group_first_write_ranges(
1608 &mut bitmap,
1609 block_size,
1610 write_device_range,
1611 );
1612
1613 let crypt_ctx = if let Some(key) = &key {
1614 key.crypt_ctx(self.object_id, target_range.start)
1615 } else {
1616 None
1617 };
1618
1619 writes.push(async move {
1620 let maybe_checksums = self
1621 .write_aligned(current_buf.as_ref(), write_device_offset, crypt_ctx)
1622 .await?;
1623 Ok::<_, Error>(match maybe_checksums {
1624 MaybeChecksums::None => Vec::new(),
1625 MaybeChecksums::Fletcher(checksums) => checksum_ranges
1626 .into_iter()
1627 .map(
1628 |ChecksumRangeChunk {
1629 checksum_range,
1630 device_range,
1631 is_first_write,
1632 }| {
1633 (
1634 device_range,
1635 checksums[checksum_range].to_vec(),
1636 is_first_write,
1637 )
1638 },
1639 )
1640 .collect(),
1641 })
1642 });
1643 buf = remaining_buf;
1644 target_range.start += write_len;
1645 if target_range.start == target_range.end {
1646 match range_iter.next() {
1647 None => break,
1648 Some(next_range) => target_range = next_range.clone(),
1649 }
1650 }
1651 if range.end <= target_range.start {
1652 break;
1653 }
1654 }
1655 if let Some((mut bitmap, write_bitmap)) = bitmap.take_bitmaps() {
1656 if bitmap.or(&write_bitmap) {
1657 let mode = if bitmap.all() {
1658 ExtentMode::Overwrite
1659 } else {
1660 ExtentMode::OverwritePartial(bitmap)
1661 };
1662 mutations.push(Mutation::merge_object(
1663 ObjectKey::extent(self.object_id(), attr_id, range.clone()),
1664 ObjectValue::Extent(ExtentValue::new(*device_offset, mode, key_id)),
1665 ))
1666 }
1667 }
1668 if target_range.start == target_range.end {
1669 break;
1670 }
1671 iter.advance().await?;
1672 }
1673 _ => bail!(anyhow!(FxfsError::Internal).context(
1677 "found a non-extent object record while there were still ranges to process"
1678 )),
1679 }
1680 }
1681
1682 let checksums = writes.try_collect::<Vec<_>>().await?;
1683 if !store.filesystem().options().barriers_enabled {
1685 for (r, c, first_write) in checksums.into_iter().flatten() {
1686 transaction.add_checksum(r, c, first_write);
1687 }
1688 }
1689
1690 for m in mutations {
1691 transaction.add(store_id, m);
1692 }
1693
1694 Ok(())
1695 }
1696
1697 #[trace]
1704 pub async fn write_new_attr_in_batches<'a>(
1705 &'a self,
1706 transaction: &mut Transaction<'a>,
1707 attribute_id: u64,
1708 data: &[u8],
1709 batch_size: usize,
1710 ) -> Result<(), Error> {
1711 transaction.add(
1712 self.store().store_object_id,
1713 Mutation::replace_or_insert_object(
1714 ObjectKey::attribute(self.object_id(), attribute_id, AttributeKey::Attribute),
1715 ObjectValue::attribute(data.len() as u64, false),
1716 ),
1717 );
1718 let chunks = data.chunks(batch_size);
1719 let num_chunks = chunks.len();
1720 if num_chunks > 1 {
1721 transaction.add(
1722 self.store().store_object_id,
1723 Mutation::replace_or_insert_object(
1724 ObjectKey::graveyard_attribute_entry(
1725 self.store().graveyard_directory_object_id(),
1726 self.object_id(),
1727 attribute_id,
1728 ),
1729 ObjectValue::Some,
1730 ),
1731 );
1732 }
1733 let mut start_offset = 0;
1734 for (i, chunk) in chunks.enumerate() {
1735 let rounded_len = round_up(chunk.len() as u64, self.block_size()).unwrap();
1736 let mut buffer = self.store().device.allocate_buffer(rounded_len as usize).await;
1737 let slice = buffer.as_mut_slice();
1738 slice[..chunk.len()].copy_from_slice(chunk);
1739 slice[chunk.len()..].fill(0);
1740 self.multi_write(
1741 transaction,
1742 attribute_id,
1743 Some(VOLUME_DATA_KEY_ID),
1744 &[start_offset..start_offset + rounded_len],
1745 buffer.as_mut(),
1746 )
1747 .await?;
1748 start_offset += rounded_len;
1749 if i < num_chunks - 1 {
1751 transaction.commit_and_continue().await?;
1752 }
1753 }
1754 Ok(())
1755 }
1756
1757 pub async fn write_attr(
1764 &self,
1765 transaction: &mut Transaction<'_>,
1766 attribute_id: u64,
1767 data: &[u8],
1768 ) -> Result<NeedsTrim, Error> {
1769 let rounded_len = round_up(data.len() as u64, self.block_size()).unwrap();
1770 let store = self.store();
1771 let tree = store.tree();
1772 let should_trim = if let Some(item) = tree
1773 .find(&ObjectKey::attribute(self.object_id(), attribute_id, AttributeKey::Attribute))
1774 .await?
1775 {
1776 match item.value {
1777 ObjectValue::Attribute { size: _, has_overwrite_extents: true } => {
1778 bail!(
1779 anyhow!(FxfsError::Inconsistent)
1780 .context("write_attr on an attribute with overwrite extents")
1781 )
1782 }
1783 ObjectValue::Attribute { size, .. } => (data.len() as u64) < size,
1784 _ => bail!(FxfsError::Inconsistent),
1785 }
1786 } else {
1787 false
1788 };
1789 let mut buffer = self.store().device.allocate_buffer(rounded_len as usize).await;
1790 let slice = buffer.as_mut_slice();
1791 slice[..data.len()].copy_from_slice(data);
1792 slice[data.len()..].fill(0);
1793 self.multi_write(
1794 transaction,
1795 attribute_id,
1796 Some(VOLUME_DATA_KEY_ID),
1797 &[0..rounded_len],
1798 buffer.as_mut(),
1799 )
1800 .await?;
1801 transaction.add(
1802 self.store().store_object_id,
1803 Mutation::replace_or_insert_object(
1804 ObjectKey::attribute(self.object_id(), attribute_id, AttributeKey::Attribute),
1805 ObjectValue::attribute(data.len() as u64, false),
1806 ),
1807 );
1808 if should_trim {
1809 self.shrink(transaction, attribute_id, data.len() as u64).await
1810 } else {
1811 Ok(NeedsTrim(false))
1812 }
1813 }
1814
1815 pub async fn list_extended_attributes(&self) -> Result<Vec<Vec<u8>>, Error> {
1816 let layer_set = self.store().tree().layer_set();
1817 let mut merger = layer_set.merger();
1818 let mut iter = merger
1820 .query(Query::FullRange(&ObjectKey::extended_attribute(self.object_id(), Vec::new())))
1821 .await?;
1822 let mut out = Vec::new();
1823 while let Some(item) = iter.get() {
1824 if item.value != &ObjectValue::None {
1826 match item.key {
1827 ObjectKey { object_id, data: ObjectKeyData::ExtendedAttribute { name } } => {
1828 if self.object_id() != *object_id {
1829 bail!(
1830 anyhow!(FxfsError::Inconsistent)
1831 .context("list_extended_attributes: wrong object id")
1832 )
1833 }
1834 out.push(name.clone());
1835 }
1836 _ => break,
1839 }
1840 }
1841 iter.advance().await?;
1842 }
1843 Ok(out)
1844 }
1845
1846 pub async fn get_inline_selinux_context(&self) -> Result<Option<fio::SelinuxContext>, Error> {
1850 const_assert!(fio::MAX_SELINUX_CONTEXT_ATTRIBUTE_LEN as usize <= MAX_INLINE_XATTR_SIZE);
1853 let item = match self
1854 .store()
1855 .tree()
1856 .find(&ObjectKey::extended_attribute(
1857 self.object_id(),
1858 fio::SELINUX_CONTEXT_NAME.into(),
1859 ))
1860 .await?
1861 {
1862 Some(item) => item,
1863 None => return Ok(None),
1864 };
1865 match item.value {
1866 ObjectValue::ExtendedAttribute(ExtendedAttributeValue::Inline(value)) => {
1867 Ok(Some(fio::SelinuxContext::Data(value)))
1868 }
1869 ObjectValue::ExtendedAttribute(ExtendedAttributeValue::AttributeId(_)) => {
1870 Ok(Some(fio::SelinuxContext::UseExtendedAttributes(fio::EmptyStruct {})))
1871 }
1872 _ => {
1873 bail!(
1874 anyhow!(FxfsError::Inconsistent)
1875 .context("get_inline_extended_attribute: Expected ExtendedAttribute value")
1876 )
1877 }
1878 }
1879 }
1880
1881 pub async fn get_extended_attribute(&self, name: Vec<u8>) -> Result<Vec<u8>, Error> {
1882 let item = self
1883 .store()
1884 .tree()
1885 .find(&ObjectKey::extended_attribute(self.object_id(), name))
1886 .await?
1887 .ok_or(FxfsError::NotFound)?;
1888 match item.value {
1889 ObjectValue::ExtendedAttribute(ExtendedAttributeValue::Inline(value)) => Ok(value),
1890 ObjectValue::ExtendedAttribute(ExtendedAttributeValue::AttributeId(id)) => {
1891 Ok(self.read_attr(id).await?.ok_or(FxfsError::Inconsistent)?.into_vec())
1892 }
1893 _ => {
1894 bail!(
1895 anyhow!(FxfsError::Inconsistent)
1896 .context("get_extended_attribute: Expected ExtendedAttribute value")
1897 )
1898 }
1899 }
1900 }
1901
1902 pub async fn set_extended_attribute(
1903 &self,
1904 name: Vec<u8>,
1905 value: Vec<u8>,
1906 mode: SetExtendedAttributeMode,
1907 ) -> Result<(), Error> {
1908 let store = self.store();
1909 let fs = store.filesystem();
1910 let keys = lock_keys![LockKey::object(store.store_object_id(), self.object_id())];
1913 let mut transaction = fs.new_transaction(keys, Options::default()).await?;
1914 self.set_extended_attribute_impl(name, value, mode, &mut transaction).await?;
1915 transaction.commit().await?;
1916 Ok(())
1917 }
1918
1919 async fn set_extended_attribute_impl(
1920 &self,
1921 name: Vec<u8>,
1922 value: Vec<u8>,
1923 mode: SetExtendedAttributeMode,
1924 transaction: &mut Transaction<'_>,
1925 ) -> Result<(), Error> {
1926 ensure!(name.len() <= MAX_XATTR_NAME_SIZE, FxfsError::TooBig);
1927 ensure!(value.len() <= MAX_XATTR_VALUE_SIZE, FxfsError::TooBig);
1928 let tree = self.store().tree();
1929 let object_key = ObjectKey::extended_attribute(self.object_id(), name);
1930
1931 let existing_attribute_id = {
1932 let (found, existing_attribute_id) = match tree.find(&object_key).await? {
1933 None => (false, None),
1934 Some(Item { value, .. }) => (
1935 true,
1936 match value {
1937 ObjectValue::ExtendedAttribute(ExtendedAttributeValue::Inline(..)) => None,
1938 ObjectValue::ExtendedAttribute(ExtendedAttributeValue::AttributeId(id)) => {
1939 Some(id)
1940 }
1941 _ => bail!(
1942 anyhow!(FxfsError::Inconsistent)
1943 .context("expected extended attribute value")
1944 ),
1945 },
1946 ),
1947 };
1948 match mode {
1949 SetExtendedAttributeMode::Create if found => {
1950 bail!(FxfsError::AlreadyExists)
1951 }
1952 SetExtendedAttributeMode::Replace if !found => {
1953 bail!(FxfsError::NotFound)
1954 }
1955 _ => (),
1956 }
1957 existing_attribute_id
1958 };
1959
1960 if let Some(attribute_id) = existing_attribute_id {
1961 let _ = self.write_attr(transaction, attribute_id, &value).await?;
1967 } else if value.len() <= MAX_INLINE_XATTR_SIZE {
1968 transaction.add(
1969 self.store().store_object_id(),
1970 Mutation::replace_or_insert_object(
1971 object_key,
1972 ObjectValue::inline_extended_attribute(value),
1973 ),
1974 );
1975 } else {
1976 let mut attribute_id = EXTENDED_ATTRIBUTE_RANGE_START;
1982 let layer_set = tree.layer_set();
1983 let mut merger = layer_set.merger();
1984 let key = ObjectKey::attribute(self.object_id(), attribute_id, AttributeKey::Attribute);
1985 let mut iter = merger.query(Query::FullRange(&key)).await?;
1986 loop {
1987 match iter.get() {
1988 None => break,
1991 Some(ItemRef {
1992 key: ObjectKey { object_id, data: ObjectKeyData::Attribute(attr_id, _) },
1993 value,
1994 ..
1995 }) if *object_id == self.object_id() => {
1996 if matches!(value, ObjectValue::None) {
1997 break;
2000 }
2001 if attribute_id < *attr_id {
2002 break;
2004 } else if attribute_id == *attr_id {
2005 attribute_id += 1;
2007 if attribute_id == EXTENDED_ATTRIBUTE_RANGE_END {
2008 bail!(FxfsError::NoSpace);
2009 }
2010 }
2011 }
2015 _ => break,
2019 }
2020 iter.advance().await?;
2021 }
2022
2023 let _ = self.write_attr(transaction, attribute_id, &value).await?;
2025 transaction.add(
2026 self.store().store_object_id(),
2027 Mutation::replace_or_insert_object(
2028 object_key,
2029 ObjectValue::extended_attribute(attribute_id),
2030 ),
2031 );
2032 }
2033
2034 Ok(())
2035 }
2036
2037 pub async fn remove_extended_attribute(&self, name: Vec<u8>) -> Result<(), Error> {
2038 let store = self.store();
2039 let tree = store.tree();
2040 let object_key = ObjectKey::extended_attribute(self.object_id(), name);
2041
2042 let keys = lock_keys![LockKey::object(store.store_object_id(), self.object_id())];
2047 let mut transaction = store.filesystem().new_transaction(keys, Options::default()).await?;
2048
2049 let attribute_to_delete =
2050 match tree.find(&object_key).await?.ok_or(FxfsError::NotFound)?.value {
2051 ObjectValue::ExtendedAttribute(ExtendedAttributeValue::AttributeId(id)) => Some(id),
2052 ObjectValue::ExtendedAttribute(ExtendedAttributeValue::Inline(..)) => None,
2053 _ => {
2054 bail!(
2055 anyhow!(FxfsError::Inconsistent)
2056 .context("remove_extended_attribute: Expected ExtendedAttribute value")
2057 )
2058 }
2059 };
2060
2061 transaction.add(
2062 store.store_object_id(),
2063 Mutation::replace_or_insert_object(object_key, ObjectValue::None),
2064 );
2065
2066 if let Some(attribute_id) = attribute_to_delete {
2073 let trim_result = store
2074 .trim_some(
2075 &mut transaction,
2076 self.object_id(),
2077 attribute_id,
2078 TrimMode::FromOffset(0),
2079 )
2080 .await?;
2081 assert_matches!(trim_result, TrimResult::Done(_));
2084 transaction.add(
2085 store.store_object_id(),
2086 Mutation::replace_or_insert_object(
2087 ObjectKey::attribute(self.object_id, attribute_id, AttributeKey::Attribute),
2088 ObjectValue::None,
2089 ),
2090 );
2091 }
2092
2093 transaction.commit().await?;
2094 Ok(())
2095 }
2096
2097 pub fn pre_fetch_keys(&self) -> Option<impl Future<Output = ()> + use<S>> {
2100 if let Encryption::CachedKeys = self.encryption {
2101 let owner = self.owner.clone();
2102 let object_id = self.object_id;
2103 Some(async move {
2104 let store = owner.as_ref().as_ref();
2105 if let Some(crypt) = store.crypt() {
2106 let _ = store
2107 .key_manager
2108 .get_keys(
2109 object_id,
2110 crypt.as_ref(),
2111 &mut Some(async || store.get_keys(object_id).await),
2112 false,
2113 false,
2114 )
2115 .await;
2116 }
2117 })
2118 } else {
2119 None
2120 }
2121 }
2122}
2123
2124impl<S: HandleOwner> Drop for StoreObjectHandle<S> {
2125 fn drop(&mut self) {
2126 if self.is_encrypted() {
2127 let _ = self.store().key_manager.remove(self.object_id);
2128 }
2129 }
2130}
2131
2132#[must_use]
2136pub struct NeedsTrim(pub bool);
2137
2138#[cfg(test)]
2139mod tests {
2140 use super::{ChecksumRangeChunk, OverwriteBitmaps};
2141 use crate::errors::FxfsError;
2142 use crate::filesystem::{FxFilesystem, OpenFxFilesystem};
2143 use crate::object_handle::ObjectHandle;
2144 use crate::object_store::data_object_handle::WRITE_ATTR_BATCH_SIZE;
2145 use crate::object_store::transaction::{Mutation, Options, lock_keys};
2146 use crate::object_store::{
2147 AttributeKey, DataObjectHandle, Directory, FSVERITY_MERKLE_ATTRIBUTE_ID, HandleOptions,
2148 LockKey, ObjectKey, ObjectStore, ObjectValue, SetExtendedAttributeMode, StoreObjectHandle,
2149 };
2150 use bit_vec::BitVec;
2151 use fuchsia_async as fasync;
2152 use futures::join;
2153 use std::sync::Arc;
2154 use storage_device::DeviceHolder;
2155 use storage_device::fake_device::FakeDevice;
2156
2157 const TEST_DEVICE_BLOCK_SIZE: u32 = 512;
2158 const TEST_OBJECT_NAME: &str = "foo";
2159
2160 fn is_error(actual: anyhow::Error, expected: FxfsError) {
2161 assert_eq!(*actual.root_cause().downcast_ref::<FxfsError>().unwrap(), expected)
2162 }
2163
2164 async fn test_filesystem() -> OpenFxFilesystem {
2165 let device = DeviceHolder::new(FakeDevice::new(16384, TEST_DEVICE_BLOCK_SIZE));
2166 FxFilesystem::new_empty(device).await.expect("new_empty failed")
2167 }
2168
2169 async fn test_filesystem_and_empty_object() -> (OpenFxFilesystem, DataObjectHandle<ObjectStore>)
2170 {
2171 let fs = test_filesystem().await;
2172 let store = fs.root_store();
2173
2174 let mut transaction = fs
2175 .clone()
2176 .new_transaction(
2177 lock_keys![LockKey::object(
2178 store.store_object_id(),
2179 store.root_directory_object_id()
2180 )],
2181 Options::default(),
2182 )
2183 .await
2184 .expect("new_transaction failed");
2185
2186 let object =
2187 ObjectStore::create_object(&store, &mut transaction, HandleOptions::default(), None)
2188 .await
2189 .expect("create_object failed");
2190
2191 let root_directory =
2192 Directory::open(&store, store.root_directory_object_id()).await.expect("open failed");
2193 root_directory
2194 .add_child_file(&mut transaction, TEST_OBJECT_NAME, &object)
2195 .await
2196 .expect("add_child_file failed");
2197
2198 transaction.commit().await.expect("commit failed");
2199
2200 (fs, object)
2201 }
2202
2203 #[fuchsia::test(threads = 3)]
2204 async fn extended_attribute_double_remove() {
2205 let (fs, object) = test_filesystem_and_empty_object().await;
2210 let basic = Arc::new(StoreObjectHandle::new(
2211 object.owner().clone(),
2212 object.object_id(),
2213 false,
2214 HandleOptions::default(),
2215 false,
2216 ));
2217 let basic_a = basic.clone();
2218 let basic_b = basic.clone();
2219
2220 basic
2221 .set_extended_attribute(
2222 b"security.selinux".to_vec(),
2223 b"bar".to_vec(),
2224 SetExtendedAttributeMode::Set,
2225 )
2226 .await
2227 .expect("failed to set attribute");
2228
2229 let a_task = fasync::Task::spawn(async move {
2232 basic_a.remove_extended_attribute(b"security.selinux".to_vec()).await
2233 });
2234 let b_task = fasync::Task::spawn(async move {
2235 basic_b.remove_extended_attribute(b"security.selinux".to_vec()).await
2236 });
2237 match join!(a_task, b_task) {
2238 (Ok(()), Ok(())) => panic!("both remove calls succeeded"),
2239 (Err(_), Err(_)) => panic!("both remove calls failed"),
2240
2241 (Ok(()), Err(e)) => is_error(e, FxfsError::NotFound),
2242 (Err(e), Ok(())) => is_error(e, FxfsError::NotFound),
2243 }
2244
2245 fs.close().await.expect("Close failed");
2246 }
2247
2248 #[fuchsia::test(threads = 3)]
2249 async fn extended_attribute_double_create() {
2250 let (fs, object) = test_filesystem_and_empty_object().await;
2255 let basic = Arc::new(StoreObjectHandle::new(
2256 object.owner().clone(),
2257 object.object_id(),
2258 false,
2259 HandleOptions::default(),
2260 false,
2261 ));
2262 let basic_a = basic.clone();
2263 let basic_b = basic.clone();
2264
2265 let a_task = fasync::Task::spawn(async move {
2268 basic_a
2269 .set_extended_attribute(
2270 b"security.selinux".to_vec(),
2271 b"one".to_vec(),
2272 SetExtendedAttributeMode::Create,
2273 )
2274 .await
2275 });
2276 let b_task = fasync::Task::spawn(async move {
2277 basic_b
2278 .set_extended_attribute(
2279 b"security.selinux".to_vec(),
2280 b"two".to_vec(),
2281 SetExtendedAttributeMode::Create,
2282 )
2283 .await
2284 });
2285 match join!(a_task, b_task) {
2286 (Ok(()), Ok(())) => panic!("both set calls succeeded"),
2287 (Err(_), Err(_)) => panic!("both set calls failed"),
2288
2289 (Ok(()), Err(e)) => {
2290 assert_eq!(
2291 basic
2292 .get_extended_attribute(b"security.selinux".to_vec())
2293 .await
2294 .expect("failed to get xattr"),
2295 b"one"
2296 );
2297 is_error(e, FxfsError::AlreadyExists);
2298 }
2299 (Err(e), Ok(())) => {
2300 assert_eq!(
2301 basic
2302 .get_extended_attribute(b"security.selinux".to_vec())
2303 .await
2304 .expect("failed to get xattr"),
2305 b"two"
2306 );
2307 is_error(e, FxfsError::AlreadyExists);
2308 }
2309 }
2310
2311 fs.close().await.expect("Close failed");
2312 }
2313
2314 struct TestAttr {
2315 name: Vec<u8>,
2316 value: Vec<u8>,
2317 }
2318
2319 impl TestAttr {
2320 fn new(name: impl AsRef<[u8]>, value: impl AsRef<[u8]>) -> Self {
2321 Self { name: name.as_ref().to_vec(), value: value.as_ref().to_vec() }
2322 }
2323 fn name(&self) -> Vec<u8> {
2324 self.name.clone()
2325 }
2326 fn value(&self) -> Vec<u8> {
2327 self.value.clone()
2328 }
2329 }
2330
2331 #[fuchsia::test]
2332 async fn extended_attributes() {
2333 let (fs, object) = test_filesystem_and_empty_object().await;
2334
2335 let test_attr = TestAttr::new(b"security.selinux", b"foo");
2336
2337 assert_eq!(object.list_extended_attributes().await.unwrap(), Vec::<Vec<u8>>::new());
2338 is_error(
2339 object.get_extended_attribute(test_attr.name()).await.unwrap_err(),
2340 FxfsError::NotFound,
2341 );
2342
2343 object
2344 .set_extended_attribute(
2345 test_attr.name(),
2346 test_attr.value(),
2347 SetExtendedAttributeMode::Set,
2348 )
2349 .await
2350 .unwrap();
2351 assert_eq!(object.list_extended_attributes().await.unwrap(), vec![test_attr.name()]);
2352 assert_eq!(
2353 object.get_extended_attribute(test_attr.name()).await.unwrap(),
2354 test_attr.value()
2355 );
2356
2357 object.remove_extended_attribute(test_attr.name()).await.unwrap();
2358 assert_eq!(object.list_extended_attributes().await.unwrap(), Vec::<Vec<u8>>::new());
2359 is_error(
2360 object.get_extended_attribute(test_attr.name()).await.unwrap_err(),
2361 FxfsError::NotFound,
2362 );
2363
2364 object
2366 .set_extended_attribute(
2367 test_attr.name(),
2368 test_attr.value(),
2369 SetExtendedAttributeMode::Set,
2370 )
2371 .await
2372 .unwrap();
2373 assert_eq!(object.list_extended_attributes().await.unwrap(), vec![test_attr.name()]);
2374 assert_eq!(
2375 object.get_extended_attribute(test_attr.name()).await.unwrap(),
2376 test_attr.value()
2377 );
2378
2379 object.remove_extended_attribute(test_attr.name()).await.unwrap();
2380 assert_eq!(object.list_extended_attributes().await.unwrap(), Vec::<Vec<u8>>::new());
2381 is_error(
2382 object.get_extended_attribute(test_attr.name()).await.unwrap_err(),
2383 FxfsError::NotFound,
2384 );
2385
2386 fs.close().await.expect("close failed");
2387 }
2388
2389 #[fuchsia::test]
2390 async fn large_extended_attribute() {
2391 let (fs, object) = test_filesystem_and_empty_object().await;
2392
2393 let test_attr = TestAttr::new(b"security.selinux", vec![3u8; 300]);
2394
2395 object
2396 .set_extended_attribute(
2397 test_attr.name(),
2398 test_attr.value(),
2399 SetExtendedAttributeMode::Set,
2400 )
2401 .await
2402 .unwrap();
2403 assert_eq!(
2404 object.get_extended_attribute(test_attr.name()).await.unwrap(),
2405 test_attr.value()
2406 );
2407
2408 assert_eq!(
2411 object
2412 .read_attr(64)
2413 .await
2414 .expect("read_attr failed")
2415 .expect("read_attr returned none")
2416 .into_vec(),
2417 test_attr.value()
2418 );
2419
2420 object.remove_extended_attribute(test_attr.name()).await.unwrap();
2421 is_error(
2422 object.get_extended_attribute(test_attr.name()).await.unwrap_err(),
2423 FxfsError::NotFound,
2424 );
2425
2426 object
2428 .set_extended_attribute(
2429 test_attr.name(),
2430 test_attr.value(),
2431 SetExtendedAttributeMode::Set,
2432 )
2433 .await
2434 .unwrap();
2435 assert_eq!(
2436 object.get_extended_attribute(test_attr.name()).await.unwrap(),
2437 test_attr.value()
2438 );
2439 object.remove_extended_attribute(test_attr.name()).await.unwrap();
2440 is_error(
2441 object.get_extended_attribute(test_attr.name()).await.unwrap_err(),
2442 FxfsError::NotFound,
2443 );
2444
2445 fs.close().await.expect("close failed");
2446 }
2447
2448 #[fuchsia::test]
2449 async fn multiple_extended_attributes() {
2450 let (fs, object) = test_filesystem_and_empty_object().await;
2451
2452 let attrs = [
2453 TestAttr::new(b"security.selinux", b"foo"),
2454 TestAttr::new(b"large.attribute", vec![3u8; 300]),
2455 TestAttr::new(b"an.attribute", b"asdf"),
2456 TestAttr::new(b"user.big", vec![5u8; 288]),
2457 TestAttr::new(b"user.tiny", b"smol"),
2458 TestAttr::new(b"this string doesn't matter", b"the quick brown fox etc"),
2459 TestAttr::new(b"also big", vec![7u8; 500]),
2460 TestAttr::new(b"all.ones", vec![1u8; 11111]),
2461 ];
2462
2463 for i in 0..attrs.len() {
2464 object
2465 .set_extended_attribute(
2466 attrs[i].name(),
2467 attrs[i].value(),
2468 SetExtendedAttributeMode::Set,
2469 )
2470 .await
2471 .unwrap();
2472 assert_eq!(
2473 object.get_extended_attribute(attrs[i].name()).await.unwrap(),
2474 attrs[i].value()
2475 );
2476 }
2477
2478 for i in 0..attrs.len() {
2479 let mut found_attrs = object.list_extended_attributes().await.unwrap();
2481 let mut expected_attrs: Vec<Vec<u8>> = attrs.iter().skip(i).map(|a| a.name()).collect();
2482 found_attrs.sort();
2483 expected_attrs.sort();
2484 assert_eq!(found_attrs, expected_attrs);
2485 for j in i..attrs.len() {
2486 assert_eq!(
2487 object.get_extended_attribute(attrs[j].name()).await.unwrap(),
2488 attrs[j].value()
2489 );
2490 }
2491
2492 object.remove_extended_attribute(attrs[i].name()).await.expect("failed to remove");
2493 is_error(
2494 object.get_extended_attribute(attrs[i].name()).await.unwrap_err(),
2495 FxfsError::NotFound,
2496 );
2497 }
2498
2499 fs.close().await.expect("close failed");
2500 }
2501
2502 #[fuchsia::test]
2503 async fn multiple_extended_attributes_delete() {
2504 let (fs, object) = test_filesystem_and_empty_object().await;
2505 let store = object.owner().clone();
2506
2507 let attrs = [
2508 TestAttr::new(b"security.selinux", b"foo"),
2509 TestAttr::new(b"large.attribute", vec![3u8; 300]),
2510 TestAttr::new(b"an.attribute", b"asdf"),
2511 TestAttr::new(b"user.big", vec![5u8; 288]),
2512 TestAttr::new(b"user.tiny", b"smol"),
2513 TestAttr::new(b"this string doesn't matter", b"the quick brown fox etc"),
2514 TestAttr::new(b"also big", vec![7u8; 500]),
2515 TestAttr::new(b"all.ones", vec![1u8; 11111]),
2516 ];
2517
2518 for i in 0..attrs.len() {
2519 object
2520 .set_extended_attribute(
2521 attrs[i].name(),
2522 attrs[i].value(),
2523 SetExtendedAttributeMode::Set,
2524 )
2525 .await
2526 .unwrap();
2527 assert_eq!(
2528 object.get_extended_attribute(attrs[i].name()).await.unwrap(),
2529 attrs[i].value()
2530 );
2531 }
2532
2533 let root_directory =
2535 Directory::open(object.owner(), object.store().root_directory_object_id())
2536 .await
2537 .expect("open failed");
2538 let mut transaction = fs
2539 .clone()
2540 .new_transaction(
2541 lock_keys![
2542 LockKey::object(store.store_object_id(), store.root_directory_object_id()),
2543 LockKey::object(store.store_object_id(), object.object_id()),
2544 ],
2545 Options::default(),
2546 )
2547 .await
2548 .expect("new_transaction failed");
2549 crate::object_store::directory::replace_child(
2550 &mut transaction,
2551 None,
2552 (&root_directory, TEST_OBJECT_NAME),
2553 )
2554 .await
2555 .expect("replace_child failed");
2556 transaction.commit().await.unwrap();
2557 store.tombstone_object(object.object_id(), Options::default()).await.unwrap();
2558
2559 crate::fsck::fsck(fs.clone()).await.unwrap();
2560
2561 fs.close().await.expect("close failed");
2562 }
2563
2564 #[fuchsia::test]
2565 async fn extended_attribute_changing_sizes() {
2566 let (fs, object) = test_filesystem_and_empty_object().await;
2567
2568 let test_name = b"security.selinux";
2569 let test_small_attr = TestAttr::new(test_name, b"smol");
2570 let test_large_attr = TestAttr::new(test_name, vec![3u8; 300]);
2571
2572 object
2573 .set_extended_attribute(
2574 test_small_attr.name(),
2575 test_small_attr.value(),
2576 SetExtendedAttributeMode::Set,
2577 )
2578 .await
2579 .unwrap();
2580 assert_eq!(
2581 object.get_extended_attribute(test_small_attr.name()).await.unwrap(),
2582 test_small_attr.value()
2583 );
2584
2585 assert!(object.read_attr(64).await.expect("read_attr failed").is_none());
2587
2588 crate::fsck::fsck(fs.clone()).await.unwrap();
2589
2590 object
2591 .set_extended_attribute(
2592 test_large_attr.name(),
2593 test_large_attr.value(),
2594 SetExtendedAttributeMode::Set,
2595 )
2596 .await
2597 .unwrap();
2598 assert_eq!(
2599 object.get_extended_attribute(test_large_attr.name()).await.unwrap(),
2600 test_large_attr.value()
2601 );
2602
2603 assert_eq!(
2606 object
2607 .read_attr(64)
2608 .await
2609 .expect("read_attr failed")
2610 .expect("read_attr returned none")
2611 .into_vec(),
2612 test_large_attr.value()
2613 );
2614
2615 crate::fsck::fsck(fs.clone()).await.unwrap();
2616
2617 object
2618 .set_extended_attribute(
2619 test_small_attr.name(),
2620 test_small_attr.value(),
2621 SetExtendedAttributeMode::Set,
2622 )
2623 .await
2624 .unwrap();
2625 assert_eq!(
2626 object.get_extended_attribute(test_small_attr.name()).await.unwrap(),
2627 test_small_attr.value()
2628 );
2629
2630 assert_eq!(
2633 object
2634 .read_attr(64)
2635 .await
2636 .expect("read_attr failed")
2637 .expect("read_attr returned none")
2638 .into_vec(),
2639 test_small_attr.value()
2640 );
2641
2642 crate::fsck::fsck(fs.clone()).await.unwrap();
2643
2644 object.remove_extended_attribute(test_small_attr.name()).await.expect("failed to remove");
2645
2646 crate::fsck::fsck(fs.clone()).await.unwrap();
2647
2648 fs.close().await.expect("close failed");
2649 }
2650
2651 #[fuchsia::test]
2652 async fn extended_attribute_max_size() {
2653 let (fs, object) = test_filesystem_and_empty_object().await;
2654
2655 let test_attr = TestAttr::new(
2656 vec![3u8; super::MAX_XATTR_NAME_SIZE],
2657 vec![1u8; super::MAX_XATTR_VALUE_SIZE],
2658 );
2659
2660 object
2661 .set_extended_attribute(
2662 test_attr.name(),
2663 test_attr.value(),
2664 SetExtendedAttributeMode::Set,
2665 )
2666 .await
2667 .unwrap();
2668 assert_eq!(
2669 object.get_extended_attribute(test_attr.name()).await.unwrap(),
2670 test_attr.value()
2671 );
2672 assert_eq!(object.list_extended_attributes().await.unwrap(), vec![test_attr.name()]);
2673 object.remove_extended_attribute(test_attr.name()).await.unwrap();
2674
2675 fs.close().await.expect("close failed");
2676 }
2677
2678 #[fuchsia::test]
2679 async fn extended_attribute_remove_then_create() {
2680 let (fs, object) = test_filesystem_and_empty_object().await;
2681
2682 let test_attr = TestAttr::new(
2683 vec![3u8; super::MAX_XATTR_NAME_SIZE],
2684 vec![1u8; super::MAX_XATTR_VALUE_SIZE],
2685 );
2686
2687 object
2688 .set_extended_attribute(
2689 test_attr.name(),
2690 test_attr.value(),
2691 SetExtendedAttributeMode::Create,
2692 )
2693 .await
2694 .unwrap();
2695 fs.journal().compact().await.unwrap();
2696 object.remove_extended_attribute(test_attr.name()).await.unwrap();
2697 object
2698 .set_extended_attribute(
2699 test_attr.name(),
2700 test_attr.value(),
2701 SetExtendedAttributeMode::Create,
2702 )
2703 .await
2704 .unwrap();
2705
2706 assert_eq!(
2707 object.get_extended_attribute(test_attr.name()).await.unwrap(),
2708 test_attr.value()
2709 );
2710
2711 fs.close().await.expect("close failed");
2712 }
2713
2714 #[fuchsia::test]
2715 async fn large_extended_attribute_max_number() {
2716 let (fs, object) = test_filesystem_and_empty_object().await;
2717
2718 let max_xattrs =
2719 super::EXTENDED_ATTRIBUTE_RANGE_END - super::EXTENDED_ATTRIBUTE_RANGE_START;
2720 for i in 0..max_xattrs {
2721 let test_attr = TestAttr::new(format!("{}", i).as_bytes(), vec![0x3; 300]);
2722 object
2723 .set_extended_attribute(
2724 test_attr.name(),
2725 test_attr.value(),
2726 SetExtendedAttributeMode::Set,
2727 )
2728 .await
2729 .unwrap_or_else(|_| panic!("failed to set xattr number {}", i));
2730 }
2731
2732 match object
2735 .set_extended_attribute(
2736 b"one.too.many".to_vec(),
2737 vec![0x3; 300],
2738 SetExtendedAttributeMode::Set,
2739 )
2740 .await
2741 {
2742 Ok(()) => panic!("set should not succeed"),
2743 Err(e) => is_error(e, FxfsError::NoSpace),
2744 }
2745
2746 object
2748 .set_extended_attribute(
2749 b"this.is.okay".to_vec(),
2750 b"small value".to_vec(),
2751 SetExtendedAttributeMode::Set,
2752 )
2753 .await
2754 .unwrap();
2755
2756 object
2758 .set_extended_attribute(b"11".to_vec(), vec![0x4; 300], SetExtendedAttributeMode::Set)
2759 .await
2760 .unwrap();
2761 object
2762 .set_extended_attribute(
2763 b"12".to_vec(),
2764 vec![0x1; 300],
2765 SetExtendedAttributeMode::Replace,
2766 )
2767 .await
2768 .unwrap();
2769
2770 object.remove_extended_attribute(b"5".to_vec()).await.unwrap();
2772 object
2773 .set_extended_attribute(
2774 b"new attr".to_vec(),
2775 vec![0x3; 300],
2776 SetExtendedAttributeMode::Set,
2777 )
2778 .await
2779 .unwrap();
2780
2781 fs.close().await.expect("close failed");
2782 }
2783
2784 #[fuchsia::test]
2785 async fn write_attr_trims_beyond_new_end() {
2786 let (fs, object) = test_filesystem_and_empty_object().await;
2791
2792 let block_size = fs.block_size();
2793 let buf_size = block_size * 2;
2794 let attribute_id = 10;
2795
2796 let mut transaction = (*object).new_transaction(attribute_id).await.unwrap();
2797 let mut buffer = object.allocate_buffer(buf_size as usize).await;
2798 buffer.as_mut_slice().fill(3);
2799 object
2802 .multi_write(
2803 &mut transaction,
2804 attribute_id,
2805 &[0..block_size, block_size..block_size * 2],
2806 buffer.as_mut(),
2807 )
2808 .await
2809 .unwrap();
2810 transaction.add(
2811 object.store().store_object_id,
2812 Mutation::replace_or_insert_object(
2813 ObjectKey::attribute(object.object_id(), attribute_id, AttributeKey::Attribute),
2814 ObjectValue::attribute(block_size * 2, false),
2815 ),
2816 );
2817 transaction.commit().await.unwrap();
2818
2819 crate::fsck::fsck(fs.clone()).await.unwrap();
2820
2821 let mut transaction = (*object).new_transaction(attribute_id).await.unwrap();
2822 let needs_trim = (*object)
2823 .write_attr(&mut transaction, attribute_id, &vec![3u8; block_size as usize])
2824 .await
2825 .unwrap();
2826 assert!(!needs_trim.0);
2827 transaction.commit().await.unwrap();
2828
2829 crate::fsck::fsck(fs.clone()).await.unwrap();
2830
2831 fs.close().await.expect("close failed");
2832 }
2833
2834 #[fuchsia::test]
2835 async fn write_new_attr_in_batches_multiple_txns() {
2836 let (fs, object) = test_filesystem_and_empty_object().await;
2837 let merkle_tree = vec![1; 3 * WRITE_ATTR_BATCH_SIZE];
2838 let mut transaction =
2839 (*object).new_transaction(FSVERITY_MERKLE_ATTRIBUTE_ID).await.unwrap();
2840 object
2841 .write_new_attr_in_batches(
2842 &mut transaction,
2843 FSVERITY_MERKLE_ATTRIBUTE_ID,
2844 &merkle_tree,
2845 WRITE_ATTR_BATCH_SIZE,
2846 )
2847 .await
2848 .expect("failed to write merkle attribute");
2849
2850 transaction.add(
2851 object.store().store_object_id,
2852 Mutation::replace_or_insert_object(
2853 ObjectKey::graveyard_attribute_entry(
2854 object.store().graveyard_directory_object_id(),
2855 object.object_id(),
2856 FSVERITY_MERKLE_ATTRIBUTE_ID,
2857 ),
2858 ObjectValue::None,
2859 ),
2860 );
2861 transaction.commit().await.unwrap();
2862 assert_eq!(
2863 object.read_attr(FSVERITY_MERKLE_ATTRIBUTE_ID).await.expect("read_attr failed"),
2864 Some(merkle_tree.into())
2865 );
2866
2867 fs.close().await.expect("close failed");
2868 }
2869
2870 #[cfg(target_os = "fuchsia")]
2872 #[fuchsia::test(allow_stalls = false)]
2873 async fn test_watchdog() {
2874 use super::Watchdog;
2875 use fuchsia_async::{MonotonicDuration, MonotonicInstant, TestExecutor};
2876 use std::sync::mpsc::channel;
2877
2878 TestExecutor::advance_to(make_time(0)).await;
2879 let (sender, receiver) = channel();
2880
2881 fn make_time(time_secs: i64) -> MonotonicInstant {
2882 MonotonicInstant::from_nanos(0) + MonotonicDuration::from_seconds(time_secs)
2883 }
2884
2885 {
2886 let _watchdog = Watchdog::new(10, move |count| {
2887 sender.send(count).expect("Sending value");
2888 });
2889
2890 TestExecutor::advance_to(make_time(5)).await;
2892 receiver.try_recv().expect_err("Should not have message");
2893
2894 TestExecutor::advance_to(make_time(10)).await;
2896 assert_eq!(1, receiver.recv().expect("Receiving"));
2897
2898 TestExecutor::advance_to(make_time(15)).await;
2900 receiver.try_recv().expect_err("Should not have message");
2901
2902 TestExecutor::advance_to(make_time(30)).await;
2904 assert_eq!(2, receiver.recv().expect("Receiving"));
2905 assert_eq!(3, receiver.recv().expect("Receiving"));
2906 }
2907
2908 TestExecutor::advance_to(make_time(100)).await;
2910 receiver.recv().expect_err("Watchdog should be gone");
2911 }
2912
2913 #[fuchsia::test]
2914 fn test_checksum_range_chunk() {
2915 let block_size = 4096;
2916
2917 assert_eq!(
2919 ChecksumRangeChunk::group_first_write_ranges(
2920 &mut OverwriteBitmaps::None,
2921 block_size,
2922 block_size * 2..block_size * 5,
2923 ),
2924 vec![ChecksumRangeChunk {
2925 checksum_range: 0..3,
2926 device_range: block_size * 2..block_size * 5,
2927 is_first_write: false,
2928 }],
2929 );
2930
2931 let mut bitmaps = OverwriteBitmaps::new(BitVec::from_bytes(&[0b11110000]));
2932 assert_eq!(
2933 ChecksumRangeChunk::group_first_write_ranges(
2934 &mut bitmaps,
2935 block_size,
2936 block_size * 2..block_size * 5,
2937 ),
2938 vec![ChecksumRangeChunk {
2939 checksum_range: 0..3,
2940 device_range: block_size * 2..block_size * 5,
2941 is_first_write: false,
2942 }],
2943 );
2944 assert_eq!(
2945 bitmaps.take_bitmaps(),
2946 Some((BitVec::from_bytes(&[0b11110000]), BitVec::from_bytes(&[0b11100000])))
2947 );
2948
2949 let mut bitmaps = OverwriteBitmaps::new(BitVec::from_bytes(&[0b11110000]));
2950 bitmaps.set_offset(2);
2951 assert_eq!(
2952 ChecksumRangeChunk::group_first_write_ranges(
2953 &mut bitmaps,
2954 block_size,
2955 block_size * 2..block_size * 5,
2956 ),
2957 vec![
2958 ChecksumRangeChunk {
2959 checksum_range: 0..2,
2960 device_range: block_size * 2..block_size * 4,
2961 is_first_write: false,
2962 },
2963 ChecksumRangeChunk {
2964 checksum_range: 2..3,
2965 device_range: block_size * 4..block_size * 5,
2966 is_first_write: true,
2967 },
2968 ],
2969 );
2970 assert_eq!(
2971 bitmaps.take_bitmaps(),
2972 Some((BitVec::from_bytes(&[0b11110000]), BitVec::from_bytes(&[0b00111000])))
2973 );
2974
2975 let mut bitmaps = OverwriteBitmaps::new(BitVec::from_bytes(&[0b11110000]));
2976 bitmaps.set_offset(4);
2977 assert_eq!(
2978 ChecksumRangeChunk::group_first_write_ranges(
2979 &mut bitmaps,
2980 block_size,
2981 block_size * 2..block_size * 5,
2982 ),
2983 vec![ChecksumRangeChunk {
2984 checksum_range: 0..3,
2985 device_range: block_size * 2..block_size * 5,
2986 is_first_write: true,
2987 }],
2988 );
2989 assert_eq!(
2990 bitmaps.take_bitmaps(),
2991 Some((BitVec::from_bytes(&[0b11110000]), BitVec::from_bytes(&[0b00001110])))
2992 );
2993
2994 let mut bitmaps = OverwriteBitmaps::new(BitVec::from_bytes(&[0b01010101]));
2995 assert_eq!(
2996 ChecksumRangeChunk::group_first_write_ranges(
2997 &mut bitmaps,
2998 block_size,
2999 block_size * 2..block_size * 10,
3000 ),
3001 vec![
3002 ChecksumRangeChunk {
3003 checksum_range: 0..1,
3004 device_range: block_size * 2..block_size * 3,
3005 is_first_write: true,
3006 },
3007 ChecksumRangeChunk {
3008 checksum_range: 1..2,
3009 device_range: block_size * 3..block_size * 4,
3010 is_first_write: false,
3011 },
3012 ChecksumRangeChunk {
3013 checksum_range: 2..3,
3014 device_range: block_size * 4..block_size * 5,
3015 is_first_write: true,
3016 },
3017 ChecksumRangeChunk {
3018 checksum_range: 3..4,
3019 device_range: block_size * 5..block_size * 6,
3020 is_first_write: false,
3021 },
3022 ChecksumRangeChunk {
3023 checksum_range: 4..5,
3024 device_range: block_size * 6..block_size * 7,
3025 is_first_write: true,
3026 },
3027 ChecksumRangeChunk {
3028 checksum_range: 5..6,
3029 device_range: block_size * 7..block_size * 8,
3030 is_first_write: false,
3031 },
3032 ChecksumRangeChunk {
3033 checksum_range: 6..7,
3034 device_range: block_size * 8..block_size * 9,
3035 is_first_write: true,
3036 },
3037 ChecksumRangeChunk {
3038 checksum_range: 7..8,
3039 device_range: block_size * 9..block_size * 10,
3040 is_first_write: false,
3041 },
3042 ],
3043 );
3044 assert_eq!(
3045 bitmaps.take_bitmaps(),
3046 Some((BitVec::from_bytes(&[0b01010101]), BitVec::from_bytes(&[0b11111111])))
3047 );
3048 }
3049}