1#![recursion_limit = "512"]
6
7use fuchsia_rcu::RcuReadScope;
8use once_cell::sync::OnceCell;
9use rand::Rng;
10use starnix_core::fs::tmpfs::{TmpFs, TmpFsDirectory};
11use starnix_core::mm::memory::MemoryObject;
12use starnix_core::security;
13use starnix_core::task::{CurrentTask, Kernel};
14use starnix_core::vfs::fs_args::MountParams;
15use starnix_core::vfs::rw_queue::RwQueueReadGuard;
16use starnix_core::vfs::{
17 AlreadyLockedAppendLockStrategy, AppendLockGuard, CacheMode, DirEntry, DirEntryHandle,
18 DirectoryEntryType, DirentSink, FallocMode, FileHandle, FileObject, FileOps, FileSystem,
19 FileSystemHandle, FileSystemOps, FileSystemOptions, FsNode, FsNodeHandle, FsNodeInfo,
20 FsNodeOps, FsStr, FsString, InputBuffer, MountInfo, OutputBuffer, RenameFlags, SeekTarget,
21 SymlinkTarget, UnlinkKind, ValueOrSize, VecInputBuffer, VecOutputBuffer, XattrOp, default_seek,
22 emit_dotdot, fileops_impl_directory, fileops_impl_noop_sync, fileops_impl_seekable,
23};
24use starnix_logging::{log_error, log_warn, track_stub};
25use starnix_sync::{
26 BeforeFsNodeAppend, FileOpsCore, FsNodeAppend, LockEqualOrBefore, Locked, RwLock,
27 RwLockReadGuard, RwLockWriteGuard, Unlocked,
28};
29use starnix_uapi::auth::{Credentials, FsCred};
30use starnix_uapi::device_type::DeviceType;
31use starnix_uapi::errors::{EEXIST, ENOENT, Errno};
32use starnix_uapi::file_mode::{FileMode, mode};
33use starnix_uapi::open_flags::OpenFlags;
34use starnix_uapi::{errno, error, ino_t, off_t, statfs};
35use std::collections::BTreeSet;
36use std::sync::Arc;
37use syncio::zxio_node_attr_has_t;
38
39const OPAQUE_DIR_XATTR: &str = "trusted.overlay.opaque";
42const OPAQUE_DIR_XATTR_VALUE: &str = "y";
43
44#[derive(Clone)]
45struct DirEntryInfo {
46 name: FsString,
47 inode_num: ino_t,
48 entry_type: DirectoryEntryType,
49}
50
51type DirEntries = Vec<DirEntryInfo>;
52
53#[derive(Default)]
54struct DirentSinkAdapter {
55 items: Vec<DirEntryInfo>,
56 offset: off_t,
57}
58
59impl DirentSink for DirentSinkAdapter {
60 fn add(
61 &mut self,
62 inode_num: ino_t,
63 offset: off_t,
64 entry_type: DirectoryEntryType,
65 name: &FsStr,
66 ) -> Result<(), Errno> {
67 if !DirEntry::is_reserved_name(name) {
68 self.items.push(DirEntryInfo { name: name.to_owned(), inode_num, entry_type });
69 }
70 self.offset = offset;
71 Ok(())
72 }
73
74 fn offset(&self) -> off_t {
75 self.offset
76 }
77}
78
79#[derive(Copy, Clone, Eq, PartialEq)]
80enum UpperCopyMode {
81 MetadataOnly,
82 CopyAll,
83}
84
85#[derive(Clone)]
89struct ActiveEntry {
90 entry: DirEntryHandle,
91 mount: MountInfo,
92}
93
94impl ActiveEntry {
95 fn mapper<'a>(entry: &'a ActiveEntry) -> impl Fn(DirEntryHandle) -> ActiveEntry + 'a {
96 |dir_entry| ActiveEntry { entry: dir_entry, mount: entry.mount.clone() }
97 }
98
99 fn entry(&self) -> &DirEntryHandle {
100 &self.entry
101 }
102
103 fn mount(&self) -> &MountInfo {
104 &self.mount
105 }
106
107 fn component_lookup<L>(
108 &self,
109 locked: &mut Locked<L>,
110 current_task: &CurrentTask,
111 name: &FsStr,
112 ) -> Result<Self, Errno>
113 where
114 L: LockEqualOrBefore<FileOpsCore>,
115 {
116 self.entry()
117 .component_lookup(locked, current_task, self.mount(), name)
118 .map(ActiveEntry::mapper(self))
119 }
120
121 fn create_entry<L>(
122 &self,
123 locked: &mut Locked<L>,
124 current_task: &CurrentTask,
125 name: &FsStr,
126 create_node_fn: impl FnOnce(
127 &mut Locked<L>,
128 &FsNodeHandle,
129 &MountInfo,
130 &FsStr,
131 ) -> Result<FsNodeHandle, Errno>,
132 ) -> Result<Self, Errno>
133 where
134 L: LockEqualOrBefore<FileOpsCore>,
135 {
136 self.entry()
137 .create_entry(locked, current_task, self.mount(), name, create_node_fn)
138 .map(ActiveEntry::mapper(self))
139 }
140
141 fn set_opaque_xattr<L>(
145 &self,
146 locked: &mut Locked<L>,
147 current_task: &CurrentTask,
148 ) -> Result<(), Errno>
149 where
150 L: LockEqualOrBefore<FileOpsCore>,
151 {
152 self.entry().node.set_xattr(
153 locked,
154 current_task,
155 self.mount(),
156 OPAQUE_DIR_XATTR.into(),
157 OPAQUE_DIR_XATTR_VALUE.into(),
158 XattrOp::Set,
159 )
160 }
161
162 fn is_opaque_node<L>(&self, locked: &mut Locked<L>, current_task: &CurrentTask) -> bool
164 where
165 L: LockEqualOrBefore<FileOpsCore>,
166 {
167 match self.entry().node.get_xattr(
168 locked,
169 current_task,
170 self.mount(),
171 OPAQUE_DIR_XATTR.into(),
172 OPAQUE_DIR_XATTR_VALUE.len(),
173 ) {
174 Ok(ValueOrSize::Value(v)) if v == OPAQUE_DIR_XATTR_VALUE => true,
175 _ => false,
176 }
177 }
178
179 fn create_whiteout<L>(
183 &self,
184 locked: &mut Locked<L>,
185 current_task: &CurrentTask,
186 name: &FsStr,
187 ) -> Result<ActiveEntry, Errno>
188 where
189 L: LockEqualOrBefore<FileOpsCore>,
190 {
191 self.create_entry(locked, current_task, name, |locked, dir, mount, name| {
192 dir.create_node(
193 locked,
194 current_task,
195 mount,
196 name,
197 FileMode::IFCHR,
198 DeviceType::NONE,
199 FsCred::root(),
200 )
201 })
202 }
203
204 fn is_whiteout(&self) -> bool {
206 let info = self.entry().node.info();
207 info.mode.is_chr() && info.rdev == DeviceType::NONE
208 }
209
210 fn is_whiteout_child<L>(
214 &self,
215 locked: &mut Locked<L>,
216 current_task: &CurrentTask,
217 info: &DirEntryInfo,
218 ) -> Result<bool, Errno>
219 where
220 L: LockEqualOrBefore<FileOpsCore>,
221 {
222 if info.entry_type != DirectoryEntryType::CHR {
224 return Ok(false);
225 }
226 let entry = self.component_lookup(locked, current_task, info.name.as_ref())?;
227 Ok(entry.is_whiteout())
228 }
229
230 fn read_dir_entries<L>(
231 &self,
232 locked: &mut Locked<L>,
233 current_task: &CurrentTask,
234 ) -> Result<Vec<DirEntryInfo>, Errno>
235 where
236 L: LockEqualOrBefore<FileOpsCore>,
237 {
238 let mut sink = DirentSinkAdapter::default();
239 self.entry().open_anonymous(locked, current_task, OpenFlags::DIRECTORY)?.readdir(
240 locked,
241 current_task,
242 &mut sink,
243 )?;
244 Ok(sink.items)
245 }
246}
247
248struct OverlayNode {
249 stack: Arc<OverlayStack>,
250
251 upper: OnceCell<ActiveEntry>,
256 lower: Option<ActiveEntry>,
257
258 upper_is_opaque: OnceCell<()>,
261
262 parent: Option<Arc<OverlayNode>>,
263}
264
265impl OverlayNode {
266 fn new(
267 stack: Arc<OverlayStack>,
268 lower: Option<ActiveEntry>,
269 upper: Option<ActiveEntry>,
270 parent: Option<Arc<OverlayNode>>,
271 ) -> Arc<Self> {
272 assert!(upper.is_some() || parent.is_some());
273
274 let upper = match upper {
275 Some(entry) => OnceCell::with_value(entry),
276 None => OnceCell::new(),
277 };
278
279 Arc::new(OverlayNode { stack, upper, lower, upper_is_opaque: OnceCell::new(), parent })
280 }
281
282 fn from_fs_node(node: &FsNodeHandle) -> Result<&Arc<Self>, Errno> {
283 Ok(&node.downcast_ops::<OverlayNodeOps>().ok_or_else(|| errno!(EIO))?.node)
284 }
285
286 fn main_entry(&self) -> &ActiveEntry {
287 self.upper.get().or(self.lower.as_ref()).expect("Expected either upper or lower node")
288 }
289
290 fn init_fs_node_for_child(
291 self: &Arc<OverlayNode>,
292 node: &FsNode,
293 lower: Option<ActiveEntry>,
294 upper: Option<ActiveEntry>,
295 ) -> FsNodeHandle {
296 let entry = upper.as_ref().or(lower.as_ref()).expect("expect either lower or upper node");
297 let ino = entry.entry().node.ino;
298 let info = entry.entry().node.info().clone();
299
300 let parent = if upper.is_some() { None } else { Some(self.clone()) };
302
303 let overlay_node =
304 OverlayNodeOps { node: OverlayNode::new(self.stack.clone(), lower, upper, parent) };
305 FsNode::new_uncached(ino, overlay_node, &node.fs(), info)
306 }
307
308 fn ensure_upper<L>(
311 &self,
312 locked: &mut Locked<L>,
313 current_task: &CurrentTask,
314 fs: &FileSystem,
315 ) -> Result<&ActiveEntry, Errno>
316 where
317 L: LockEqualOrBefore<FileOpsCore>,
318 {
319 self.ensure_upper_maybe_copy(locked, current_task, UpperCopyMode::CopyAll, fs)
320 }
321
322 fn ensure_upper_maybe_copy<L>(
324 &self,
325 locked: &mut Locked<L>,
326 current_task: &CurrentTask,
327 copy_mode: UpperCopyMode,
328 fs: &FileSystem,
329 ) -> Result<&ActiveEntry, Errno>
330 where
331 L: LockEqualOrBefore<FileOpsCore>,
332 {
333 self.upper.get_or_try_init(|| {
334 let lower = self.lower.as_ref().expect("lower is expected when upper is missing");
335 let parent = self.parent.as_ref().expect("Parent is expected when upper is missing");
336 let parent_upper = parent.ensure_upper(locked, current_task, fs)?;
337 let name = lower.entry.local_name(&RcuReadScope::new()).to_owned();
338 let info = {
339 let info = lower.entry.node.info();
340 info.clone()
341 };
342 let cred = info.cred();
343
344 let mut copy_up_creds = Credentials::clone(&self.stack.mounter);
345 security::fs_node_copy_up(current_task, &lower.entry.node, fs, &mut copy_up_creds);
346 let res = current_task.override_creds(Arc::new(copy_up_creds), || {
347 if info.mode.is_lnk() {
348 let link_target = lower.entry.node.readlink(locked, current_task)?;
349 let link_path = match &link_target {
350 SymlinkTarget::Node(_) => return error!(EIO),
351 SymlinkTarget::Path(path) => path,
352 };
353 parent_upper.create_entry(
354 locked,
355 current_task,
356 name.as_ref(),
357 |locked, dir, mount, name| {
358 dir.create_symlink(
359 locked,
360 current_task,
361 mount,
362 name,
363 link_path.as_ref(),
364 cred,
365 )
366 },
367 )
368 } else if info.mode.is_reg() && copy_mode == UpperCopyMode::CopyAll {
369 self.stack.create_upper_entry(
371 locked,
372 current_task,
373 parent_upper,
374 name.as_ref(),
375 |locked, dir, name| {
376 dir.create_entry(
377 locked,
378 current_task,
379 name,
380 |locked, dir_node, mount, name| {
381 dir_node.create_node(
382 locked,
383 current_task,
384 mount,
385 name,
386 info.mode,
387 DeviceType::NONE,
388 cred,
389 )
390 },
391 )
392 },
393 |locked, entry| copy_file_content(locked, current_task, lower, &entry),
394 )
395 } else {
396 parent_upper.create_entry(
397 locked,
398 current_task,
399 name.as_ref(),
400 |locked, dir, mount, name| {
401 dir.create_node(
402 locked,
403 current_task,
404 mount,
405 name,
406 info.mode,
407 info.rdev,
408 cred,
409 )
410 },
411 )
412 }
413 });
414
415 track_stub!(TODO("https://fxbug.dev/322874151"), "overlayfs copy xattrs");
416 res
417 })
418 }
419
420 fn has_lower(&self) -> bool {
422 self.lower.is_some()
423 }
424
425 fn lower_entry_exists<L>(
427 &self,
428 locked: &mut Locked<L>,
429 current_task: &CurrentTask,
430 name: &FsStr,
431 ) -> Result<bool, Errno>
432 where
433 L: LockEqualOrBefore<FileOpsCore>,
434 {
435 match &self.lower {
436 Some(lower) => match lower.component_lookup(locked, current_task, name) {
437 Ok(entry) => Ok(!entry.is_whiteout()),
438 Err(err) if err.code == ENOENT => Ok(false),
439 Err(err) => Err(err),
440 },
441 None => Ok(false),
442 }
443 }
444
445 fn create_entry<F, L>(
454 self: &Arc<OverlayNode>,
455 locked: &mut Locked<L>,
456 node: &FsNode,
457 current_task: &CurrentTask,
458 name: &FsStr,
459 do_create: F,
460 ) -> Result<ActiveEntry, Errno>
461 where
462 F: Fn(&mut Locked<L>, &ActiveEntry, &FsStr) -> Result<ActiveEntry, Errno>,
463 L: LockEqualOrBefore<FileOpsCore>,
464 {
465 let upper = self.ensure_upper(locked, current_task, &node.fs())?;
466
467 match upper.component_lookup(locked, current_task, name) {
468 Ok(existing) => {
469 if !existing.is_whiteout() {
471 return error!(EEXIST);
472 }
473 }
474
475 Err(e) if e.code == ENOENT => {
476 if self.lower_entry_exists(locked, current_task, name)? {
478 return error!(EEXIST);
479 }
480 }
481 Err(e) => return Err(e),
482 };
483
484 self.stack.create_upper_entry(
485 locked,
486 current_task,
487 upper,
488 name,
489 |locked, entry, fs| do_create(locked, entry, fs),
490 |_, _entry| Ok(()),
491 )
492 }
493
494 fn prepare_to_unlink<L>(
500 self: &Arc<OverlayNode>,
501 locked: &mut Locked<L>,
502 current_task: &CurrentTask,
503 ) -> Result<(), Errno>
504 where
505 L: LockEqualOrBefore<FileOpsCore>,
506 {
507 if self.main_entry().entry().node.is_dir() {
508 let mut lower_entries = BTreeSet::new();
509 if let Some(dir) = &self.lower {
510 for item in dir.read_dir_entries(locked, current_task)?.drain(..) {
511 if !dir.is_whiteout_child(locked, current_task, &item)? {
512 lower_entries.insert(item.name);
513 }
514 }
515 }
516
517 if let Some(dir) = self.upper.get() {
518 let mut to_remove = Vec::<FsString>::new();
519 for item in dir.read_dir_entries(locked, current_task)?.drain(..) {
520 if !dir.is_whiteout_child(locked, current_task, &item)? {
521 return error!(ENOTEMPTY);
522 }
523 lower_entries.remove(&item.name);
524 to_remove.push(item.name);
525 }
526
527 if !lower_entries.is_empty() {
528 return error!(ENOTEMPTY);
529 }
530
531 dir.set_opaque_xattr(locked, current_task)?;
533 let _ = self.upper_is_opaque.set(());
534
535 for name in to_remove.iter() {
537 dir.entry().unlink(
538 locked,
539 current_task,
540 dir.mount(),
541 name.as_ref(),
542 UnlinkKind::NonDirectory,
543 false,
544 )?;
545 }
546 }
547 }
548
549 Ok(())
550 }
551
552 fn as_mounter<R, F: FnOnce() -> R>(&self, current_task: &CurrentTask, do_work: F) -> R {
553 current_task.override_creds(self.stack.mounter.clone(), do_work)
554 }
555}
556
557struct OverlayNodeOps {
558 node: Arc<OverlayNode>,
559}
560
561impl FsNodeOps for OverlayNodeOps {
562 fn create_file_ops(
563 &self,
564 locked: &mut Locked<FileOpsCore>,
565 node: &FsNode,
566 current_task: &CurrentTask,
567 flags: OpenFlags,
568 ) -> Result<Box<dyn FileOps>, Errno> {
569 self.node.as_mounter(current_task, || {
570 if flags.can_write() {
571 let copy_mode = if flags.contains(OpenFlags::TRUNC) {
573 UpperCopyMode::MetadataOnly
574 } else {
575 UpperCopyMode::CopyAll
576 };
577 self.node.ensure_upper_maybe_copy(locked, current_task, copy_mode, &node.fs())?;
578 }
579
580 let ops: Box<dyn FileOps> = if node.is_dir() {
581 Box::new(OverlayDirectory {
582 node: self.node.clone(),
583 dir_entries: Default::default(),
584 })
585 } else {
586 let state =
587 match (self.node.upper.get(), &self.node.lower) {
588 (Some(upper), _) => OverlayFileState::Upper(upper.entry().open_anonymous(
589 locked,
590 current_task,
591 flags,
592 )?),
593 (None, Some(lower)) => OverlayFileState::Lower(
594 lower.entry().open_anonymous(locked, current_task, flags)?,
595 ),
596 _ => panic!("Expected either upper or lower node"),
597 };
598
599 Box::new(OverlayFile { node: self.node.clone(), flags, state: RwLock::new(state) })
600 };
601
602 Ok(ops)
603 })
604 }
605
606 fn lookup(
607 &self,
608 locked: &mut Locked<FileOpsCore>,
609 node: &FsNode,
610 current_task: &CurrentTask,
611 name: &FsStr,
612 ) -> Result<FsNodeHandle, Errno> {
613 self.node.as_mounter(current_task, || {
614 let resolve_child = |locked: &mut Locked<FileOpsCore>,
615 dir_opt: Option<&ActiveEntry>| {
616 dir_opt
618 .as_ref()
619 .map(|dir| match dir.component_lookup(locked, current_task, name) {
620 Ok(entry) => Some(Ok(entry)),
621 Err(e) if e.code == ENOENT => None,
622 Err(e) => Some(Err(e)),
623 })
624 .flatten()
625 .transpose()
626 };
627
628 let upper: Option<ActiveEntry> = resolve_child(locked, self.node.upper.get())?;
629
630 let (upper_is_dir, upper_is_opaque) = match &upper {
631 Some(upper) if upper.is_whiteout() => return error!(ENOENT),
632 Some(upper) => {
633 let is_dir = upper.entry().node.is_dir();
634 let is_opaque = !is_dir || upper.is_opaque_node(locked, current_task);
635 (is_dir, is_opaque)
636 }
637 None => (false, false),
638 };
639
640 let parent_upper_is_opaque = self.node.upper_is_opaque.get().is_some();
641
642 let lookup_lower = !parent_upper_is_opaque && !upper_is_opaque;
644 let lower: Option<ActiveEntry> = if lookup_lower {
645 match resolve_child(locked, self.node.lower.as_ref())? {
646 Some(lower) if upper_is_dir && !lower.entry().node.is_dir() => None,
648 Some(lower) if lower.is_whiteout() => None,
649 result => result,
650 }
651 } else {
652 None
653 };
654
655 if upper.is_none() && lower.is_none() {
656 return error!(ENOENT);
657 }
658
659 Ok(self.node.init_fs_node_for_child(node, lower, upper))
660 })
661 }
662
663 fn mknod(
664 &self,
665 locked: &mut Locked<FileOpsCore>,
666 node: &FsNode,
667 current_task: &CurrentTask,
668 name: &FsStr,
669 mode: FileMode,
670 dev: DeviceType,
671 owner: FsCred,
672 ) -> Result<FsNodeHandle, Errno> {
673 let mut creds = Credentials::clone(&self.node.stack.mounter);
674 security::dentry_create_files_as(current_task, node, mode, name, &mut creds)?;
675 current_task.override_creds(Arc::new(creds), || {
676 let new_upper_node = self.node.create_entry(
677 locked,
678 node,
679 current_task,
680 name,
681 |locked, dir, temp_name| {
682 dir.create_entry(
683 locked,
684 current_task,
685 temp_name,
686 |locked, dir_node, mount, name| {
687 dir_node.create_node(
688 locked,
689 current_task,
690 mount,
691 name,
692 mode,
693 dev,
694 owner.clone(),
695 )
696 },
697 )
698 },
699 )?;
700 Ok(self.node.init_fs_node_for_child(node, None, Some(new_upper_node)))
701 })
702 }
703
704 fn mkdir(
705 &self,
706 locked: &mut Locked<FileOpsCore>,
707 node: &FsNode,
708 current_task: &CurrentTask,
709 name: &FsStr,
710 mode: FileMode,
711 owner: FsCred,
712 ) -> Result<FsNodeHandle, Errno> {
713 let mut creds = Credentials::clone(&self.node.stack.mounter);
714 security::dentry_create_files_as(current_task, node, mode, name, &mut creds)?;
715 current_task.override_creds(Arc::new(creds), || {
716 let new_upper_node = self.node.create_entry(
717 locked,
718 node,
719 current_task,
720 name,
721 |locked, dir, temp_name| {
722 let entry = dir.create_entry(
723 locked,
724 current_task,
725 temp_name,
726 |locked, dir_node, mount, name| {
727 dir_node.create_node(
728 locked,
729 current_task,
730 mount,
731 name,
732 mode,
733 DeviceType::NONE,
734 owner.clone(),
735 )
736 },
737 )?;
738
739 entry.set_opaque_xattr(locked, current_task)?;
741
742 Ok(entry)
743 },
744 )?;
745
746 Ok(self.node.init_fs_node_for_child(node, None, Some(new_upper_node)))
747 })
748 }
749
750 fn create_symlink(
751 &self,
752 locked: &mut Locked<FileOpsCore>,
753 node: &FsNode,
754 current_task: &CurrentTask,
755 name: &FsStr,
756 target: &FsStr,
757 owner: FsCred,
758 ) -> Result<FsNodeHandle, Errno> {
759 let mut creds = Credentials::clone(&self.node.stack.mounter);
760 security::dentry_create_files_as(current_task, node, FileMode::IFLNK, name, &mut creds)?;
761 current_task.override_creds(Arc::new(creds), || {
762 let new_upper_node = self.node.create_entry(
763 locked,
764 node,
765 current_task,
766 name,
767 |locked, dir, temp_name| {
768 dir.create_entry(
769 locked,
770 current_task,
771 temp_name,
772 |locked, dir_node, mount, name| {
773 dir_node.create_symlink(
774 locked,
775 current_task,
776 mount,
777 name,
778 target,
779 owner.clone(),
780 )
781 },
782 )
783 },
784 )?;
785 Ok(self.node.init_fs_node_for_child(node, None, Some(new_upper_node)))
786 })
787 }
788
789 fn readlink(
790 &self,
791 locked: &mut Locked<FileOpsCore>,
792 _node: &FsNode,
793 current_task: &CurrentTask,
794 ) -> Result<SymlinkTarget, Errno> {
795 self.node.as_mounter(current_task, || {
796 self.node.main_entry().entry().node.readlink(locked, current_task)
797 })
798 }
799
800 fn link(
801 &self,
802 locked: &mut Locked<FileOpsCore>,
803 node: &FsNode,
804 current_task: &CurrentTask,
805 name: &FsStr,
806 child: &FsNodeHandle,
807 ) -> Result<(), Errno> {
808 self.node.as_mounter(current_task, || {
809 let child_overlay = OverlayNode::from_fs_node(child)?;
810 let upper_child = child_overlay.ensure_upper(locked, current_task, &node.fs())?;
811 self.node.create_entry(
812 locked,
813 node,
814 current_task,
815 name,
816 |locked, dir, temp_name| {
817 dir.create_entry(
818 locked,
819 current_task,
820 temp_name,
821 |locked, dir_node, mount, name| {
822 dir_node.link(
823 locked,
824 current_task,
825 mount,
826 name,
827 &upper_child.entry().node,
828 )
829 },
830 )
831 },
832 )?;
833 Ok(())
834 })
835 }
836
837 fn unlink(
838 &self,
839 locked: &mut Locked<FileOpsCore>,
840 node: &FsNode,
841 current_task: &CurrentTask,
842 name: &FsStr,
843 child: &FsNodeHandle,
844 ) -> Result<(), Errno> {
845 self.node.as_mounter(current_task, || {
846 let upper = self.node.ensure_upper(locked, current_task, &node.fs())?;
847 let child_overlay = OverlayNode::from_fs_node(child)?;
848 child_overlay.prepare_to_unlink(locked, current_task)?;
849
850 let need_whiteout = self.node.lower_entry_exists(locked, current_task, name)?;
851 if need_whiteout {
852 self.node.stack.create_upper_entry(
853 locked,
854 current_task,
855 &upper,
856 &name,
857 |locked, work, name| work.create_whiteout(locked, current_task, name),
858 |_, _entry| Ok(()),
859 )?;
860 } else if let Some(child_upper) = child_overlay.upper.get() {
861 let kind = if child_upper.entry().node.is_dir() {
862 UnlinkKind::Directory
863 } else {
864 UnlinkKind::NonDirectory
865 };
866 upper.entry().unlink(locked, current_task, upper.mount(), name, kind, false)?;
867 }
868
869 Ok(())
870 })
871 }
872
873 fn fetch_and_refresh_info<'a>(
874 &self,
875 locked: &mut Locked<FileOpsCore>,
876 _node: &FsNode,
877 current_task: &CurrentTask,
878 info: &'a RwLock<FsNodeInfo>,
879 ) -> Result<RwLockReadGuard<'a, FsNodeInfo>, Errno> {
880 self.node.as_mounter(current_task, || {
881 let real_info = self
882 .node
883 .main_entry()
884 .entry()
885 .node
886 .fetch_and_refresh_info(locked, current_task)?
887 .clone();
888 let mut lock = info.write();
889 *lock = real_info;
890 Ok(RwLockWriteGuard::downgrade(lock))
891 })
892 }
893
894 fn update_attributes(
895 &self,
896 locked: &mut Locked<FileOpsCore>,
897 node: &FsNode,
898 current_task: &CurrentTask,
899 new_info: &FsNodeInfo,
900 has: zxio_node_attr_has_t,
901 ) -> Result<(), Errno> {
902 self.node.as_mounter(current_task, || {
903 let upper = self.node.ensure_upper(locked, current_task, &node.fs())?.entry();
904 upper.node.update_attributes(locked, current_task, |info| {
905 if has.modification_time {
906 info.time_modify = new_info.time_modify;
907 }
908 if has.access_time {
909 info.time_access = new_info.time_access;
910 }
911 if has.mode {
912 info.mode = new_info.mode;
913 }
914 if has.uid {
915 info.uid = new_info.uid;
916 }
917 if has.gid {
918 info.gid = new_info.gid;
919 }
920 if has.rdev {
921 info.rdev = new_info.rdev;
922 }
923 Ok(())
924 })
925 })
926 }
927
928 fn append_lock_read<'a>(
929 &'a self,
930 locked: &'a mut Locked<BeforeFsNodeAppend>,
931 node: &'a FsNode,
932 current_task: &CurrentTask,
933 ) -> Result<(RwQueueReadGuard<'a, FsNodeAppend>, &'a mut Locked<FsNodeAppend>), Errno> {
934 self.node.as_mounter(current_task, || {
935 let upper_node =
936 self.node.ensure_upper(locked, current_task, &node.fs())?.entry.node.as_ref();
937 upper_node.ops().append_lock_read(locked, upper_node, current_task)
938 })
939 }
940
941 fn truncate(
942 &self,
943 locked: &mut Locked<FileOpsCore>,
944 guard: &AppendLockGuard<'_>,
945 node: &FsNode,
946 current_task: &CurrentTask,
947 length: u64,
948 ) -> Result<(), Errno> {
949 self.node.as_mounter(current_task, || {
950 let upper = self.node.ensure_upper(locked, current_task, &node.fs())?;
951
952 upper.entry().node.truncate_with_strategy(
953 locked,
954 AlreadyLockedAppendLockStrategy::new(guard),
955 current_task,
956 upper.mount(),
957 length,
958 )
959 })
960 }
961
962 fn allocate(
963 &self,
964 locked: &mut Locked<FileOpsCore>,
965 guard: &AppendLockGuard<'_>,
966 node: &FsNode,
967 current_task: &CurrentTask,
968 mode: FallocMode,
969 offset: u64,
970 length: u64,
971 ) -> Result<(), Errno> {
972 self.node.as_mounter(current_task, || {
973 let node = &self.node.ensure_upper(locked, current_task, &node.fs())?.entry().node;
974 node.fallocate_with_strategy(
975 locked,
976 AlreadyLockedAppendLockStrategy::new(guard),
977 current_task,
978 mode,
979 offset,
980 length,
981 )
982 })
983 }
984
985 fn get_xattr(
986 &self,
987 locked: &mut Locked<FileOpsCore>,
988 _node: &FsNode,
989 current_task: &CurrentTask,
990 name: &FsStr,
991 max_size: usize,
992 ) -> Result<ValueOrSize<FsString>, Errno> {
993 let entry = self
994 .node
995 .upper
996 .get()
997 .or(self.node.lower.as_ref())
998 .expect("expect either lower or upper node");
999 self.node.as_mounter(current_task, || {
1000 entry.entry().node.get_xattr(locked, current_task, &entry.mount, name, max_size)
1001 })
1002 }
1003
1004 fn set_xattr(
1005 &self,
1006 locked: &mut Locked<FileOpsCore>,
1007 node: &FsNode,
1008 current_task: &CurrentTask,
1009 name: &FsStr,
1010 value: &FsStr,
1011 op: XattrOp,
1012 ) -> Result<(), Errno> {
1013 self.node.as_mounter(current_task, || {
1014 let upper = self.node.ensure_upper(locked, current_task, &node.fs())?;
1015 upper.entry().node.set_xattr(locked, current_task, &upper.mount, name, value, op)
1016 })
1017 }
1018
1019 fn remove_xattr(
1020 &self,
1021 locked: &mut Locked<FileOpsCore>,
1022 node: &FsNode,
1023 current_task: &CurrentTask,
1024 name: &FsStr,
1025 ) -> Result<(), Errno> {
1026 self.node.as_mounter(current_task, || {
1027 let upper = self.node.ensure_upper(locked, current_task, &node.fs())?;
1028 upper.entry().node.remove_xattr(locked, current_task, &upper.mount, name)
1029 })
1030 }
1031
1032 fn list_xattrs(
1033 &self,
1034 locked: &mut Locked<FileOpsCore>,
1035 _node: &FsNode,
1036 current_task: &CurrentTask,
1037 max_size: usize,
1038 ) -> Result<ValueOrSize<Vec<FsString>>, Errno> {
1039 self.node.as_mounter(current_task, || {
1040 let entry = self
1041 .node
1042 .upper
1043 .get()
1044 .or(self.node.lower.as_ref())
1045 .expect("expect either lower or upper node");
1046 entry.entry().node.list_xattrs(locked, current_task, max_size)
1047 })
1048 }
1049}
1050struct OverlayDirectory {
1051 node: Arc<OverlayNode>,
1052 dir_entries: RwLock<DirEntries>,
1053}
1054
1055impl OverlayDirectory {
1056 fn refresh_dir_entries<L>(
1057 &self,
1058 locked: &mut Locked<L>,
1059 current_task: &CurrentTask,
1060 ) -> Result<(), Errno>
1061 where
1062 L: LockEqualOrBefore<FileOpsCore>,
1063 {
1064 let mut entries = DirEntries::new();
1065
1066 let upper_is_opaque = self.node.upper_is_opaque.get().is_some();
1067 let merge_with_lower = self.node.lower.is_some() && !upper_is_opaque;
1068
1069 let mut upper_set = BTreeSet::new();
1072 if let Some(dir) = self.node.upper.get() {
1073 for item in dir.read_dir_entries(locked, current_task)?.drain(..) {
1074 if merge_with_lower {
1076 upper_set.insert(item.name.clone());
1077 }
1078 if !dir.is_whiteout_child(locked, current_task, &item)? {
1079 entries.push(item);
1080 }
1081 }
1082 }
1083
1084 if merge_with_lower {
1085 if let Some(dir) = &self.node.lower {
1086 for item in dir.read_dir_entries(locked, current_task)?.drain(..) {
1087 if !upper_set.contains(&item.name)
1088 && !dir.is_whiteout_child(locked, current_task, &item)?
1089 {
1090 entries.push(item);
1091 }
1092 }
1093 }
1094 }
1095
1096 *self.dir_entries.write() = entries;
1097
1098 Ok(())
1099 }
1100}
1101
1102impl FileOps for OverlayDirectory {
1103 fileops_impl_directory!();
1104 fileops_impl_noop_sync!();
1105
1106 fn seek(
1107 &self,
1108 _locked: &mut Locked<FileOpsCore>,
1109 _file: &FileObject,
1110 current_task: &CurrentTask,
1111 current_offset: off_t,
1112 target: SeekTarget,
1113 ) -> Result<off_t, Errno> {
1114 self.node
1115 .as_mounter(current_task, || default_seek(current_offset, target, || error!(EINVAL)))
1116 }
1117
1118 fn readdir(
1119 &self,
1120 locked: &mut Locked<FileOpsCore>,
1121 file: &FileObject,
1122 current_task: &CurrentTask,
1123 sink: &mut dyn DirentSink,
1124 ) -> Result<(), Errno> {
1125 self.node.as_mounter(current_task, || {
1126 if sink.offset() == 0 {
1127 self.refresh_dir_entries(locked, current_task)?;
1128 }
1129
1130 emit_dotdot(file, sink)?;
1131
1132 for item in self.dir_entries.read().iter().skip(sink.offset() as usize - 2) {
1133 sink.add(item.inode_num, sink.offset() + 1, item.entry_type, item.name.as_ref())?;
1134 }
1135
1136 Ok(())
1137 })
1138 }
1139}
1140
1141enum OverlayFileState {
1142 Lower(FileHandle),
1143 Upper(FileHandle),
1144}
1145
1146impl OverlayFileState {
1147 fn file(&self) -> &FileHandle {
1148 match self {
1149 Self::Lower(f) | Self::Upper(f) => f,
1150 }
1151 }
1152}
1153
1154struct OverlayFile {
1155 node: Arc<OverlayNode>,
1156 flags: OpenFlags,
1157 state: RwLock<OverlayFileState>,
1158}
1159
1160impl FileOps for OverlayFile {
1161 fileops_impl_seekable!();
1162
1163 fn read(
1164 &self,
1165 locked: &mut Locked<FileOpsCore>,
1166 _file: &FileObject,
1167 current_task: &CurrentTask,
1168 offset: usize,
1169 data: &mut dyn OutputBuffer,
1170 ) -> Result<usize, Errno> {
1171 self.node.as_mounter(current_task, || {
1172 let mut state = self.state.read();
1173
1174 if let Some(upper) = self.node.upper.get() {
1177 if matches!(*state, OverlayFileState::Lower(_)) {
1178 std::mem::drop(state);
1179
1180 {
1181 let mut write_state = self.state.write();
1182
1183 *write_state = OverlayFileState::Upper(upper.entry().open_anonymous(
1186 locked,
1187 current_task,
1188 self.flags,
1189 )?);
1190 }
1191 state = self.state.read();
1192 }
1193 }
1194
1195 state.file().read_at(locked, current_task, offset, data)
1197 })
1198 }
1199
1200 fn write(
1201 &self,
1202 locked: &mut Locked<FileOpsCore>,
1203 _file: &FileObject,
1204 current_task: &CurrentTask,
1205 offset: usize,
1206 data: &mut dyn InputBuffer,
1207 ) -> Result<usize, Errno> {
1208 self.node.as_mounter(current_task, || {
1209 let state = self.state.read();
1210 let file = match &*state {
1211 OverlayFileState::Upper(f) => f.clone(),
1212
1213 OverlayFileState::Lower(_) => panic!("write() called for a lower FS file."),
1216 };
1217 std::mem::drop(state);
1218 file.write_at(locked, current_task, offset, data)
1219 })
1220 }
1221
1222 fn sync(&self, _file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
1223 self.node.as_mounter(current_task, || self.state.read().file().sync(current_task))
1224 }
1225
1226 fn get_memory(
1227 &self,
1228 locked: &mut Locked<FileOpsCore>,
1229 _file: &FileObject,
1230 current_task: &CurrentTask,
1231 length: Option<usize>,
1232 prot: starnix_core::mm::ProtectionFlags,
1233 ) -> Result<Arc<MemoryObject>, Errno> {
1234 self.node.as_mounter(current_task, || {
1235 self.state.read().file().get_memory(locked, current_task, length, prot)
1239 })
1240 }
1241}
1242
1243pub fn new_overlay_fs(
1244 locked: &mut Locked<Unlocked>,
1245 current_task: &CurrentTask,
1246 options: FileSystemOptions,
1247) -> Result<FileSystemHandle, Errno> {
1248 OverlayStack::new_fs(locked, current_task, options)
1249}
1250
1251pub struct OverlayStack {
1252 #[allow(unused)]
1255 lower_fs: FileSystemHandle,
1256 upper_fs: FileSystemHandle,
1257
1258 work: ActiveEntry,
1259
1260 mounter: Arc<Credentials>,
1262}
1263
1264impl OverlayStack {
1265 fn new_fs(
1266 locked: &mut Locked<Unlocked>,
1267 current_task: &CurrentTask,
1268 options: FileSystemOptions,
1269 ) -> Result<FileSystemHandle, Errno> {
1270 match options.params.get("redirect_dir".as_bytes()) {
1271 None => (),
1272 Some(o) if o == "off" => (),
1273 Some(_) => {
1274 track_stub!(TODO("https://fxbug.dev/322874205"), "overlayfs redirect_dir");
1275 return error!(ENOTSUP);
1276 }
1277 }
1278
1279 let lower = resolve_dir_param(locked, current_task, &options.params, "lowerdir".into())?;
1280 let upper = resolve_dir_param(locked, current_task, &options.params, "upperdir".into())?;
1281 let work = resolve_dir_param(locked, current_task, &options.params, "workdir".into())?;
1282
1283 let lower_fs = lower.entry().node.fs();
1284 let upper_fs = upper.entry().node.fs();
1285
1286 if !Arc::ptr_eq(&upper_fs, &work.entry().node.fs()) {
1287 log_error!("overlayfs: upperdir and workdir must be on the same FS");
1288 return error!(EINVAL);
1289 }
1290
1291 let kernel = current_task.kernel();
1292 let mounter = current_task.current_creds().clone();
1293 let stack = Arc::new(OverlayStack { lower_fs, upper_fs, work, mounter });
1294 let root_node = OverlayNode::new(stack.clone(), Some(lower), Some(upper), None);
1295 let fs =
1296 FileSystem::new(locked, kernel, CacheMode::Uncached, OverlayFs { stack }, options)?;
1297 let root_ino = fs.allocate_ino();
1298 fs.create_root(root_ino, OverlayNodeOps { node: root_node });
1299 Ok(fs)
1300 }
1301
1302 pub fn wrap_fs_in_writable_layer<L>(
1304 locked: &mut Locked<L>,
1305 kernel: &Kernel,
1306 rootfs: FileSystemHandle,
1307 ) -> Result<FileSystemHandle, Errno>
1308 where
1309 L: LockEqualOrBefore<FileOpsCore>,
1310 {
1311 let lower = ActiveEntry { entry: rootfs.root().clone(), mount: MountInfo::detached() };
1312
1313 let invisible_tmp = TmpFs::new_fs(locked, kernel);
1315
1316 let create_directory = |fs: &FileSystemHandle| {
1317 let ino = fs.allocate_ino();
1318 let info = FsNodeInfo::new(mode!(IFDIR, 0o777), FsCred::root());
1319 let node = fs.create_detached_node(ino, TmpFsDirectory::new(), info);
1320 let dir_entry = DirEntry::new(node, None, FsString::default());
1321
1322 security::fs_node_init_with_dentry_deferred(kernel, &dir_entry);
1325
1326 dir_entry
1327 };
1328
1329 let upper =
1330 ActiveEntry { entry: create_directory(&invisible_tmp), mount: MountInfo::detached() };
1331 let work =
1332 ActiveEntry { entry: create_directory(&invisible_tmp), mount: MountInfo::detached() };
1333
1334 let lower_fs = rootfs;
1335 let upper_fs = invisible_tmp;
1336
1337 let mounter = Credentials::root();
1338 let stack = Arc::new(OverlayStack { lower_fs, upper_fs, work, mounter });
1339 let root_node = OverlayNode::new(stack.clone(), Some(lower), Some(upper), None);
1340 let fs = FileSystem::new(
1341 locked,
1342 kernel,
1343 CacheMode::Uncached,
1344 OverlayFs { stack },
1345 FileSystemOptions::default(),
1346 )?;
1347 let root_ino = fs.allocate_ino();
1348 fs.create_root(root_ino, OverlayNodeOps { node: root_node });
1349 Ok(fs)
1350 }
1351
1352 fn create_upper_entry<FCreate, FInit, L>(
1360 &self,
1361 locked: &mut Locked<L>,
1362 current_task: &CurrentTask,
1363 target_dir: &ActiveEntry,
1364 name: &FsStr,
1365 try_create: FCreate,
1366 do_init: FInit,
1367 ) -> Result<ActiveEntry, Errno>
1368 where
1369 L: LockEqualOrBefore<FileOpsCore>,
1370 FCreate: Fn(&mut Locked<L>, &ActiveEntry, &FsStr) -> Result<ActiveEntry, Errno>,
1371 FInit: FnOnce(&mut Locked<L>, &ActiveEntry) -> Result<(), Errno>,
1372 {
1373 let mut rng = rand::rng();
1374 let (temp_name, entry) = loop {
1375 let x: u64 = rng.random();
1376 let temp_name = FsString::from(format!("tmp{:x}", x));
1377 match try_create(locked, &self.work, temp_name.as_ref()) {
1378 Err(err) if err.code == EEXIST => continue,
1379 Err(err) => return Err(err),
1380 Ok(entry) => break (temp_name, entry),
1381 }
1382 };
1383
1384 do_init(locked, &entry)
1385 .and_then(|()| {
1386 DirEntry::rename(
1387 locked,
1388 current_task,
1389 self.work.entry(),
1390 self.work.mount(),
1391 temp_name.as_ref(),
1392 target_dir.entry(),
1393 target_dir.mount(),
1394 name,
1395 RenameFlags::REPLACE_ANY,
1396 )
1397 })
1398 .map_err(|e| {
1399 self.work
1401 .entry()
1402 .unlink(
1403 locked,
1404 current_task,
1405 self.work.mount(),
1406 temp_name.as_ref(),
1407 UnlinkKind::NonDirectory,
1408 false,
1409 )
1410 .unwrap_or_else(|e| {
1411 log_error!("Failed to cleanup work dir after an error: {}", e)
1412 });
1413 e
1414 })?;
1415
1416 Ok(entry)
1417 }
1418}
1419
1420struct OverlayFs {
1421 stack: Arc<OverlayStack>,
1422}
1423
1424impl FileSystemOps for OverlayFs {
1425 fn statfs(
1426 &self,
1427 locked: &mut Locked<FileOpsCore>,
1428 _fs: &FileSystem,
1429 current_task: &CurrentTask,
1430 ) -> Result<statfs, Errno> {
1431 current_task.override_creds(self.stack.mounter.clone(), || {
1432 self.stack.upper_fs.statfs(locked, current_task)
1433 })
1434 }
1435
1436 fn name(&self) -> &'static FsStr {
1437 "overlay".into()
1438 }
1439
1440 fn rename(
1441 &self,
1442 locked: &mut Locked<FileOpsCore>,
1443 _fs: &FileSystem,
1444 current_task: &CurrentTask,
1445 old_parent: &FsNodeHandle,
1446 old_name: &FsStr,
1447 new_parent: &FsNodeHandle,
1448 new_name: &FsStr,
1449 renamed: &FsNodeHandle,
1450 _replaced: Option<&FsNodeHandle>,
1451 ) -> Result<(), Errno> {
1452 current_task.override_creds(self.stack.mounter.clone(), || {
1453 let renamed_overlay = OverlayNode::from_fs_node(renamed)?;
1454 if renamed_overlay.has_lower() && renamed_overlay.main_entry().entry().node.is_dir() {
1455 return error!(EXDEV);
1459 }
1460 renamed_overlay.ensure_upper(locked, current_task, &renamed.fs())?;
1461
1462 let old_parent_overlay = OverlayNode::from_fs_node(old_parent)?;
1463 let old_parent_upper =
1464 old_parent_overlay.ensure_upper(locked, current_task, &renamed.fs())?;
1465
1466 let new_parent_overlay = OverlayNode::from_fs_node(new_parent)?;
1467 let new_parent_upper =
1468 new_parent_overlay.ensure_upper(locked, current_task, &renamed.fs())?;
1469
1470 let need_whiteout =
1471 old_parent_overlay.lower_entry_exists(locked, current_task, old_name)?;
1472
1473 DirEntry::rename(
1474 locked,
1475 current_task,
1476 old_parent_upper.entry(),
1477 old_parent_upper.mount(),
1478 old_name,
1479 new_parent_upper.entry(),
1480 new_parent_upper.mount(),
1481 new_name,
1482 RenameFlags::REPLACE_ANY,
1483 )?;
1484
1485 if need_whiteout {
1488 match old_parent_upper.create_whiteout(locked, current_task, old_name) {
1489 Err(e) => log_warn!("overlayfs: failed to create whiteout for {old_name}: {e}"),
1490 Ok(_) => (),
1491 }
1492 }
1493
1494 Ok(())
1495 })
1496 }
1497
1498 fn unmount(&self) {}
1499}
1500
1501fn resolve_dir_param(
1505 locked: &mut Locked<Unlocked>,
1506 current_task: &CurrentTask,
1507 params: &MountParams,
1508 name: &FsStr,
1509) -> Result<ActiveEntry, Errno> {
1510 let path = params.get(&**name).ok_or_else(|| {
1511 log_error!("overlayfs: {name} was not specified");
1512 errno!(EINVAL)
1513 })?;
1514
1515 current_task
1516 .open_file(locked, path.as_ref(), OpenFlags::RDONLY | OpenFlags::DIRECTORY)
1517 .map(|f| ActiveEntry { entry: f.name.entry.clone(), mount: f.name.mount.clone() })
1518 .map_err(|e| {
1519 log_error!("overlayfs: Failed to lookup {path}: {}", e);
1520 e
1521 })
1522}
1523
1524fn copy_file_content<L>(
1526 locked: &mut Locked<L>,
1527 current_task: &CurrentTask,
1528 from: &ActiveEntry,
1529 to: &ActiveEntry,
1530) -> Result<(), Errno>
1531where
1532 L: LockEqualOrBefore<FileOpsCore>,
1533{
1534 let from_file = from.entry().open_anonymous(locked, current_task, OpenFlags::RDONLY)?;
1535 let to_file = to.entry().open_anonymous(locked, current_task, OpenFlags::WRONLY)?;
1536
1537 const BUFFER_SIZE: usize = 4096;
1538
1539 loop {
1540 let mut output_buffer = VecOutputBuffer::new(BUFFER_SIZE);
1543 let bytes_read = from_file.read(locked, current_task, &mut output_buffer)?;
1544 if bytes_read == 0 {
1545 break;
1546 }
1547
1548 let buffer: Vec<u8> = output_buffer.into();
1549 let mut input_buffer = VecInputBuffer::from(buffer);
1550 while input_buffer.available() > 0 {
1551 to_file.write(locked, current_task, &mut input_buffer)?;
1552 }
1553 }
1554
1555 to_file.data_sync(current_task)?;
1556
1557 Ok(())
1558}