Skip to main content

starnix_core/vfs/
dir_entry.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::security;
6use crate::task::CurrentTask;
7use crate::vfs::{
8    CheckAccessReason, FileHandle, FileObject, FsLockDepType, FsNodeHandle, FsNodeLinkBehavior,
9    FsStr, FsString, LookupVec, MountInfo, Mounts, NamespaceNode, UnlinkKind, path,
10};
11use atomic_bitflags::atomic_bitflags;
12use bitflags::bitflags;
13use fuchsia_rcu::{RcuOptionArc, RcuReadScope};
14use fuchsia_sync::ResetDependencies;
15use starnix_rcu::RcuString;
16use starnix_sync::{
17    DirEntryChildrenLevel, DirEntryChildrenRecursiveLevel, DynamicLockDepRwLock, FileOpsCore,
18    FuseDirEntryChildrenLevel, LockDepWriteGuard, LockEqualOrBefore, Locked, allow_subclass,
19};
20use starnix_uapi::auth::FsCred;
21use starnix_uapi::errors::{ENOENT, Errno};
22use starnix_uapi::file_mode::{Access, FileMode};
23use starnix_uapi::inotify_mask::InotifyMask;
24use starnix_uapi::open_flags::OpenFlags;
25use starnix_uapi::{NAME_MAX, RENAME_EXCHANGE, RENAME_NOREPLACE, RENAME_WHITEOUT, error};
26use std::collections::BTreeMap;
27use std::collections::btree_map::Entry;
28use std::fmt;
29use std::ops::Deref;
30use std::sync::atomic::Ordering;
31use std::sync::{Arc, Weak};
32
33bitflags! {
34    #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
35    pub struct RenameFlags: u32 {
36        // Exchange the entries.
37        const EXCHANGE = RENAME_EXCHANGE;
38
39        // Don't overwrite an existing DirEntry.
40        const NOREPLACE = RENAME_NOREPLACE;
41
42        // Create a "whiteout" object to replace the file.
43        const WHITEOUT = RENAME_WHITEOUT;
44
45        // Allow replacing any file with a directory. This is an internal flag used only
46        // internally inside Starnix for OverlayFS.
47        const REPLACE_ANY = 1 << 31;
48
49        // Internal flags that cannot be passed to `sys_rename()`
50        const INTERNAL = Self::REPLACE_ANY.bits();
51    }
52}
53
54pub trait DirEntryOps: Send + Sync + 'static {
55    /// Revalidate the [`DirEntry`], if needed.
56    ///
57    /// Most filesystems don't need to do any revalidations because they are "local"
58    /// and all changes to nodes go through the kernel. However some filesystems
59    /// allow changes to happen through other means (e.g. NFS, FUSE) and these
60    /// filesystems need a way to let the kernel know it may need to refresh its
61    /// cached metadata. This method provides that hook for such filesystems.
62    ///
63    /// For more details, see:
64    ///  - https://www.halolinux.us/kernel-reference/the-dentry-cache.html
65    ///  - https://www.kernel.org/doc/html/latest/filesystems/path-lookup.html#revalidation-and-automounts
66    ///  - https://lwn.net/Articles/649115/
67    ///  - https://www.infradead.org/~mchehab/kernel_docs/filesystems/path-walking.html
68    ///
69    /// Returns `Ok(valid)` where `valid` indicates if the `DirEntry` is still valid,
70    /// or an error.
71    fn revalidate(
72        &self,
73        _locked: &mut Locked<FileOpsCore>,
74        _: &CurrentTask,
75        _: &DirEntry,
76    ) -> Result<bool, Errno> {
77        Ok(true)
78    }
79}
80
81atomic_bitflags! {
82    #[derive(Clone, Copy, Debug, PartialEq, Eq)]
83    pub struct DirEntryFlags: u8 {
84        /// Whether this directory entry has been removed from the tree.
85        const IS_DEAD = 1 << 0;
86
87        /// Whether the entry has filesystems mounted on top of it.
88        const HAS_MOUNTS = 1 << 1;
89    }
90}
91
92pub struct DefaultDirEntryOps;
93
94impl DirEntryOps for DefaultDirEntryOps {}
95
96/// An entry in a directory.
97///
98/// This structure assigns a name to an FsNode in a given file system. An
99/// FsNode might have multiple directory entries, for example if there are more
100/// than one hard link to the same FsNode. In those cases, each hard link will
101/// have a different parent and a different local_name because each hard link
102/// has its own DirEntry object.
103///
104/// A directory cannot have more than one hard link, which means there is a
105/// single DirEntry for each Directory FsNode. That invariant lets us store the
106/// children for a directory in the DirEntry rather than in the FsNode.
107pub struct DirEntry {
108    /// The FsNode referenced by this DirEntry.
109    ///
110    /// A given FsNode can be referenced by multiple DirEntry objects, for
111    /// example if there are multiple hard links to a given FsNode.
112    pub node: FsNodeHandle,
113
114    /// The [`DirEntryOps`] for this `DirEntry`.
115    ///
116    /// The `DirEntryOps` are implemented by the individual file systems to provide
117    /// specific behaviours for this `DirEntry`.
118    ops: Box<dyn DirEntryOps>,
119
120    /// The parent DirEntry.
121    ///
122    /// The DirEntry tree has strong references from child-to-parent and weak
123    /// references from parent-to-child. This design ensures that the parent
124    /// chain is always populated in the cache, but some children might be
125    /// missing from the cache.
126    parent: RcuOptionArc<DirEntry>,
127
128    /// The [`DirEntryFlags`] for this `DirEntry`.
129    flags: AtomicDirEntryFlags,
130
131    /// The name that this parent calls this child.
132    ///
133    /// This name might not be reflected in the full path in the namespace that
134    /// contains this DirEntry. For example, this DirEntry might be the root of
135    /// a chroot.
136    ///
137    /// Most callers that want to work with names for DirEntries should use the
138    /// NamespaceNodes.
139    local_name: RcuString,
140
141    /// A partial cache of the children of this DirEntry.
142    ///
143    /// DirEntries are added to this cache when they are looked up and removed
144    /// when they are no longer referenced.
145    ///
146    // FIXME(b/379929394): The lock ordering here assumes parent-to-child lock acquisition, which
147    // a number of algorithms in the DirEntry operations also assume. This assumption can be broken
148    // by the rename operation, which can move nodes around the hierarchy. See the referenced bug
149    // for more details, the current mitigations, and potentials for long-term solutions.
150    children: DynamicLockDepRwLock<DirEntryChildren>,
151}
152type DirEntryChildren = BTreeMap<FsString, Weak<DirEntry>>;
153
154pub type DirEntryHandle = Arc<DirEntry>;
155
156impl DirEntry {
157    #[allow(clippy::let_and_return)]
158    pub fn new_uncached(
159        node: FsNodeHandle,
160        parent: Option<DirEntryHandle>,
161        local_name: FsString,
162    ) -> DirEntryHandle {
163        let ops = node.create_dir_entry_ops();
164        let fs_lockdep_type = node.fs().fs_lockdep_type();
165        let result = Arc::new(DirEntry {
166            node,
167            ops,
168            parent: RcuOptionArc::new(parent),
169            flags: Default::default(),
170            local_name: local_name.into(),
171            children: match fs_lockdep_type {
172                FsLockDepType::Normal => {
173                    DynamicLockDepRwLock::new::<DirEntryChildrenLevel>(Default::default())
174                }
175                FsLockDepType::Recursive => {
176                    DynamicLockDepRwLock::new::<DirEntryChildrenRecursiveLevel>(Default::default())
177                }
178                FsLockDepType::Fuse => {
179                    DynamicLockDepRwLock::new::<FuseDirEntryChildrenLevel>(Default::default())
180                }
181            },
182        });
183        #[cfg(any(test, debug_assertions))]
184        {
185            // Taking this lock tells the lock tracing system about the parent/child ordering
186            // relation.
187            let _token = allow_subclass();
188            let _l1 = result.children.read();
189        }
190        result
191    }
192
193    pub fn new(
194        node: FsNodeHandle,
195        parent: Option<DirEntryHandle>,
196        local_name: FsString,
197    ) -> DirEntryHandle {
198        let result = Self::new_uncached(node, parent, local_name);
199        result.node.fs().did_create_dir_entry(&result);
200        result
201    }
202
203    /// Returns a new DirEntry for the given `node` without parent. The entry has no local name and
204    /// is not cached.
205    pub fn new_unrooted(node: FsNodeHandle) -> DirEntryHandle {
206        Self::new_uncached(node, None, FsString::default())
207    }
208
209    /// Returns a new `DirEntry` that is ready marked as having been deleted.
210    pub fn new_deleted(
211        node: FsNodeHandle,
212        parent: Option<DirEntryHandle>,
213        local_name: FsString,
214    ) -> DirEntryHandle {
215        let entry = DirEntry::new_uncached(node, parent, local_name);
216        entry.raise_flags(DirEntryFlags::IS_DEAD);
217        entry
218    }
219
220    /// Returns a file handle to this entry, associated with an anonymous namespace.
221    pub fn open_anonymous<L>(
222        self: &DirEntryHandle,
223        locked: &mut Locked<L>,
224        current_task: &CurrentTask,
225        flags: OpenFlags,
226    ) -> Result<FileHandle, Errno>
227    where
228        L: LockEqualOrBefore<FileOpsCore>,
229    {
230        let ops = self.node.create_file_ops(locked, current_task, flags)?;
231        FileObject::new(
232            locked,
233            current_task,
234            ops,
235            NamespaceNode::new_anonymous(self.clone()),
236            flags,
237        )
238    }
239
240    /// Set the children of this DirEntry to the given `children`. This should only ever be called
241    /// when children is empty.
242    pub fn set_children(self: &DirEntryHandle, children: BTreeMap<FsString, DirEntryHandle>) {
243        let mut dir_entry_children = self.lock_children();
244        assert!(dir_entry_children.children.is_empty());
245        for (name, child) in children.into_iter() {
246            child.set_parent(self.clone());
247            dir_entry_children.children.insert(name, Arc::downgrade(&child));
248        }
249    }
250
251    fn lock_children<'a>(self: &'a DirEntryHandle) -> DirEntryLockedChildren<'a> {
252        DirEntryLockedChildren { entry: self, children: self.children.write() }
253    }
254
255    /// The parent DirEntry.
256    pub fn parent(&self) -> Option<DirEntryHandle> {
257        self.parent.to_option_arc()
258    }
259
260    /// Returns a reference to the parent DirEntry.
261    ///
262    /// The reference is only valid for the duration of the RCU read scope.
263    pub fn parent_ref<'a>(&'a self, scope: &'a RcuReadScope) -> Option<&'a DirEntry> {
264        self.parent.as_ref(scope)
265    }
266
267    /// Set the parent of this DirEntry.
268    pub fn set_parent(&self, parent: DirEntryHandle) {
269        self.parent.update(Some(parent));
270    }
271
272    /// The parent DirEntry object or this DirEntry if this entry is the root.
273    ///
274    /// Useful when traversing up the tree if you always want to find a parent
275    /// (e.g., for "..").
276    ///
277    /// Be aware that the root of one file system might be mounted as a child
278    /// in another file system. For that reason, consider walking the
279    /// NamespaceNode tree (which understands mounts) rather than the DirEntry
280    /// tree.
281    pub fn parent_or_self(self: &DirEntryHandle) -> DirEntryHandle {
282        self.parent().unwrap_or_else(|| self.clone())
283    }
284
285    /// The name that this parent calls this child.
286    ///
287    /// The reference is only valid for the duration of the RCU read scope.
288    pub fn local_name<'a>(&self, scope: &'a RcuReadScope) -> &'a FsStr {
289        self.local_name.read(scope)
290    }
291
292    /// Whether the given name has special semantics as a directory entry.
293    ///
294    /// Specifically, whether the name is empty (which means "self"), dot
295    /// (which also means "self"), or dot dot (which means "parent").
296    pub fn is_reserved_name(name: &FsStr) -> bool {
297        name.is_empty() || name == "." || name == ".."
298    }
299
300    /// Returns the flags of this DirEntry.
301    pub fn flags(&self) -> DirEntryFlags {
302        self.flags.load(Ordering::Acquire)
303    }
304
305    /// Raises the flags of this DirEntry.
306    ///
307    /// Returns the flags of this DirEntry before the flags were raised.
308    pub fn raise_flags(&self, flags: DirEntryFlags) -> DirEntryFlags {
309        self.flags.fetch_or(flags, Ordering::AcqRel)
310    }
311
312    /// Lowers the flags of this DirEntry.
313    ///
314    /// Returns the flags of this DirEntry before the flags were lowered.
315    pub fn lower_flags(&self, flags: DirEntryFlags) -> DirEntryFlags {
316        self.flags.fetch_and(!flags, Ordering::AcqRel)
317    }
318
319    /// Returns true if this DirEntry is dead.
320    pub fn is_dead(&self) -> bool {
321        self.flags().contains(DirEntryFlags::IS_DEAD)
322    }
323
324    /// Look up a directory entry with the given name as direct child of this
325    /// entry.
326    pub fn component_lookup<L>(
327        self: &DirEntryHandle,
328        locked: &mut Locked<L>,
329        current_task: &CurrentTask,
330        mount: &MountInfo,
331        name: &FsStr,
332    ) -> Result<DirEntryHandle, Errno>
333    where
334        L: LockEqualOrBefore<FileOpsCore>,
335    {
336        let (node, _) = self.get_or_create_child(
337            locked,
338            current_task,
339            mount,
340            name,
341            |locked, d, mount, name| d.lookup(locked, current_task, mount, name),
342        )?;
343        Ok(node)
344    }
345
346    pub fn get_children_pipelined<L>(
347        self: &DirEntryHandle,
348        locked: &mut Locked<L>,
349        current_task: &CurrentTask,
350        mount: &MountInfo,
351        names: &[&FsStr],
352    ) -> LookupVec<Result<DirEntryHandle, Errno>>
353    where
354        L: LockEqualOrBefore<FileOpsCore>,
355    {
356        let locked = locked.cast_locked::<FileOpsCore>();
357
358        let mut nodes = LookupVec::new();
359        let mut results = LookupVec::new();
360        let mut current_parent = self.clone();
361        for i in 0..names.len() {
362            let next_node = nodes.pop();
363            match current_parent.get_or_create_child(
364                locked,
365                current_task,
366                mount,
367                names[i],
368                |locked, parent_node, _mount, _name| {
369                    if let Some(node) = next_node {
370                        return node;
371                    }
372                    nodes = parent_node.ops().lookup_pipelined(
373                        locked,
374                        parent_node,
375                        current_task,
376                        &names[i..],
377                    );
378                    nodes.reverse();
379                    nodes.pop().unwrap()
380                },
381            ) {
382                Ok((entry, _)) => {
383                    results.push(Ok(entry.clone()));
384                    current_parent = entry;
385                }
386                Err(e) => {
387                    results.push(Err(e));
388                    break;
389                }
390            }
391        }
392        results
393    }
394
395    /// Creates a new DirEntry
396    ///
397    /// The create_node_fn function is called to create the underlying FsNode
398    /// for the DirEntry.
399    ///
400    /// If the entry already exists, create_node_fn is not called, and EEXIST is
401    /// returned.
402    pub fn create_entry<L>(
403        self: &DirEntryHandle,
404        locked: &mut Locked<L>,
405        current_task: &CurrentTask,
406        mount: &MountInfo,
407        name: &FsStr,
408        create_node_fn: impl FnOnce(
409            &mut Locked<L>,
410            &FsNodeHandle,
411            &MountInfo,
412            &FsStr,
413        ) -> Result<FsNodeHandle, Errno>,
414    ) -> Result<DirEntryHandle, Errno>
415    where
416        L: LockEqualOrBefore<FileOpsCore>,
417    {
418        let (entry, exists) =
419            self.create_entry_internal(locked, current_task, mount, name, create_node_fn)?;
420        if exists {
421            return error!(EEXIST);
422        }
423        Ok(entry)
424    }
425
426    /// Creates a new DirEntry. Works just like create_entry, except if the entry already exists,
427    /// it is returned.
428    pub fn get_or_create_entry<L>(
429        self: &DirEntryHandle,
430        locked: &mut Locked<L>,
431        current_task: &CurrentTask,
432        mount: &MountInfo,
433        name: &FsStr,
434        create_node_fn: impl FnOnce(
435            &mut Locked<L>,
436            &FsNodeHandle,
437            &MountInfo,
438            &FsStr,
439        ) -> Result<FsNodeHandle, Errno>,
440    ) -> Result<DirEntryHandle, Errno>
441    where
442        L: LockEqualOrBefore<FileOpsCore>,
443    {
444        let (entry, _exists) =
445            self.create_entry_internal(locked, current_task, mount, name, create_node_fn)?;
446        Ok(entry)
447    }
448
449    fn create_entry_internal<L>(
450        self: &DirEntryHandle,
451        locked: &mut Locked<L>,
452        current_task: &CurrentTask,
453        mount: &MountInfo,
454        name: &FsStr,
455        create_node_fn: impl FnOnce(
456            &mut Locked<L>,
457            &FsNodeHandle,
458            &MountInfo,
459            &FsStr,
460        ) -> Result<FsNodeHandle, Errno>,
461    ) -> Result<(DirEntryHandle, bool), Errno>
462    where
463        L: LockEqualOrBefore<FileOpsCore>,
464    {
465        if DirEntry::is_reserved_name(name) {
466            return error!(EEXIST);
467        }
468        // TODO: Do we need to check name for embedded NUL characters?
469        if name.len() > NAME_MAX as usize {
470            return error!(ENAMETOOLONG);
471        }
472        if name.contains(&path::SEPARATOR) {
473            return error!(EINVAL);
474        }
475        let (entry, exists) =
476            self.get_or_create_child(locked, current_task, mount, name, create_node_fn)?;
477        if !exists {
478            // An entry was created. Update the ctime and mtime of this directory.
479            self.node.update_ctime_mtime();
480            entry.notify_creation();
481        }
482        Ok((entry, exists))
483    }
484
485    // This is marked as test-only because it sets the owner/group to root instead of the current
486    // user to save a bit of typing in tests, but this shouldn't happen silently in production.
487    #[cfg(test)]
488    pub fn create_dir<L>(
489        self: &DirEntryHandle,
490        locked: &mut starnix_sync::Locked<L>,
491        current_task: &CurrentTask,
492        name: &FsStr,
493    ) -> Result<DirEntryHandle, Errno>
494    where
495        L: LockEqualOrBefore<FileOpsCore>,
496    {
497        self.create_dir_for_testing(locked, current_task, name)
498    }
499
500    // This function is for testing because it sets the owner/group to root instead of the current
501    // user to save a bit of typing in tests, but this shouldn't happen silently in production.
502    pub fn create_dir_for_testing<L>(
503        self: &DirEntryHandle,
504        locked: &mut Locked<L>,
505        current_task: &CurrentTask,
506        name: &FsStr,
507    ) -> Result<DirEntryHandle, Errno>
508    where
509        L: LockEqualOrBefore<FileOpsCore>,
510    {
511        // TODO: apply_umask
512        self.create_entry(
513            locked,
514            current_task,
515            &MountInfo::detached(),
516            name,
517            |locked, dir, mount, name| {
518                dir.create_node(
519                    locked,
520                    current_task,
521                    mount,
522                    name,
523                    starnix_uapi::file_mode::mode!(IFDIR, 0o777),
524                    starnix_uapi::device_id::DeviceId::NONE,
525                    FsCred::root(),
526                )
527            },
528        )
529    }
530
531    /// Creates an anonymous file.
532    ///
533    /// The FileMode::IFMT of the FileMode is always FileMode::IFREG.
534    ///
535    /// Used by O_TMPFILE.
536    pub fn create_tmpfile<L>(
537        self: &DirEntryHandle,
538        locked: &mut Locked<L>,
539        current_task: &CurrentTask,
540        mount: &MountInfo,
541        mode: FileMode,
542        owner: FsCred,
543        flags: OpenFlags,
544    ) -> Result<DirEntryHandle, Errno>
545    where
546        L: LockEqualOrBefore<FileOpsCore>,
547    {
548        // Only directories can have children.
549        if !self.node.is_dir() {
550            return error!(ENOTDIR);
551        }
552        assert!(mode.is_reg());
553
554        // From <https://man7.org/linux/man-pages/man2/open.2.html>:
555        //
556        //   Specifying O_EXCL in conjunction with O_TMPFILE prevents a
557        //   temporary file from being linked into the filesystem in
558        //   the above manner.  (Note that the meaning of O_EXCL in
559        //   this case is different from the meaning of O_EXCL
560        //   otherwise.)
561        let link_behavior = if flags.contains(OpenFlags::EXCL) {
562            FsNodeLinkBehavior::Disallowed
563        } else {
564            FsNodeLinkBehavior::Allowed
565        };
566
567        let node =
568            self.node.create_tmpfile(locked, current_task, mount, mode, owner, link_behavior)?;
569        let local_name = format!("#{}", node.ino).into();
570        Ok(DirEntry::new_deleted(node, Some(self.clone()), local_name))
571    }
572
573    pub fn unlink<L>(
574        self: &DirEntryHandle,
575        locked: &mut Locked<L>,
576        current_task: &CurrentTask,
577        mount: &MountInfo,
578        name: &FsStr,
579        kind: UnlinkKind,
580        must_be_directory: bool,
581    ) -> Result<(), Errno>
582    where
583        L: LockEqualOrBefore<FileOpsCore>,
584    {
585        assert!(!DirEntry::is_reserved_name(name));
586
587        // child_to_unlink *must* be dropped after self_children (even in the error paths).
588        let child_to_unlink;
589
590        let mut self_children = self.lock_children();
591        child_to_unlink = self_children.component_lookup(locked, current_task, mount, name)?;
592        child_to_unlink.require_no_mounts(mount)?;
593
594        // Check that this filesystem entry must be a directory. This can
595        // happen if the path terminates with a trailing slash.
596        //
597        // Example: If we're unlinking a symlink `/foo/bar/`, this would
598        // result in `ENOTDIR` because of the trailing slash, even if
599        // `UnlinkKind::NonDirectory` was used.
600        if must_be_directory && !child_to_unlink.node.is_dir() {
601            return error!(ENOTDIR);
602        }
603
604        match kind {
605            UnlinkKind::Directory => {
606                if !child_to_unlink.node.is_dir() {
607                    return error!(ENOTDIR);
608                }
609            }
610            UnlinkKind::NonDirectory => {
611                if child_to_unlink.node.is_dir() {
612                    return error!(EISDIR);
613                }
614            }
615        }
616
617        self.node.unlink(locked, current_task, mount, name, &child_to_unlink.node)?;
618        self_children.children.remove(name);
619
620        std::mem::drop(self_children);
621        child_to_unlink.destroy(&current_task.kernel().mounts);
622
623        Ok(())
624    }
625
626    /// Destroy this directory entry.
627    ///
628    /// Notice that this method takes `self` by value to destroy this reference.
629    fn destroy(self: DirEntryHandle, mounts: &Mounts) {
630        let was_already_dead =
631            self.raise_flags(DirEntryFlags::IS_DEAD).contains(DirEntryFlags::IS_DEAD);
632        if was_already_dead {
633            return;
634        }
635        let unmount =
636            self.lower_flags(DirEntryFlags::HAS_MOUNTS).contains(DirEntryFlags::HAS_MOUNTS);
637        self.node.fs().will_destroy_dir_entry(&self);
638        if unmount {
639            mounts.unmount(&self);
640        }
641        self.notify_deletion();
642    }
643
644    /// Returns whether this entry is a descendant of |other|.
645    pub fn is_descendant_of(self: &DirEntryHandle, other: &DirEntryHandle) -> bool {
646        let scope = RcuReadScope::new();
647        let mut current = self.deref();
648        loop {
649            if std::ptr::eq(current, other.deref()) {
650                // We found |other|.
651                return true;
652            }
653            if let Some(parent) = current.parent_ref(&scope) {
654                current = parent;
655            } else {
656                // We reached the root of the file system.
657                return false;
658            }
659        }
660    }
661
662    /// Rename the file with old_basename in old_parent to new_basename in
663    /// new_parent.
664    ///
665    /// old_parent and new_parent must belong to the same file system.
666    pub fn rename<L>(
667        locked: &mut Locked<L>,
668        current_task: &CurrentTask,
669        old_parent: &DirEntryHandle,
670        old_mount: &MountInfo,
671        old_basename: &FsStr,
672        new_parent: &DirEntryHandle,
673        new_mount: &MountInfo,
674        new_basename: &FsStr,
675        flags: RenameFlags,
676    ) -> Result<(), Errno>
677    where
678        L: LockEqualOrBefore<FileOpsCore>,
679    {
680        // The nodes we are touching must be part of the same mount.
681        if old_mount != new_mount {
682            return error!(EXDEV);
683        }
684
685        // The mounts are equals, choose one.
686        let mount = old_mount;
687
688        // If either the old_basename or the new_basename is a reserved name
689        // (e.g., "." or ".."), then we cannot do the rename.
690        if DirEntry::is_reserved_name(old_basename) || DirEntry::is_reserved_name(new_basename) {
691            if flags.contains(RenameFlags::NOREPLACE) {
692                return error!(EEXIST);
693            }
694            return error!(EBUSY);
695        }
696
697        // If the names and parents are the same, then there's nothing to do
698        // and we can report success.
699        if Arc::ptr_eq(&old_parent.node, &new_parent.node) && old_basename == new_basename {
700            return Ok(());
701        }
702
703        // This task must have write access to the old and new parent nodes.
704        old_parent.node.check_access(
705            locked,
706            current_task,
707            mount,
708            Access::WRITE,
709            CheckAccessReason::InternalPermissionChecks,
710            old_parent,
711        )?;
712        new_parent.node.check_access(
713            locked,
714            current_task,
715            mount,
716            Access::WRITE,
717            CheckAccessReason::InternalPermissionChecks,
718            new_parent,
719        )?;
720
721        // The mount check ensures that the nodes we're touching are part of the
722        // same file system. It doesn't matter where we grab the FileSystem reference from.
723        let fs = old_parent.node.fs();
724
725        // We need to hold these DirEntryHandles until after we drop all the
726        // locks so that we do not deadlock when we drop them.
727        let renamed;
728        let mut maybe_replaced = None;
729
730        {
731            // Before we take any locks, we need to take the rename mutex on
732            // the file system. This lock ensures that no other rename
733            // operations are happening in this file system while we're
734            // analyzing this rename operation.
735            //
736            // For example, we grab writer locks on both old_parent and
737            // new_parent. If there was another rename operation in flight with
738            // old_parent and new_parent reversed, then we could deadlock while
739            // trying to acquire these locks.
740            let _lock = fs.rename_mutex.lock();
741
742            // We cannot simply grab the locks on old_parent and new_parent
743            // independently because old_parent and new_parent might be the
744            // same directory entry. Instead, we use the RenameGuard helper to
745            // grab the appropriate locks.
746            let mut state = RenameGuard::lock(old_parent, new_parent);
747
748            // Now that we know the old_parent child list cannot change, we
749            // establish the DirEntry that we are going to try to rename.
750            renamed = state.old_parent_children().component_lookup(
751                locked,
752                current_task,
753                mount,
754                old_basename,
755            )?;
756
757            // We need to check if there is already a DirEntry with
758            // new_basename in new_parent. If so, there are additional checks
759            // we need to perform.
760            // This must be done BEFORE locking info to avoid self-deadlock.
761            let lookup_replaced = state.new_parent_children().component_lookup(
762                locked,
763                current_task,
764                mount,
765                new_basename,
766            );
767
768            // If the target entry is an ancestor of the source parent, the
769            // rename would create a cycle (for EXCHANGE) or attempt to
770            // overwrite a non-empty directory.
771            // We check this before acquiring child locks to avoid
772            // deadlocks from bottom-up locking (locking ancestor after
773            // descendant).
774            // This is done early (before parent locks) as a fail-fast
775            // optimization since we already have the lookup result.
776            if let Ok(replaced) = &lookup_replaced {
777                if old_parent.is_descendant_of(replaced) {
778                    if flags.contains(RenameFlags::EXCHANGE) {
779                        return error!(EINVAL);
780                    } else {
781                        return error!(ENOTEMPTY);
782                    }
783                }
784            }
785
786            // Lock the info for the parents to ensure that subsequent checks on their
787            // state (e.g., sticky bit checks) and the actual rename operation are not racy.
788            let mut state =
789                state.lock_info(old_parent, new_parent, &renamed, lookup_replaced.as_ref().ok());
790
791            // Check whether the sticky bit on the old parent prevents us from
792            // removing this child.
793            {
794                // Safe because the parent is locked first, and then we check the
795                // sticky bit of the child. This parent -> child acquisition follows
796                // the hierarchical lock ordering.
797                let _token = allow_subclass();
798                old_parent.node.check_sticky_bit(
799                    current_task,
800                    &renamed.node,
801                    state.old_parent_info(),
802                )?;
803            }
804
805            // If new_parent is a descendant of renamed, the operation would
806            // create a cycle. That's disallowed.
807            if new_parent.is_descendant_of(&renamed) {
808                return error!(EINVAL);
809            }
810
811            // Check whether the renamed entry is a mountpoint.
812            // TODO: We should hold a read lock on the mount points for this
813            //       namespace to prevent the child from becoming a mount point
814            //       while this function is executing.
815            renamed.require_no_mounts(mount)?;
816
817            // We lookup the replaced entry before locking info to avoid deadlock, but we match
818            // on the result here under the parent info locks. This ensures that checks on the
819            // replaced entry (e.g., existence, directory status, identity) are consistent and
820            // do not race with concurrent operations.
821            match &lookup_replaced {
822                Ok(replaced) => {
823                    // Set `maybe_replaced` now to ensure it gets dropped in the right order.
824                    let replaced = maybe_replaced.insert(replaced.clone());
825
826                    if flags.contains(RenameFlags::NOREPLACE) {
827                        return error!(EEXIST);
828                    }
829
830                    // Sayeth https://man7.org/linux/man-pages/man2/rename.2.html:
831                    //
832                    // "If oldpath and newpath are existing hard links referring to the
833                    // same file, then rename() does nothing, and returns a success
834                    // status."
835                    if Arc::ptr_eq(&renamed.node, &replaced.node) {
836                        return Ok(());
837                    }
838
839                    // Sayeth https://man7.org/linux/man-pages/man2/rename.2.html:
840                    //
841                    // "oldpath can specify a directory.  In this case, newpath must"
842                    // either not exist, or it must specify an empty directory."
843                    if state.replaced_is_dir() {
844                        // Check whether the replaced entry is a mountpoint.
845                        // TODO: We should hold a read lock on the mount points for this
846                        //       namespace to prevent the child from becoming a mount point
847                        //       while this function is executing.
848                        replaced.require_no_mounts(mount)?;
849                    }
850
851                    if !flags.intersects(RenameFlags::EXCHANGE | RenameFlags::REPLACE_ANY) {
852                        let renamed_is_dir = state.renamed_is_dir();
853                        let replaced_is_dir = state.replaced_is_dir();
854                        if renamed_is_dir && !replaced_is_dir {
855                            return error!(ENOTDIR);
856                        } else if !renamed_is_dir && replaced_is_dir {
857                            return error!(EISDIR);
858                        }
859                    }
860                }
861                // It's fine for the lookup to fail to find a child.
862                Err(errno) if *errno == ENOENT => {
863                    if flags.contains(RenameFlags::EXCHANGE) {
864                        return error!(ENOENT);
865                    }
866                }
867                // However, other errors are fatal.
868                Err(e) => return Err(e.clone()),
869            }
870
871            security::check_fs_node_rename_access(
872                current_task,
873                &old_parent.node,
874                &renamed.node,
875                &new_parent.node,
876                maybe_replaced.as_ref().map(|dir_entry| dir_entry.node.deref().as_ref()),
877                old_basename,
878                new_basename,
879            )?;
880
881            if let Some(replaced) = maybe_replaced.as_ref() {
882                // Safe because the parent is locked first, and then we check the
883                // sticky bit of the child. This parent -> child acquisition
884                // follows the hierarchical lock ordering.
885                let _token = allow_subclass();
886                new_parent.node.check_sticky_bit(
887                    current_task,
888                    &replaced.node,
889                    state.new_parent_info().unwrap_or_else(|| state.old_parent_info()),
890                )?;
891            }
892
893            // We've found all the errors that we know how to find. Ask the
894            // file system to actually execute the rename operation. Once the
895            // file system has executed the rename, we are no longer allowed to
896            // fail because we will not be able to return the system to a
897            // consistent state.
898
899            if flags.contains(RenameFlags::EXCHANGE) {
900                fs.exchange(current_task, &mut state, old_basename, new_basename)?;
901            } else {
902                fs.rename(locked, current_task, &mut state, old_basename, new_basename)?;
903            }
904
905            // We need to update the parent and local name for the DirEntry
906            // we are renaming to reflect its new parent and its new name.
907            renamed.set_parent(new_parent.clone());
908            renamed.local_name.update(new_basename.to_owned());
909
910            // Actually add the renamed child to the new_parent's child list.
911            // This operation implicitly removes the replaced child (if any)
912            // from the child list.
913            state
914                .new_parent_children()
915                .children
916                .insert(new_basename.into(), Arc::downgrade(&renamed));
917
918            // Lock ordering is enforced from parent-to-child, and therefore we need to
919            // reset the lock ordering constraints when we reorder the tree nodes.
920            // SAFETY: We manually clear the dependency graph for these locks.
921            // This is safe because `fs.rename_mutex` is held during this operation, which
922            // prevents the tree topology from changing concurrently. This allows us to safely
923            // dynamically enforce a sound locking order (e.g. by memory address in `RenameGuard`)
924            // to avoid deadlocks. Clearing the graph prevents false-positive cycle panics from `tracing-mutex`
925            // after the node is reparented.
926            unsafe {
927                renamed.children.reset_dependencies();
928                renamed.node.info_lock().reset_dependencies();
929                old_parent.children.reset_dependencies();
930                old_parent.node.info_lock().reset_dependencies();
931                new_parent.children.reset_dependencies();
932                new_parent.node.info_lock().reset_dependencies();
933            }
934
935            if flags.contains(RenameFlags::EXCHANGE) {
936                // Reparent `replaced` when exchanging.
937                let replaced =
938                    maybe_replaced.as_ref().expect("replaced expected with RENAME_EXCHANGE");
939                replaced.set_parent(old_parent.clone());
940                replaced.local_name.update(old_basename.to_owned());
941                state
942                    .old_parent_children()
943                    .children
944                    .insert(old_basename.into(), Arc::downgrade(replaced));
945
946                // Lock ordering is enforced from parent-to-child, and therefore we need to
947                // reset the lock ordering constraints when we reorder the tree nodes.
948                // SAFETY: See the comment above for `renamed` lock resetting.
949                unsafe {
950                    replaced.children.reset_dependencies();
951                    replaced.node.info_lock().reset_dependencies();
952                }
953            } else {
954                // Remove the renamed child from the old_parent's child list.
955                state.old_parent_children().children.remove(old_basename);
956            }
957        };
958
959        fs.purge_old_entries();
960
961        if let Some(replaced) = maybe_replaced {
962            if !flags.contains(RenameFlags::EXCHANGE) {
963                replaced.destroy(&current_task.kernel().mounts);
964            }
965        }
966
967        // Renaming a file updates its ctime.
968        renamed.node.update_ctime();
969
970        let mode = renamed.node.info().mode;
971        let cookie = current_task.kernel().get_next_inotify_cookie();
972        old_parent.node.notify(InotifyMask::MOVE_FROM, cookie, old_basename, mode, false);
973        new_parent.node.notify(InotifyMask::MOVE_TO, cookie, new_basename, mode, false);
974        renamed.node.notify(InotifyMask::MOVE_SELF, 0, Default::default(), mode, false);
975
976        Ok(())
977    }
978
979    pub fn get_children<F, T>(&self, callback: F) -> T
980    where
981        F: FnOnce(&DirEntryChildren) -> T,
982    {
983        let children = self.children.read();
984        callback(&children)
985    }
986
987    /// Remove the child with the given name from the children cache.  The child must not have any
988    /// mounts.
989    pub fn remove_child(&self, name: &FsStr, mounts: &Mounts) {
990        let mut children = self.children.write();
991        let child = children.get(name).and_then(Weak::upgrade);
992        if let Some(child) = child {
993            children.remove(name);
994            std::mem::drop(children);
995            child.destroy(mounts);
996        }
997    }
998
999    fn get_or_create_child<L>(
1000        self: &DirEntryHandle,
1001        locked: &mut Locked<L>,
1002        current_task: &CurrentTask,
1003        mount: &MountInfo,
1004        name: &FsStr,
1005        create_fn: impl FnOnce(
1006            &mut Locked<L>,
1007            &FsNodeHandle,
1008            &MountInfo,
1009            &FsStr,
1010        ) -> Result<FsNodeHandle, Errno>,
1011    ) -> Result<(DirEntryHandle, bool), Errno>
1012    where
1013        L: LockEqualOrBefore<FileOpsCore>,
1014    {
1015        assert!(!DirEntry::is_reserved_name(name));
1016        // Only directories can have children.
1017        if !self.node.is_dir() {
1018            return error!(ENOTDIR);
1019        }
1020        // The user must be able to search the directory (requires the EXEC permission)
1021        self.node.check_access(
1022            locked,
1023            current_task,
1024            mount,
1025            Access::EXEC,
1026            CheckAccessReason::InternalPermissionChecks,
1027            self,
1028        )?;
1029
1030        // Check if the child is already in children. In that case, we can
1031        // simply return the child and we do not need to call init_fn.
1032        let child = self.children.read().get(name).and_then(Weak::upgrade);
1033        let (child, create_result) = if let Some(child) = child {
1034            // Do not cache a child in a locked directory
1035            if self.node.fail_if_locked(current_task, &self.node.info()).is_ok() {
1036                child.node.fs().did_access_dir_entry(&child);
1037            }
1038            (child, CreationResult::Existed { create_fn })
1039        } else {
1040            let (child, create_result) = self.lock_children().get_or_create_child(
1041                locked,
1042                current_task,
1043                mount,
1044                name,
1045                create_fn,
1046            )?;
1047            child.node.fs().purge_old_entries();
1048            (child, create_result)
1049        };
1050
1051        let (child, exists) = match create_result {
1052            CreationResult::Created => (child, false),
1053            CreationResult::Existed { create_fn } => {
1054                if child.ops.revalidate(
1055                    locked.cast_locked::<FileOpsCore>(),
1056                    current_task,
1057                    &child,
1058                )? {
1059                    (child, true)
1060                } else {
1061                    self.internal_remove_child(&child);
1062                    child.destroy(&current_task.kernel().mounts);
1063
1064                    let (child, create_result) = self.lock_children().get_or_create_child(
1065                        locked,
1066                        current_task,
1067                        mount,
1068                        name,
1069                        create_fn,
1070                    )?;
1071                    child.node.fs().purge_old_entries();
1072                    (child, matches!(create_result, CreationResult::Existed { .. }))
1073                }
1074            }
1075        };
1076
1077        Ok((child, exists))
1078    }
1079
1080    // This function is only useful for tests and has some oddities.
1081    //
1082    // For example, not all the children might have been looked up yet, which
1083    // means the returned vector could be missing some names.
1084    //
1085    // Also, the vector might have "extra" names that are in the process of
1086    // being looked up. If the lookup fails, they'll be removed.
1087    #[cfg(test)]
1088    pub fn copy_child_names(&self) -> Vec<FsString> {
1089        let scope = RcuReadScope::new();
1090        self.children
1091            .read()
1092            .values()
1093            .filter_map(|child| Weak::upgrade(child).map(|c| c.local_name.read(&scope).to_owned()))
1094            .collect()
1095    }
1096
1097    fn internal_remove_child(&self, child: &DirEntry) {
1098        let mut children = self.children.write();
1099        let scope = RcuReadScope::new();
1100        let local_name = child.local_name.read(&scope);
1101        if let Some(weak_child) = children.get(local_name) {
1102            // If this entry is occupied, we need to check whether child is
1103            // the current occupant. If so, we should remove the entry
1104            // because the child no longer exists.
1105            if std::ptr::eq(weak_child.as_ptr(), child) {
1106                children.remove(local_name);
1107            }
1108        }
1109    }
1110
1111    /// Notifies watchers on the current node and its parent about an event.
1112    pub fn notify(&self, event_mask: InotifyMask) {
1113        self.notify_watchers(event_mask, self.is_dead());
1114    }
1115
1116    /// Notifies watchers on the current node and its parent about an event.
1117    ///
1118    /// Used for FSNOTIFY_EVENT_INODE events, which ignore IN_EXCL_UNLINK.
1119    pub fn notify_ignoring_excl_unlink(&self, event_mask: InotifyMask) {
1120        // We pretend that this directory entry is not dead to ignore IN_EXCL_UNLINK.
1121        self.notify_watchers(event_mask, false);
1122    }
1123
1124    fn notify_watchers(&self, event_mask: InotifyMask, is_dead: bool) {
1125        let mode = self.node.info().mode;
1126        {
1127            let scope = RcuReadScope::new();
1128            if let Some(parent) = self.parent_ref(&scope) {
1129                let local_name = self.local_name.read(&scope);
1130                parent.node.notify(event_mask, 0, local_name, mode, is_dead);
1131            }
1132        }
1133        self.node.notify(event_mask, 0, Default::default(), mode, is_dead);
1134    }
1135
1136    /// Notifies parents about creation, and notifies current node about link_count change.
1137    fn notify_creation(&self) {
1138        let mode = self.node.info().mode;
1139        if Arc::strong_count(&self.node) > 1 {
1140            // Notify about link change only if there is already a hardlink.
1141            self.node.notify(InotifyMask::ATTRIB, 0, Default::default(), mode, false);
1142        }
1143        let scope = RcuReadScope::new();
1144        if let Some(parent) = self.parent_ref(&scope) {
1145            let local_name = self.local_name.read(&scope);
1146            parent.node.notify(InotifyMask::CREATE, 0, local_name, mode, false);
1147        }
1148    }
1149
1150    /// Notifies watchers on the current node about deletion if this is the
1151    /// last hardlink, and drops the DirEntryHandle kept by Inotify.
1152    /// Parent is also notified about deletion.
1153    fn notify_deletion(&self) {
1154        let mode = self.node.info().mode;
1155        if !mode.is_dir() {
1156            // Linux notifies link count change for non-directories.
1157            self.node.notify(InotifyMask::ATTRIB, 0, Default::default(), mode, false);
1158        }
1159
1160        // This check is incorrect if there's another hard link to this FsNode that isn't in
1161        // memory at the moment.
1162        if Arc::strong_count(&self.node) == 1 {
1163            self.node.notify(InotifyMask::DELETE_SELF, 0, Default::default(), mode, false);
1164        }
1165
1166        let scope = RcuReadScope::new();
1167        if let Some(parent) = self.parent_ref(&scope) {
1168            let local_name = self.local_name.read(&scope);
1169            parent.node.notify(InotifyMask::DELETE, 0, local_name, mode, false);
1170        }
1171    }
1172
1173    /// Returns true if this entry has mounts.
1174    pub fn has_mounts(&self) -> bool {
1175        self.flags().contains(DirEntryFlags::HAS_MOUNTS)
1176    }
1177
1178    /// Records whether or not the entry has mounts.
1179    pub fn set_has_mounts(&self, v: bool) {
1180        if v {
1181            self.raise_flags(DirEntryFlags::HAS_MOUNTS);
1182        } else {
1183            self.lower_flags(DirEntryFlags::HAS_MOUNTS);
1184        }
1185    }
1186
1187    /// Verifies this directory has nothing mounted on it.
1188    fn require_no_mounts(self: &Arc<Self>, parent_mount: &MountInfo) -> Result<(), Errno> {
1189        if self.has_mounts() {
1190            if let Some(mount) = parent_mount.as_ref() {
1191                if mount.read().has_submount(self) {
1192                    return error!(EBUSY);
1193                }
1194            }
1195        }
1196        Ok(())
1197    }
1198}
1199
1200struct DirEntryLockedChildren<'a> {
1201    entry: &'a DirEntryHandle,
1202    children: LockDepWriteGuard<'a, DirEntryChildren>,
1203}
1204
1205enum CreationResult<F> {
1206    Created,
1207    Existed { create_fn: F },
1208}
1209
1210impl<'a> DirEntryLockedChildren<'a> {
1211    fn component_lookup<L>(
1212        &mut self,
1213        locked: &mut Locked<L>,
1214        current_task: &CurrentTask,
1215        mount: &MountInfo,
1216        name: &FsStr,
1217    ) -> Result<DirEntryHandle, Errno>
1218    where
1219        L: LockEqualOrBefore<FileOpsCore>,
1220    {
1221        assert!(!DirEntry::is_reserved_name(name));
1222        let (node, _) =
1223            self.get_or_create_child(locked, current_task, mount, name, |_, _, _, _| {
1224                error!(ENOENT)
1225            })?;
1226        Ok(node)
1227    }
1228
1229    fn get_or_create_child<
1230        L,
1231        F: FnOnce(&mut Locked<L>, &FsNodeHandle, &MountInfo, &FsStr) -> Result<FsNodeHandle, Errno>,
1232    >(
1233        &mut self,
1234        locked: &mut Locked<L>,
1235        current_task: &CurrentTask,
1236        mount: &MountInfo,
1237        name: &FsStr,
1238        create_fn: F,
1239    ) -> Result<(DirEntryHandle, CreationResult<F>), Errno>
1240    where
1241        L: LockEqualOrBefore<FileOpsCore>,
1242    {
1243        let create_child = |locked: &mut Locked<L>, create_fn: F| {
1244            // Before creating the child, check for existence.
1245            let (node, create_result) =
1246                match self.entry.node.lookup(locked, current_task, mount, name) {
1247                    Ok(node) => (node, CreationResult::Existed { create_fn }),
1248                    Err(e) if e == ENOENT => {
1249                        (create_fn(locked, &self.entry.node, mount, name)?, CreationResult::Created)
1250                    }
1251                    Err(e) => return Err(e),
1252                };
1253
1254            assert!(
1255                node.info().mode & FileMode::IFMT != FileMode::EMPTY,
1256                "FsNode initialization did not populate the FileMode in FsNodeInfo."
1257            );
1258
1259            let entry = DirEntry::new(node, Some(self.entry.clone()), name.to_owned());
1260
1261            if let Err(err) = security::fs_node_init_with_dentry(locked, current_task, &entry) {
1262                // Null out the `parent` reference from `entry` otherwise dropping `entry` will
1263                // attempt to remove itself from `parent`, triggering a deadlock with `self`.
1264                entry.parent.update(None);
1265                return Err(err);
1266            }
1267
1268            Ok((entry, create_result))
1269        };
1270
1271        let (child, create_result) = match self.children.entry(name.to_owned()) {
1272            Entry::Vacant(entry) => {
1273                let (child, create_result) = create_child(locked, create_fn)?;
1274                // Do not cache a child in a locked directory
1275                if self.entry.node.fail_if_locked(current_task, &self.entry.node.info()).is_ok() {
1276                    entry.insert(Arc::downgrade(&child));
1277                }
1278                (child, create_result)
1279            }
1280            Entry::Occupied(mut entry) => {
1281                // It's possible that the upgrade will succeed this time around because we dropped
1282                // the read lock before acquiring the write lock. Another thread might have
1283                // populated this entry while we were not holding any locks.
1284                if let Some(child) = Weak::upgrade(entry.get()) {
1285                    // Do not cache a child in a locked directory
1286                    if self.entry.node.fail_if_locked(current_task, &self.entry.node.info()).is_ok()
1287                    {
1288                        child.node.fs().did_access_dir_entry(&child);
1289                    }
1290                    return Ok((child, CreationResult::Existed { create_fn }));
1291                }
1292                let (child, create_result) = create_child(locked, create_fn)?;
1293                // Do not cache a child in a locked directory
1294                if self.entry.node.fail_if_locked(current_task, &self.entry.node.info()).is_ok() {
1295                    entry.insert(Arc::downgrade(&child));
1296                }
1297                (child, create_result)
1298            }
1299        };
1300
1301        Ok((child, create_result))
1302    }
1303}
1304
1305impl fmt::Debug for DirEntry {
1306    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
1307        let scope = RcuReadScope::new();
1308        let mut parents = vec![];
1309        let mut maybe_parent = self.parent_ref(&scope);
1310        while let Some(parent) = maybe_parent {
1311            parents.push(parent.local_name.read(&scope));
1312            maybe_parent = parent.parent_ref(&scope);
1313        }
1314        let mut builder = f.debug_struct("DirEntry");
1315        builder.field("id", &(self as *const DirEntry));
1316        builder.field("local_name", &self.local_name.read(&scope).to_owned());
1317        if !parents.is_empty() {
1318            builder.field("parents", &parents);
1319        }
1320        builder.finish()
1321    }
1322}
1323
1324#[derive(Copy, Clone, Debug, Eq, PartialEq)]
1325enum RenameRelationship {
1326    Same,
1327    NewIsDescendant,
1328    OldIsDescendant,
1329    Independent,
1330}
1331
1332struct RenameGuard<'a> {
1333    old_parent_guard: DirEntryLockedChildren<'a>,
1334    new_parent_guard: Option<DirEntryLockedChildren<'a>>,
1335    relationship: RenameRelationship,
1336}
1337
1338impl<'a> RenameGuard<'a> {
1339    fn lock(old_parent: &'a DirEntryHandle, new_parent: &'a DirEntryHandle) -> Self {
1340        if Arc::ptr_eq(old_parent, new_parent) {
1341            let old_parent_guard = old_parent.lock_children();
1342            Self {
1343                old_parent_guard,
1344                new_parent_guard: None,
1345                relationship: RenameRelationship::Same,
1346            }
1347        } else if new_parent.is_descendant_of(old_parent) {
1348            let old_parent_guard = old_parent.lock_children();
1349            let _token = allow_subclass();
1350            let new_parent_guard = new_parent.lock_children();
1351            Self {
1352                old_parent_guard,
1353                new_parent_guard: Some(new_parent_guard),
1354                relationship: RenameRelationship::NewIsDescendant,
1355            }
1356        } else if old_parent.is_descendant_of(new_parent) {
1357            let new_parent_guard = new_parent.lock_children();
1358            let _token = allow_subclass();
1359            let old_parent_guard = old_parent.lock_children();
1360            Self {
1361                old_parent_guard,
1362                new_parent_guard: Some(new_parent_guard),
1363                relationship: RenameRelationship::OldIsDescendant,
1364            }
1365        } else {
1366            // Independent directories can be locked in address order.
1367            let (g1, g2) =
1368                starnix_sync::ordered_write_lock(&old_parent.children, &new_parent.children);
1369            let old_parent_guard = DirEntryLockedChildren { entry: old_parent, children: g1 };
1370            let new_parent_guard = DirEntryLockedChildren { entry: new_parent, children: g2 };
1371            Self {
1372                old_parent_guard,
1373                new_parent_guard: Some(new_parent_guard),
1374                relationship: RenameRelationship::Independent,
1375            }
1376        }
1377    }
1378
1379    /// Consumes the `RenameGuard` (which only holds children locks) and locks the `info`
1380    /// of the parent directories in a safe order to prevent deadlocks. Returns a
1381    /// `RenameGuardLocked` which encapsulates all acquired locks (both children and info).
1382    fn lock_info(
1383        self,
1384        old_parent: &'a DirEntryHandle,
1385        new_parent: &'a DirEntryHandle,
1386        renamed: &'a DirEntryHandle,
1387        replaced: Option<&'a DirEntryHandle>,
1388    ) -> RenameContext<'a> {
1389        let (g1, g2) = match self.relationship {
1390            RenameRelationship::Same => (old_parent.node.info_lock().write(), None),
1391            RenameRelationship::NewIsDescendant => {
1392                let g1 = old_parent.node.info_lock().write();
1393                let _token = allow_subclass();
1394                let g2 = new_parent.node.info_lock().write();
1395                (g1, Some(g2))
1396            }
1397            RenameRelationship::OldIsDescendant => {
1398                let g2 = new_parent.node.info_lock().write();
1399                let _token = allow_subclass();
1400                let g1 = old_parent.node.info_lock().write();
1401                (g1, Some(g2))
1402            }
1403            RenameRelationship::Independent => {
1404                let (g1, g2) = starnix_sync::ordered_write_lock(
1405                    old_parent.node.info_lock(),
1406                    new_parent.node.info_lock(),
1407                );
1408                (g1, Some(g2))
1409            }
1410        };
1411
1412        RenameContext {
1413            renamed,
1414            replaced,
1415            old_parent_guard: self.old_parent_guard,
1416            new_parent_guard: self.new_parent_guard,
1417            old_parent_info_guard: g1,
1418            new_parent_info_guard: g2,
1419        }
1420    }
1421
1422    fn old_parent_children(&mut self) -> &mut DirEntryLockedChildren<'a> {
1423        &mut self.old_parent_guard
1424    }
1425
1426    fn new_parent_children(&mut self) -> &mut DirEntryLockedChildren<'a> {
1427        if let Some(new_guard) = self.new_parent_guard.as_mut() {
1428            new_guard
1429        } else {
1430            &mut self.old_parent_guard
1431        }
1432    }
1433}
1434
1435/// A context that holds the locked children and info of the parents during a
1436/// rename operation.
1437///
1438/// The context is constructed by locking the parent directories
1439/// (`old_parent`, `new_parent`) for write. These parent locks are held for
1440/// the duration of the context's lifetime, preventing concurrent
1441/// modifications to the parents.
1442///
1443/// Child nodes (`renamed` and `replaced`) are *not* locked upon
1444/// construction. To query properties of the children (like whether they are
1445/// directories) without risking deadlocks, use the provided helper methods
1446/// (`renamed_is_dir`, `replaced_is_dir`) instead of locking them directly.
1447pub struct RenameContext<'a> {
1448    pub renamed: &'a DirEntryHandle,
1449    pub replaced: Option<&'a DirEntryHandle>,
1450    old_parent_guard: DirEntryLockedChildren<'a>,
1451    new_parent_guard: Option<DirEntryLockedChildren<'a>>,
1452    old_parent_info_guard: LockDepWriteGuard<'a, crate::vfs::FsNodeInfo>,
1453    new_parent_info_guard: Option<LockDepWriteGuard<'a, crate::vfs::FsNodeInfo>>,
1454}
1455
1456impl<'a> RenameContext<'a> {
1457    /// Returns whether the renamed child node is a directory.
1458    ///
1459    /// This method safely handles child info locking under the parent locks.
1460    /// If the renamed node is same as a parent (which is already locked),
1461    /// it uses the parent's guard to avoid self-deadlock. Otherwise, it
1462    /// locks the child info using `allow_subclass`.
1463    pub fn renamed_is_dir(&self) -> bool {
1464        self.is_dir(&self.renamed.node)
1465    }
1466
1467    /// Returns whether the replaced child node is a directory.
1468    ///
1469    /// Returns `false` if `replaced` is `None`.
1470    ///
1471    /// This method safely handles child info locking under the parent locks.
1472    /// If the replaced node is same as a parent (which is already locked),
1473    /// it uses the parent's guard to avoid self-deadlock. Otherwise, it
1474    /// locks the child info using `allow_subclass`.
1475    pub fn replaced_is_dir(&self) -> bool {
1476        self.replaced.map(|r| self.is_dir(&r.node)).unwrap_or(false)
1477    }
1478
1479    /// Returns the old parent directory entry handle.
1480    ///
1481    /// The old parent's child list is write-locked for the lifetime of the
1482    /// context.
1483    pub fn old_parent(&self) -> &DirEntryHandle {
1484        self.old_parent_guard.entry
1485    }
1486
1487    /// Returns the new parent directory entry handle.
1488    ///
1489    /// The new parent's child list is write-locked for the lifetime of the
1490    /// context.
1491    pub fn new_parent(&self) -> &DirEntryHandle {
1492        self.new_parent_guard.as_ref().map(|g| g.entry).unwrap_or(self.old_parent_guard.entry)
1493    }
1494
1495    /// Returns mutable references to the `FsNodeInfo` of both parent
1496    /// directories.
1497    ///
1498    /// The references are returned as a tuple to allow them to be borrowed
1499    /// mutably at the same time (e.g., to update link counts in both).
1500    ///
1501    /// If `new_parent` is the same as `old_parent`, the second element of the
1502    /// tuple will be `None` to prevent mutable aliasing of the same guard.
1503    pub fn parent_infos_mut(
1504        &mut self,
1505    ) -> (&mut crate::vfs::FsNodeInfo, Option<&mut crate::vfs::FsNodeInfo>) {
1506        let old = &mut *self.old_parent_info_guard;
1507        let new = self.new_parent_info_guard.as_mut().map(|g| &mut **g);
1508        (old, new)
1509    }
1510
1511    /// Returns a shared reference to the `FsNodeInfo` of the old parent
1512    /// directory.
1513    pub fn old_parent_info(&self) -> &crate::vfs::FsNodeInfo {
1514        &self.old_parent_info_guard
1515    }
1516    /// Returns a shared reference to the `FsNodeInfo` of the new parent
1517    /// directory.
1518    ///
1519    /// Returns `None` if `new_parent` is the same as `old_parent`.
1520    pub fn new_parent_info(&self) -> Option<&crate::vfs::FsNodeInfo> {
1521        self.new_parent_info_guard.as_deref()
1522    }
1523
1524    fn new_parent_children(&mut self) -> &mut DirEntryLockedChildren<'a> {
1525        self.new_parent_guard.as_mut().unwrap_or(&mut self.old_parent_guard)
1526    }
1527
1528    fn old_parent_children(&mut self) -> &mut DirEntryLockedChildren<'a> {
1529        &mut self.old_parent_guard
1530    }
1531
1532    fn is_dir(&self, node: &FsNodeHandle) -> bool {
1533        if Arc::ptr_eq(node, &self.old_parent().node) {
1534            self.old_parent_info_guard.mode.is_dir()
1535        } else if Arc::ptr_eq(node, &self.new_parent().node) {
1536            self.new_parent_info_guard
1537                .as_ref()
1538                .map(|g| g.mode.is_dir())
1539                .unwrap_or_else(|| self.old_parent_info_guard.mode.is_dir())
1540        } else {
1541            let _token = allow_subclass();
1542            node.is_dir()
1543        }
1544    }
1545}
1546
1547/// The Drop trait for DirEntry removes the entry from the child list of the
1548/// parent entry, which means we cannot drop DirEntry objects while holding a
1549/// lock on the parent's child list.
1550impl Drop for DirEntry {
1551    fn drop(&mut self) {
1552        let maybe_parent = self.parent();
1553        self.parent.update(None);
1554        if let Some(parent) = maybe_parent {
1555            parent.internal_remove_child(self);
1556        }
1557    }
1558}