Skip to main content

starnix_core/vfs/
fs_node.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::device::DeviceMode;
6use crate::mm::PAGE_SIZE;
7use crate::security::{self, Auditable, PermissionFlags};
8use crate::signals::{SignalInfo, send_standard_signal};
9use crate::task::{CurrentTask, CurrentTaskAndLocked, WaitQueue, Waiter, register_delayed_release};
10use crate::time::utc;
11use crate::vfs::fsverity::FsVerityState;
12use crate::vfs::pipe::{Pipe, PipeHandle};
13use crate::vfs::rw_queue::{RwQueue, RwQueueReadGuard};
14use crate::vfs::socket::SocketHandle;
15use crate::vfs::{
16    DefaultDirEntryOps, DirEntryOps, FileObject, FileObjectState, FileOps, FileSystem,
17    FileSystemHandle, FileWriteGuardState, FsStr, FsString, MAX_LFS_FILESIZE, MountInfo,
18    NamespaceNode, OPathOps, RecordLockCommand, RecordLockOwner, RecordLocks, WeakFileHandle,
19    checked_add_offset_and_length, inotify,
20};
21use bitflags::bitflags;
22use fuchsia_runtime::UtcInstant;
23use linux_uapi::{XATTR_SECURITY_PREFIX, XATTR_SYSTEM_PREFIX, XATTR_TRUSTED_PREFIX};
24use once_cell::race::OnceBool;
25use starnix_crypt::EncryptionKeyId;
26use starnix_lifecycle::{ObjectReleaser, ReleaserAction};
27use starnix_logging::{log_error, track_stub};
28use starnix_sync::{
29    BeforeFsNodeAppend, FileOpsCore, FsNodeAppend, LockBefore, LockEqualOrBefore, Locked, Mutex,
30    RwLock, RwLockReadGuard, Unlocked,
31};
32use starnix_types::ownership::{Releasable, ReleaseGuard};
33use starnix_types::time::{NANOS_PER_SECOND, timespec_from_time};
34use starnix_uapi::as_any::AsAny;
35use starnix_uapi::auth::{
36    CAP_CHOWN, CAP_DAC_OVERRIDE, CAP_DAC_READ_SEARCH, CAP_FOWNER, CAP_FSETID, CAP_MKNOD,
37    CAP_SYS_ADMIN, CAP_SYS_RESOURCE, FsCred, UserAndOrGroupId,
38};
39use starnix_uapi::device_type::DeviceType;
40use starnix_uapi::errors::{EACCES, ENOTSUP, EPERM, Errno};
41use starnix_uapi::file_mode::{Access, AccessCheck, FileMode};
42use starnix_uapi::inotify_mask::InotifyMask;
43use starnix_uapi::mount_flags::MountFlags;
44use starnix_uapi::open_flags::OpenFlags;
45use starnix_uapi::resource_limits::Resource;
46use starnix_uapi::seal_flags::SealFlags;
47use starnix_uapi::signals::SIGXFSZ;
48use starnix_uapi::{
49    FALLOC_FL_COLLAPSE_RANGE, FALLOC_FL_INSERT_RANGE, FALLOC_FL_KEEP_SIZE, FALLOC_FL_PUNCH_HOLE,
50    FALLOC_FL_UNSHARE_RANGE, FALLOC_FL_ZERO_RANGE, LOCK_EX, LOCK_NB, LOCK_SH, LOCK_UN,
51    STATX__RESERVED, STATX_ATIME, STATX_ATTR_VERITY, STATX_BASIC_STATS, STATX_BLOCKS, STATX_CTIME,
52    STATX_GID, STATX_INO, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_UID, XATTR_USER_PREFIX,
53    errno, error, fsverity_descriptor, gid_t, ino_t, statx, statx_timestamp, timespec, uapi, uid_t,
54};
55use std::sync::atomic::Ordering;
56use std::sync::{Arc, OnceLock, Weak};
57use syncio::zxio_node_attr_has_t;
58
59#[derive(Debug, Clone, Copy, PartialEq, Eq)]
60pub enum FsNodeLinkBehavior {
61    Allowed,
62    Disallowed,
63}
64
65impl Default for FsNodeLinkBehavior {
66    fn default() -> Self {
67        FsNodeLinkBehavior::Allowed
68    }
69}
70
71pub enum AppendLockGuard<'a> {
72    Read(RwQueueReadGuard<'a, FsNodeAppend>),
73    AlreadyLocked(&'a AppendLockGuard<'a>),
74}
75
76pub trait AppendLockStrategy<L> {
77    /// Helper method for acquiring append lock in `truncate`/`allocate`. Acquires the lock when it's not already acquired.
78    fn lock<'a>(
79        &'a self,
80        locked: &'a mut Locked<L>,
81        current_task: &CurrentTask,
82        node: &'a FsNode,
83    ) -> Result<(AppendLockGuard<'a>, &'a mut Locked<FileOpsCore>), Errno>;
84}
85
86struct RealAppendLockStrategy {}
87
88impl AppendLockStrategy<BeforeFsNodeAppend> for RealAppendLockStrategy {
89    fn lock<'a>(
90        &'a self,
91        locked: &'a mut Locked<BeforeFsNodeAppend>,
92        current_task: &CurrentTask,
93        node: &'a FsNode,
94    ) -> Result<(AppendLockGuard<'a>, &'a mut Locked<FileOpsCore>), Errno> {
95        let (guard, new_locked) = node.ops().append_lock_read(locked, node, current_task)?;
96        Ok((AppendLockGuard::Read(guard), new_locked.cast_locked()))
97    }
98}
99
100pub struct AlreadyLockedAppendLockStrategy<'a> {
101    // Keep the reference to the guard, which will be returned in subsequent attempts to acquire this lock.
102    guard: &'a AppendLockGuard<'a>,
103}
104
105impl<'a> AlreadyLockedAppendLockStrategy<'a> {
106    pub fn new(guard: &'a AppendLockGuard<'a>) -> Self {
107        Self { guard }
108    }
109}
110
111impl AppendLockStrategy<FileOpsCore> for AlreadyLockedAppendLockStrategy<'_> {
112    fn lock<'a>(
113        &'a self,
114        locked: &'a mut Locked<FileOpsCore>,
115        _current_task: &CurrentTask,
116        _node: &'a FsNode,
117    ) -> Result<(AppendLockGuard<'a>, &'a mut Locked<FileOpsCore>), Errno> {
118        Ok((AppendLockGuard::AlreadyLocked(self.guard), locked.cast_locked::<FileOpsCore>()))
119    }
120}
121
122pub struct FsNode {
123    /// The inode number for this FsNode.
124    pub ino: ino_t,
125
126    /// The FsNodeOps for this FsNode.
127    ///
128    /// The FsNodeOps are implemented by the individual file systems to provide
129    /// specific behaviors for this FsNode.
130    ops: Box<dyn FsNodeOps>,
131
132    /// The FileSystem that owns this FsNode's tree.
133    fs: Weak<FileSystem>,
134
135    /// A RwLock to synchronize append operations for this node.
136    ///
137    /// FileObjects writing with O_APPEND should grab a write() lock on this
138    /// field to ensure they operate sequentially. FileObjects writing without
139    /// O_APPEND should grab read() lock so that they can operate in parallel.
140    pub append_lock: RwQueue<FsNodeAppend>,
141
142    /// Mutable information about this node.
143    ///
144    /// This data is used to populate the uapi::stat structure.
145    info: RwLock<FsNodeInfo>,
146
147    /// Data associated with an FsNode that is rarely needed.
148    rare_data: OnceLock<Box<FsNodeRareData>>,
149
150    /// Tracks lock state for this file.
151    pub write_guard_state: Mutex<FileWriteGuardState>,
152
153    /// Cached FsVerity state associated with this node.
154    pub fsverity: Mutex<FsVerityState>,
155
156    /// The security state associated with this node. Must always be acquired last
157    /// relative to other `FsNode` locks.
158    pub security_state: security::FsNodeState,
159}
160
161#[derive(Default)]
162struct FsNodeRareData {
163    /// The pipe located at this node, if any.
164    ///
165    /// Used if, and only if, the node has a mode of FileMode::IFIFO.
166    fifo: OnceLock<PipeHandle>,
167
168    /// The UNIX domain socket bound to this node, if any.
169    bound_socket: OnceLock<SocketHandle>,
170
171    /// Information about the locking information on this node.
172    ///
173    /// No other lock on this object may be taken while this lock is held.
174    flock_info: Mutex<FlockInfo>,
175
176    /// Records locks associated with this node.
177    record_locks: RecordLocks,
178
179    /// Whether this node can be linked into a directory.
180    ///
181    /// Only set for nodes created with `O_TMPFILE`.
182    link_behavior: OnceLock<FsNodeLinkBehavior>,
183
184    /// Inotify watchers on this node. See inotify(7).
185    watchers: inotify::InotifyWatchers,
186}
187
188impl FsNodeRareData {
189    fn ensure_fifo(&self, current_task: &CurrentTask) -> &PipeHandle {
190        self.fifo.get_or_init(|| {
191            let mut default_pipe_capacity = (*PAGE_SIZE * 16) as usize;
192            if !security::is_task_capable_noaudit(current_task, CAP_SYS_RESOURCE) {
193                let kernel = current_task.kernel();
194                let max_size = kernel.system_limits.pipe_max_size.load(Ordering::Relaxed);
195                default_pipe_capacity = std::cmp::min(default_pipe_capacity, max_size);
196            }
197            Pipe::new(default_pipe_capacity)
198        })
199    }
200}
201
202pub enum FsNodeReleaserAction {}
203impl ReleaserAction<FsNode> for FsNodeReleaserAction {
204    fn release(fs_node: ReleaseGuard<FsNode>) {
205        register_delayed_release(fs_node);
206    }
207}
208pub type FsNodeReleaser = ObjectReleaser<FsNode, FsNodeReleaserAction>;
209pub type FsNodeHandle = Arc<FsNodeReleaser>;
210pub type WeakFsNodeHandle = Weak<FsNodeReleaser>;
211
212#[derive(Debug, Default, Clone, PartialEq)]
213pub struct FsNodeInfo {
214    pub mode: FileMode,
215    pub link_count: usize,
216    pub uid: uid_t,
217    pub gid: gid_t,
218    pub rdev: DeviceType,
219    pub size: usize,
220    pub blksize: usize,
221    pub blocks: usize,
222    pub time_status_change: UtcInstant,
223    pub time_access: UtcInstant,
224    pub time_modify: UtcInstant,
225    pub casefold: bool,
226
227    // If this node is fscrypt encrypted, stores the id of the user wrapping key used to encrypt it.
228    pub wrapping_key_id: Option<[u8; 16]>,
229
230    // Used to indicate to filesystems that manage timestamps that an access has occurred and to
231    // update the node's atime.
232    // This only impacts accesses within Starnix. Most Fuchsia programs are not expected to maintain
233    // access times. If the file handle is transferred out of Starnix, there may be inconsistencies.
234    pub pending_time_access_update: bool,
235}
236
237impl FsNodeInfo {
238    pub fn new(mode: FileMode, owner: FsCred) -> Self {
239        let now = utc::utc_now();
240        Self {
241            mode,
242            link_count: if mode.is_dir() { 2 } else { 1 },
243            uid: owner.uid,
244            gid: owner.gid,
245            blksize: DEFAULT_BYTES_PER_BLOCK,
246            time_status_change: now,
247            time_access: now,
248            time_modify: now,
249            ..Default::default()
250        }
251    }
252
253    pub fn storage_size(&self) -> usize {
254        self.blksize.saturating_mul(self.blocks)
255    }
256
257    pub fn chmod(&mut self, mode: FileMode) {
258        self.mode = (self.mode & !FileMode::PERMISSIONS) | (mode & FileMode::PERMISSIONS);
259    }
260
261    pub fn chown(&mut self, owner: Option<uid_t>, group: Option<gid_t>) {
262        if let Some(owner) = owner {
263            self.uid = owner;
264        }
265        if let Some(group) = group {
266            self.gid = group;
267        }
268        // Clear the setuid and setgid bits if the file is executable and a regular file.
269        if self.mode.is_reg() {
270            self.mode &= !FileMode::ISUID;
271            self.clear_sgid_bit();
272        }
273    }
274
275    fn clear_sgid_bit(&mut self) {
276        // If the group execute bit is not set, the setgid bit actually indicates mandatory
277        // locking and should not be cleared.
278        if self.mode.intersects(FileMode::IXGRP) {
279            self.mode &= !FileMode::ISGID;
280        }
281    }
282
283    fn clear_suid_and_sgid_bits(&mut self) {
284        self.mode &= !FileMode::ISUID;
285        self.clear_sgid_bit();
286    }
287
288    pub fn cred(&self) -> FsCred {
289        FsCred { uid: self.uid, gid: self.gid }
290    }
291
292    pub fn suid_and_sgid(
293        &self,
294        current_task: &CurrentTask,
295        fs_node: &FsNode,
296    ) -> Result<UserAndOrGroupId, Errno> {
297        let uid = self.mode.contains(FileMode::ISUID).then_some(self.uid);
298
299        // See <https://man7.org/linux/man-pages/man7/inode.7.html>:
300        //
301        //   For an executable file, the set-group-ID bit causes the
302        //   effective group ID of a process that executes the file to change
303        //   as described in execve(2).  For a file that does not have the
304        //   group execution bit (S_IXGRP) set, the set-group-ID bit indicates
305        //   mandatory file/record locking.
306        let gid = self.mode.contains(FileMode::ISGID | FileMode::IXGRP).then_some(self.gid);
307
308        let maybe_set_id = UserAndOrGroupId { uid, gid };
309        if maybe_set_id.is_some() {
310            // Check that uid and gid actually have execute access before
311            // returning them as the SUID or SGID.
312            check_access(
313                fs_node,
314                current_task,
315                security::PermissionFlags::EXEC,
316                self.uid,
317                self.gid,
318                self.mode,
319            )?;
320        }
321        Ok(maybe_set_id)
322    }
323}
324
325#[derive(Default)]
326struct FlockInfo {
327    /// Whether the node is currently locked. The meaning of the different values are:
328    /// - `None`: The node is not locked.
329    /// - `Some(false)`: The node is locked non exclusively.
330    /// - `Some(true)`: The node is locked exclusively.
331    locked_exclusive: Option<bool>,
332    /// The FileObject that hold the lock.
333    locking_handles: Vec<WeakFileHandle>,
334    /// The queue to notify process waiting on the lock.
335    wait_queue: WaitQueue,
336}
337
338impl FlockInfo {
339    /// Removes all file handle not holding `predicate` from the list of object holding the lock. If
340    /// this empties the list, unlocks the node and notifies all waiting processes.
341    pub fn retain<F>(&mut self, predicate: F)
342    where
343        F: Fn(&FileObject) -> bool,
344    {
345        if !self.locking_handles.is_empty() {
346            self.locking_handles
347                .retain(|w| if let Some(fh) = w.upgrade() { predicate(&fh) } else { false });
348            if self.locking_handles.is_empty() {
349                self.locked_exclusive = None;
350                self.wait_queue.notify_all();
351            }
352        }
353    }
354}
355
356/// `st_blksize` is measured in units of 512 bytes.
357pub const DEFAULT_BYTES_PER_BLOCK: usize = 512;
358
359pub struct FlockOperation {
360    operation: u32,
361}
362
363impl FlockOperation {
364    pub fn from_flags(operation: u32) -> Result<Self, Errno> {
365        if operation & !(LOCK_SH | LOCK_EX | LOCK_UN | LOCK_NB) != 0 {
366            return error!(EINVAL);
367        }
368        if [LOCK_SH, LOCK_EX, LOCK_UN].iter().filter(|&&o| operation & o == o).count() != 1 {
369            return error!(EINVAL);
370        }
371        Ok(Self { operation })
372    }
373
374    pub fn is_unlock(&self) -> bool {
375        self.operation & LOCK_UN > 0
376    }
377
378    pub fn is_lock_exclusive(&self) -> bool {
379        self.operation & LOCK_EX > 0
380    }
381
382    pub fn is_blocking(&self) -> bool {
383        self.operation & LOCK_NB == 0
384    }
385}
386
387impl FileObject {
388    /// Advisory locking.
389    ///
390    /// See flock(2).
391    pub fn flock(
392        &self,
393        locked: &mut Locked<Unlocked>,
394        current_task: &CurrentTask,
395        operation: FlockOperation,
396    ) -> Result<(), Errno> {
397        if self.flags().contains(OpenFlags::PATH) {
398            return error!(EBADF);
399        }
400        loop {
401            let mut flock_info = self.name.entry.node.ensure_rare_data().flock_info.lock();
402            if operation.is_unlock() {
403                flock_info.retain(|fh| !std::ptr::eq(fh, self));
404                return Ok(());
405            }
406            // Operation is a locking operation.
407            // 1. File is not locked
408            if flock_info.locked_exclusive.is_none() {
409                flock_info.locked_exclusive = Some(operation.is_lock_exclusive());
410                flock_info.locking_handles.push(self.weak_handle.clone());
411                return Ok(());
412            }
413
414            let file_lock_is_exclusive = flock_info.locked_exclusive == Some(true);
415            let fd_has_lock = flock_info
416                .locking_handles
417                .iter()
418                .find_map(|w| {
419                    w.upgrade().and_then(|fh| {
420                        if std::ptr::eq(&fh as &FileObject, self) { Some(()) } else { None }
421                    })
422                })
423                .is_some();
424
425            // 2. File is locked, but fd already have a lock
426            if fd_has_lock {
427                if operation.is_lock_exclusive() == file_lock_is_exclusive {
428                    // Correct lock is already held, return.
429                    return Ok(());
430                } else {
431                    // Incorrect lock is held. Release the lock and loop back to try to reacquire
432                    // it. flock doesn't guarantee atomic lock type switching.
433                    flock_info.retain(|fh| !std::ptr::eq(fh, self));
434                    continue;
435                }
436            }
437
438            // 3. File is locked, and fd doesn't have a lock.
439            if !file_lock_is_exclusive && !operation.is_lock_exclusive() {
440                // The lock is not exclusive, let's grab it.
441                flock_info.locking_handles.push(self.weak_handle.clone());
442                return Ok(());
443            }
444
445            // 4. The operation cannot be done at this time.
446            if !operation.is_blocking() {
447                return error!(EAGAIN);
448            }
449
450            // Register a waiter to be notified when the lock is released. Release the lock on
451            // FlockInfo, and wait.
452            let waiter = Waiter::new();
453            flock_info.wait_queue.wait_async(&waiter);
454            std::mem::drop(flock_info);
455            waiter.wait(locked, current_task)?;
456        }
457    }
458}
459
460// The inner mod is required because bitflags cannot pass the attribute through to the single
461// variant, and attributes cannot be applied to macro invocations.
462mod inner_flags {
463    // Part of the code for the AT_STATX_SYNC_AS_STAT case that's produced by the macro triggers the
464    // lint, but as a whole, the produced code is still correct.
465    #![allow(clippy::bad_bit_mask)] // TODO(b/303500202) Remove once addressed in bitflags.
466    use super::{bitflags, uapi};
467
468    bitflags! {
469        #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
470        pub struct StatxFlags: u32 {
471            const AT_SYMLINK_NOFOLLOW = uapi::AT_SYMLINK_NOFOLLOW;
472            const AT_EMPTY_PATH = uapi::AT_EMPTY_PATH;
473            const AT_NO_AUTOMOUNT = uapi::AT_NO_AUTOMOUNT;
474            const AT_STATX_SYNC_AS_STAT = uapi::AT_STATX_SYNC_AS_STAT;
475            const AT_STATX_FORCE_SYNC = uapi::AT_STATX_FORCE_SYNC;
476            const AT_STATX_DONT_SYNC = uapi::AT_STATX_DONT_SYNC;
477            const STATX_ATTR_VERITY = uapi::STATX_ATTR_VERITY;
478        }
479    }
480}
481
482pub use inner_flags::StatxFlags;
483
484#[derive(Copy, Clone, Debug, PartialEq, Eq)]
485pub enum UnlinkKind {
486    /// Unlink a directory.
487    Directory,
488
489    /// Unlink a non-directory.
490    NonDirectory,
491}
492
493pub enum SymlinkTarget {
494    Path(FsString),
495    Node(NamespaceNode),
496}
497
498#[derive(Clone, Copy, PartialEq, Eq)]
499pub enum XattrOp {
500    /// Set the value of the extended attribute regardless of whether it exists.
501    Set,
502    /// Create a new extended attribute. Fail if it already exists.
503    Create,
504    /// Replace the value of the extended attribute. Fail if it doesn't exist.
505    Replace,
506}
507
508impl XattrOp {
509    pub fn into_flags(self) -> u32 {
510        match self {
511            Self::Set => 0,
512            Self::Create => uapi::XATTR_CREATE,
513            Self::Replace => uapi::XATTR_REPLACE,
514        }
515    }
516}
517
518/// Returns a value, or the size required to contains it.
519#[derive(Clone, Debug, PartialEq)]
520pub enum ValueOrSize<T> {
521    Value(T),
522    Size(usize),
523}
524
525impl<T> ValueOrSize<T> {
526    pub fn map<F, U>(self, f: F) -> ValueOrSize<U>
527    where
528        F: FnOnce(T) -> U,
529    {
530        match self {
531            Self::Size(s) => ValueOrSize::Size(s),
532            Self::Value(v) => ValueOrSize::Value(f(v)),
533        }
534    }
535
536    #[cfg(test)]
537    pub fn unwrap(self) -> T {
538        match self {
539            Self::Size(_) => panic!("Unwrap ValueOrSize that is a Size"),
540            Self::Value(v) => v,
541        }
542    }
543}
544
545impl<T> From<T> for ValueOrSize<T> {
546    fn from(t: T) -> Self {
547        Self::Value(t)
548    }
549}
550
551#[derive(Copy, Clone, Eq, PartialEq, Debug)]
552pub enum FallocMode {
553    Allocate { keep_size: bool },
554    PunchHole,
555    Collapse,
556    Zero { keep_size: bool },
557    InsertRange,
558    UnshareRange,
559}
560
561impl FallocMode {
562    pub fn from_bits(mode: u32) -> Option<Self> {
563        // `fallocate()` allows only the following values for `mode`.
564        if mode == 0 {
565            Some(Self::Allocate { keep_size: false })
566        } else if mode == FALLOC_FL_KEEP_SIZE {
567            Some(Self::Allocate { keep_size: true })
568        } else if mode == FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE {
569            Some(Self::PunchHole)
570        } else if mode == FALLOC_FL_COLLAPSE_RANGE {
571            Some(Self::Collapse)
572        } else if mode == FALLOC_FL_ZERO_RANGE {
573            Some(Self::Zero { keep_size: false })
574        } else if mode == FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE {
575            Some(Self::Zero { keep_size: true })
576        } else if mode == FALLOC_FL_INSERT_RANGE {
577            Some(Self::InsertRange)
578        } else if mode == FALLOC_FL_UNSHARE_RANGE {
579            Some(Self::UnshareRange)
580        } else {
581            None
582        }
583    }
584}
585
586#[derive(Debug, Copy, Clone, PartialEq)]
587pub enum CheckAccessReason {
588    Access,
589    Chdir,
590    Chroot,
591    Exec,
592    ChangeTimestamps { now: bool },
593    InternalPermissionChecks,
594}
595
596pub trait FsNodeOps: Send + Sync + AsAny + 'static {
597    /// Delegate the access check to the node.
598    fn check_access(
599        &self,
600        _locked: &mut Locked<FileOpsCore>,
601        node: &FsNode,
602        current_task: &CurrentTask,
603        access: security::PermissionFlags,
604        info: &RwLock<FsNodeInfo>,
605        reason: CheckAccessReason,
606        audit_context: security::Auditable<'_>,
607    ) -> Result<(), Errno> {
608        node.default_check_access_impl(current_task, access, reason, info.read(), audit_context)
609    }
610
611    /// Build the [`DirEntryOps`] for a new [`DirEntry`] that will be associated
612    /// to this node.
613    fn create_dir_entry_ops(&self) -> Box<dyn DirEntryOps> {
614        Box::new(DefaultDirEntryOps)
615    }
616
617    /// Build the `FileOps` for the file associated to this node.
618    ///
619    /// The returned FileOps will be used to create a FileObject, which might
620    /// be assigned an FdNumber.
621    fn create_file_ops(
622        &self,
623        locked: &mut Locked<FileOpsCore>,
624        node: &FsNode,
625        _current_task: &CurrentTask,
626        flags: OpenFlags,
627    ) -> Result<Box<dyn FileOps>, Errno>;
628
629    /// Find an existing child node and populate the child parameter. Return the node.
630    ///
631    /// The child parameter is an empty node. Operations other than initialize may panic before
632    /// initialize is called.
633    fn lookup(
634        &self,
635        _locked: &mut Locked<FileOpsCore>,
636        _node: &FsNode,
637        _current_task: &CurrentTask,
638        name: &FsStr,
639    ) -> Result<FsNodeHandle, Errno> {
640        // The default implementation here is suitable for filesystems that have permanent entries;
641        // entries that already exist will get found in the cache and shouldn't get this far.
642        error!(ENOENT, format!("looking for {name}"))
643    }
644
645    /// Create and return the given child node.
646    ///
647    /// The mode field of the FsNodeInfo indicates what kind of child to
648    /// create.
649    ///
650    /// This function is never called with FileMode::IFDIR. The mkdir function
651    /// is used to create directories instead.
652    fn mknod(
653        &self,
654        locked: &mut Locked<FileOpsCore>,
655        _node: &FsNode,
656        _current_task: &CurrentTask,
657        _name: &FsStr,
658        _mode: FileMode,
659        _dev: DeviceType,
660        _owner: FsCred,
661    ) -> Result<FsNodeHandle, Errno>;
662
663    /// Create and return the given child node as a subdirectory.
664    fn mkdir(
665        &self,
666        locked: &mut Locked<FileOpsCore>,
667        _node: &FsNode,
668        _current_task: &CurrentTask,
669        _name: &FsStr,
670        _mode: FileMode,
671        _owner: FsCred,
672    ) -> Result<FsNodeHandle, Errno>;
673
674    /// Creates a symlink with the given `target` path.
675    fn create_symlink(
676        &self,
677        locked: &mut Locked<FileOpsCore>,
678        _node: &FsNode,
679        _current_task: &CurrentTask,
680        _name: &FsStr,
681        _target: &FsStr,
682        _owner: FsCred,
683    ) -> Result<FsNodeHandle, Errno>;
684
685    /// Creates an anonymous file.
686    ///
687    /// The FileMode::IFMT of the FileMode is always FileMode::IFREG.
688    ///
689    /// Used by O_TMPFILE.
690    fn create_tmpfile(
691        &self,
692        _node: &FsNode,
693        _current_task: &CurrentTask,
694        _mode: FileMode,
695        _owner: FsCred,
696    ) -> Result<FsNodeHandle, Errno> {
697        error!(EOPNOTSUPP)
698    }
699
700    /// Reads the symlink from this node.
701    fn readlink(
702        &self,
703        _locked: &mut Locked<FileOpsCore>,
704        _node: &FsNode,
705        _current_task: &CurrentTask,
706    ) -> Result<SymlinkTarget, Errno> {
707        error!(EINVAL)
708    }
709
710    /// Create a hard link with the given name to the given child.
711    fn link(
712        &self,
713        _locked: &mut Locked<FileOpsCore>,
714        _node: &FsNode,
715        _current_task: &CurrentTask,
716        _name: &FsStr,
717        _child: &FsNodeHandle,
718    ) -> Result<(), Errno> {
719        error!(EPERM)
720    }
721
722    /// Remove the child with the given name, if the child exists.
723    ///
724    /// The UnlinkKind parameter indicates whether the caller intends to unlink
725    /// a directory or a non-directory child.
726    fn unlink(
727        &self,
728        locked: &mut Locked<FileOpsCore>,
729        _node: &FsNode,
730        _current_task: &CurrentTask,
731        _name: &FsStr,
732        _child: &FsNodeHandle,
733    ) -> Result<(), Errno>;
734
735    /// Acquire the necessary append lock for the operations that depend on them.
736    /// Should be done before calling `allocate` or `truncate` to avoid lock ordering issues.
737    fn append_lock_read<'a>(
738        &'a self,
739        locked: &'a mut Locked<BeforeFsNodeAppend>,
740        node: &'a FsNode,
741        current_task: &CurrentTask,
742    ) -> Result<(RwQueueReadGuard<'a, FsNodeAppend>, &'a mut Locked<FsNodeAppend>), Errno> {
743        return node.append_lock.read_and(locked, current_task);
744    }
745
746    /// Change the length of the file.
747    fn truncate(
748        &self,
749        _locked: &mut Locked<FileOpsCore>,
750        _guard: &AppendLockGuard<'_>,
751        _node: &FsNode,
752        _current_task: &CurrentTask,
753        _length: u64,
754    ) -> Result<(), Errno> {
755        error!(EINVAL)
756    }
757
758    /// Manipulate allocated disk space for the file.
759    fn allocate(
760        &self,
761        _locked: &mut Locked<FileOpsCore>,
762        _guard: &AppendLockGuard<'_>,
763        _node: &FsNode,
764        _current_task: &CurrentTask,
765        _mode: FallocMode,
766        _offset: u64,
767        _length: u64,
768    ) -> Result<(), Errno> {
769        error!(EINVAL)
770    }
771
772    /// Update the supplied info with initial state (e.g. size) for the node.
773    ///
774    /// FsNode calls this method when created, to allow the FsNodeOps to
775    /// set appropriate initial values in the FsNodeInfo.
776    fn initial_info(&self, _info: &mut FsNodeInfo) {}
777
778    /// Update node.info as needed.
779    ///
780    /// FsNode calls this method before converting the FsNodeInfo struct into
781    /// the uapi::stat struct to give the file system a chance to update this data
782    /// before it is used by clients.
783    ///
784    /// File systems that keep the FsNodeInfo up-to-date do not need to
785    /// override this function.
786    ///
787    /// Return a read guard for the updated information.
788    fn fetch_and_refresh_info<'a>(
789        &self,
790        _locked: &mut Locked<FileOpsCore>,
791        _node: &FsNode,
792        _current_task: &CurrentTask,
793        info: &'a RwLock<FsNodeInfo>,
794    ) -> Result<RwLockReadGuard<'a, FsNodeInfo>, Errno> {
795        Ok(info.read())
796    }
797
798    /// Syncs cached data to persistent storage.
799    fn sync(&self, _node: &FsNode, _current_task: &CurrentTask) -> Result<(), Errno> {
800        Ok(())
801    }
802
803    /// Update node attributes persistently.
804    fn update_attributes(
805        &self,
806        _locked: &mut Locked<FileOpsCore>,
807        _node: &FsNode,
808        _current_task: &CurrentTask,
809        _info: &FsNodeInfo,
810        _has: zxio_node_attr_has_t,
811    ) -> Result<(), Errno> {
812        Ok(())
813    }
814
815    /// Get an extended attribute on the node.
816    ///
817    /// An implementation can systematically return a value. Otherwise, if `max_size` is 0, it can
818    /// instead return the size of the attribute, and can return an ERANGE error if max_size is not
819    /// 0, and lesser than the required size.
820    fn get_xattr(
821        &self,
822        _locked: &mut Locked<FileOpsCore>,
823        _node: &FsNode,
824        _current_task: &CurrentTask,
825        _name: &FsStr,
826        _max_size: usize,
827    ) -> Result<ValueOrSize<FsString>, Errno> {
828        error!(ENOTSUP)
829    }
830
831    /// Set an extended attribute on the node.
832    fn set_xattr(
833        &self,
834        _locked: &mut Locked<FileOpsCore>,
835        _node: &FsNode,
836        _current_task: &CurrentTask,
837        _name: &FsStr,
838        _value: &FsStr,
839        _op: XattrOp,
840    ) -> Result<(), Errno> {
841        error!(ENOTSUP)
842    }
843
844    fn remove_xattr(
845        &self,
846        _locked: &mut Locked<FileOpsCore>,
847        _node: &FsNode,
848        _current_task: &CurrentTask,
849        _name: &FsStr,
850    ) -> Result<(), Errno> {
851        error!(ENOTSUP)
852    }
853
854    /// An implementation can systematically return a value. Otherwise, if `max_size` is 0, it can
855    /// instead return the size of the 0 separated string needed to represent the value, and can
856    /// return an ERANGE error if max_size is not 0, and lesser than the required size.
857    fn list_xattrs(
858        &self,
859        _locked: &mut Locked<FileOpsCore>,
860        _node: &FsNode,
861        _current_task: &CurrentTask,
862        _max_size: usize,
863    ) -> Result<ValueOrSize<Vec<FsString>>, Errno> {
864        error!(ENOTSUP)
865    }
866
867    /// Called when the FsNode is freed by the Kernel.
868    fn forget(
869        self: Box<Self>,
870        _locked: &mut Locked<FileOpsCore>,
871        _current_task: &CurrentTask,
872        _info: FsNodeInfo,
873    ) -> Result<(), Errno> {
874        Ok(())
875    }
876
877    ////////////////////
878    // FS-Verity operations
879
880    /// Marks that FS-Verity is being built. Writes fsverity descriptor and merkle tree, the latter
881    /// computed by the filesystem.
882    /// This should ensure there are no writable file handles. Returns EEXIST if the file was
883    /// already fsverity-enabled. Returns EBUSY if this ioctl was already running on this file.
884    fn enable_fsverity(
885        &self,
886        _locked: &mut Locked<FileOpsCore>,
887        _node: &FsNode,
888        _current_task: &CurrentTask,
889        _descriptor: &fsverity_descriptor,
890    ) -> Result<(), Errno> {
891        error!(ENOTSUP)
892    }
893
894    /// Read fsverity descriptor, if the node is fsverity-enabled. Else returns ENODATA.
895    fn get_fsverity_descriptor(&self, _log_blocksize: u8) -> Result<fsverity_descriptor, Errno> {
896        error!(ENOTSUP)
897    }
898
899    /// Returns a descriptive name for this node, suitable to report to userspace in situations
900    /// where the node's path is unavailable (e.g. because it is anonymous, and has no path).
901    /// If no name is returned then a default name of the form "<class:[<node_id>]" will be used.
902    fn internal_name(&self, _node: &FsNode) -> Option<FsString> {
903        None
904    }
905
906    /// The key used to identify this node in the file system's node cache.
907    ///
908    /// For many file systems, this will be the same as the inode number. However, some file
909    /// systems, such as FUSE, sometimes use different `node_key` and inode numbers.
910    fn node_key(&self, node: &FsNode) -> ino_t {
911        node.ino
912    }
913
914    /// Whether this node is private to the kernel/filesystem.
915    fn is_private(&self) -> bool {
916        false
917    }
918
919    /// Returns the size of the file.
920    fn get_size(
921        &self,
922        locked: &mut Locked<FileOpsCore>,
923        node: &FsNode,
924        current_task: &CurrentTask,
925    ) -> Result<usize, Errno> {
926        let info = node.fetch_and_refresh_info(locked, current_task)?;
927        Ok(info.size.try_into().map_err(|_| errno!(EINVAL))?)
928    }
929}
930
931impl<T> From<T> for Box<dyn FsNodeOps>
932where
933    T: FsNodeOps,
934{
935    fn from(ops: T) -> Box<dyn FsNodeOps> {
936        Box::new(ops)
937    }
938}
939
940/// Implements [`FsNodeOps`] methods in a way that makes sense for symlinks.
941/// You must implement [`FsNodeOps::readlink`].
942#[macro_export]
943macro_rules! fs_node_impl_symlink {
944    () => {
945        $crate::vfs::fs_node_impl_not_dir!();
946
947        fn create_file_ops(
948            &self,
949            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
950            node: &$crate::vfs::FsNode,
951            _current_task: &CurrentTask,
952            _flags: starnix_uapi::open_flags::OpenFlags,
953        ) -> Result<Box<dyn $crate::vfs::FileOps>, starnix_uapi::errors::Errno> {
954            assert!(node.is_lnk());
955            unreachable!("Symlink nodes cannot be opened.");
956        }
957    };
958}
959
960#[macro_export]
961macro_rules! fs_node_impl_dir_readonly {
962    () => {
963        fn check_access(
964            &self,
965            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
966            node: &$crate::vfs::FsNode,
967            current_task: &$crate::task::CurrentTask,
968            permission_flags: $crate::security::PermissionFlags,
969            info: &starnix_sync::RwLock<$crate::vfs::FsNodeInfo>,
970            reason: $crate::vfs::CheckAccessReason,
971            audit_context: $crate::security::Auditable<'_>,
972        ) -> Result<(), starnix_uapi::errors::Errno> {
973            let access = permission_flags.as_access();
974            if access.contains(starnix_uapi::file_mode::Access::WRITE) {
975                return starnix_uapi::error!(
976                    EROFS,
977                    format!("check_access failed: read-only directory")
978                );
979            }
980            node.default_check_access_impl(
981                current_task,
982                permission_flags,
983                reason,
984                info.read(),
985                audit_context,
986            )
987        }
988
989        fn mkdir(
990            &self,
991            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
992            _node: &$crate::vfs::FsNode,
993            _current_task: &$crate::task::CurrentTask,
994            name: &$crate::vfs::FsStr,
995            _mode: starnix_uapi::file_mode::FileMode,
996            _owner: starnix_uapi::auth::FsCred,
997        ) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
998            starnix_uapi::error!(EROFS, format!("mkdir failed: {:?}", name))
999        }
1000
1001        fn mknod(
1002            &self,
1003            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1004            _node: &$crate::vfs::FsNode,
1005            _current_task: &$crate::task::CurrentTask,
1006            name: &$crate::vfs::FsStr,
1007            _mode: starnix_uapi::file_mode::FileMode,
1008            _dev: starnix_uapi::device_type::DeviceType,
1009            _owner: starnix_uapi::auth::FsCred,
1010        ) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
1011            starnix_uapi::error!(EROFS, format!("mknod failed: {:?}", name))
1012        }
1013
1014        fn create_symlink(
1015            &self,
1016            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1017            _node: &$crate::vfs::FsNode,
1018            _current_task: &$crate::task::CurrentTask,
1019            name: &$crate::vfs::FsStr,
1020            _target: &$crate::vfs::FsStr,
1021            _owner: starnix_uapi::auth::FsCred,
1022        ) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
1023            starnix_uapi::error!(EROFS, format!("symlink failed: {:?}", name))
1024        }
1025
1026        fn link(
1027            &self,
1028            _locked: &mut Locked<FileOpsCore>,
1029            _node: &$crate::vfs::FsNode,
1030            _current_task: &$crate::task::CurrentTask,
1031            name: &$crate::vfs::FsStr,
1032            _child: &$crate::vfs::FsNodeHandle,
1033        ) -> Result<(), starnix_uapi::errors::Errno> {
1034            starnix_uapi::error!(EROFS, format!("link failed: {:?}", name))
1035        }
1036
1037        fn unlink(
1038            &self,
1039            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1040            _node: &$crate::vfs::FsNode,
1041            _current_task: &$crate::task::CurrentTask,
1042            name: &$crate::vfs::FsStr,
1043            _child: &$crate::vfs::FsNodeHandle,
1044        ) -> Result<(), starnix_uapi::errors::Errno> {
1045            starnix_uapi::error!(EROFS, format!("unlink failed: {:?}", name))
1046        }
1047    };
1048}
1049
1050/// Trait that objects can implement if they need to handle extended attribute storage. Allows
1051/// delegating extended attribute operations in [`FsNodeOps`] to another object.
1052///
1053/// See [`fs_node_impl_xattr_delegate`] for usage details.
1054pub trait XattrStorage {
1055    /// Delegate for [`FsNodeOps::get_xattr`].
1056    fn get_xattr(&self, locked: &mut Locked<FileOpsCore>, name: &FsStr) -> Result<FsString, Errno>;
1057
1058    /// Delegate for [`FsNodeOps::set_xattr`].
1059    fn set_xattr(
1060        &self,
1061        locked: &mut Locked<FileOpsCore>,
1062        name: &FsStr,
1063        value: &FsStr,
1064        op: XattrOp,
1065    ) -> Result<(), Errno>;
1066
1067    /// Delegate for [`FsNodeOps::remove_xattr`].
1068    fn remove_xattr(&self, locked: &mut Locked<FileOpsCore>, name: &FsStr) -> Result<(), Errno>;
1069
1070    /// Delegate for [`FsNodeOps::list_xattrs`].
1071    fn list_xattrs(&self, locked: &mut Locked<FileOpsCore>) -> Result<Vec<FsString>, Errno>;
1072}
1073
1074/// Implements extended attribute ops for [`FsNodeOps`] by delegating to another object which
1075/// implements the [`XattrStorage`] trait or a similar interface. For example:
1076///
1077/// ```
1078/// struct Xattrs {}
1079///
1080/// impl XattrStorage for Xattrs {
1081///     // implement XattrStorage
1082/// }
1083///
1084/// struct Node {
1085///     xattrs: Xattrs
1086/// }
1087///
1088/// impl FsNodeOps for Node {
1089///     // Delegate extended attribute ops in FsNodeOps to self.xattrs
1090///     fs_node_impl_xattr_delegate!(self, self.xattrs);
1091///
1092///     // add other FsNodeOps impls here
1093/// }
1094/// ```
1095#[macro_export]
1096macro_rules! fs_node_impl_xattr_delegate {
1097    ($self:ident, $delegate:expr) => {
1098        fn get_xattr(
1099            &$self,
1100            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1101            _node: &FsNode,
1102            _current_task: &CurrentTask,
1103            name: &$crate::vfs::FsStr,
1104            _size: usize,
1105        ) -> Result<$crate::vfs::ValueOrSize<$crate::vfs::FsString>, starnix_uapi::errors::Errno> {
1106            Ok($delegate.get_xattr(locked, name)?.into())
1107        }
1108
1109        fn set_xattr(
1110            &$self,
1111            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1112            _node: &FsNode,
1113            _current_task: &CurrentTask,
1114            name: &$crate::vfs::FsStr,
1115            value: &$crate::vfs::FsStr,
1116            op: $crate::vfs::XattrOp,
1117        ) -> Result<(), starnix_uapi::errors::Errno> {
1118            $delegate.set_xattr(locked, name, value, op)
1119        }
1120
1121        fn remove_xattr(
1122            &$self,
1123            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1124            _node: &FsNode,
1125            _current_task: &CurrentTask,
1126            name: &$crate::vfs::FsStr,
1127        ) -> Result<(), starnix_uapi::errors::Errno> {
1128            $delegate.remove_xattr(locked, name)
1129        }
1130
1131        fn list_xattrs(
1132            &$self,
1133            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1134            _node: &FsNode,
1135            _current_task: &CurrentTask,
1136            _size: usize,
1137        ) -> Result<$crate::vfs::ValueOrSize<Vec<$crate::vfs::FsString>>, starnix_uapi::errors::Errno> {
1138            Ok($delegate.list_xattrs(locked)?.into())
1139        }
1140    };
1141}
1142
1143/// Stubs out [`FsNodeOps`] methods that only apply to directories.
1144#[macro_export]
1145macro_rules! fs_node_impl_not_dir {
1146    () => {
1147        fn lookup(
1148            &self,
1149            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1150            _node: &$crate::vfs::FsNode,
1151            _current_task: &$crate::task::CurrentTask,
1152            _name: &$crate::vfs::FsStr,
1153        ) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
1154            starnix_uapi::error!(ENOTDIR)
1155        }
1156
1157        fn mknod(
1158            &self,
1159            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1160            _node: &$crate::vfs::FsNode,
1161            _current_task: &$crate::task::CurrentTask,
1162            _name: &$crate::vfs::FsStr,
1163            _mode: starnix_uapi::file_mode::FileMode,
1164            _dev: starnix_uapi::device_type::DeviceType,
1165            _owner: starnix_uapi::auth::FsCred,
1166        ) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
1167            starnix_uapi::error!(ENOTDIR)
1168        }
1169
1170        fn mkdir(
1171            &self,
1172            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1173            _node: &$crate::vfs::FsNode,
1174            _current_task: &$crate::task::CurrentTask,
1175            _name: &$crate::vfs::FsStr,
1176            _mode: starnix_uapi::file_mode::FileMode,
1177            _owner: starnix_uapi::auth::FsCred,
1178        ) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
1179            starnix_uapi::error!(ENOTDIR)
1180        }
1181
1182        fn create_symlink(
1183            &self,
1184            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1185            _node: &$crate::vfs::FsNode,
1186            _current_task: &$crate::task::CurrentTask,
1187            _name: &$crate::vfs::FsStr,
1188            _target: &$crate::vfs::FsStr,
1189            _owner: starnix_uapi::auth::FsCred,
1190        ) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
1191            starnix_uapi::error!(ENOTDIR)
1192        }
1193
1194        fn unlink(
1195            &self,
1196            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1197            _node: &$crate::vfs::FsNode,
1198            _current_task: &$crate::task::CurrentTask,
1199            _name: &$crate::vfs::FsStr,
1200            _child: &$crate::vfs::FsNodeHandle,
1201        ) -> Result<(), starnix_uapi::errors::Errno> {
1202            starnix_uapi::error!(ENOTDIR)
1203        }
1204    };
1205}
1206
1207#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1208pub enum TimeUpdateType {
1209    Now,
1210    Omit,
1211    Time(UtcInstant),
1212}
1213
1214// Public re-export of macros allows them to be used like regular rust items.
1215pub use fs_node_impl_dir_readonly;
1216pub use fs_node_impl_not_dir;
1217pub use fs_node_impl_symlink;
1218pub use fs_node_impl_xattr_delegate;
1219
1220pub struct SpecialNode;
1221
1222impl FsNodeOps for SpecialNode {
1223    fs_node_impl_not_dir!();
1224
1225    fn create_file_ops(
1226        &self,
1227        _locked: &mut Locked<FileOpsCore>,
1228        _node: &FsNode,
1229        _current_task: &CurrentTask,
1230        _flags: OpenFlags,
1231    ) -> Result<Box<dyn FileOps>, Errno> {
1232        unreachable!("Special nodes cannot be opened.");
1233    }
1234}
1235
1236impl FsNode {
1237    /// Returns true if the `fs_node` is private to the `Kernel`/`FileSystem`, in which
1238    /// case both MAC and DAC checks should be skipped.
1239    pub fn is_private(&self) -> bool {
1240        self.ops().is_private()
1241    }
1242
1243    /// Create a node without inserting it into the FileSystem node cache.
1244    ///
1245    /// This is usually not what you want!
1246    /// Only use if you're also using get_or_create_node, like ext4.
1247    pub fn new_uncached(
1248        ino: ino_t,
1249        ops: impl Into<Box<dyn FsNodeOps>>,
1250        fs: &FileSystemHandle,
1251        info: FsNodeInfo,
1252    ) -> FsNodeHandle {
1253        let ops = ops.into();
1254        FsNodeHandle::new(Self::new_internal(ino, ops, Arc::downgrade(fs), info).into())
1255    }
1256
1257    fn new_internal(
1258        ino: ino_t,
1259        ops: Box<dyn FsNodeOps>,
1260        fs: Weak<FileSystem>,
1261        info: FsNodeInfo,
1262    ) -> Self {
1263        // Allow the FsNodeOps to populate initial info.
1264        let info = {
1265            let mut info = info;
1266            ops.initial_info(&mut info);
1267            info
1268        };
1269
1270        // The linter will fail in non test mode as it will not see the lock check.
1271        #[allow(clippy::let_and_return)]
1272        {
1273            let result = Self {
1274                ino,
1275                ops,
1276                fs,
1277                info: RwLock::new(info),
1278                append_lock: Default::default(),
1279                rare_data: Default::default(),
1280                write_guard_state: Default::default(),
1281                fsverity: Mutex::new(FsVerityState::None),
1282                security_state: Default::default(),
1283            };
1284            #[cfg(any(test, debug_assertions))]
1285            {
1286                #[allow(
1287                    clippy::undocumented_unsafe_blocks,
1288                    reason = "Force documented unsafe blocks in Starnix"
1289                )]
1290                let locked = unsafe { Unlocked::new() };
1291                let _l1 = result.append_lock.read_for_lock_ordering(locked);
1292                let _l2 = result.info.read();
1293                let _l3 = result.write_guard_state.lock();
1294                let _l4 = result.fsverity.lock();
1295                // TODO(https://fxbug.dev/367585803): Add lock levels to SELinux implementation.
1296                let _l5 = result.security_state.lock();
1297            }
1298            result
1299        }
1300    }
1301
1302    pub fn fs(&self) -> FileSystemHandle {
1303        self.fs.upgrade().expect("FileSystem did not live long enough")
1304    }
1305
1306    pub fn ops(&self) -> &dyn FsNodeOps {
1307        self.ops.as_ref()
1308    }
1309
1310    /// Returns an error if this node is encrypted and locked. Does not require
1311    /// fetch_and_refresh_info because FS_IOC_SET_ENCRYPTION_POLICY updates info and once a node is
1312    /// encrypted, it remains encrypted forever.
1313    pub fn fail_if_locked(&self, _current_task: &CurrentTask) -> Result<(), Errno> {
1314        let node_info = self.info();
1315        if let Some(wrapping_key_id) = node_info.wrapping_key_id {
1316            let crypt_service = self.fs().crypt_service().ok_or_else(|| errno!(ENOKEY))?;
1317            if !crypt_service.contains_key(EncryptionKeyId::from(wrapping_key_id)) {
1318                return error!(ENOKEY);
1319            }
1320        }
1321        Ok(())
1322    }
1323
1324    /// Returns the `FsNode`'s `FsNodeOps` as a `&T`, or `None` if the downcast fails.
1325    pub fn downcast_ops<T>(&self) -> Option<&T>
1326    where
1327        T: 'static,
1328    {
1329        self.ops().as_any().downcast_ref::<T>()
1330    }
1331
1332    pub fn on_file_closed(&self, file: &FileObjectState) {
1333        if let Some(rare_data) = self.rare_data.get() {
1334            let mut flock_info = rare_data.flock_info.lock();
1335            // This function will drop the flock from `file` because the `WeakFileHandle` for
1336            // `file` will no longer upgrade to an `FileHandle`.
1337            flock_info.retain(|_| true);
1338        }
1339        self.record_lock_release(RecordLockOwner::FileObject(file.id));
1340    }
1341
1342    pub fn record_lock(
1343        &self,
1344        locked: &mut Locked<Unlocked>,
1345        current_task: &CurrentTask,
1346        file: &FileObject,
1347        cmd: RecordLockCommand,
1348        flock: uapi::flock,
1349    ) -> Result<Option<uapi::flock>, Errno> {
1350        self.ensure_rare_data().record_locks.lock(locked, current_task, file, cmd, flock)
1351    }
1352
1353    /// Release all record locks acquired by the given owner.
1354    pub fn record_lock_release(&self, owner: RecordLockOwner) {
1355        if let Some(rare_data) = self.rare_data.get() {
1356            rare_data.record_locks.release_locks(owner);
1357        }
1358    }
1359
1360    pub fn create_dir_entry_ops(&self) -> Box<dyn DirEntryOps> {
1361        self.ops().create_dir_entry_ops()
1362    }
1363
1364    pub fn create_file_ops<L>(
1365        &self,
1366        locked: &mut Locked<L>,
1367        current_task: &CurrentTask,
1368        flags: OpenFlags,
1369    ) -> Result<Box<dyn FileOps>, Errno>
1370    where
1371        L: LockEqualOrBefore<FileOpsCore>,
1372    {
1373        let locked = locked.cast_locked::<FileOpsCore>();
1374        self.ops().create_file_ops(locked, self, current_task, flags)
1375    }
1376
1377    pub fn open(
1378        &self,
1379        locked: &mut Locked<Unlocked>,
1380        current_task: &CurrentTask,
1381        namespace_node: &NamespaceNode,
1382        flags: OpenFlags,
1383        access_check: AccessCheck,
1384    ) -> Result<Box<dyn FileOps>, Errno> {
1385        // If O_PATH is set, there is no need to create a real FileOps because
1386        // most file operations are disabled.
1387        if flags.contains(OpenFlags::PATH) {
1388            return Ok(Box::new(OPathOps::new()));
1389        }
1390
1391        let access = access_check.resolve(flags);
1392        if access.is_nontrivial() {
1393            if flags.contains(OpenFlags::NOATIME) {
1394                self.check_o_noatime_allowed(current_task)?;
1395            }
1396
1397            // `flags` doesn't contain any information about the EXEC permission. Instead the syscalls
1398            // used to execute a file (`sys_execve` and `sys_execveat`) call `open()` with the EXEC
1399            // permission request in `access`.
1400            let mut permission_flags = PermissionFlags::from(access);
1401
1402            // The `APPEND` flag exists only in `flags`, to modify the behaviour of
1403            // `PermissionFlags::WRITE`
1404            if flags.contains(OpenFlags::APPEND) {
1405                permission_flags |= security::PermissionFlags::APPEND;
1406            }
1407
1408            // TODO: https://fxbug.dev/455782510 - Remove this once non-open() checks are fully
1409            // enforced.
1410            permission_flags |= security::PermissionFlags::FOR_OPEN;
1411
1412            self.check_access(
1413                locked,
1414                current_task,
1415                &namespace_node.mount,
1416                permission_flags,
1417                CheckAccessReason::InternalPermissionChecks,
1418                namespace_node,
1419            )?;
1420        }
1421
1422        let (mode, rdev) = {
1423            // Don't hold the info lock while calling into open_device or self.ops().
1424            // TODO: The mode and rdev are immutable and shouldn't require a lock to read.
1425            let info = self.info();
1426            (info.mode, info.rdev)
1427        };
1428
1429        match mode & FileMode::IFMT {
1430            FileMode::IFCHR => {
1431                if namespace_node.mount.flags().contains(MountFlags::NODEV) {
1432                    return error!(EACCES);
1433                }
1434                current_task.kernel().open_device(
1435                    locked,
1436                    current_task,
1437                    namespace_node,
1438                    flags,
1439                    rdev,
1440                    DeviceMode::Char,
1441                )
1442            }
1443            FileMode::IFBLK => {
1444                if namespace_node.mount.flags().contains(MountFlags::NODEV) {
1445                    return error!(EACCES);
1446                }
1447                current_task.kernel().open_device(
1448                    locked,
1449                    current_task,
1450                    namespace_node,
1451                    flags,
1452                    rdev,
1453                    DeviceMode::Block,
1454                )
1455            }
1456            FileMode::IFIFO => Pipe::open(locked, current_task, self.fifo(current_task), flags),
1457            // UNIX domain sockets can't be opened.
1458            FileMode::IFSOCK => error!(ENXIO),
1459            _ => self.create_file_ops(locked, current_task, flags),
1460        }
1461    }
1462
1463    pub fn lookup<L>(
1464        &self,
1465        locked: &mut Locked<L>,
1466        current_task: &CurrentTask,
1467        mount: &MountInfo,
1468        name: &FsStr,
1469    ) -> Result<FsNodeHandle, Errno>
1470    where
1471        L: LockEqualOrBefore<FileOpsCore>,
1472    {
1473        self.check_access(
1474            locked,
1475            current_task,
1476            mount,
1477            Access::EXEC,
1478            CheckAccessReason::InternalPermissionChecks,
1479            &[Auditable::Name(name), std::panic::Location::caller().into()],
1480        )?;
1481        let locked = locked.cast_locked::<FileOpsCore>();
1482        self.ops().lookup(locked, self, current_task, name)
1483    }
1484
1485    pub fn create_node<L>(
1486        &self,
1487        locked: &mut Locked<L>,
1488        current_task: &CurrentTask,
1489        mount: &MountInfo,
1490        name: &FsStr,
1491        mut mode: FileMode,
1492        dev: DeviceType,
1493        mut owner: FsCred,
1494    ) -> Result<FsNodeHandle, Errno>
1495    where
1496        L: LockEqualOrBefore<FileOpsCore>,
1497    {
1498        assert!(mode & FileMode::IFMT != FileMode::EMPTY, "mknod called without node type.");
1499        self.check_access(
1500            locked,
1501            current_task,
1502            mount,
1503            Access::WRITE,
1504            CheckAccessReason::InternalPermissionChecks,
1505            security::Auditable::Name(name),
1506        )?;
1507        if mode.is_reg() {
1508            security::check_fs_node_create_access(current_task, self, mode, name)?;
1509        } else if mode.is_dir() {
1510            // Even though the man page for mknod(2) says that mknod "cannot be used to create
1511            // directories" in starnix the mkdir syscall (`sys_mkdirat`) ends up calling
1512            //create_node.
1513            security::check_fs_node_mkdir_access(current_task, self, mode, name)?;
1514        } else if !matches!(
1515            mode.fmt(),
1516            FileMode::IFCHR | FileMode::IFBLK | FileMode::IFIFO | FileMode::IFSOCK
1517        ) {
1518            security::check_fs_node_mknod_access(current_task, self, mode, name, dev)?;
1519        }
1520
1521        self.update_metadata_for_child(current_task, &mut mode, &mut owner);
1522
1523        let new_node = if mode.is_dir() {
1524            let locked = locked.cast_locked::<FileOpsCore>();
1525            self.ops().mkdir(locked, self, current_task, name, mode, owner)?
1526        } else {
1527            // https://man7.org/linux/man-pages/man2/mknod.2.html says on error EPERM:
1528            //
1529            //   mode requested creation of something other than a regular
1530            //   file, FIFO (named pipe), or UNIX domain socket, and the
1531            //   caller is not privileged (Linux: does not have the
1532            //   CAP_MKNOD capability); also returned if the filesystem
1533            //   containing pathname does not support the type of node
1534            //   requested.
1535            if !matches!(mode.fmt(), FileMode::IFREG | FileMode::IFIFO | FileMode::IFSOCK) {
1536                security::check_task_capable(current_task, CAP_MKNOD)?;
1537            }
1538            let locked = locked.cast_locked::<FileOpsCore>();
1539            self.ops().mknod(locked, self, current_task, name, mode, dev, owner)?
1540        };
1541
1542        self.init_new_node_security_on_create(locked, current_task, &new_node, name)?;
1543
1544        Ok(new_node)
1545    }
1546
1547    pub fn create_symlink<L>(
1548        &self,
1549        locked: &mut Locked<L>,
1550        current_task: &CurrentTask,
1551        mount: &MountInfo,
1552        name: &FsStr,
1553        target: &FsStr,
1554        owner: FsCred,
1555    ) -> Result<FsNodeHandle, Errno>
1556    where
1557        L: LockEqualOrBefore<FileOpsCore>,
1558    {
1559        self.check_access(
1560            locked,
1561            current_task,
1562            mount,
1563            Access::WRITE,
1564            CheckAccessReason::InternalPermissionChecks,
1565            security::Auditable::Name(name),
1566        )?;
1567        security::check_fs_node_symlink_access(current_task, self, name, target)?;
1568
1569        let locked = locked.cast_locked::<FileOpsCore>();
1570        let new_node =
1571            self.ops().create_symlink(locked, self, current_task, name, target, owner)?;
1572
1573        self.init_new_node_security_on_create(locked, current_task, &new_node, name)?;
1574
1575        Ok(new_node)
1576    }
1577
1578    /// Requests that the LSM initialise a security label for the `new_node`, and optionally provide
1579    /// an extended attribute to write to the file to persist it.  If no LSM is enabled, no extended
1580    /// attribute returned, or if the filesystem does not support extended attributes, then the call
1581    /// returns success. All other failure modes return an `Errno` that should be early-returned.
1582    fn init_new_node_security_on_create<L>(
1583        &self,
1584        locked: &mut Locked<L>,
1585        current_task: &CurrentTask,
1586        new_node: &FsNode,
1587        name: &FsStr,
1588    ) -> Result<(), Errno>
1589    where
1590        L: LockEqualOrBefore<FileOpsCore>,
1591    {
1592        let locked = locked.cast_locked::<FileOpsCore>();
1593        security::fs_node_init_on_create(current_task, &new_node, self, name)?
1594            .map(|xattr| {
1595                match new_node.ops().set_xattr(
1596                    locked,
1597                    &new_node,
1598                    current_task,
1599                    xattr.name,
1600                    xattr.value.as_slice().into(),
1601                    XattrOp::Create,
1602                ) {
1603                    Err(e) => {
1604                        if e.code == ENOTSUP {
1605                            // This should only occur if a task has an "fscreate" context set, and
1606                            // creates a new file in a filesystem that does not support xattrs.
1607                            Ok(())
1608                        } else {
1609                            Err(e)
1610                        }
1611                    }
1612                    result => result,
1613                }
1614            })
1615            .unwrap_or_else(|| Ok(()))
1616    }
1617
1618    pub fn create_tmpfile<L>(
1619        &self,
1620        locked: &mut Locked<L>,
1621        current_task: &CurrentTask,
1622        mount: &MountInfo,
1623        mut mode: FileMode,
1624        mut owner: FsCred,
1625        link_behavior: FsNodeLinkBehavior,
1626    ) -> Result<FsNodeHandle, Errno>
1627    where
1628        L: LockEqualOrBefore<FileOpsCore>,
1629    {
1630        self.check_access(
1631            locked,
1632            current_task,
1633            mount,
1634            Access::WRITE,
1635            CheckAccessReason::InternalPermissionChecks,
1636            security::Auditable::Location(std::panic::Location::caller()),
1637        )?;
1638        self.update_metadata_for_child(current_task, &mut mode, &mut owner);
1639        let node = self.ops().create_tmpfile(self, current_task, mode, owner)?;
1640        self.init_new_node_security_on_create(locked, current_task, &node, "".into())?;
1641        if link_behavior == FsNodeLinkBehavior::Disallowed {
1642            node.ensure_rare_data().link_behavior.set(link_behavior).unwrap();
1643        }
1644        Ok(node)
1645    }
1646
1647    // This method does not attempt to update the atime of the node.
1648    // Use `NamespaceNode::readlink` which checks the mount flags and updates the atime accordingly.
1649    pub fn readlink<L>(
1650        &self,
1651        locked: &mut Locked<L>,
1652        current_task: &CurrentTask,
1653    ) -> Result<SymlinkTarget, Errno>
1654    where
1655        L: LockEqualOrBefore<FileOpsCore>,
1656    {
1657        // TODO: 378864856 - Is there a permission check here other than security checks?
1658        security::check_fs_node_read_link_access(current_task, self)?;
1659        self.ops().readlink(locked.cast_locked::<FileOpsCore>(), self, current_task)
1660    }
1661
1662    pub fn link<L>(
1663        &self,
1664        locked: &mut Locked<L>,
1665        current_task: &CurrentTask,
1666        mount: &MountInfo,
1667        name: &FsStr,
1668        child: &FsNodeHandle,
1669    ) -> Result<FsNodeHandle, Errno>
1670    where
1671        L: LockEqualOrBefore<FileOpsCore>,
1672    {
1673        self.check_access(
1674            locked,
1675            current_task,
1676            mount,
1677            Access::WRITE,
1678            CheckAccessReason::InternalPermissionChecks,
1679            security::Auditable::Location(std::panic::Location::caller()),
1680        )?;
1681
1682        if child.is_dir() {
1683            return error!(EPERM);
1684        }
1685
1686        if let Some(child_rare_data) = child.rare_data.get() {
1687            if matches!(child_rare_data.link_behavior.get(), Some(FsNodeLinkBehavior::Disallowed)) {
1688                return error!(ENOENT);
1689            }
1690        }
1691
1692        // Check that `current_task` has permission to create the hard link.
1693        //
1694        // See description of /proc/sys/fs/protected_hardlinks in
1695        // https://man7.org/linux/man-pages/man5/proc.5.html for details of the security
1696        // vulnerabilities.
1697        //
1698        let (child_uid, mode) = {
1699            let info = child.info();
1700            (info.uid, info.mode)
1701        };
1702        // Check that the the filesystem UID of the calling process (`current_task`) is the same as
1703        // the UID of the existing file. The check can be bypassed if the calling process has
1704        // `CAP_FOWNER` capability.
1705        if child_uid != current_task.current_creds().fsuid
1706            && !security::is_task_capable_noaudit(current_task, CAP_FOWNER)
1707        {
1708            // If current_task is not the user of the existing file, it needs to have read and write
1709            // access to the existing file.
1710            child
1711                .check_access(
1712                    locked,
1713                    current_task,
1714                    mount,
1715                    Access::READ | Access::WRITE,
1716                    CheckAccessReason::InternalPermissionChecks,
1717                    security::Auditable::Name(name),
1718                )
1719                .map_err(|e| {
1720                    // `check_access(..)` returns EACCES when the access rights doesn't match - change
1721                    // it to EPERM to match Linux standards.
1722                    if e == EACCES { errno!(EPERM) } else { e }
1723                })?;
1724            // There are also security issues that may arise when users link to setuid, setgid, or
1725            // special files.
1726            if mode.contains(FileMode::ISGID | FileMode::IXGRP) {
1727                return error!(EPERM);
1728            };
1729            if mode.contains(FileMode::ISUID) {
1730                return error!(EPERM);
1731            };
1732            if !mode.contains(FileMode::IFREG) {
1733                return error!(EPERM);
1734            };
1735        }
1736
1737        security::check_fs_node_link_access(current_task, self, child)?;
1738
1739        let locked = locked.cast_locked::<FileOpsCore>();
1740        self.ops().link(locked, self, current_task, name, child)?;
1741        Ok(child.clone())
1742    }
1743
1744    pub fn unlink<L>(
1745        &self,
1746        locked: &mut Locked<L>,
1747        current_task: &CurrentTask,
1748        mount: &MountInfo,
1749        name: &FsStr,
1750        child: &FsNodeHandle,
1751    ) -> Result<(), Errno>
1752    where
1753        L: LockEqualOrBefore<FileOpsCore>,
1754    {
1755        // The user must be able to search and write to the directory.
1756        self.check_access(
1757            locked,
1758            current_task,
1759            mount,
1760            Access::EXEC | Access::WRITE,
1761            CheckAccessReason::InternalPermissionChecks,
1762            security::Auditable::Name(name),
1763        )?;
1764        self.check_sticky_bit(current_task, child)?;
1765        if child.is_dir() {
1766            security::check_fs_node_rmdir_access(current_task, self, child, name)?;
1767        } else {
1768            security::check_fs_node_unlink_access(current_task, self, child, name)?;
1769        }
1770        let locked = locked.cast_locked::<FileOpsCore>();
1771        self.ops().unlink(locked, self, current_task, name, child)?;
1772        self.update_ctime_mtime();
1773        Ok(())
1774    }
1775
1776    pub fn truncate<L>(
1777        &self,
1778        locked: &mut Locked<L>,
1779        current_task: &CurrentTask,
1780        mount: &MountInfo,
1781        length: u64,
1782    ) -> Result<(), Errno>
1783    where
1784        L: LockEqualOrBefore<BeforeFsNodeAppend>,
1785    {
1786        self.truncate_with_strategy(locked, RealAppendLockStrategy {}, current_task, mount, length)
1787    }
1788
1789    pub fn truncate_with_strategy<L, M>(
1790        &self,
1791        locked: &mut Locked<L>,
1792        strategy: impl AppendLockStrategy<M>,
1793        current_task: &CurrentTask,
1794        mount: &MountInfo,
1795        length: u64,
1796    ) -> Result<(), Errno>
1797    where
1798        M: LockEqualOrBefore<FileOpsCore>,
1799        L: LockEqualOrBefore<M>,
1800    {
1801        if self.is_dir() {
1802            return error!(EISDIR);
1803        }
1804
1805        {
1806            let locked = locked.cast_locked::<M>();
1807            self.check_access(
1808                locked,
1809                current_task,
1810                mount,
1811                Access::WRITE,
1812                CheckAccessReason::InternalPermissionChecks,
1813                security::Auditable::Location(std::panic::Location::caller()),
1814            )?;
1815        }
1816
1817        self.truncate_common(locked, strategy, current_task, length)
1818    }
1819
1820    /// Avoid calling this method directly. You probably want to call `FileObject::ftruncate()`
1821    /// which will also perform all file-descriptor based verifications.
1822    pub fn ftruncate<L>(
1823        &self,
1824        locked: &mut Locked<L>,
1825        current_task: &CurrentTask,
1826        length: u64,
1827    ) -> Result<(), Errno>
1828    where
1829        L: LockEqualOrBefore<BeforeFsNodeAppend>,
1830    {
1831        if self.is_dir() {
1832            // When truncating a file descriptor, if the descriptor references a directory,
1833            // return EINVAL. This is different from the truncate() syscall which returns EISDIR.
1834            //
1835            // See https://man7.org/linux/man-pages/man2/ftruncate.2.html#ERRORS
1836            return error!(EINVAL);
1837        }
1838
1839        // For ftruncate, we do not need to check that the file node is writable.
1840        //
1841        // The file object that calls this method must verify that the file was opened
1842        // with write permissions.
1843        //
1844        // This matters because a file could be opened with O_CREAT + O_RDWR + 0444 mode.
1845        // The file descriptor returned from such an operation can be truncated, even
1846        // though the file was created with a read-only mode.
1847        //
1848        // See https://man7.org/linux/man-pages/man2/ftruncate.2.html#DESCRIPTION
1849        // which says:
1850        //
1851        // "With ftruncate(), the file must be open for writing; with truncate(),
1852        // the file must be writable."
1853
1854        self.truncate_common(locked, RealAppendLockStrategy {}, current_task, length)
1855    }
1856
1857    // Called by `truncate` and `ftruncate` above.
1858    fn truncate_common<L, M>(
1859        &self,
1860        locked: &mut Locked<L>,
1861        strategy: impl AppendLockStrategy<M>,
1862        current_task: &CurrentTask,
1863        length: u64,
1864    ) -> Result<(), Errno>
1865    where
1866        M: LockEqualOrBefore<FileOpsCore>,
1867        L: LockEqualOrBefore<M>,
1868    {
1869        if length > MAX_LFS_FILESIZE as u64 {
1870            return error!(EINVAL);
1871        }
1872        {
1873            let locked = locked.cast_locked::<M>().cast_locked::<FileOpsCore>();
1874            if length > current_task.thread_group().get_rlimit(locked, Resource::FSIZE) {
1875                send_standard_signal(locked, current_task, SignalInfo::kernel(SIGXFSZ));
1876                return error!(EFBIG);
1877            }
1878        }
1879        let locked = locked.cast_locked::<M>();
1880        self.clear_suid_and_sgid_bits(locked, current_task)?;
1881        // We have to take the append lock since otherwise it would be possible to truncate and for
1882        // an append to continue using the old size.
1883        let (guard, locked) = strategy.lock(locked, current_task, self)?;
1884        self.ops().truncate(locked, &guard, self, current_task, length)?;
1885        self.update_ctime_mtime();
1886        Ok(())
1887    }
1888
1889    /// Avoid calling this method directly. You probably want to call `FileObject::fallocate()`
1890    /// which will also perform additional verifications.
1891    pub fn fallocate<L>(
1892        &self,
1893        locked: &mut Locked<L>,
1894        current_task: &CurrentTask,
1895        mode: FallocMode,
1896        offset: u64,
1897        length: u64,
1898    ) -> Result<(), Errno>
1899    where
1900        L: LockBefore<BeforeFsNodeAppend>,
1901    {
1902        self.fallocate_with_strategy(
1903            locked,
1904            RealAppendLockStrategy {},
1905            current_task,
1906            mode,
1907            offset,
1908            length,
1909        )
1910    }
1911
1912    pub fn fallocate_with_strategy<L, M>(
1913        &self,
1914        locked: &mut Locked<L>,
1915        strategy: impl AppendLockStrategy<M>,
1916        current_task: &CurrentTask,
1917        mode: FallocMode,
1918        offset: u64,
1919        length: u64,
1920    ) -> Result<(), Errno>
1921    where
1922        M: LockEqualOrBefore<FileOpsCore>,
1923        L: LockEqualOrBefore<M>,
1924    {
1925        let allocate_size = checked_add_offset_and_length(offset as usize, length as usize)
1926            .map_err(|_| errno!(EFBIG))? as u64;
1927        {
1928            let locked = locked.cast_locked::<M>().cast_locked::<FileOpsCore>();
1929            if allocate_size > current_task.thread_group().get_rlimit(locked, Resource::FSIZE) {
1930                send_standard_signal(locked, current_task, SignalInfo::kernel(SIGXFSZ));
1931                return error!(EFBIG);
1932            }
1933        }
1934
1935        let locked = locked.cast_locked::<M>();
1936        self.clear_suid_and_sgid_bits(locked, current_task)?;
1937        let (guard, locked) = strategy.lock(locked, current_task, self)?;
1938        self.ops().allocate(locked, &guard, self, current_task, mode, offset, length)?;
1939        self.update_ctime_mtime();
1940        Ok(())
1941    }
1942
1943    fn update_metadata_for_child(
1944        &self,
1945        current_task: &CurrentTask,
1946        mode: &mut FileMode,
1947        owner: &mut FsCred,
1948    ) {
1949        // The setgid bit on a directory causes the gid to be inherited by new children and the
1950        // setgid bit to be inherited by new child directories. See SetgidDirTest in gvisor.
1951        {
1952            let self_info = self.info();
1953            if self_info.mode.contains(FileMode::ISGID) {
1954                owner.gid = self_info.gid;
1955                if mode.is_dir() {
1956                    *mode |= FileMode::ISGID;
1957                }
1958            }
1959        }
1960
1961        if !mode.is_dir() {
1962            // https://man7.org/linux/man-pages/man7/inode.7.html says:
1963            //
1964            //   For an executable file, the set-group-ID bit causes the
1965            //   effective group ID of a process that executes the file to change
1966            //   as described in execve(2).
1967            //
1968            // We need to check whether the current task has permission to create such a file.
1969            // See a similar check in `FsNode::chmod`.
1970            let current_creds = current_task.current_creds();
1971            if owner.gid != current_creds.fsgid
1972                && !current_creds.is_in_group(owner.gid)
1973                && !security::is_task_capable_noaudit(current_task, CAP_FOWNER)
1974            {
1975                *mode &= !FileMode::ISGID;
1976            }
1977        }
1978    }
1979
1980    /// Checks if O_NOATIME is allowed,
1981    pub fn check_o_noatime_allowed(&self, current_task: &CurrentTask) -> Result<(), Errno> {
1982        // Per open(2),
1983        //
1984        //   O_NOATIME (since Linux 2.6.8)
1985        //      ...
1986        //
1987        //      This flag can be employed only if one of the following
1988        //      conditions is true:
1989        //
1990        //      *  The effective UID of the process matches the owner UID
1991        //         of the file.
1992        //
1993        //      *  The calling process has the CAP_FOWNER capability in
1994        //         its user namespace and the owner UID of the file has a
1995        //         mapping in the namespace.
1996        if current_task.current_creds().fsuid != self.info().uid {
1997            security::check_task_capable(current_task, CAP_FOWNER)?;
1998        }
1999        Ok(())
2000    }
2001
2002    pub fn default_check_access_impl(
2003        &self,
2004        current_task: &CurrentTask,
2005        permission_flags: security::PermissionFlags,
2006        reason: CheckAccessReason,
2007        info: RwLockReadGuard<'_, FsNodeInfo>,
2008        audit_context: Auditable<'_>,
2009    ) -> Result<(), Errno> {
2010        let (node_uid, node_gid, mode) = (info.uid, info.gid, info.mode);
2011        std::mem::drop(info);
2012        if let CheckAccessReason::ChangeTimestamps { now } = reason {
2013            // To set the timestamps to the current time the caller must either have write access to
2014            // the file, be the file owner, or hold the CAP_DAC_OVERRIDE or CAP_FOWNER capability.
2015            // To set the timestamps to other values the caller must either be the file owner or hold
2016            // the CAP_FOWNER capability.
2017            if current_task.current_creds().fsuid == node_uid {
2018                return Ok(());
2019            }
2020            if now {
2021                if security::is_task_capable_noaudit(current_task, CAP_FOWNER) {
2022                    return Ok(());
2023                }
2024            } else {
2025                security::check_task_capable(current_task, CAP_FOWNER)?;
2026                return Ok(());
2027            }
2028        }
2029        check_access(self, current_task, permission_flags, node_uid, node_gid, mode)?;
2030        security::fs_node_permission(current_task, self, permission_flags, audit_context)
2031    }
2032
2033    /// Check whether the node can be accessed in the current context with the specified access
2034    /// flags (read, write, or exec). Accounts for capabilities and whether the current user is the
2035    /// owner or is in the file's group.
2036    pub fn check_access<'a, L>(
2037        &self,
2038        locked: &mut Locked<L>,
2039        current_task: &CurrentTask,
2040        mount: &MountInfo,
2041        access: impl Into<security::PermissionFlags>,
2042        reason: CheckAccessReason,
2043        audit_context: impl Into<security::Auditable<'a>>,
2044    ) -> Result<(), Errno>
2045    where
2046        L: LockEqualOrBefore<FileOpsCore>,
2047    {
2048        let mut permission_flags = access.into();
2049        if permission_flags.contains(security::PermissionFlags::WRITE) {
2050            mount.check_readonly_filesystem()?;
2051        }
2052        if permission_flags.contains(security::PermissionFlags::EXEC) && !self.is_dir() {
2053            mount.check_noexec_filesystem()?;
2054        }
2055        if reason == CheckAccessReason::Access {
2056            permission_flags |= PermissionFlags::ACCESS;
2057        }
2058        self.ops().check_access(
2059            locked.cast_locked::<FileOpsCore>(),
2060            self,
2061            current_task,
2062            permission_flags,
2063            &self.info,
2064            reason,
2065            audit_context.into(),
2066        )
2067    }
2068
2069    /// Check whether the stick bit, `S_ISVTX`, forbids the `current_task` from removing the given
2070    /// `child`. If this node has `S_ISVTX`, then either the child must be owned by the `fsuid` of
2071    /// `current_task` or `current_task` must have `CAP_FOWNER`.
2072    pub fn check_sticky_bit(
2073        &self,
2074        current_task: &CurrentTask,
2075        child: &FsNodeHandle,
2076    ) -> Result<(), Errno> {
2077        if self.info().mode.contains(FileMode::ISVTX)
2078            && child.info().uid != current_task.current_creds().fsuid
2079        {
2080            security::check_task_capable(current_task, CAP_FOWNER)?;
2081        }
2082        Ok(())
2083    }
2084
2085    pub fn fifo(&self, current_task: &CurrentTask) -> &PipeHandle {
2086        assert!(self.is_fifo());
2087        self.ensure_rare_data().ensure_fifo(current_task)
2088    }
2089
2090    /// Returns the UNIX domain socket bound to this node, if any.
2091    pub fn bound_socket(&self) -> Option<&SocketHandle> {
2092        if let Some(rare_data) = self.rare_data.get() { rare_data.bound_socket.get() } else { None }
2093    }
2094
2095    /// Register the provided socket as the UNIX domain socket bound to this node.
2096    ///
2097    /// It is a fatal error to call this method again if it has already been called on this node.
2098    pub fn set_bound_socket(&self, socket: SocketHandle) {
2099        assert!(self.ensure_rare_data().bound_socket.set(socket).is_ok());
2100    }
2101
2102    pub fn update_attributes<L, F>(
2103        &self,
2104        locked: &mut Locked<L>,
2105        current_task: &CurrentTask,
2106        mutator: F,
2107    ) -> Result<(), Errno>
2108    where
2109        L: LockEqualOrBefore<FileOpsCore>,
2110        F: FnOnce(&mut FsNodeInfo) -> Result<(), Errno>,
2111    {
2112        let mut info = self.info.write();
2113        let mut new_info = info.clone();
2114        mutator(&mut new_info)?;
2115
2116        let new_access = new_info.mode.user_access()
2117            | new_info.mode.group_access()
2118            | new_info.mode.other_access();
2119
2120        if new_access.intersects(Access::EXEC) {
2121            let write_guard_state = self.write_guard_state.lock();
2122            if let Ok(seals) = write_guard_state.get_seals() {
2123                if seals.contains(SealFlags::NO_EXEC) {
2124                    return error!(EPERM);
2125                }
2126            }
2127        }
2128
2129        // `mutator`s should not update the attribute change time, which is managed by this API.
2130        assert_eq!(info.time_status_change, new_info.time_status_change);
2131        if *info == new_info {
2132            return Ok(());
2133        }
2134        new_info.time_status_change = utc::utc_now();
2135
2136        let mut has = zxio_node_attr_has_t { ..Default::default() };
2137        has.modification_time = info.time_modify != new_info.time_modify;
2138        has.access_time = info.time_access != new_info.time_access;
2139        has.mode = info.mode != new_info.mode;
2140        has.uid = info.uid != new_info.uid;
2141        has.gid = info.gid != new_info.gid;
2142        has.rdev = info.rdev != new_info.rdev;
2143        has.casefold = info.casefold != new_info.casefold;
2144        has.wrapping_key_id = info.wrapping_key_id != new_info.wrapping_key_id;
2145
2146        security::check_fs_node_setattr_access(current_task, &self, &has)?;
2147
2148        // Call `update_attributes(..)` to persist the changes for the following fields.
2149        if has.modification_time
2150            || has.access_time
2151            || has.mode
2152            || has.uid
2153            || has.gid
2154            || has.rdev
2155            || has.casefold
2156            || has.wrapping_key_id
2157        {
2158            let locked = locked.cast_locked::<FileOpsCore>();
2159            self.ops().update_attributes(locked, self, current_task, &new_info, has)?;
2160        }
2161
2162        *info = new_info;
2163        Ok(())
2164    }
2165
2166    /// Set the permissions on this FsNode to the given values.
2167    ///
2168    /// Does not change the IFMT of the node.
2169    pub fn chmod<L>(
2170        &self,
2171        locked: &mut Locked<L>,
2172        current_task: &CurrentTask,
2173        mount: &MountInfo,
2174        mut mode: FileMode,
2175    ) -> Result<(), Errno>
2176    where
2177        L: LockEqualOrBefore<FileOpsCore>,
2178    {
2179        mount.check_readonly_filesystem()?;
2180        self.update_attributes(locked, current_task, |info| {
2181            let current_creds = current_task.current_creds();
2182            if info.uid != current_creds.euid {
2183                security::check_task_capable(current_task, CAP_FOWNER)?;
2184            } else if info.gid != current_creds.egid
2185                && !current_creds.is_in_group(info.gid)
2186                && mode.intersects(FileMode::ISGID)
2187                && !security::is_task_capable_noaudit(current_task, CAP_FOWNER)
2188            {
2189                mode &= !FileMode::ISGID;
2190            }
2191            info.chmod(mode);
2192            Ok(())
2193        })
2194    }
2195
2196    /// Sets the owner and/or group on this FsNode.
2197    pub fn chown<L>(
2198        &self,
2199        locked: &mut Locked<L>,
2200        current_task: &CurrentTask,
2201        mount: &MountInfo,
2202        owner: Option<uid_t>,
2203        group: Option<gid_t>,
2204    ) -> Result<(), Errno>
2205    where
2206        L: LockEqualOrBefore<FileOpsCore>,
2207    {
2208        mount.check_readonly_filesystem()?;
2209        self.update_attributes(locked, current_task, |info| {
2210            if security::is_task_capable_noaudit(current_task, CAP_CHOWN) {
2211                info.chown(owner, group);
2212                return Ok(());
2213            }
2214
2215            // Nobody can change the owner.
2216            if let Some(uid) = owner {
2217                if info.uid != uid {
2218                    return error!(EPERM);
2219                }
2220            }
2221
2222            let (euid, is_in_group) = {
2223                let current_creds = current_task.current_creds();
2224                (current_creds.euid, group.map(|gid| current_creds.is_in_group(gid)))
2225            };
2226
2227            // The owner can change the group.
2228            if info.uid == euid {
2229                // To a group that it belongs.
2230                if let Some(is_in_group) = is_in_group {
2231                    if !is_in_group {
2232                        return error!(EPERM);
2233                    }
2234                }
2235                info.chown(None, group);
2236                return Ok(());
2237            }
2238
2239            // Any other user can call chown(file, -1, -1)
2240            if owner.is_some() || group.is_some() {
2241                return error!(EPERM);
2242            }
2243
2244            // But not on set-user-ID or set-group-ID files.
2245            // If we were to chown them, they would drop the set-ID bit.
2246            if info.mode.is_reg()
2247                && (info.mode.contains(FileMode::ISUID)
2248                    || info.mode.contains(FileMode::ISGID | FileMode::IXGRP))
2249            {
2250                return error!(EPERM);
2251            }
2252
2253            info.chown(None, None);
2254            Ok(())
2255        })
2256    }
2257
2258    /// Forcefully change the owner and group of this node.
2259    ///
2260    /// # Safety
2261    ///
2262    /// This function skips all the security checks and just updates the owner and group. Also, does
2263    /// not check if the filesystem is read-only and does not update the attribute change time.
2264    ///
2265    /// This function is used to set the owner and group of /proc/pid to the credentials of the
2266    /// current task. Please consider carefully whether you want to use this function for another
2267    /// purpose.
2268    pub unsafe fn force_chown(&self, creds: FsCred) {
2269        self.update_info(|info| {
2270            info.chown(Some(creds.uid), Some(creds.gid));
2271        });
2272    }
2273
2274    /// Whether this node is a regular file.
2275    pub fn is_reg(&self) -> bool {
2276        self.info().mode.is_reg()
2277    }
2278
2279    /// Whether this node is a directory.
2280    pub fn is_dir(&self) -> bool {
2281        self.info().mode.is_dir()
2282    }
2283
2284    /// Whether this node is a socket.
2285    pub fn is_sock(&self) -> bool {
2286        self.info().mode.is_sock()
2287    }
2288
2289    /// Whether this node is a FIFO.
2290    pub fn is_fifo(&self) -> bool {
2291        self.info().mode.is_fifo()
2292    }
2293
2294    /// Whether this node is a symbolic link.
2295    pub fn is_lnk(&self) -> bool {
2296        self.info().mode.is_lnk()
2297    }
2298
2299    pub fn dev(&self) -> DeviceType {
2300        self.fs().dev_id
2301    }
2302
2303    pub fn stat<L>(
2304        &self,
2305        locked: &mut Locked<L>,
2306        current_task: &CurrentTask,
2307    ) -> Result<uapi::stat, Errno>
2308    where
2309        L: LockEqualOrBefore<FileOpsCore>,
2310    {
2311        security::check_fs_node_getattr_access(current_task, self)?;
2312
2313        let info = self.fetch_and_refresh_info(locked, current_task)?;
2314
2315        let time_to_kernel_timespec_pair = |t| {
2316            let timespec { tv_sec, tv_nsec } = timespec_from_time(t);
2317            let time = tv_sec.try_into().map_err(|_| errno!(EINVAL))?;
2318            let time_nsec = tv_nsec.try_into().map_err(|_| errno!(EINVAL))?;
2319            Ok((time, time_nsec))
2320        };
2321
2322        let (st_atime, st_atime_nsec) = time_to_kernel_timespec_pair(info.time_access)?;
2323        let (st_mtime, st_mtime_nsec) = time_to_kernel_timespec_pair(info.time_modify)?;
2324        let (st_ctime, st_ctime_nsec) = time_to_kernel_timespec_pair(info.time_status_change)?;
2325
2326        Ok(uapi::stat {
2327            st_dev: self.dev().bits(),
2328            st_ino: self.ino,
2329            st_nlink: info.link_count.try_into().map_err(|_| errno!(EINVAL))?,
2330            st_mode: info.mode.bits(),
2331            st_uid: info.uid,
2332            st_gid: info.gid,
2333            st_rdev: info.rdev.bits(),
2334            st_size: info.size.try_into().map_err(|_| errno!(EINVAL))?,
2335            st_blksize: info.blksize.try_into().map_err(|_| errno!(EINVAL))?,
2336            st_blocks: info.blocks.try_into().map_err(|_| errno!(EINVAL))?,
2337            st_atime,
2338            st_atime_nsec,
2339            st_mtime,
2340            st_mtime_nsec,
2341            st_ctime,
2342            st_ctime_nsec,
2343            ..Default::default()
2344        })
2345    }
2346
2347    /// Returns the current size of the file.  This is inherently racy, so any caller that
2348    /// might want to use the value returned should hold their own locks if necessary.  For
2349    /// example, if using the value here to implement append (which is the case at the time
2350    /// of writing this comment), locks must be held to prevent the file size being changed
2351    /// concurrently.
2352    // TODO(https://fxbug.dev/454730248): This is probably the wrong way to implement O_APPEND.
2353    pub fn get_size<L>(
2354        &self,
2355        locked: &mut Locked<L>,
2356        current_task: &CurrentTask,
2357    ) -> Result<usize, Errno>
2358    where
2359        L: LockEqualOrBefore<FileOpsCore>,
2360    {
2361        self.ops().get_size(locked.cast_locked::<FileOpsCore>(), self, current_task)
2362    }
2363
2364    fn statx_timestamp_from_time(time: UtcInstant) -> statx_timestamp {
2365        let nanos = time.into_nanos();
2366        statx_timestamp {
2367            tv_sec: nanos / NANOS_PER_SECOND,
2368            tv_nsec: (nanos % NANOS_PER_SECOND) as u32,
2369            ..Default::default()
2370        }
2371    }
2372
2373    pub fn statx<L>(
2374        &self,
2375        locked: &mut Locked<L>,
2376        current_task: &CurrentTask,
2377        flags: StatxFlags,
2378        mask: u32,
2379    ) -> Result<statx, Errno>
2380    where
2381        L: LockEqualOrBefore<FileOpsCore>,
2382    {
2383        security::check_fs_node_getattr_access(current_task, self)?;
2384
2385        // Ignore mask for now and fill in all of the fields.
2386        let info = if flags.contains(StatxFlags::AT_STATX_DONT_SYNC) {
2387            self.info()
2388        } else {
2389            self.fetch_and_refresh_info(locked, current_task)?
2390        };
2391        if mask & STATX__RESERVED == STATX__RESERVED {
2392            return error!(EINVAL);
2393        }
2394
2395        track_stub!(TODO("https://fxbug.dev/302594110"), "statx attributes");
2396        let stx_mnt_id = 0;
2397        let mut stx_attributes = 0;
2398        let stx_attributes_mask = STATX_ATTR_VERITY as u64;
2399
2400        if matches!(*self.fsverity.lock(), FsVerityState::FsVerity) {
2401            stx_attributes |= STATX_ATTR_VERITY as u64;
2402        }
2403
2404        Ok(statx {
2405            stx_mask: STATX_NLINK
2406                | STATX_UID
2407                | STATX_GID
2408                | STATX_ATIME
2409                | STATX_MTIME
2410                | STATX_CTIME
2411                | STATX_INO
2412                | STATX_SIZE
2413                | STATX_BLOCKS
2414                | STATX_BASIC_STATS,
2415            stx_blksize: info.blksize.try_into().map_err(|_| errno!(EINVAL))?,
2416            stx_attributes,
2417            stx_nlink: info.link_count.try_into().map_err(|_| errno!(EINVAL))?,
2418            stx_uid: info.uid,
2419            stx_gid: info.gid,
2420            stx_mode: info.mode.bits().try_into().map_err(|_| errno!(EINVAL))?,
2421            stx_ino: self.ino,
2422            stx_size: info.size.try_into().map_err(|_| errno!(EINVAL))?,
2423            stx_blocks: info.blocks.try_into().map_err(|_| errno!(EINVAL))?,
2424            stx_attributes_mask,
2425            stx_ctime: Self::statx_timestamp_from_time(info.time_status_change),
2426            stx_mtime: Self::statx_timestamp_from_time(info.time_modify),
2427            stx_atime: Self::statx_timestamp_from_time(info.time_access),
2428
2429            stx_rdev_major: info.rdev.major(),
2430            stx_rdev_minor: info.rdev.minor(),
2431
2432            stx_dev_major: self.fs().dev_id.major(),
2433            stx_dev_minor: self.fs().dev_id.minor(),
2434            stx_mnt_id,
2435            ..Default::default()
2436        })
2437    }
2438
2439    /// Checks whether `current_task` has capabilities required for the specified `access` to the
2440    /// extended attribute `name`.
2441    fn check_xattr_access<L>(
2442        &self,
2443        locked: &mut Locked<L>,
2444        current_task: &CurrentTask,
2445        mount: &MountInfo,
2446        name: &FsStr,
2447        access: Access,
2448    ) -> Result<(), Errno>
2449    where
2450        L: LockEqualOrBefore<FileOpsCore>,
2451    {
2452        assert!(access == Access::READ || access == Access::WRITE);
2453
2454        let enodata_if_read =
2455            |e: Errno| if access == Access::READ && e.code == EPERM { errno!(ENODATA) } else { e };
2456
2457        // man xattr(7) describes the different access checks applied to each extended attribute
2458        // namespace.
2459        if name.starts_with(XATTR_USER_PREFIX.to_bytes()) {
2460            {
2461                let info = self.info();
2462                if !info.mode.is_reg() && !info.mode.is_dir() {
2463                    return Err(enodata_if_read(errno!(EPERM)));
2464                }
2465            }
2466
2467            // TODO: https://fxbug.dev/460734830 - Perform capability check(s) if file has sticky
2468            // bit set.
2469
2470            self.check_access(
2471                locked,
2472                current_task,
2473                mount,
2474                access,
2475                CheckAccessReason::InternalPermissionChecks,
2476                security::Auditable::Name(name),
2477            )?;
2478        } else if name.starts_with(XATTR_TRUSTED_PREFIX.to_bytes()) {
2479            // Trusted extended attributes require `CAP_SYS_ADMIN` to read or write.
2480            security::check_task_capable(current_task, CAP_SYS_ADMIN).map_err(enodata_if_read)?;
2481        } else if name.starts_with(XATTR_SYSTEM_PREFIX.to_bytes()) {
2482            // System extended attributes have attribute-specific access policy.
2483            // TODO: https://fxbug.dev/460734830 -  Revise how system extended attributes are
2484            // access-controlled.
2485            security::check_task_capable(current_task, CAP_SYS_ADMIN).map_err(enodata_if_read)?;
2486        } else if name.starts_with(XATTR_SECURITY_PREFIX.to_bytes()) {
2487            if access == Access::WRITE {
2488                // Writes require `CAP_SYS_ADMIN`, unless the LSM owning `name` specifies to skip.
2489                if !security::fs_node_xattr_skipcap(current_task, name) {
2490                    security::check_task_capable(current_task, CAP_SYS_ADMIN)
2491                        .map_err(enodata_if_read)?;
2492                }
2493            }
2494        } else {
2495            panic!("Unknown extended attribute prefix: {}", name);
2496        }
2497        Ok(())
2498    }
2499
2500    pub fn get_xattr<L>(
2501        &self,
2502        locked: &mut Locked<L>,
2503        current_task: &CurrentTask,
2504        mount: &MountInfo,
2505        name: &FsStr,
2506        max_size: usize,
2507    ) -> Result<ValueOrSize<FsString>, Errno>
2508    where
2509        L: LockEqualOrBefore<FileOpsCore>,
2510    {
2511        // Perform discretionary capability & access checks appropriate to the xattr prefix.
2512        self.check_xattr_access(locked, current_task, mount, name, Access::READ)?;
2513
2514        // LSM access checks must be performed after discretionary checks.
2515        security::check_fs_node_getxattr_access(current_task, self, name)?;
2516
2517        if name.starts_with(XATTR_SECURITY_PREFIX.to_bytes()) {
2518            // If the attribute is in the security.* domain then allow the LSM to handle the
2519            // request, or to delegate to `FsNodeOps::get_xattr()`.
2520            security::fs_node_getsecurity(locked, current_task, self, name, max_size)
2521        } else {
2522            // If the attribute is outside security.*, delegate the read to the `FsNodeOps`.
2523            self.ops().get_xattr(
2524                locked.cast_locked::<FileOpsCore>(),
2525                self,
2526                current_task,
2527                name,
2528                max_size,
2529            )
2530        }
2531    }
2532
2533    pub fn set_xattr<L>(
2534        &self,
2535        locked: &mut Locked<L>,
2536        current_task: &CurrentTask,
2537        mount: &MountInfo,
2538        name: &FsStr,
2539        value: &FsStr,
2540        op: XattrOp,
2541    ) -> Result<(), Errno>
2542    where
2543        L: LockEqualOrBefore<FileOpsCore>,
2544    {
2545        // Perform discretionary capability & access checks appropriate to the xattr prefix.
2546        self.check_xattr_access(locked, current_task, mount, name, Access::WRITE)?;
2547
2548        // LSM access checks must be performed after discretionary checks.
2549        security::check_fs_node_setxattr_access(current_task, self, name, value, op)?;
2550
2551        if name.starts_with(XATTR_SECURITY_PREFIX.to_bytes()) {
2552            // If the attribute is in the security.* domain then allow the LSM to handle the
2553            // request, or to delegate to `FsNodeOps::set_xattr()`.
2554            security::fs_node_setsecurity(locked, current_task, self, name, value, op)
2555        } else {
2556            // If the attribute is outside security.*, delegate the read to the `FsNodeOps`.
2557            self.ops().set_xattr(
2558                locked.cast_locked::<FileOpsCore>(),
2559                self,
2560                current_task,
2561                name,
2562                value,
2563                op,
2564            )
2565        }
2566    }
2567
2568    pub fn remove_xattr<L>(
2569        &self,
2570        locked: &mut Locked<L>,
2571        current_task: &CurrentTask,
2572        mount: &MountInfo,
2573        name: &FsStr,
2574    ) -> Result<(), Errno>
2575    where
2576        L: LockEqualOrBefore<FileOpsCore>,
2577    {
2578        // Perform discretionary capability & access checks appropriate to the xattr prefix.
2579        self.check_xattr_access(locked, current_task, mount, name, Access::WRITE)?;
2580
2581        // LSM access checks must be performed after discretionary checks.
2582        security::check_fs_node_removexattr_access(current_task, self, name)?;
2583        self.ops().remove_xattr(locked.cast_locked::<FileOpsCore>(), self, current_task, name)
2584    }
2585
2586    pub fn list_xattrs<L>(
2587        &self,
2588        locked: &mut Locked<L>,
2589        current_task: &CurrentTask,
2590        max_size: usize,
2591    ) -> Result<ValueOrSize<Vec<FsString>>, Errno>
2592    where
2593        L: LockEqualOrBefore<FileOpsCore>,
2594    {
2595        security::check_fs_node_listxattr_access(current_task, self)?;
2596        Ok(self
2597            .ops()
2598            .list_xattrs(locked.cast_locked::<FileOpsCore>(), self, current_task, max_size)?
2599            .map(|mut v| {
2600                // Extended attributes may be listed even if the caller would not be able to read
2601                // (or modify) the attribute's value.
2602                // trusted.* attributes are only accessible with CAP_SYS_ADMIN and are omitted by
2603                // `listxattr()` unless the caller holds CAP_SYS_ADMIN.
2604                if !security::is_task_capable_noaudit(current_task, CAP_SYS_ADMIN) {
2605                    v.retain(|name| !name.starts_with(XATTR_TRUSTED_PREFIX.to_bytes()));
2606                }
2607                v
2608            }))
2609    }
2610
2611    /// Returns current `FsNodeInfo`.
2612    pub fn info(&self) -> RwLockReadGuard<'_, FsNodeInfo> {
2613        self.info.read()
2614    }
2615
2616    /// Refreshes the `FsNodeInfo` if necessary and returns a read guard.
2617    pub fn fetch_and_refresh_info<L>(
2618        &self,
2619        locked: &mut Locked<L>,
2620        current_task: &CurrentTask,
2621    ) -> Result<RwLockReadGuard<'_, FsNodeInfo>, Errno>
2622    where
2623        L: LockEqualOrBefore<FileOpsCore>,
2624    {
2625        self.ops().fetch_and_refresh_info(
2626            locked.cast_locked::<FileOpsCore>(),
2627            self,
2628            current_task,
2629            &self.info,
2630        )
2631    }
2632
2633    pub fn update_info<F, T>(&self, mutator: F) -> T
2634    where
2635        F: FnOnce(&mut FsNodeInfo) -> T,
2636    {
2637        let mut info = self.info.write();
2638        mutator(&mut info)
2639    }
2640
2641    /// Clear the SUID and SGID bits unless the `current_task` has `CAP_FSETID`
2642    pub fn clear_suid_and_sgid_bits<L>(
2643        &self,
2644        locked: &mut Locked<L>,
2645        current_task: &CurrentTask,
2646    ) -> Result<(), Errno>
2647    where
2648        L: LockEqualOrBefore<FileOpsCore>,
2649    {
2650        if !security::is_task_capable_noaudit(current_task, CAP_FSETID) {
2651            self.update_attributes(locked, current_task, |info| {
2652                info.clear_suid_and_sgid_bits();
2653                Ok(())
2654            })?;
2655        }
2656        Ok(())
2657    }
2658
2659    /// Update the ctime and mtime of a file to now.
2660    pub fn update_ctime_mtime(&self) {
2661        if self.fs().manages_timestamps() {
2662            return;
2663        }
2664        self.update_info(|info| {
2665            let now = utc::utc_now();
2666            info.time_status_change = now;
2667            info.time_modify = now;
2668        });
2669    }
2670
2671    /// Update the ctime of a file to now.
2672    pub fn update_ctime(&self) {
2673        if self.fs().manages_timestamps() {
2674            return;
2675        }
2676        self.update_info(|info| {
2677            let now = utc::utc_now();
2678            info.time_status_change = now;
2679        });
2680    }
2681
2682    /// Update the atime and mtime if the `current_task` has write access, is the file owner, or
2683    /// holds either the CAP_DAC_OVERRIDE or CAP_FOWNER capability.
2684    pub fn update_atime_mtime<L>(
2685        &self,
2686        locked: &mut Locked<L>,
2687        current_task: &CurrentTask,
2688        mount: &MountInfo,
2689        atime: TimeUpdateType,
2690        mtime: TimeUpdateType,
2691    ) -> Result<(), Errno>
2692    where
2693        L: LockEqualOrBefore<FileOpsCore>,
2694    {
2695        // If the filesystem is read-only, this always fail.
2696        mount.check_readonly_filesystem()?;
2697
2698        let now = matches!((atime, mtime), (TimeUpdateType::Now, TimeUpdateType::Now));
2699        self.check_access(
2700            locked,
2701            current_task,
2702            mount,
2703            Access::WRITE,
2704            CheckAccessReason::ChangeTimestamps { now },
2705            security::Auditable::Location(std::panic::Location::caller()),
2706        )?;
2707
2708        if !matches!((atime, mtime), (TimeUpdateType::Omit, TimeUpdateType::Omit)) {
2709            // This function is called by `utimes(..)` which will update the access and
2710            // modification time. We need to call `update_attributes()` to update the mtime of
2711            // filesystems that manages file timestamps.
2712            self.update_attributes(locked, current_task, |info| {
2713                let now = utc::utc_now();
2714                let get_time = |time: TimeUpdateType| match time {
2715                    TimeUpdateType::Now => Some(now),
2716                    TimeUpdateType::Time(t) => Some(t),
2717                    TimeUpdateType::Omit => None,
2718                };
2719                if let Some(time) = get_time(atime) {
2720                    info.time_access = time;
2721                }
2722                if let Some(time) = get_time(mtime) {
2723                    info.time_modify = time;
2724                }
2725                Ok(())
2726            })?;
2727        }
2728        Ok(())
2729    }
2730
2731    /// Returns a string describing this `FsNode` in the format used by "/proc/../fd" for anonymous
2732    /// file descriptors. By default this is in the form:
2733    ///   <class>:[<node_id>]
2734    /// though `FsNodeOps` may customize this as required.
2735    pub fn internal_name(&self) -> FsString {
2736        if let Some(name) = self.ops().internal_name(self) {
2737            return name;
2738        };
2739        let class = if self.is_sock() {
2740            "socket"
2741        } else if self.is_fifo() {
2742            "pipe"
2743        } else {
2744            "file"
2745        };
2746        format!("{}:[{}]", class, self.ino).into()
2747    }
2748
2749    /// The key used to identify this node in the file system's node cache.
2750    ///
2751    /// For many file systems, this will be the same as the inode number. However, some file
2752    /// systems, such as FUSE, sometimes use different `node_key` and inode numbers.
2753    pub fn node_key(&self) -> ino_t {
2754        self.ops().node_key(self)
2755    }
2756
2757    fn ensure_rare_data(&self) -> &FsNodeRareData {
2758        self.rare_data.get_or_init(|| Box::new(FsNodeRareData::default()))
2759    }
2760
2761    /// Returns the set of watchers for this node.
2762    ///
2763    /// Only call this function if you require this node to actually store a list of watchers. If
2764    /// you just wish to notify any watchers that might exist, please use `notify` instead.
2765    pub fn ensure_watchers(&self) -> &inotify::InotifyWatchers {
2766        &self.ensure_rare_data().watchers
2767    }
2768
2769    /// Notify the watchers of the given event.
2770    pub fn notify(
2771        &self,
2772        event_mask: InotifyMask,
2773        cookie: u32,
2774        name: &FsStr,
2775        mode: FileMode,
2776        is_dead: bool,
2777    ) {
2778        if let Some(rare_data) = self.rare_data.get() {
2779            rare_data.watchers.notify(event_mask, cookie, name, mode, is_dead);
2780        }
2781    }
2782
2783    /// Calls through to the filesystem to enable fs-verity on this file.
2784    pub fn enable_fsverity<L>(
2785        &self,
2786        locked: &mut Locked<L>,
2787        current_task: &CurrentTask,
2788        descriptor: &fsverity_descriptor,
2789    ) -> Result<(), Errno>
2790    where
2791        L: LockEqualOrBefore<FileOpsCore>,
2792    {
2793        let locked = locked.cast_locked::<FileOpsCore>();
2794        self.ops().enable_fsverity(locked, self, current_task, descriptor)
2795    }
2796}
2797
2798impl std::fmt::Debug for FsNode {
2799    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2800        f.debug_struct("FsNode")
2801            .field("fs", &self.fs().name())
2802            .field("info", &*self.info())
2803            .field("ops_ty", &self.ops().type_name())
2804            .finish()
2805    }
2806}
2807
2808impl Releasable for FsNode {
2809    type Context<'a> = CurrentTaskAndLocked<'a>;
2810
2811    fn release<'a>(self, context: CurrentTaskAndLocked<'a>) {
2812        let (locked, current_task) = context;
2813        if let Some(fs) = self.fs.upgrade() {
2814            fs.remove_node(&self);
2815        }
2816        if let Err(err) = self.ops.forget(
2817            locked.cast_locked::<FileOpsCore>(),
2818            current_task,
2819            self.info.into_inner(),
2820        ) {
2821            log_error!("Error on FsNodeOps::forget: {err:?}");
2822        }
2823    }
2824}
2825
2826fn check_access(
2827    fs_node: &FsNode,
2828    current_task: &CurrentTask,
2829    permission_flags: security::PermissionFlags,
2830    node_uid: uid_t,
2831    node_gid: gid_t,
2832    mode: FileMode,
2833) -> Result<(), Errno> {
2834    // Determine which of the access bits apply to the `current_task`.
2835    let (fsuid, is_in_group) = {
2836        let current_creds = current_task.current_creds();
2837        (current_creds.fsuid, current_creds.is_in_group(node_gid))
2838    };
2839    let granted = if fsuid == node_uid {
2840        mode.user_access()
2841    } else if is_in_group {
2842        mode.group_access()
2843    } else {
2844        mode.other_access()
2845    };
2846
2847    let access = permission_flags.as_access();
2848    if granted.contains(access) {
2849        return Ok(());
2850    }
2851
2852    // Callers with CAP_DAC_READ_SEARCH override can read files & directories, and traverse
2853    // directories to which they lack permission.
2854    let mut requested = access & !granted;
2855
2856    // If this check was triggered by `access()`, or a variant, then check for a `dontaudit`
2857    // statement for the `audit_access` permission for this caller & file.
2858    let have_dont_audit = OnceBool::new();
2859    let has_capability = move |current_task, capability| {
2860        let dont_audit = have_dont_audit.get_or_init(|| {
2861            permission_flags.contains(PermissionFlags::ACCESS)
2862                && security::has_dontaudit_access(current_task, fs_node)
2863        });
2864        if dont_audit {
2865            security::is_task_capable_noaudit(current_task, capability)
2866        } else {
2867            security::check_task_capable(current_task, capability).is_ok()
2868        }
2869    };
2870
2871    // CAP_DAC_READ_SEARCH allows bypass of read checks, and directory traverse (eXecute) checks.
2872    let dac_read_search_access =
2873        if mode.is_dir() { Access::READ | Access::EXEC } else { Access::READ };
2874    if dac_read_search_access.intersects(requested)
2875        && has_capability(current_task, CAP_DAC_READ_SEARCH)
2876    {
2877        requested.remove(dac_read_search_access);
2878    }
2879    if requested.is_empty() {
2880        return Ok(());
2881    }
2882
2883    // CAP_DAC_OVERRIDE allows bypass of all checks (though see the comment for file-execute).
2884    let mut dac_override_access = Access::READ | Access::WRITE;
2885    dac_override_access |= if mode.is_dir() {
2886        Access::EXEC
2887    } else {
2888        // File execute access checks may not be bypassed unless at least one executable bit is set.
2889        (mode.user_access() | mode.group_access() | mode.other_access()) & Access::EXEC
2890    };
2891    if dac_override_access.intersects(requested) && has_capability(current_task, CAP_DAC_OVERRIDE) {
2892        requested.remove(dac_override_access);
2893    }
2894    if requested.is_empty() {
2895        return Ok(());
2896    }
2897
2898    return error!(EACCES);
2899}
2900
2901#[cfg(test)]
2902mod tests {
2903    use super::*;
2904    use crate::device::mem::mem_device_init;
2905    use crate::testing::*;
2906    use crate::vfs::buffers::VecOutputBuffer;
2907    use starnix_uapi::auth::Credentials;
2908    use starnix_uapi::file_mode::mode;
2909
2910    #[::fuchsia::test]
2911    async fn open_device_file() {
2912        spawn_kernel_and_run(async |locked, current_task| {
2913            mem_device_init(locked, &*current_task).expect("mem_device_init");
2914
2915            // Create a device file that points to the `zero` device (which is automatically
2916            // registered in the kernel).
2917            current_task
2918                .fs()
2919                .root()
2920                .create_node(
2921                    locked,
2922                    &current_task,
2923                    "zero".into(),
2924                    mode!(IFCHR, 0o666),
2925                    DeviceType::ZERO,
2926                )
2927                .expect("create_node");
2928
2929            const CONTENT_LEN: usize = 10;
2930            let mut buffer = VecOutputBuffer::new(CONTENT_LEN);
2931
2932            // Read from the zero device.
2933            let device_file = current_task
2934                .open_file(locked, "zero".into(), OpenFlags::RDONLY)
2935                .expect("open device file");
2936            device_file.read(locked, &current_task, &mut buffer).expect("read from zero");
2937
2938            // Assert the contents.
2939            assert_eq!(&[0; CONTENT_LEN], buffer.data());
2940        })
2941        .await;
2942    }
2943
2944    #[::fuchsia::test]
2945    async fn node_info_is_reflected_in_stat() {
2946        spawn_kernel_and_run(async |locked, current_task| {
2947            // Create a node.
2948            let node = &current_task
2949                .fs()
2950                .root()
2951                .create_node(
2952                    locked,
2953                    &current_task,
2954                    "zero".into(),
2955                    FileMode::IFCHR,
2956                    DeviceType::ZERO,
2957                )
2958                .expect("create_node")
2959                .entry
2960                .node;
2961            node.update_info(|info| {
2962                info.mode = FileMode::IFSOCK;
2963                info.size = 1;
2964                info.blocks = 2;
2965                info.blksize = 4;
2966                info.uid = 9;
2967                info.gid = 10;
2968                info.link_count = 11;
2969                info.time_status_change = UtcInstant::from_nanos(1);
2970                info.time_access = UtcInstant::from_nanos(2);
2971                info.time_modify = UtcInstant::from_nanos(3);
2972                info.rdev = DeviceType::new(13, 13);
2973            });
2974            let stat = node.stat(locked, &current_task).expect("stat");
2975
2976            assert_eq!(stat.st_mode, FileMode::IFSOCK.bits());
2977            assert_eq!(stat.st_size, 1);
2978            assert_eq!(stat.st_blksize, 4);
2979            assert_eq!(stat.st_blocks, 2);
2980            assert_eq!(stat.st_uid, 9);
2981            assert_eq!(stat.st_gid, 10);
2982            assert_eq!(stat.st_nlink, 11);
2983            assert_eq!(stat.st_ctime, 0);
2984            assert_eq!(stat.st_ctime_nsec, 1);
2985            assert_eq!(stat.st_atime, 0);
2986            assert_eq!(stat.st_atime_nsec, 2);
2987            assert_eq!(stat.st_mtime, 0);
2988            assert_eq!(stat.st_mtime_nsec, 3);
2989            assert_eq!(stat.st_rdev, DeviceType::new(13, 13).bits());
2990        })
2991        .await;
2992    }
2993
2994    #[::fuchsia::test]
2995    fn test_flock_operation() {
2996        assert!(FlockOperation::from_flags(0).is_err());
2997        assert!(FlockOperation::from_flags(u32::MAX).is_err());
2998
2999        let operation1 = FlockOperation::from_flags(LOCK_SH).expect("from_flags");
3000        assert!(!operation1.is_unlock());
3001        assert!(!operation1.is_lock_exclusive());
3002        assert!(operation1.is_blocking());
3003
3004        let operation2 = FlockOperation::from_flags(LOCK_EX | LOCK_NB).expect("from_flags");
3005        assert!(!operation2.is_unlock());
3006        assert!(operation2.is_lock_exclusive());
3007        assert!(!operation2.is_blocking());
3008
3009        let operation3 = FlockOperation::from_flags(LOCK_UN).expect("from_flags");
3010        assert!(operation3.is_unlock());
3011        assert!(!operation3.is_lock_exclusive());
3012        assert!(operation3.is_blocking());
3013    }
3014
3015    #[::fuchsia::test]
3016    async fn test_check_access() {
3017        spawn_kernel_and_run(async |locked, current_task| {
3018            let mut creds = Credentials::with_ids(1, 2);
3019            creds.groups = vec![3, 4];
3020            current_task.set_creds(creds);
3021
3022            // Create a node.
3023            let node = &current_task
3024                .fs()
3025                .root()
3026                .create_node(locked, &current_task, "foo".into(), FileMode::IFREG, DeviceType::NONE)
3027                .expect("create_node")
3028                .entry
3029                .node;
3030            let check_access = |locked: &mut Locked<Unlocked>,
3031                                uid: uid_t,
3032                                gid: gid_t,
3033                                perm: u32,
3034                                access: Access| {
3035                node.update_info(|info| {
3036                    info.mode = mode!(IFREG, perm);
3037                    info.uid = uid;
3038                    info.gid = gid;
3039                });
3040                node.check_access(
3041                    locked,
3042                    &current_task,
3043                    &MountInfo::detached(),
3044                    access,
3045                    CheckAccessReason::InternalPermissionChecks,
3046                    security::Auditable::Location(std::panic::Location::caller()),
3047                )
3048            };
3049
3050            assert_eq!(check_access(locked, 0, 0, 0o700, Access::EXEC), error!(EACCES));
3051            assert_eq!(check_access(locked, 0, 0, 0o700, Access::READ), error!(EACCES));
3052            assert_eq!(check_access(locked, 0, 0, 0o700, Access::WRITE), error!(EACCES));
3053
3054            assert_eq!(check_access(locked, 0, 0, 0o070, Access::EXEC), error!(EACCES));
3055            assert_eq!(check_access(locked, 0, 0, 0o070, Access::READ), error!(EACCES));
3056            assert_eq!(check_access(locked, 0, 0, 0o070, Access::WRITE), error!(EACCES));
3057
3058            assert_eq!(check_access(locked, 0, 0, 0o007, Access::EXEC), Ok(()));
3059            assert_eq!(check_access(locked, 0, 0, 0o007, Access::READ), Ok(()));
3060            assert_eq!(check_access(locked, 0, 0, 0o007, Access::WRITE), Ok(()));
3061
3062            assert_eq!(check_access(locked, 1, 0, 0o700, Access::EXEC), Ok(()));
3063            assert_eq!(check_access(locked, 1, 0, 0o700, Access::READ), Ok(()));
3064            assert_eq!(check_access(locked, 1, 0, 0o700, Access::WRITE), Ok(()));
3065
3066            assert_eq!(check_access(locked, 1, 0, 0o100, Access::EXEC), Ok(()));
3067            assert_eq!(check_access(locked, 1, 0, 0o100, Access::READ), error!(EACCES));
3068            assert_eq!(check_access(locked, 1, 0, 0o100, Access::WRITE), error!(EACCES));
3069
3070            assert_eq!(check_access(locked, 1, 0, 0o200, Access::EXEC), error!(EACCES));
3071            assert_eq!(check_access(locked, 1, 0, 0o200, Access::READ), error!(EACCES));
3072            assert_eq!(check_access(locked, 1, 0, 0o200, Access::WRITE), Ok(()));
3073
3074            assert_eq!(check_access(locked, 1, 0, 0o400, Access::EXEC), error!(EACCES));
3075            assert_eq!(check_access(locked, 1, 0, 0o400, Access::READ), Ok(()));
3076            assert_eq!(check_access(locked, 1, 0, 0o400, Access::WRITE), error!(EACCES));
3077
3078            assert_eq!(check_access(locked, 0, 2, 0o700, Access::EXEC), error!(EACCES));
3079            assert_eq!(check_access(locked, 0, 2, 0o700, Access::READ), error!(EACCES));
3080            assert_eq!(check_access(locked, 0, 2, 0o700, Access::WRITE), error!(EACCES));
3081
3082            assert_eq!(check_access(locked, 0, 2, 0o070, Access::EXEC), Ok(()));
3083            assert_eq!(check_access(locked, 0, 2, 0o070, Access::READ), Ok(()));
3084            assert_eq!(check_access(locked, 0, 2, 0o070, Access::WRITE), Ok(()));
3085
3086            assert_eq!(check_access(locked, 0, 3, 0o070, Access::EXEC), Ok(()));
3087            assert_eq!(check_access(locked, 0, 3, 0o070, Access::READ), Ok(()));
3088            assert_eq!(check_access(locked, 0, 3, 0o070, Access::WRITE), Ok(()));
3089        })
3090        .await;
3091    }
3092
3093    #[::fuchsia::test]
3094    async fn set_security_xattr_fails_without_security_module_or_root() {
3095        spawn_kernel_and_run(async |locked, current_task| {
3096            let mut creds = Credentials::with_ids(1, 2);
3097            creds.groups = vec![3, 4];
3098            current_task.set_creds(creds);
3099
3100            // Create a node.
3101            let node = &current_task
3102                .fs()
3103                .root()
3104                .create_node(locked, &current_task, "foo".into(), FileMode::IFREG, DeviceType::NONE)
3105                .expect("create_node")
3106                .entry
3107                .node;
3108
3109            // Give read-write-execute access.
3110            node.update_info(|info| info.mode = mode!(IFREG, 0o777));
3111
3112            // Without a security module, and without CAP_SYS_ADMIN capabilities, setting the xattr
3113            // should fail.
3114            assert_eq!(
3115                node.set_xattr(
3116                    locked,
3117                    &current_task,
3118                    &MountInfo::detached(),
3119                    "security.name".into(),
3120                    "security_label".into(),
3121                    XattrOp::Create,
3122                ),
3123                error!(EPERM)
3124            );
3125        })
3126        .await;
3127    }
3128
3129    #[::fuchsia::test]
3130    async fn set_non_user_xattr_fails_without_security_module_or_root() {
3131        spawn_kernel_and_run(async |locked, current_task| {
3132            let mut creds = Credentials::with_ids(1, 2);
3133            creds.groups = vec![3, 4];
3134            current_task.set_creds(creds);
3135
3136            // Create a node.
3137            let node = &current_task
3138                .fs()
3139                .root()
3140                .create_node(locked, &current_task, "foo".into(), FileMode::IFREG, DeviceType::NONE)
3141                .expect("create_node")
3142                .entry
3143                .node;
3144
3145            // Give read-write-execute access.
3146            node.update_info(|info| info.mode = mode!(IFREG, 0o777));
3147
3148            // Without a security module, and without CAP_SYS_ADMIN capabilities, setting the xattr
3149            // should fail.
3150            assert_eq!(
3151                node.set_xattr(
3152                    locked,
3153                    &current_task,
3154                    &MountInfo::detached(),
3155                    "trusted.name".into(),
3156                    "some data".into(),
3157                    XattrOp::Create,
3158                ),
3159                error!(EPERM)
3160            );
3161        })
3162        .await;
3163    }
3164
3165    #[::fuchsia::test]
3166    async fn get_security_xattr_succeeds_without_read_access() {
3167        spawn_kernel_and_run(async |locked, current_task| {
3168            let mut creds = Credentials::with_ids(1, 2);
3169            creds.groups = vec![3, 4];
3170            current_task.set_creds(creds);
3171
3172            // Create a node.
3173            let node = &current_task
3174                .fs()
3175                .root()
3176                .create_node(locked, &current_task, "foo".into(), FileMode::IFREG, DeviceType::NONE)
3177                .expect("create_node")
3178                .entry
3179                .node;
3180
3181            // Only give read access to the root and give root access to the current task.
3182            node.update_info(|info| info.mode = mode!(IFREG, 0o100));
3183            current_task.set_creds(Credentials::with_ids(0, 0));
3184
3185            // Setting the label should succeed even without write access to the file.
3186            assert_eq!(
3187                node.set_xattr(
3188                    locked,
3189                    &current_task,
3190                    &MountInfo::detached(),
3191                    "security.name".into(),
3192                    "security_label".into(),
3193                    XattrOp::Create,
3194                ),
3195                Ok(())
3196            );
3197
3198            // Remove root access from the current task.
3199            current_task.set_creds(Credentials::with_ids(1, 1));
3200
3201            // Getting the label should succeed even without read access to the file.
3202            assert_eq!(
3203                node.get_xattr(
3204                    locked,
3205                    &current_task,
3206                    &MountInfo::detached(),
3207                    "security.name".into(),
3208                    4096
3209                ),
3210                Ok(ValueOrSize::Value("security_label".into()))
3211            );
3212        })
3213        .await;
3214    }
3215}