Skip to main content

starnix_core/vfs/
fs_node.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::device::DeviceMode;
6use crate::mm::PAGE_SIZE;
7use crate::security::{self, Auditable, PermissionFlags};
8use crate::signals::{SignalInfo, send_standard_signal};
9use crate::task::{CurrentTask, CurrentTaskAndLocked, WaitQueue, Waiter, register_delayed_release};
10use crate::time::utc;
11use crate::vfs::fsverity::FsVerityState;
12use crate::vfs::pipe::{Pipe, PipeHandle};
13use crate::vfs::rw_queue::{RwQueue, RwQueueReadGuard};
14use crate::vfs::socket::SocketHandle;
15use crate::vfs::{
16    DefaultDirEntryOps, DirEntryOps, FileObject, FileObjectState, FileOps, FileSystem,
17    FileSystemHandle, FileWriteGuardState, FsStr, FsString, MAX_LFS_FILESIZE, MountInfo,
18    NamespaceNode, OPathOps, RecordLockCommand, RecordLockOwner, RecordLocks, WeakFileHandle,
19    checked_add_offset_and_length, inotify,
20};
21use bitflags::bitflags;
22use fuchsia_runtime::UtcInstant;
23use linux_uapi::{XATTR_SECURITY_PREFIX, XATTR_SYSTEM_PREFIX, XATTR_TRUSTED_PREFIX};
24use once_cell::race::OnceBool;
25use smallvec::SmallVec;
26use starnix_crypt::EncryptionKeyId;
27use starnix_lifecycle::{ObjectReleaser, ReleaserAction};
28use starnix_logging::{log_error, track_stub};
29use starnix_sync::{
30    BeforeFsNodeAppend, FileOpsCore, FsNodeAppend, LockBefore, LockEqualOrBefore, Locked, Mutex,
31    RwLock, RwLockReadGuard, Unlocked,
32};
33use starnix_types::ownership::{Releasable, ReleaseGuard};
34use starnix_types::time::{NANOS_PER_SECOND, timespec_from_time};
35use starnix_uapi::as_any::AsAny;
36use starnix_uapi::auth::{
37    CAP_CHOWN, CAP_DAC_OVERRIDE, CAP_DAC_READ_SEARCH, CAP_FOWNER, CAP_FSETID, CAP_MKNOD,
38    CAP_SYS_ADMIN, CAP_SYS_RESOURCE, FsCred, UserAndOrGroupId,
39};
40use starnix_uapi::device_id::DeviceId;
41use starnix_uapi::errors::{EACCES, ENOTSUP, EPERM, Errno};
42use starnix_uapi::file_mode::{Access, AccessCheck, FileMode};
43use starnix_uapi::inotify_mask::InotifyMask;
44use starnix_uapi::mount_flags::MountFlags;
45use starnix_uapi::open_flags::OpenFlags;
46use starnix_uapi::resource_limits::Resource;
47use starnix_uapi::seal_flags::SealFlags;
48use starnix_uapi::signals::SIGXFSZ;
49use starnix_uapi::{
50    FALLOC_FL_COLLAPSE_RANGE, FALLOC_FL_INSERT_RANGE, FALLOC_FL_KEEP_SIZE, FALLOC_FL_PUNCH_HOLE,
51    FALLOC_FL_UNSHARE_RANGE, FALLOC_FL_ZERO_RANGE, LOCK_EX, LOCK_NB, LOCK_SH, LOCK_UN,
52    STATX__RESERVED, STATX_ATIME, STATX_ATTR_VERITY, STATX_BASIC_STATS, STATX_BLOCKS, STATX_CTIME,
53    STATX_GID, STATX_INO, STATX_MTIME, STATX_NLINK, STATX_SIZE, STATX_UID, XATTR_USER_PREFIX,
54    errno, error, fsverity_descriptor, gid_t, ino_t, statx, statx_timestamp, timespec, uapi, uid_t,
55};
56use std::sync::atomic::Ordering;
57use std::sync::{Arc, OnceLock, Weak};
58use syncio::zxio_node_attr_has_t;
59
60#[derive(Debug, Clone, Copy, PartialEq, Eq)]
61pub enum FsNodeLinkBehavior {
62    Allowed,
63    Disallowed,
64}
65
66impl Default for FsNodeLinkBehavior {
67    fn default() -> Self {
68        FsNodeLinkBehavior::Allowed
69    }
70}
71
72pub enum AppendLockGuard<'a> {
73    Read(RwQueueReadGuard<'a, FsNodeAppend>),
74    AlreadyLocked(&'a AppendLockGuard<'a>),
75}
76
77pub trait AppendLockStrategy<L> {
78    /// Helper method for acquiring append lock in `truncate`/`allocate`. Acquires the lock when it's not already acquired.
79    fn lock<'a>(
80        &'a self,
81        locked: &'a mut Locked<L>,
82        current_task: &CurrentTask,
83        node: &'a FsNode,
84    ) -> Result<(AppendLockGuard<'a>, &'a mut Locked<FileOpsCore>), Errno>;
85}
86
87struct RealAppendLockStrategy {}
88
89impl AppendLockStrategy<BeforeFsNodeAppend> for RealAppendLockStrategy {
90    fn lock<'a>(
91        &'a self,
92        locked: &'a mut Locked<BeforeFsNodeAppend>,
93        current_task: &CurrentTask,
94        node: &'a FsNode,
95    ) -> Result<(AppendLockGuard<'a>, &'a mut Locked<FileOpsCore>), Errno> {
96        let (guard, new_locked) = node.ops().append_lock_read(locked, node, current_task)?;
97        Ok((AppendLockGuard::Read(guard), new_locked.cast_locked()))
98    }
99}
100
101pub struct AlreadyLockedAppendLockStrategy<'a> {
102    // Keep the reference to the guard, which will be returned in subsequent attempts to acquire this lock.
103    guard: &'a AppendLockGuard<'a>,
104}
105
106impl<'a> AlreadyLockedAppendLockStrategy<'a> {
107    pub fn new(guard: &'a AppendLockGuard<'a>) -> Self {
108        Self { guard }
109    }
110}
111
112impl AppendLockStrategy<FileOpsCore> for AlreadyLockedAppendLockStrategy<'_> {
113    fn lock<'a>(
114        &'a self,
115        locked: &'a mut Locked<FileOpsCore>,
116        _current_task: &CurrentTask,
117        _node: &'a FsNode,
118    ) -> Result<(AppendLockGuard<'a>, &'a mut Locked<FileOpsCore>), Errno> {
119        Ok((AppendLockGuard::AlreadyLocked(self.guard), locked.cast_locked::<FileOpsCore>()))
120    }
121}
122
123pub struct FsNode {
124    /// The inode number for this FsNode.
125    pub ino: ino_t,
126
127    /// The FsNodeOps for this FsNode.
128    ///
129    /// The FsNodeOps are implemented by the individual file systems to provide
130    /// specific behaviors for this FsNode.
131    ops: Box<dyn FsNodeOps>,
132
133    /// The FileSystem that owns this FsNode's tree.
134    fs: Weak<FileSystem>,
135
136    /// A RwLock to synchronize append operations for this node.
137    ///
138    /// FileObjects writing with O_APPEND should grab a write() lock on this
139    /// field to ensure they operate sequentially. FileObjects writing without
140    /// O_APPEND should grab read() lock so that they can operate in parallel.
141    pub append_lock: RwQueue<FsNodeAppend>,
142
143    /// Mutable information about this node.
144    ///
145    /// This data is used to populate the uapi::stat structure.
146    info: RwLock<FsNodeInfo>,
147
148    /// Data associated with an FsNode that is rarely needed.
149    rare_data: OnceLock<Box<FsNodeRareData>>,
150
151    /// Tracks lock state for this file.
152    pub write_guard_state: Mutex<FileWriteGuardState>,
153
154    /// Cached FsVerity state associated with this node.
155    pub fsverity: Mutex<FsVerityState>,
156
157    /// The security state associated with this node. Must always be acquired last
158    /// relative to other `FsNode` locks.
159    pub security_state: security::FsNodeState,
160}
161
162#[derive(Default)]
163struct FsNodeRareData {
164    /// The pipe located at this node, if any.
165    ///
166    /// Used if, and only if, the node has a mode of FileMode::IFIFO.
167    fifo: OnceLock<PipeHandle>,
168
169    /// The UNIX domain socket bound to this node, if any.
170    bound_socket: OnceLock<SocketHandle>,
171
172    /// Information about the locking information on this node.
173    ///
174    /// No other lock on this object may be taken while this lock is held.
175    flock_info: Mutex<FlockInfo>,
176
177    /// Records locks associated with this node.
178    record_locks: RecordLocks,
179
180    /// Whether this node can be linked into a directory.
181    ///
182    /// Only set for nodes created with `O_TMPFILE`.
183    link_behavior: OnceLock<FsNodeLinkBehavior>,
184
185    /// Inotify watchers on this node. See inotify(7).
186    watchers: inotify::InotifyWatchers,
187}
188
189impl FsNodeRareData {
190    fn ensure_fifo(&self, current_task: &CurrentTask) -> &PipeHandle {
191        self.fifo.get_or_init(|| {
192            let mut default_pipe_capacity = (*PAGE_SIZE * 16) as usize;
193            if !security::is_task_capable_noaudit(current_task, CAP_SYS_RESOURCE) {
194                let kernel = current_task.kernel();
195                let max_size = kernel.system_limits.pipe_max_size.load(Ordering::Relaxed);
196                default_pipe_capacity = std::cmp::min(default_pipe_capacity, max_size);
197            }
198            Pipe::new(default_pipe_capacity)
199        })
200    }
201}
202
203pub enum FsNodeReleaserAction {}
204impl ReleaserAction<FsNode> for FsNodeReleaserAction {
205    fn release(fs_node: ReleaseGuard<FsNode>) {
206        register_delayed_release(fs_node);
207    }
208}
209pub type FsNodeReleaser = ObjectReleaser<FsNode, FsNodeReleaserAction>;
210pub type FsNodeHandle = Arc<FsNodeReleaser>;
211pub type WeakFsNodeHandle = Weak<FsNodeReleaser>;
212
213#[derive(Debug, Default, Clone, PartialEq)]
214pub struct FsNodeInfo {
215    pub mode: FileMode,
216    pub link_count: usize,
217    pub uid: uid_t,
218    pub gid: gid_t,
219    pub rdev: DeviceId,
220    pub size: usize,
221    pub blksize: usize,
222    pub blocks: usize,
223    pub time_status_change: UtcInstant,
224    pub time_access: UtcInstant,
225    pub time_modify: UtcInstant,
226    pub casefold: bool,
227
228    // If this node is fscrypt encrypted, stores the id of the user wrapping key used to encrypt it.
229    pub wrapping_key_id: Option<[u8; 16]>,
230
231    // Used to indicate to filesystems that manage timestamps that an access has occurred and to
232    // update the node's atime.
233    // This only impacts accesses within Starnix. Most Fuchsia programs are not expected to maintain
234    // access times. If the file handle is transferred out of Starnix, there may be inconsistencies.
235    pub pending_time_access_update: bool,
236}
237
238impl FsNodeInfo {
239    pub fn new(mode: FileMode, owner: FsCred) -> Self {
240        let now = utc::utc_now();
241        Self {
242            mode,
243            link_count: if mode.is_dir() { 2 } else { 1 },
244            uid: owner.uid,
245            gid: owner.gid,
246            blksize: DEFAULT_BYTES_PER_BLOCK,
247            time_status_change: now,
248            time_access: now,
249            time_modify: now,
250            ..Default::default()
251        }
252    }
253
254    pub fn storage_size(&self) -> usize {
255        self.blksize.saturating_mul(self.blocks)
256    }
257
258    pub fn chmod(&mut self, mode: FileMode) {
259        self.mode = (self.mode & !FileMode::PERMISSIONS) | (mode & FileMode::PERMISSIONS);
260    }
261
262    pub fn chown(&mut self, owner: Option<uid_t>, group: Option<gid_t>) {
263        if let Some(owner) = owner {
264            self.uid = owner;
265        }
266        if let Some(group) = group {
267            self.gid = group;
268        }
269        // Clear the setuid and setgid bits if the file is executable and a regular file.
270        if self.mode.is_reg() {
271            self.mode &= !FileMode::ISUID;
272            self.clear_sgid_bit();
273        }
274    }
275
276    fn clear_sgid_bit(&mut self) {
277        // If the group execute bit is not set, the setgid bit actually indicates mandatory
278        // locking and should not be cleared.
279        if self.mode.intersects(FileMode::IXGRP) {
280            self.mode &= !FileMode::ISGID;
281        }
282    }
283
284    fn clear_suid_and_sgid_bits(&mut self) {
285        self.mode &= !FileMode::ISUID;
286        self.clear_sgid_bit();
287    }
288
289    pub fn cred(&self) -> FsCred {
290        FsCred { uid: self.uid, gid: self.gid }
291    }
292
293    pub fn suid_and_sgid(
294        &self,
295        current_task: &CurrentTask,
296        fs_node: &FsNode,
297    ) -> Result<UserAndOrGroupId, Errno> {
298        let uid = self.mode.contains(FileMode::ISUID).then_some(self.uid);
299
300        // See <https://man7.org/linux/man-pages/man7/inode.7.html>:
301        //
302        //   For an executable file, the set-group-ID bit causes the
303        //   effective group ID of a process that executes the file to change
304        //   as described in execve(2).  For a file that does not have the
305        //   group execution bit (S_IXGRP) set, the set-group-ID bit indicates
306        //   mandatory file/record locking.
307        let gid = self.mode.contains(FileMode::ISGID | FileMode::IXGRP).then_some(self.gid);
308
309        let maybe_set_id = UserAndOrGroupId { uid, gid };
310        if maybe_set_id.is_some() {
311            // Check that uid and gid actually have execute access before
312            // returning them as the SUID or SGID.
313            check_access(
314                fs_node,
315                current_task,
316                security::PermissionFlags::EXEC,
317                self.uid,
318                self.gid,
319                self.mode,
320            )?;
321        }
322        Ok(maybe_set_id)
323    }
324}
325
326#[derive(Default)]
327struct FlockInfo {
328    /// Whether the node is currently locked. The meaning of the different values are:
329    /// - `None`: The node is not locked.
330    /// - `Some(false)`: The node is locked non exclusively.
331    /// - `Some(true)`: The node is locked exclusively.
332    locked_exclusive: Option<bool>,
333    /// The FileObject that hold the lock.
334    locking_handles: Vec<WeakFileHandle>,
335    /// The queue to notify process waiting on the lock.
336    wait_queue: WaitQueue,
337}
338
339impl FlockInfo {
340    /// Removes all file handle not holding `predicate` from the list of object holding the lock. If
341    /// this empties the list, unlocks the node and notifies all waiting processes.
342    pub fn retain<F>(&mut self, predicate: F)
343    where
344        F: Fn(&FileObject) -> bool,
345    {
346        if !self.locking_handles.is_empty() {
347            self.locking_handles
348                .retain(|w| if let Some(fh) = w.upgrade() { predicate(&fh) } else { false });
349            if self.locking_handles.is_empty() {
350                self.locked_exclusive = None;
351                self.wait_queue.notify_all();
352            }
353        }
354    }
355}
356
357/// `st_blksize` is measured in units of 512 bytes.
358pub const DEFAULT_BYTES_PER_BLOCK: usize = 512;
359
360pub struct FlockOperation {
361    operation: u32,
362}
363
364impl FlockOperation {
365    pub fn from_flags(operation: u32) -> Result<Self, Errno> {
366        if operation & !(LOCK_SH | LOCK_EX | LOCK_UN | LOCK_NB) != 0 {
367            return error!(EINVAL);
368        }
369        if [LOCK_SH, LOCK_EX, LOCK_UN].iter().filter(|&&o| operation & o == o).count() != 1 {
370            return error!(EINVAL);
371        }
372        Ok(Self { operation })
373    }
374
375    pub fn is_unlock(&self) -> bool {
376        self.operation & LOCK_UN > 0
377    }
378
379    pub fn is_lock_exclusive(&self) -> bool {
380        self.operation & LOCK_EX > 0
381    }
382
383    pub fn is_blocking(&self) -> bool {
384        self.operation & LOCK_NB == 0
385    }
386}
387
388impl FileObject {
389    /// Advisory locking.
390    ///
391    /// See flock(2).
392    pub fn flock(
393        &self,
394        locked: &mut Locked<Unlocked>,
395        current_task: &CurrentTask,
396        operation: FlockOperation,
397    ) -> Result<(), Errno> {
398        if self.flags().contains(OpenFlags::PATH) {
399            return error!(EBADF);
400        }
401        loop {
402            let mut flock_info = self.name.entry.node.ensure_rare_data().flock_info.lock();
403            if operation.is_unlock() {
404                flock_info.retain(|fh| !std::ptr::eq(fh, self));
405                return Ok(());
406            }
407            // Operation is a locking operation.
408            // 1. File is not locked
409            if flock_info.locked_exclusive.is_none() {
410                flock_info.locked_exclusive = Some(operation.is_lock_exclusive());
411                flock_info.locking_handles.push(self.weak_handle.clone());
412                return Ok(());
413            }
414
415            let file_lock_is_exclusive = flock_info.locked_exclusive == Some(true);
416            let fd_has_lock = flock_info
417                .locking_handles
418                .iter()
419                .find_map(|w| {
420                    w.upgrade().and_then(|fh| {
421                        if std::ptr::eq(&fh as &FileObject, self) { Some(()) } else { None }
422                    })
423                })
424                .is_some();
425
426            // 2. File is locked, but fd already have a lock
427            if fd_has_lock {
428                if operation.is_lock_exclusive() == file_lock_is_exclusive {
429                    // Correct lock is already held, return.
430                    return Ok(());
431                } else {
432                    // Incorrect lock is held. Release the lock and loop back to try to reacquire
433                    // it. flock doesn't guarantee atomic lock type switching.
434                    flock_info.retain(|fh| !std::ptr::eq(fh, self));
435                    continue;
436                }
437            }
438
439            // 3. File is locked, and fd doesn't have a lock.
440            if !file_lock_is_exclusive && !operation.is_lock_exclusive() {
441                // The lock is not exclusive, let's grab it.
442                flock_info.locking_handles.push(self.weak_handle.clone());
443                return Ok(());
444            }
445
446            // 4. The operation cannot be done at this time.
447            if !operation.is_blocking() {
448                return error!(EAGAIN);
449            }
450
451            // Register a waiter to be notified when the lock is released. Release the lock on
452            // FlockInfo, and wait.
453            let waiter = Waiter::new();
454            flock_info.wait_queue.wait_async(&waiter);
455            std::mem::drop(flock_info);
456            waiter.wait(locked, current_task)?;
457        }
458    }
459}
460
461// The inner mod is required because bitflags cannot pass the attribute through to the single
462// variant, and attributes cannot be applied to macro invocations.
463mod inner_flags {
464    // Part of the code for the AT_STATX_SYNC_AS_STAT case that's produced by the macro triggers the
465    // lint, but as a whole, the produced code is still correct.
466    #![allow(clippy::bad_bit_mask)] // TODO(b/303500202) Remove once addressed in bitflags.
467    use super::{bitflags, uapi};
468
469    bitflags! {
470        #[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
471        pub struct StatxFlags: u32 {
472            const AT_SYMLINK_NOFOLLOW = uapi::AT_SYMLINK_NOFOLLOW;
473            const AT_EMPTY_PATH = uapi::AT_EMPTY_PATH;
474            const AT_NO_AUTOMOUNT = uapi::AT_NO_AUTOMOUNT;
475            const AT_STATX_SYNC_AS_STAT = uapi::AT_STATX_SYNC_AS_STAT;
476            const AT_STATX_FORCE_SYNC = uapi::AT_STATX_FORCE_SYNC;
477            const AT_STATX_DONT_SYNC = uapi::AT_STATX_DONT_SYNC;
478            const STATX_ATTR_VERITY = uapi::STATX_ATTR_VERITY;
479        }
480    }
481}
482
483pub use inner_flags::StatxFlags;
484
485#[derive(Copy, Clone, Debug, PartialEq, Eq)]
486pub enum UnlinkKind {
487    /// Unlink a directory.
488    Directory,
489
490    /// Unlink a non-directory.
491    NonDirectory,
492}
493
494pub enum SymlinkTarget {
495    Path(FsString),
496    Node(NamespaceNode),
497}
498
499#[derive(Clone, Copy, PartialEq, Eq)]
500pub enum XattrOp {
501    /// Set the value of the extended attribute regardless of whether it exists.
502    Set,
503    /// Create a new extended attribute. Fail if it already exists.
504    Create,
505    /// Replace the value of the extended attribute. Fail if it doesn't exist.
506    Replace,
507}
508
509impl XattrOp {
510    pub fn into_flags(self) -> u32 {
511        match self {
512            Self::Set => 0,
513            Self::Create => uapi::XATTR_CREATE,
514            Self::Replace => uapi::XATTR_REPLACE,
515        }
516    }
517}
518
519/// Returns a value, or the size required to contains it.
520#[derive(Clone, Debug, PartialEq)]
521pub enum ValueOrSize<T> {
522    Value(T),
523    Size(usize),
524}
525
526impl<T> ValueOrSize<T> {
527    pub fn map<F, U>(self, f: F) -> ValueOrSize<U>
528    where
529        F: FnOnce(T) -> U,
530    {
531        match self {
532            Self::Size(s) => ValueOrSize::Size(s),
533            Self::Value(v) => ValueOrSize::Value(f(v)),
534        }
535    }
536
537    #[cfg(test)]
538    pub fn unwrap(self) -> T {
539        match self {
540            Self::Size(_) => panic!("Unwrap ValueOrSize that is a Size"),
541            Self::Value(v) => v,
542        }
543    }
544}
545
546impl<T> From<T> for ValueOrSize<T> {
547    fn from(t: T) -> Self {
548        Self::Value(t)
549    }
550}
551
552#[derive(Copy, Clone, Eq, PartialEq, Debug)]
553pub enum FallocMode {
554    Allocate { keep_size: bool },
555    PunchHole,
556    Collapse,
557    Zero { keep_size: bool },
558    InsertRange,
559    UnshareRange,
560}
561
562impl FallocMode {
563    pub fn from_bits(mode: u32) -> Option<Self> {
564        // `fallocate()` allows only the following values for `mode`.
565        if mode == 0 {
566            Some(Self::Allocate { keep_size: false })
567        } else if mode == FALLOC_FL_KEEP_SIZE {
568            Some(Self::Allocate { keep_size: true })
569        } else if mode == FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE {
570            Some(Self::PunchHole)
571        } else if mode == FALLOC_FL_COLLAPSE_RANGE {
572            Some(Self::Collapse)
573        } else if mode == FALLOC_FL_ZERO_RANGE {
574            Some(Self::Zero { keep_size: false })
575        } else if mode == FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE {
576            Some(Self::Zero { keep_size: true })
577        } else if mode == FALLOC_FL_INSERT_RANGE {
578            Some(Self::InsertRange)
579        } else if mode == FALLOC_FL_UNSHARE_RANGE {
580            Some(Self::UnshareRange)
581        } else {
582            None
583        }
584    }
585}
586
587#[derive(Debug, Copy, Clone, PartialEq)]
588pub enum CheckAccessReason {
589    Access,
590    Chdir,
591    Chroot,
592    Exec,
593    ChangeTimestamps { now: bool },
594    InternalPermissionChecks,
595}
596
597pub type LookupVec<T> = SmallVec<[T; 8]>;
598
599pub trait FsNodeOps: Send + Sync + AsAny + 'static {
600    /// Delegate the access check to the node.
601    fn check_access(
602        &self,
603        _locked: &mut Locked<FileOpsCore>,
604        node: &FsNode,
605        current_task: &CurrentTask,
606        access: security::PermissionFlags,
607        info: &RwLock<FsNodeInfo>,
608        reason: CheckAccessReason,
609        audit_context: security::Auditable<'_>,
610    ) -> Result<(), Errno> {
611        node.default_check_access_impl(current_task, access, reason, info.read(), audit_context)
612    }
613
614    /// Build the [`DirEntryOps`] for a new [`DirEntry`] that will be associated
615    /// to this node.
616    fn create_dir_entry_ops(&self) -> Box<dyn DirEntryOps> {
617        Box::new(DefaultDirEntryOps)
618    }
619
620    /// Build the `FileOps` for the file associated to this node.
621    ///
622    /// The returned FileOps will be used to create a FileObject, which might
623    /// be assigned an FdNumber.
624    fn create_file_ops(
625        &self,
626        locked: &mut Locked<FileOpsCore>,
627        node: &FsNode,
628        _current_task: &CurrentTask,
629        flags: OpenFlags,
630    ) -> Result<Box<dyn FileOps>, Errno>;
631
632    /// Find an existing child node and populate the child parameter. Return the node.
633    ///
634    /// The child parameter is an empty node. Operations other than initialize may panic before
635    /// initialize is called.
636    fn lookup(
637        &self,
638        _locked: &mut Locked<FileOpsCore>,
639        _node: &FsNode,
640        _current_task: &CurrentTask,
641        name: &FsStr,
642    ) -> Result<FsNodeHandle, Errno> {
643        // The default implementation here is suitable for filesystems that have permanent entries;
644        // entries that already exist will get found in the cache and shouldn't get this far.
645        error!(ENOENT, format!("looking for {name}"))
646    }
647
648    /// Returns whether this node supports pipelined lookups.
649    fn has_lookup_pipelined(&self) -> bool {
650        false
651    }
652
653    /// Find multiple children nodes in sequence.
654    ///
655    /// This can be used to pipeline lookups in filesystems that support it.
656    fn lookup_pipelined(
657        &self,
658        _locked: &mut Locked<FileOpsCore>,
659        _node: &FsNode,
660        _current_task: &CurrentTask,
661        _names: &[&FsStr],
662    ) -> LookupVec<Result<FsNodeHandle, Errno>> {
663        panic!("has_lookup_pipelined should be false");
664    }
665
666    /// Create and return the given child node.
667    ///
668    /// The mode field of the FsNodeInfo indicates what kind of child to
669    /// create.
670    ///
671    /// This function is never called with FileMode::IFDIR. The mkdir function
672    /// is used to create directories instead.
673    fn mknod(
674        &self,
675        locked: &mut Locked<FileOpsCore>,
676        _node: &FsNode,
677        _current_task: &CurrentTask,
678        _name: &FsStr,
679        _mode: FileMode,
680        _dev: DeviceId,
681        _owner: FsCred,
682    ) -> Result<FsNodeHandle, Errno>;
683
684    /// Create and return the given child node as a subdirectory.
685    fn mkdir(
686        &self,
687        locked: &mut Locked<FileOpsCore>,
688        _node: &FsNode,
689        _current_task: &CurrentTask,
690        _name: &FsStr,
691        _mode: FileMode,
692        _owner: FsCred,
693    ) -> Result<FsNodeHandle, Errno>;
694
695    /// Creates a symlink with the given `target` path.
696    fn create_symlink(
697        &self,
698        locked: &mut Locked<FileOpsCore>,
699        _node: &FsNode,
700        _current_task: &CurrentTask,
701        _name: &FsStr,
702        _target: &FsStr,
703        _owner: FsCred,
704    ) -> Result<FsNodeHandle, Errno>;
705
706    /// Creates an anonymous file.
707    ///
708    /// The FileMode::IFMT of the FileMode is always FileMode::IFREG.
709    ///
710    /// Used by O_TMPFILE.
711    fn create_tmpfile(
712        &self,
713        _node: &FsNode,
714        _current_task: &CurrentTask,
715        _mode: FileMode,
716        _owner: FsCred,
717    ) -> Result<FsNodeHandle, Errno> {
718        error!(EOPNOTSUPP)
719    }
720
721    /// Reads the symlink from this node.
722    fn readlink(
723        &self,
724        _locked: &mut Locked<FileOpsCore>,
725        _node: &FsNode,
726        _current_task: &CurrentTask,
727    ) -> Result<SymlinkTarget, Errno> {
728        error!(EINVAL)
729    }
730
731    /// Create a hard link with the given name to the given child.
732    fn link(
733        &self,
734        _locked: &mut Locked<FileOpsCore>,
735        _node: &FsNode,
736        _current_task: &CurrentTask,
737        _name: &FsStr,
738        _child: &FsNodeHandle,
739    ) -> Result<(), Errno> {
740        error!(EPERM)
741    }
742
743    /// Remove the child with the given name, if the child exists.
744    ///
745    /// The UnlinkKind parameter indicates whether the caller intends to unlink
746    /// a directory or a non-directory child.
747    fn unlink(
748        &self,
749        locked: &mut Locked<FileOpsCore>,
750        _node: &FsNode,
751        _current_task: &CurrentTask,
752        _name: &FsStr,
753        _child: &FsNodeHandle,
754    ) -> Result<(), Errno>;
755
756    /// Acquire the necessary append lock for the operations that depend on them.
757    /// Should be done before calling `allocate` or `truncate` to avoid lock ordering issues.
758    fn append_lock_read<'a>(
759        &'a self,
760        locked: &'a mut Locked<BeforeFsNodeAppend>,
761        node: &'a FsNode,
762        current_task: &CurrentTask,
763    ) -> Result<(RwQueueReadGuard<'a, FsNodeAppend>, &'a mut Locked<FsNodeAppend>), Errno> {
764        return node.append_lock.read_and(locked, current_task);
765    }
766
767    /// Change the length of the file.
768    fn truncate(
769        &self,
770        _locked: &mut Locked<FileOpsCore>,
771        _guard: &AppendLockGuard<'_>,
772        _node: &FsNode,
773        _current_task: &CurrentTask,
774        _length: u64,
775    ) -> Result<(), Errno> {
776        error!(EINVAL)
777    }
778
779    /// Manipulate allocated disk space for the file.
780    fn allocate(
781        &self,
782        _locked: &mut Locked<FileOpsCore>,
783        _guard: &AppendLockGuard<'_>,
784        _node: &FsNode,
785        _current_task: &CurrentTask,
786        _mode: FallocMode,
787        _offset: u64,
788        _length: u64,
789    ) -> Result<(), Errno> {
790        error!(EINVAL)
791    }
792
793    /// Update the supplied info with initial state (e.g. size) for the node.
794    ///
795    /// FsNode calls this method when created, to allow the FsNodeOps to
796    /// set appropriate initial values in the FsNodeInfo.
797    fn initial_info(&self, _info: &mut FsNodeInfo) {}
798
799    /// Update node.info as needed.
800    ///
801    /// FsNode calls this method before converting the FsNodeInfo struct into
802    /// the uapi::stat struct to give the file system a chance to update this data
803    /// before it is used by clients.
804    ///
805    /// File systems that keep the FsNodeInfo up-to-date do not need to
806    /// override this function.
807    ///
808    /// Return a read guard for the updated information.
809    fn fetch_and_refresh_info<'a>(
810        &self,
811        _locked: &mut Locked<FileOpsCore>,
812        _node: &FsNode,
813        _current_task: &CurrentTask,
814        info: &'a RwLock<FsNodeInfo>,
815    ) -> Result<RwLockReadGuard<'a, FsNodeInfo>, Errno> {
816        Ok(info.read())
817    }
818
819    /// Syncs cached data to persistent storage.
820    fn sync(&self, _node: &FsNode, _current_task: &CurrentTask) -> Result<(), Errno> {
821        Ok(())
822    }
823
824    /// Update node attributes persistently.
825    fn update_attributes(
826        &self,
827        _locked: &mut Locked<FileOpsCore>,
828        _node: &FsNode,
829        _current_task: &CurrentTask,
830        _info: &FsNodeInfo,
831        _has: zxio_node_attr_has_t,
832    ) -> Result<(), Errno> {
833        Ok(())
834    }
835
836    /// Get an extended attribute on the node.
837    ///
838    /// An implementation can systematically return a value. Otherwise, if `max_size` is 0, it can
839    /// instead return the size of the attribute, and can return an ERANGE error if max_size is not
840    /// 0, and lesser than the required size.
841    fn get_xattr(
842        &self,
843        _locked: &mut Locked<FileOpsCore>,
844        _node: &FsNode,
845        _current_task: &CurrentTask,
846        _name: &FsStr,
847        _max_size: usize,
848    ) -> Result<ValueOrSize<FsString>, Errno> {
849        error!(ENOTSUP)
850    }
851
852    /// Set an extended attribute on the node.
853    fn set_xattr(
854        &self,
855        _locked: &mut Locked<FileOpsCore>,
856        _node: &FsNode,
857        _current_task: &CurrentTask,
858        _name: &FsStr,
859        _value: &FsStr,
860        _op: XattrOp,
861    ) -> Result<(), Errno> {
862        error!(ENOTSUP)
863    }
864
865    fn remove_xattr(
866        &self,
867        _locked: &mut Locked<FileOpsCore>,
868        _node: &FsNode,
869        _current_task: &CurrentTask,
870        _name: &FsStr,
871    ) -> Result<(), Errno> {
872        error!(ENOTSUP)
873    }
874
875    /// An implementation can systematically return a value. Otherwise, if `max_size` is 0, it can
876    /// instead return the size of the 0 separated string needed to represent the value, and can
877    /// return an ERANGE error if max_size is not 0, and lesser than the required size.
878    fn list_xattrs(
879        &self,
880        _locked: &mut Locked<FileOpsCore>,
881        _node: &FsNode,
882        _current_task: &CurrentTask,
883        _max_size: usize,
884    ) -> Result<ValueOrSize<Vec<FsString>>, Errno> {
885        error!(ENOTSUP)
886    }
887
888    /// Called when the FsNode is freed by the Kernel.
889    fn forget(
890        self: Box<Self>,
891        _locked: &mut Locked<FileOpsCore>,
892        _current_task: &CurrentTask,
893        _info: FsNodeInfo,
894    ) -> Result<(), Errno> {
895        Ok(())
896    }
897
898    ////////////////////
899    // FS-Verity operations
900
901    /// Marks that FS-Verity is being built. Writes fsverity descriptor and merkle tree, the latter
902    /// computed by the filesystem.
903    /// This should ensure there are no writable file handles. Returns EEXIST if the file was
904    /// already fsverity-enabled. Returns EBUSY if this ioctl was already running on this file.
905    fn enable_fsverity(
906        &self,
907        _locked: &mut Locked<FileOpsCore>,
908        _node: &FsNode,
909        _current_task: &CurrentTask,
910        _descriptor: &fsverity_descriptor,
911    ) -> Result<(), Errno> {
912        error!(ENOTSUP)
913    }
914
915    /// Read fsverity descriptor, if the node is fsverity-enabled. Else returns ENODATA.
916    fn get_fsverity_descriptor(&self, _log_blocksize: u8) -> Result<fsverity_descriptor, Errno> {
917        error!(ENOTSUP)
918    }
919
920    /// Returns a descriptive name for this node, suitable to report to userspace in situations
921    /// where the node's path is unavailable (e.g. because it is anonymous, and has no path).
922    /// If no name is returned then a default name of the form "<class:[<node_id>]" will be used.
923    fn internal_name(&self, _node: &FsNode) -> Option<FsString> {
924        None
925    }
926
927    /// The key used to identify this node in the file system's node cache.
928    ///
929    /// For many file systems, this will be the same as the inode number. However, some file
930    /// systems, such as FUSE, sometimes use different `node_key` and inode numbers.
931    fn node_key(&self, node: &FsNode) -> ino_t {
932        node.ino
933    }
934
935    /// Whether this node is private to the kernel/filesystem.
936    fn is_private(&self) -> bool {
937        false
938    }
939
940    /// Returns the size of the file.
941    fn get_size(
942        &self,
943        locked: &mut Locked<FileOpsCore>,
944        node: &FsNode,
945        current_task: &CurrentTask,
946    ) -> Result<usize, Errno> {
947        let info = node.fetch_and_refresh_info(locked, current_task)?;
948        Ok(info.size.try_into().map_err(|_| errno!(EINVAL))?)
949    }
950}
951
952impl<T> From<T> for Box<dyn FsNodeOps>
953where
954    T: FsNodeOps,
955{
956    fn from(ops: T) -> Box<dyn FsNodeOps> {
957        Box::new(ops)
958    }
959}
960
961/// Implements [`FsNodeOps`] methods in a way that makes sense for symlinks.
962/// You must implement [`FsNodeOps::readlink`].
963#[macro_export]
964macro_rules! fs_node_impl_symlink {
965    () => {
966        $crate::vfs::fs_node_impl_not_dir!();
967
968        fn create_file_ops(
969            &self,
970            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
971            node: &$crate::vfs::FsNode,
972            _current_task: &CurrentTask,
973            _flags: starnix_uapi::open_flags::OpenFlags,
974        ) -> Result<Box<dyn $crate::vfs::FileOps>, starnix_uapi::errors::Errno> {
975            assert!(node.is_lnk());
976            unreachable!("Symlink nodes cannot be opened.");
977        }
978    };
979}
980
981#[macro_export]
982macro_rules! fs_node_impl_dir_readonly {
983    () => {
984        fn check_access(
985            &self,
986            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
987            node: &$crate::vfs::FsNode,
988            current_task: &$crate::task::CurrentTask,
989            permission_flags: $crate::security::PermissionFlags,
990            info: &starnix_sync::RwLock<$crate::vfs::FsNodeInfo>,
991            reason: $crate::vfs::CheckAccessReason,
992            audit_context: $crate::security::Auditable<'_>,
993        ) -> Result<(), starnix_uapi::errors::Errno> {
994            let access = permission_flags.as_access();
995            if access.contains(starnix_uapi::file_mode::Access::WRITE) {
996                return starnix_uapi::error!(
997                    EROFS,
998                    format!("check_access failed: read-only directory")
999                );
1000            }
1001            node.default_check_access_impl(
1002                current_task,
1003                permission_flags,
1004                reason,
1005                info.read(),
1006                audit_context,
1007            )
1008        }
1009
1010        fn mkdir(
1011            &self,
1012            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1013            _node: &$crate::vfs::FsNode,
1014            _current_task: &$crate::task::CurrentTask,
1015            name: &$crate::vfs::FsStr,
1016            _mode: starnix_uapi::file_mode::FileMode,
1017            _owner: starnix_uapi::auth::FsCred,
1018        ) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
1019            starnix_uapi::error!(EROFS, format!("mkdir failed: {:?}", name))
1020        }
1021
1022        fn mknod(
1023            &self,
1024            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1025            _node: &$crate::vfs::FsNode,
1026            _current_task: &$crate::task::CurrentTask,
1027            name: &$crate::vfs::FsStr,
1028            _mode: starnix_uapi::file_mode::FileMode,
1029            _dev: starnix_uapi::device_id::DeviceId,
1030            _owner: starnix_uapi::auth::FsCred,
1031        ) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
1032            starnix_uapi::error!(EROFS, format!("mknod failed: {:?}", name))
1033        }
1034
1035        fn create_symlink(
1036            &self,
1037            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1038            _node: &$crate::vfs::FsNode,
1039            _current_task: &$crate::task::CurrentTask,
1040            name: &$crate::vfs::FsStr,
1041            _target: &$crate::vfs::FsStr,
1042            _owner: starnix_uapi::auth::FsCred,
1043        ) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
1044            starnix_uapi::error!(EROFS, format!("symlink failed: {:?}", name))
1045        }
1046
1047        fn link(
1048            &self,
1049            _locked: &mut Locked<FileOpsCore>,
1050            _node: &$crate::vfs::FsNode,
1051            _current_task: &$crate::task::CurrentTask,
1052            name: &$crate::vfs::FsStr,
1053            _child: &$crate::vfs::FsNodeHandle,
1054        ) -> Result<(), starnix_uapi::errors::Errno> {
1055            starnix_uapi::error!(EROFS, format!("link failed: {:?}", name))
1056        }
1057
1058        fn unlink(
1059            &self,
1060            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1061            _node: &$crate::vfs::FsNode,
1062            _current_task: &$crate::task::CurrentTask,
1063            name: &$crate::vfs::FsStr,
1064            _child: &$crate::vfs::FsNodeHandle,
1065        ) -> Result<(), starnix_uapi::errors::Errno> {
1066            starnix_uapi::error!(EROFS, format!("unlink failed: {:?}", name))
1067        }
1068    };
1069}
1070
1071/// Trait that objects can implement if they need to handle extended attribute storage. Allows
1072/// delegating extended attribute operations in [`FsNodeOps`] to another object.
1073///
1074/// See [`fs_node_impl_xattr_delegate`] for usage details.
1075pub trait XattrStorage {
1076    /// Delegate for [`FsNodeOps::get_xattr`].
1077    fn get_xattr(&self, locked: &mut Locked<FileOpsCore>, name: &FsStr) -> Result<FsString, Errno>;
1078
1079    /// Delegate for [`FsNodeOps::set_xattr`].
1080    fn set_xattr(
1081        &self,
1082        locked: &mut Locked<FileOpsCore>,
1083        name: &FsStr,
1084        value: &FsStr,
1085        op: XattrOp,
1086    ) -> Result<(), Errno>;
1087
1088    /// Delegate for [`FsNodeOps::remove_xattr`].
1089    fn remove_xattr(&self, locked: &mut Locked<FileOpsCore>, name: &FsStr) -> Result<(), Errno>;
1090
1091    /// Delegate for [`FsNodeOps::list_xattrs`].
1092    fn list_xattrs(&self, locked: &mut Locked<FileOpsCore>) -> Result<Vec<FsString>, Errno>;
1093}
1094
1095/// Implements extended attribute ops for [`FsNodeOps`] by delegating to another object which
1096/// implements the [`XattrStorage`] trait or a similar interface. For example:
1097///
1098/// ```
1099/// struct Xattrs {}
1100///
1101/// impl XattrStorage for Xattrs {
1102///     // implement XattrStorage
1103/// }
1104///
1105/// struct Node {
1106///     xattrs: Xattrs
1107/// }
1108///
1109/// impl FsNodeOps for Node {
1110///     // Delegate extended attribute ops in FsNodeOps to self.xattrs
1111///     fs_node_impl_xattr_delegate!(self, self.xattrs);
1112///
1113///     // add other FsNodeOps impls here
1114/// }
1115/// ```
1116#[macro_export]
1117macro_rules! fs_node_impl_xattr_delegate {
1118    ($self:ident, $delegate:expr) => {
1119        fn get_xattr(
1120            &$self,
1121            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1122            _node: &FsNode,
1123            _current_task: &CurrentTask,
1124            name: &$crate::vfs::FsStr,
1125            _size: usize,
1126        ) -> Result<$crate::vfs::ValueOrSize<$crate::vfs::FsString>, starnix_uapi::errors::Errno> {
1127            Ok($delegate.get_xattr(locked, name)?.into())
1128        }
1129
1130        fn set_xattr(
1131            &$self,
1132            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1133            _node: &FsNode,
1134            _current_task: &CurrentTask,
1135            name: &$crate::vfs::FsStr,
1136            value: &$crate::vfs::FsStr,
1137            op: $crate::vfs::XattrOp,
1138        ) -> Result<(), starnix_uapi::errors::Errno> {
1139            $delegate.set_xattr(locked, name, value, op)
1140        }
1141
1142        fn remove_xattr(
1143            &$self,
1144            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1145            _node: &FsNode,
1146            _current_task: &CurrentTask,
1147            name: &$crate::vfs::FsStr,
1148        ) -> Result<(), starnix_uapi::errors::Errno> {
1149            $delegate.remove_xattr(locked, name)
1150        }
1151
1152        fn list_xattrs(
1153            &$self,
1154            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1155            _node: &FsNode,
1156            _current_task: &CurrentTask,
1157            _size: usize,
1158        ) -> Result<$crate::vfs::ValueOrSize<Vec<$crate::vfs::FsString>>, starnix_uapi::errors::Errno> {
1159            Ok($delegate.list_xattrs(locked)?.into())
1160        }
1161    };
1162}
1163
1164/// Stubs out [`FsNodeOps`] methods that only apply to directories.
1165#[macro_export]
1166macro_rules! fs_node_impl_not_dir {
1167    () => {
1168        fn lookup(
1169            &self,
1170            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1171            _node: &$crate::vfs::FsNode,
1172            _current_task: &$crate::task::CurrentTask,
1173            _name: &$crate::vfs::FsStr,
1174        ) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
1175            starnix_uapi::error!(ENOTDIR)
1176        }
1177
1178        fn mknod(
1179            &self,
1180            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1181            _node: &$crate::vfs::FsNode,
1182            _current_task: &$crate::task::CurrentTask,
1183            _name: &$crate::vfs::FsStr,
1184            _mode: starnix_uapi::file_mode::FileMode,
1185            _dev: starnix_uapi::device_id::DeviceId,
1186            _owner: starnix_uapi::auth::FsCred,
1187        ) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
1188            starnix_uapi::error!(ENOTDIR)
1189        }
1190
1191        fn mkdir(
1192            &self,
1193            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1194            _node: &$crate::vfs::FsNode,
1195            _current_task: &$crate::task::CurrentTask,
1196            _name: &$crate::vfs::FsStr,
1197            _mode: starnix_uapi::file_mode::FileMode,
1198            _owner: starnix_uapi::auth::FsCred,
1199        ) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
1200            starnix_uapi::error!(ENOTDIR)
1201        }
1202
1203        fn create_symlink(
1204            &self,
1205            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1206            _node: &$crate::vfs::FsNode,
1207            _current_task: &$crate::task::CurrentTask,
1208            _name: &$crate::vfs::FsStr,
1209            _target: &$crate::vfs::FsStr,
1210            _owner: starnix_uapi::auth::FsCred,
1211        ) -> Result<$crate::vfs::FsNodeHandle, starnix_uapi::errors::Errno> {
1212            starnix_uapi::error!(ENOTDIR)
1213        }
1214
1215        fn unlink(
1216            &self,
1217            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
1218            _node: &$crate::vfs::FsNode,
1219            _current_task: &$crate::task::CurrentTask,
1220            _name: &$crate::vfs::FsStr,
1221            _child: &$crate::vfs::FsNodeHandle,
1222        ) -> Result<(), starnix_uapi::errors::Errno> {
1223            starnix_uapi::error!(ENOTDIR)
1224        }
1225    };
1226}
1227
1228#[derive(Copy, Clone, Debug, PartialEq, Eq)]
1229pub enum TimeUpdateType {
1230    Now,
1231    Omit,
1232    Time(UtcInstant),
1233}
1234
1235// Public re-export of macros allows them to be used like regular rust items.
1236pub use fs_node_impl_dir_readonly;
1237pub use fs_node_impl_not_dir;
1238pub use fs_node_impl_symlink;
1239pub use fs_node_impl_xattr_delegate;
1240
1241pub struct SpecialNode;
1242
1243impl FsNodeOps for SpecialNode {
1244    fs_node_impl_not_dir!();
1245
1246    fn create_file_ops(
1247        &self,
1248        _locked: &mut Locked<FileOpsCore>,
1249        _node: &FsNode,
1250        _current_task: &CurrentTask,
1251        _flags: OpenFlags,
1252    ) -> Result<Box<dyn FileOps>, Errno> {
1253        unreachable!("Special nodes cannot be opened.");
1254    }
1255}
1256
1257impl FsNode {
1258    /// Returns true if the `fs_node` is private to the `Kernel`/`FileSystem`, in which
1259    /// case both MAC and DAC checks should be skipped.
1260    pub fn is_private(&self) -> bool {
1261        self.ops().is_private()
1262    }
1263
1264    /// Create a node without inserting it into the FileSystem node cache.
1265    ///
1266    /// This is usually not what you want!
1267    /// Only use if you're also using get_or_create_node, like ext4.
1268    pub fn new_uncached(
1269        ino: ino_t,
1270        ops: impl Into<Box<dyn FsNodeOps>>,
1271        fs: &FileSystemHandle,
1272        info: FsNodeInfo,
1273    ) -> FsNodeHandle {
1274        let ops = ops.into();
1275        FsNodeHandle::new(Self::new_internal(ino, ops, Arc::downgrade(fs), info).into())
1276    }
1277
1278    fn new_internal(
1279        ino: ino_t,
1280        ops: Box<dyn FsNodeOps>,
1281        fs: Weak<FileSystem>,
1282        info: FsNodeInfo,
1283    ) -> Self {
1284        // Allow the FsNodeOps to populate initial info.
1285        let info = {
1286            let mut info = info;
1287            ops.initial_info(&mut info);
1288            info
1289        };
1290
1291        // The linter will fail in non test mode as it will not see the lock check.
1292        #[allow(clippy::let_and_return)]
1293        {
1294            let result = Self {
1295                ino,
1296                ops,
1297                fs,
1298                info: RwLock::new(info),
1299                append_lock: Default::default(),
1300                rare_data: Default::default(),
1301                write_guard_state: Default::default(),
1302                fsverity: Mutex::new(FsVerityState::None),
1303                security_state: Default::default(),
1304            };
1305            #[cfg(any(test, debug_assertions))]
1306            {
1307                #[allow(
1308                    clippy::undocumented_unsafe_blocks,
1309                    reason = "Force documented unsafe blocks in Starnix"
1310                )]
1311                let locked = unsafe { Unlocked::new() };
1312                let _l1 = result.append_lock.read_for_lock_ordering(locked);
1313                let _l2 = result.info.read();
1314                let _l3 = result.write_guard_state.lock();
1315                let _l4 = result.fsverity.lock();
1316            }
1317            result
1318        }
1319    }
1320
1321    pub fn fs(&self) -> FileSystemHandle {
1322        self.fs.upgrade().expect("FileSystem did not live long enough")
1323    }
1324
1325    pub fn ops(&self) -> &dyn FsNodeOps {
1326        self.ops.as_ref()
1327    }
1328
1329    /// Returns an error if this node is encrypted and locked. Does not require
1330    /// fetch_and_refresh_info because FS_IOC_SET_ENCRYPTION_POLICY updates info and once a node is
1331    /// encrypted, it remains encrypted forever.
1332    pub fn fail_if_locked(&self, _current_task: &CurrentTask) -> Result<(), Errno> {
1333        let node_info = self.info();
1334        if let Some(wrapping_key_id) = node_info.wrapping_key_id {
1335            let crypt_service = self.fs().crypt_service().ok_or_else(|| errno!(ENOKEY))?;
1336            if !crypt_service.contains_key(EncryptionKeyId::from(wrapping_key_id)) {
1337                return error!(ENOKEY);
1338            }
1339        }
1340        Ok(())
1341    }
1342
1343    /// Returns the `FsNode`'s `FsNodeOps` as a `&T`, or `None` if the downcast fails.
1344    pub fn downcast_ops<T>(&self) -> Option<&T>
1345    where
1346        T: 'static,
1347    {
1348        self.ops().as_any().downcast_ref::<T>()
1349    }
1350
1351    pub fn on_file_closed(&self, file: &FileObjectState) {
1352        if let Some(rare_data) = self.rare_data.get() {
1353            let mut flock_info = rare_data.flock_info.lock();
1354            // This function will drop the flock from `file` because the `WeakFileHandle` for
1355            // `file` will no longer upgrade to an `FileHandle`.
1356            flock_info.retain(|_| true);
1357        }
1358        self.record_lock_release(RecordLockOwner::FileObject(file.id));
1359    }
1360
1361    pub fn record_lock(
1362        &self,
1363        locked: &mut Locked<Unlocked>,
1364        current_task: &CurrentTask,
1365        file: &FileObject,
1366        cmd: RecordLockCommand,
1367        flock: uapi::flock,
1368    ) -> Result<Option<uapi::flock>, Errno> {
1369        self.ensure_rare_data().record_locks.lock(locked, current_task, file, cmd, flock)
1370    }
1371
1372    /// Release all record locks acquired by the given owner.
1373    pub fn record_lock_release(&self, owner: RecordLockOwner) {
1374        if let Some(rare_data) = self.rare_data.get() {
1375            rare_data.record_locks.release_locks(owner);
1376        }
1377    }
1378
1379    pub fn create_dir_entry_ops(&self) -> Box<dyn DirEntryOps> {
1380        self.ops().create_dir_entry_ops()
1381    }
1382
1383    pub fn create_file_ops<L>(
1384        &self,
1385        locked: &mut Locked<L>,
1386        current_task: &CurrentTask,
1387        flags: OpenFlags,
1388    ) -> Result<Box<dyn FileOps>, Errno>
1389    where
1390        L: LockEqualOrBefore<FileOpsCore>,
1391    {
1392        let locked = locked.cast_locked::<FileOpsCore>();
1393        self.ops().create_file_ops(locked, self, current_task, flags)
1394    }
1395
1396    pub fn open(
1397        &self,
1398        locked: &mut Locked<Unlocked>,
1399        current_task: &CurrentTask,
1400        namespace_node: &NamespaceNode,
1401        flags: OpenFlags,
1402        access_check: AccessCheck,
1403    ) -> Result<Box<dyn FileOps>, Errno> {
1404        // If O_PATH is set, there is no need to create a real FileOps because
1405        // most file operations are disabled.
1406        if flags.contains(OpenFlags::PATH) {
1407            return Ok(Box::new(OPathOps::new()));
1408        }
1409
1410        let access = access_check.resolve(flags);
1411        if access.is_nontrivial() {
1412            if flags.contains(OpenFlags::NOATIME) {
1413                self.check_o_noatime_allowed(current_task)?;
1414            }
1415
1416            // `flags` doesn't contain any information about the EXEC permission. Instead the syscalls
1417            // used to execute a file (`sys_execve` and `sys_execveat`) call `open()` with the EXEC
1418            // permission request in `access`.
1419            let mut permission_flags = PermissionFlags::from(access);
1420
1421            // The `APPEND` flag exists only in `flags`, to modify the behaviour of
1422            // `PermissionFlags::WRITE`
1423            if flags.contains(OpenFlags::APPEND) {
1424                permission_flags |= security::PermissionFlags::APPEND;
1425            }
1426
1427            // TODO: https://fxbug.dev/455782510 - Remove this once non-open() checks are fully
1428            // enforced.
1429            permission_flags |= security::PermissionFlags::FOR_OPEN;
1430
1431            self.check_access(
1432                locked,
1433                current_task,
1434                &namespace_node.mount,
1435                permission_flags,
1436                CheckAccessReason::InternalPermissionChecks,
1437                namespace_node,
1438            )?;
1439        }
1440
1441        let (mode, rdev) = {
1442            // Don't hold the info lock while calling into open_device or self.ops().
1443            // TODO: The mode and rdev are immutable and shouldn't require a lock to read.
1444            let info = self.info();
1445            (info.mode, info.rdev)
1446        };
1447
1448        match mode & FileMode::IFMT {
1449            FileMode::IFCHR => {
1450                if namespace_node.mount.flags().contains(MountFlags::NODEV) {
1451                    return error!(EACCES);
1452                }
1453                current_task.kernel().open_device(
1454                    locked,
1455                    current_task,
1456                    namespace_node,
1457                    flags,
1458                    rdev,
1459                    DeviceMode::Char,
1460                )
1461            }
1462            FileMode::IFBLK => {
1463                if namespace_node.mount.flags().contains(MountFlags::NODEV) {
1464                    return error!(EACCES);
1465                }
1466                current_task.kernel().open_device(
1467                    locked,
1468                    current_task,
1469                    namespace_node,
1470                    flags,
1471                    rdev,
1472                    DeviceMode::Block,
1473                )
1474            }
1475            FileMode::IFIFO => Pipe::open(locked, current_task, self.fifo(current_task), flags),
1476            // UNIX domain sockets can't be opened.
1477            FileMode::IFSOCK => error!(ENXIO),
1478            _ => self.create_file_ops(locked, current_task, flags),
1479        }
1480    }
1481
1482    pub fn lookup<L>(
1483        &self,
1484        locked: &mut Locked<L>,
1485        current_task: &CurrentTask,
1486        mount: &MountInfo,
1487        name: &FsStr,
1488    ) -> Result<FsNodeHandle, Errno>
1489    where
1490        L: LockEqualOrBefore<FileOpsCore>,
1491    {
1492        self.check_access(
1493            locked,
1494            current_task,
1495            mount,
1496            Access::EXEC,
1497            CheckAccessReason::InternalPermissionChecks,
1498            &[Auditable::Name(name), std::panic::Location::caller().into()],
1499        )?;
1500        let locked = locked.cast_locked::<FileOpsCore>();
1501        self.ops().lookup(locked, self, current_task, name)
1502    }
1503
1504    pub fn create_node<L>(
1505        &self,
1506        locked: &mut Locked<L>,
1507        current_task: &CurrentTask,
1508        mount: &MountInfo,
1509        name: &FsStr,
1510        mut mode: FileMode,
1511        dev: DeviceId,
1512        mut owner: FsCred,
1513    ) -> Result<FsNodeHandle, Errno>
1514    where
1515        L: LockEqualOrBefore<FileOpsCore>,
1516    {
1517        assert!(mode & FileMode::IFMT != FileMode::EMPTY, "mknod called without node type.");
1518        self.check_access(
1519            locked,
1520            current_task,
1521            mount,
1522            Access::WRITE,
1523            CheckAccessReason::InternalPermissionChecks,
1524            security::Auditable::Name(name),
1525        )?;
1526        if mode.is_reg() {
1527            security::check_fs_node_create_access(current_task, self, mode, name)?;
1528        } else if mode.is_dir() {
1529            // Even though the man page for mknod(2) says that mknod "cannot be used to create
1530            // directories" in starnix the mkdir syscall (`sys_mkdirat`) ends up calling
1531            //create_node.
1532            security::check_fs_node_mkdir_access(current_task, self, mode, name)?;
1533        } else if !matches!(
1534            mode.fmt(),
1535            FileMode::IFCHR | FileMode::IFBLK | FileMode::IFIFO | FileMode::IFSOCK
1536        ) {
1537            security::check_fs_node_mknod_access(current_task, self, mode, name, dev)?;
1538        }
1539
1540        self.update_metadata_for_child(current_task, &mut mode, &mut owner);
1541
1542        let new_node = if mode.is_dir() {
1543            let locked = locked.cast_locked::<FileOpsCore>();
1544            self.ops().mkdir(locked, self, current_task, name, mode, owner)?
1545        } else {
1546            // https://man7.org/linux/man-pages/man2/mknod.2.html says on error EPERM:
1547            //
1548            //   mode requested creation of something other than a regular
1549            //   file, FIFO (named pipe), or UNIX domain socket, and the
1550            //   caller is not privileged (Linux: does not have the
1551            //   CAP_MKNOD capability); also returned if the filesystem
1552            //   containing pathname does not support the type of node
1553            //   requested.
1554            if !matches!(mode.fmt(), FileMode::IFREG | FileMode::IFIFO | FileMode::IFSOCK) {
1555                security::check_task_capable(current_task, CAP_MKNOD)?;
1556            }
1557            let locked = locked.cast_locked::<FileOpsCore>();
1558            self.ops().mknod(locked, self, current_task, name, mode, dev, owner)?
1559        };
1560
1561        self.init_new_node_security_on_create(locked, current_task, &new_node, name)?;
1562
1563        Ok(new_node)
1564    }
1565
1566    pub fn create_symlink<L>(
1567        &self,
1568        locked: &mut Locked<L>,
1569        current_task: &CurrentTask,
1570        mount: &MountInfo,
1571        name: &FsStr,
1572        target: &FsStr,
1573        owner: FsCred,
1574    ) -> Result<FsNodeHandle, Errno>
1575    where
1576        L: LockEqualOrBefore<FileOpsCore>,
1577    {
1578        self.check_access(
1579            locked,
1580            current_task,
1581            mount,
1582            Access::WRITE,
1583            CheckAccessReason::InternalPermissionChecks,
1584            security::Auditable::Name(name),
1585        )?;
1586        security::check_fs_node_symlink_access(current_task, self, name, target)?;
1587
1588        let locked = locked.cast_locked::<FileOpsCore>();
1589        let new_node =
1590            self.ops().create_symlink(locked, self, current_task, name, target, owner)?;
1591
1592        self.init_new_node_security_on_create(locked, current_task, &new_node, name)?;
1593
1594        Ok(new_node)
1595    }
1596
1597    /// Requests that the LSM initialise a security label for the `new_node`, and optionally provide
1598    /// an extended attribute to write to the file to persist it.  If no LSM is enabled, no extended
1599    /// attribute returned, or if the filesystem does not support extended attributes, then the call
1600    /// returns success. All other failure modes return an `Errno` that should be early-returned.
1601    fn init_new_node_security_on_create<L>(
1602        &self,
1603        locked: &mut Locked<L>,
1604        current_task: &CurrentTask,
1605        new_node: &FsNode,
1606        name: &FsStr,
1607    ) -> Result<(), Errno>
1608    where
1609        L: LockEqualOrBefore<FileOpsCore>,
1610    {
1611        let locked = locked.cast_locked::<FileOpsCore>();
1612        security::fs_node_init_on_create(current_task, &new_node, self, name)?
1613            .map(|xattr| {
1614                match new_node.ops().set_xattr(
1615                    locked,
1616                    &new_node,
1617                    current_task,
1618                    xattr.name,
1619                    xattr.value.as_slice().into(),
1620                    XattrOp::Create,
1621                ) {
1622                    Err(e) => {
1623                        if e.code == ENOTSUP {
1624                            // This should only occur if a task has an "fscreate" context set, and
1625                            // creates a new file in a filesystem that does not support xattrs.
1626                            Ok(())
1627                        } else {
1628                            Err(e)
1629                        }
1630                    }
1631                    result => result,
1632                }
1633            })
1634            .unwrap_or_else(|| Ok(()))
1635    }
1636
1637    pub fn create_tmpfile<L>(
1638        &self,
1639        locked: &mut Locked<L>,
1640        current_task: &CurrentTask,
1641        mount: &MountInfo,
1642        mut mode: FileMode,
1643        mut owner: FsCred,
1644        link_behavior: FsNodeLinkBehavior,
1645    ) -> Result<FsNodeHandle, Errno>
1646    where
1647        L: LockEqualOrBefore<FileOpsCore>,
1648    {
1649        self.check_access(
1650            locked,
1651            current_task,
1652            mount,
1653            Access::WRITE,
1654            CheckAccessReason::InternalPermissionChecks,
1655            security::Auditable::Location(std::panic::Location::caller()),
1656        )?;
1657        self.update_metadata_for_child(current_task, &mut mode, &mut owner);
1658        let node = self.ops().create_tmpfile(self, current_task, mode, owner)?;
1659        self.init_new_node_security_on_create(locked, current_task, &node, "".into())?;
1660        if link_behavior == FsNodeLinkBehavior::Disallowed {
1661            node.ensure_rare_data().link_behavior.set(link_behavior).unwrap();
1662        }
1663        Ok(node)
1664    }
1665
1666    // This method does not attempt to update the atime of the node.
1667    // Use `NamespaceNode::readlink` which checks the mount flags and updates the atime accordingly.
1668    pub fn readlink<L>(
1669        &self,
1670        locked: &mut Locked<L>,
1671        current_task: &CurrentTask,
1672    ) -> Result<SymlinkTarget, Errno>
1673    where
1674        L: LockEqualOrBefore<FileOpsCore>,
1675    {
1676        // TODO: 378864856 - Is there a permission check here other than security checks?
1677        security::check_fs_node_read_link_access(current_task, self)?;
1678        self.ops().readlink(locked.cast_locked::<FileOpsCore>(), self, current_task)
1679    }
1680
1681    pub fn link<L>(
1682        &self,
1683        locked: &mut Locked<L>,
1684        current_task: &CurrentTask,
1685        mount: &MountInfo,
1686        name: &FsStr,
1687        child: &FsNodeHandle,
1688    ) -> Result<FsNodeHandle, Errno>
1689    where
1690        L: LockEqualOrBefore<FileOpsCore>,
1691    {
1692        self.check_access(
1693            locked,
1694            current_task,
1695            mount,
1696            Access::WRITE,
1697            CheckAccessReason::InternalPermissionChecks,
1698            security::Auditable::Location(std::panic::Location::caller()),
1699        )?;
1700
1701        if child.is_dir() {
1702            return error!(EPERM);
1703        }
1704
1705        if let Some(child_rare_data) = child.rare_data.get() {
1706            if matches!(child_rare_data.link_behavior.get(), Some(FsNodeLinkBehavior::Disallowed)) {
1707                return error!(ENOENT);
1708            }
1709        }
1710
1711        // Check that `current_task` has permission to create the hard link.
1712        //
1713        // See description of /proc/sys/fs/protected_hardlinks in
1714        // https://man7.org/linux/man-pages/man5/proc.5.html for details of the security
1715        // vulnerabilities.
1716        //
1717        let (child_uid, mode) = {
1718            let info = child.info();
1719            (info.uid, info.mode)
1720        };
1721        // Check that the the filesystem UID of the calling process (`current_task`) is the same as
1722        // the UID of the existing file. The check can be bypassed if the calling process has
1723        // `CAP_FOWNER` capability.
1724        if child_uid != current_task.current_creds().fsuid
1725            && !security::is_task_capable_noaudit(current_task, CAP_FOWNER)
1726        {
1727            // If current_task is not the user of the existing file, it needs to have read and write
1728            // access to the existing file.
1729            child
1730                .check_access(
1731                    locked,
1732                    current_task,
1733                    mount,
1734                    Access::READ | Access::WRITE,
1735                    CheckAccessReason::InternalPermissionChecks,
1736                    security::Auditable::Name(name),
1737                )
1738                .map_err(|e| {
1739                    // `check_access(..)` returns EACCES when the access rights doesn't match - change
1740                    // it to EPERM to match Linux standards.
1741                    if e == EACCES { errno!(EPERM) } else { e }
1742                })?;
1743            // There are also security issues that may arise when users link to setuid, setgid, or
1744            // special files.
1745            if mode.contains(FileMode::ISGID | FileMode::IXGRP) {
1746                return error!(EPERM);
1747            };
1748            if mode.contains(FileMode::ISUID) {
1749                return error!(EPERM);
1750            };
1751            if !mode.contains(FileMode::IFREG) {
1752                return error!(EPERM);
1753            };
1754        }
1755
1756        security::check_fs_node_link_access(current_task, self, child)?;
1757
1758        let locked = locked.cast_locked::<FileOpsCore>();
1759        self.ops().link(locked, self, current_task, name, child)?;
1760        Ok(child.clone())
1761    }
1762
1763    pub fn unlink<L>(
1764        &self,
1765        locked: &mut Locked<L>,
1766        current_task: &CurrentTask,
1767        mount: &MountInfo,
1768        name: &FsStr,
1769        child: &FsNodeHandle,
1770    ) -> Result<(), Errno>
1771    where
1772        L: LockEqualOrBefore<FileOpsCore>,
1773    {
1774        // The user must be able to search and write to the directory.
1775        self.check_access(
1776            locked,
1777            current_task,
1778            mount,
1779            Access::EXEC | Access::WRITE,
1780            CheckAccessReason::InternalPermissionChecks,
1781            security::Auditable::Name(name),
1782        )?;
1783        self.check_sticky_bit(current_task, child)?;
1784        if child.is_dir() {
1785            security::check_fs_node_rmdir_access(current_task, self, child, name)?;
1786        } else {
1787            security::check_fs_node_unlink_access(current_task, self, child, name)?;
1788        }
1789        let locked = locked.cast_locked::<FileOpsCore>();
1790        self.ops().unlink(locked, self, current_task, name, child)?;
1791        self.update_ctime_mtime();
1792        Ok(())
1793    }
1794
1795    pub fn truncate<L>(
1796        &self,
1797        locked: &mut Locked<L>,
1798        current_task: &CurrentTask,
1799        mount: &MountInfo,
1800        length: u64,
1801    ) -> Result<(), Errno>
1802    where
1803        L: LockEqualOrBefore<BeforeFsNodeAppend>,
1804    {
1805        self.truncate_with_strategy(locked, RealAppendLockStrategy {}, current_task, mount, length)
1806    }
1807
1808    pub fn truncate_with_strategy<L, M>(
1809        &self,
1810        locked: &mut Locked<L>,
1811        strategy: impl AppendLockStrategy<M>,
1812        current_task: &CurrentTask,
1813        mount: &MountInfo,
1814        length: u64,
1815    ) -> Result<(), Errno>
1816    where
1817        M: LockEqualOrBefore<FileOpsCore>,
1818        L: LockEqualOrBefore<M>,
1819    {
1820        if self.is_dir() {
1821            return error!(EISDIR);
1822        }
1823
1824        {
1825            let locked = locked.cast_locked::<M>();
1826            self.check_access(
1827                locked,
1828                current_task,
1829                mount,
1830                Access::WRITE,
1831                CheckAccessReason::InternalPermissionChecks,
1832                security::Auditable::Location(std::panic::Location::caller()),
1833            )?;
1834        }
1835
1836        self.truncate_common(locked, strategy, current_task, length)
1837    }
1838
1839    /// Avoid calling this method directly. You probably want to call `FileObject::ftruncate()`
1840    /// which will also perform all file-descriptor based verifications.
1841    pub fn ftruncate<L>(
1842        &self,
1843        locked: &mut Locked<L>,
1844        current_task: &CurrentTask,
1845        length: u64,
1846    ) -> Result<(), Errno>
1847    where
1848        L: LockEqualOrBefore<BeforeFsNodeAppend>,
1849    {
1850        if self.is_dir() {
1851            // When truncating a file descriptor, if the descriptor references a directory,
1852            // return EINVAL. This is different from the truncate() syscall which returns EISDIR.
1853            //
1854            // See https://man7.org/linux/man-pages/man2/ftruncate.2.html#ERRORS
1855            return error!(EINVAL);
1856        }
1857
1858        // For ftruncate, we do not need to check that the file node is writable.
1859        //
1860        // The file object that calls this method must verify that the file was opened
1861        // with write permissions.
1862        //
1863        // This matters because a file could be opened with O_CREAT + O_RDWR + 0444 mode.
1864        // The file descriptor returned from such an operation can be truncated, even
1865        // though the file was created with a read-only mode.
1866        //
1867        // See https://man7.org/linux/man-pages/man2/ftruncate.2.html#DESCRIPTION
1868        // which says:
1869        //
1870        // "With ftruncate(), the file must be open for writing; with truncate(),
1871        // the file must be writable."
1872
1873        self.truncate_common(locked, RealAppendLockStrategy {}, current_task, length)
1874    }
1875
1876    // Called by `truncate` and `ftruncate` above.
1877    fn truncate_common<L, M>(
1878        &self,
1879        locked: &mut Locked<L>,
1880        strategy: impl AppendLockStrategy<M>,
1881        current_task: &CurrentTask,
1882        length: u64,
1883    ) -> Result<(), Errno>
1884    where
1885        M: LockEqualOrBefore<FileOpsCore>,
1886        L: LockEqualOrBefore<M>,
1887    {
1888        if length > MAX_LFS_FILESIZE as u64 {
1889            return error!(EINVAL);
1890        }
1891        {
1892            let locked = locked.cast_locked::<M>().cast_locked::<FileOpsCore>();
1893            if length > current_task.thread_group().get_rlimit(locked, Resource::FSIZE) {
1894                send_standard_signal(locked, current_task, SignalInfo::kernel(SIGXFSZ));
1895                return error!(EFBIG);
1896            }
1897        }
1898        let locked = locked.cast_locked::<M>();
1899        self.clear_suid_and_sgid_bits(locked, current_task)?;
1900        // We have to take the append lock since otherwise it would be possible to truncate and for
1901        // an append to continue using the old size.
1902        let (guard, locked) = strategy.lock(locked, current_task, self)?;
1903        self.ops().truncate(locked, &guard, self, current_task, length)?;
1904        self.update_ctime_mtime();
1905        Ok(())
1906    }
1907
1908    /// Avoid calling this method directly. You probably want to call `FileObject::fallocate()`
1909    /// which will also perform additional verifications.
1910    pub fn fallocate<L>(
1911        &self,
1912        locked: &mut Locked<L>,
1913        current_task: &CurrentTask,
1914        mode: FallocMode,
1915        offset: u64,
1916        length: u64,
1917    ) -> Result<(), Errno>
1918    where
1919        L: LockBefore<BeforeFsNodeAppend>,
1920    {
1921        self.fallocate_with_strategy(
1922            locked,
1923            RealAppendLockStrategy {},
1924            current_task,
1925            mode,
1926            offset,
1927            length,
1928        )
1929    }
1930
1931    pub fn fallocate_with_strategy<L, M>(
1932        &self,
1933        locked: &mut Locked<L>,
1934        strategy: impl AppendLockStrategy<M>,
1935        current_task: &CurrentTask,
1936        mode: FallocMode,
1937        offset: u64,
1938        length: u64,
1939    ) -> Result<(), Errno>
1940    where
1941        M: LockEqualOrBefore<FileOpsCore>,
1942        L: LockEqualOrBefore<M>,
1943    {
1944        let allocate_size = checked_add_offset_and_length(offset as usize, length as usize)
1945            .map_err(|_| errno!(EFBIG))? as u64;
1946        {
1947            let locked = locked.cast_locked::<M>().cast_locked::<FileOpsCore>();
1948            if allocate_size > current_task.thread_group().get_rlimit(locked, Resource::FSIZE) {
1949                send_standard_signal(locked, current_task, SignalInfo::kernel(SIGXFSZ));
1950                return error!(EFBIG);
1951            }
1952        }
1953
1954        let locked = locked.cast_locked::<M>();
1955        self.clear_suid_and_sgid_bits(locked, current_task)?;
1956        let (guard, locked) = strategy.lock(locked, current_task, self)?;
1957        self.ops().allocate(locked, &guard, self, current_task, mode, offset, length)?;
1958        self.update_ctime_mtime();
1959        Ok(())
1960    }
1961
1962    fn update_metadata_for_child(
1963        &self,
1964        current_task: &CurrentTask,
1965        mode: &mut FileMode,
1966        owner: &mut FsCred,
1967    ) {
1968        // The setgid bit on a directory causes the gid to be inherited by new children and the
1969        // setgid bit to be inherited by new child directories. See SetgidDirTest in gvisor.
1970        {
1971            let self_info = self.info();
1972            if self_info.mode.contains(FileMode::ISGID) {
1973                owner.gid = self_info.gid;
1974                if mode.is_dir() {
1975                    *mode |= FileMode::ISGID;
1976                }
1977            }
1978        }
1979
1980        if !mode.is_dir() {
1981            // https://man7.org/linux/man-pages/man7/inode.7.html says:
1982            //
1983            //   For an executable file, the set-group-ID bit causes the
1984            //   effective group ID of a process that executes the file to change
1985            //   as described in execve(2).
1986            //
1987            // We need to check whether the current task has permission to create such a file.
1988            // See a similar check in `FsNode::chmod`.
1989            let current_creds = current_task.current_creds();
1990            if owner.gid != current_creds.fsgid
1991                && !current_creds.is_in_group(owner.gid)
1992                && !security::is_task_capable_noaudit(current_task, CAP_FOWNER)
1993            {
1994                *mode &= !FileMode::ISGID;
1995            }
1996        }
1997    }
1998
1999    /// Checks if O_NOATIME is allowed,
2000    pub fn check_o_noatime_allowed(&self, current_task: &CurrentTask) -> Result<(), Errno> {
2001        // Per open(2),
2002        //
2003        //   O_NOATIME (since Linux 2.6.8)
2004        //      ...
2005        //
2006        //      This flag can be employed only if one of the following
2007        //      conditions is true:
2008        //
2009        //      *  The effective UID of the process matches the owner UID
2010        //         of the file.
2011        //
2012        //      *  The calling process has the CAP_FOWNER capability in
2013        //         its user namespace and the owner UID of the file has a
2014        //         mapping in the namespace.
2015        if current_task.current_creds().fsuid != self.info().uid {
2016            security::check_task_capable(current_task, CAP_FOWNER)?;
2017        }
2018        Ok(())
2019    }
2020
2021    pub fn default_check_access_impl(
2022        &self,
2023        current_task: &CurrentTask,
2024        permission_flags: security::PermissionFlags,
2025        reason: CheckAccessReason,
2026        info: RwLockReadGuard<'_, FsNodeInfo>,
2027        audit_context: Auditable<'_>,
2028    ) -> Result<(), Errno> {
2029        let (node_uid, node_gid, mode) = (info.uid, info.gid, info.mode);
2030        std::mem::drop(info);
2031        if let CheckAccessReason::ChangeTimestamps { now } = reason {
2032            // To set the timestamps to the current time the caller must either have write access to
2033            // the file, be the file owner, or hold the CAP_DAC_OVERRIDE or CAP_FOWNER capability.
2034            // To set the timestamps to other values the caller must either be the file owner or hold
2035            // the CAP_FOWNER capability.
2036            if current_task.current_creds().fsuid == node_uid {
2037                return Ok(());
2038            }
2039            if now {
2040                if security::is_task_capable_noaudit(current_task, CAP_FOWNER) {
2041                    return Ok(());
2042                }
2043            } else {
2044                security::check_task_capable(current_task, CAP_FOWNER)?;
2045                return Ok(());
2046            }
2047        }
2048        check_access(self, current_task, permission_flags, node_uid, node_gid, mode)?;
2049        security::fs_node_permission(current_task, self, permission_flags, audit_context)
2050    }
2051
2052    /// Check whether the node can be accessed in the current context with the specified access
2053    /// flags (read, write, or exec). Accounts for capabilities and whether the current user is the
2054    /// owner or is in the file's group.
2055    pub fn check_access<'a, L>(
2056        &self,
2057        locked: &mut Locked<L>,
2058        current_task: &CurrentTask,
2059        mount: &MountInfo,
2060        access: impl Into<security::PermissionFlags>,
2061        reason: CheckAccessReason,
2062        audit_context: impl Into<security::Auditable<'a>>,
2063    ) -> Result<(), Errno>
2064    where
2065        L: LockEqualOrBefore<FileOpsCore>,
2066    {
2067        let mut permission_flags = access.into();
2068        if permission_flags.contains(security::PermissionFlags::WRITE) {
2069            mount.check_readonly_filesystem()?;
2070        }
2071        if permission_flags.contains(security::PermissionFlags::EXEC) && !self.is_dir() {
2072            mount.check_noexec_filesystem()?;
2073        }
2074        if reason == CheckAccessReason::Access {
2075            permission_flags |= PermissionFlags::ACCESS;
2076        }
2077        self.ops().check_access(
2078            locked.cast_locked::<FileOpsCore>(),
2079            self,
2080            current_task,
2081            permission_flags,
2082            &self.info,
2083            reason,
2084            audit_context.into(),
2085        )
2086    }
2087
2088    /// Check whether the stick bit, `S_ISVTX`, forbids the `current_task` from removing the given
2089    /// `child`. If this node has `S_ISVTX`, then either the child must be owned by the `fsuid` of
2090    /// `current_task` or `current_task` must have `CAP_FOWNER`.
2091    pub fn check_sticky_bit(
2092        &self,
2093        current_task: &CurrentTask,
2094        child: &FsNodeHandle,
2095    ) -> Result<(), Errno> {
2096        if self.info().mode.contains(FileMode::ISVTX)
2097            && child.info().uid != current_task.current_creds().fsuid
2098        {
2099            security::check_task_capable(current_task, CAP_FOWNER)?;
2100        }
2101        Ok(())
2102    }
2103
2104    pub fn fifo(&self, current_task: &CurrentTask) -> &PipeHandle {
2105        assert!(self.is_fifo());
2106        self.ensure_rare_data().ensure_fifo(current_task)
2107    }
2108
2109    /// Returns the UNIX domain socket bound to this node, if any.
2110    pub fn bound_socket(&self) -> Option<&SocketHandle> {
2111        if let Some(rare_data) = self.rare_data.get() { rare_data.bound_socket.get() } else { None }
2112    }
2113
2114    /// Register the provided socket as the UNIX domain socket bound to this node.
2115    ///
2116    /// It is a fatal error to call this method again if it has already been called on this node.
2117    pub fn set_bound_socket(&self, socket: SocketHandle) {
2118        assert!(self.ensure_rare_data().bound_socket.set(socket).is_ok());
2119    }
2120
2121    pub fn update_attributes<L, F>(
2122        &self,
2123        locked: &mut Locked<L>,
2124        current_task: &CurrentTask,
2125        mutator: F,
2126    ) -> Result<(), Errno>
2127    where
2128        L: LockEqualOrBefore<FileOpsCore>,
2129        F: FnOnce(&mut FsNodeInfo) -> Result<(), Errno>,
2130    {
2131        let mut info = self.info.write();
2132        let mut new_info = info.clone();
2133        mutator(&mut new_info)?;
2134
2135        let new_access = new_info.mode.user_access()
2136            | new_info.mode.group_access()
2137            | new_info.mode.other_access();
2138
2139        if new_access.intersects(Access::EXEC) {
2140            let write_guard_state = self.write_guard_state.lock();
2141            if let Ok(seals) = write_guard_state.get_seals() {
2142                if seals.contains(SealFlags::NO_EXEC) {
2143                    return error!(EPERM);
2144                }
2145            }
2146        }
2147
2148        // `mutator`s should not update the attribute change time, which is managed by this API.
2149        assert_eq!(info.time_status_change, new_info.time_status_change);
2150        if *info == new_info {
2151            return Ok(());
2152        }
2153        new_info.time_status_change = utc::utc_now();
2154
2155        let mut has = zxio_node_attr_has_t { ..Default::default() };
2156        has.modification_time = info.time_modify != new_info.time_modify;
2157        has.access_time = info.time_access != new_info.time_access;
2158        has.mode = info.mode != new_info.mode;
2159        has.uid = info.uid != new_info.uid;
2160        has.gid = info.gid != new_info.gid;
2161        has.rdev = info.rdev != new_info.rdev;
2162        has.casefold = info.casefold != new_info.casefold;
2163        has.wrapping_key_id = info.wrapping_key_id != new_info.wrapping_key_id;
2164
2165        security::check_fs_node_setattr_access(current_task, &self, &has)?;
2166
2167        // Call `update_attributes(..)` to persist the changes for the following fields.
2168        if has.modification_time
2169            || has.access_time
2170            || has.mode
2171            || has.uid
2172            || has.gid
2173            || has.rdev
2174            || has.casefold
2175            || has.wrapping_key_id
2176        {
2177            let locked = locked.cast_locked::<FileOpsCore>();
2178            self.ops().update_attributes(locked, self, current_task, &new_info, has)?;
2179        }
2180
2181        *info = new_info;
2182        Ok(())
2183    }
2184
2185    /// Set the permissions on this FsNode to the given values.
2186    ///
2187    /// Does not change the IFMT of the node.
2188    pub fn chmod<L>(
2189        &self,
2190        locked: &mut Locked<L>,
2191        current_task: &CurrentTask,
2192        mount: &MountInfo,
2193        mut mode: FileMode,
2194    ) -> Result<(), Errno>
2195    where
2196        L: LockEqualOrBefore<FileOpsCore>,
2197    {
2198        mount.check_readonly_filesystem()?;
2199        self.update_attributes(locked, current_task, |info| {
2200            let current_creds = current_task.current_creds();
2201            if info.uid != current_creds.euid {
2202                security::check_task_capable(current_task, CAP_FOWNER)?;
2203            } else if info.gid != current_creds.egid
2204                && !current_creds.is_in_group(info.gid)
2205                && mode.intersects(FileMode::ISGID)
2206                && !security::is_task_capable_noaudit(current_task, CAP_FOWNER)
2207            {
2208                mode &= !FileMode::ISGID;
2209            }
2210            info.chmod(mode);
2211            Ok(())
2212        })
2213    }
2214
2215    /// Sets the owner and/or group on this FsNode.
2216    pub fn chown<L>(
2217        &self,
2218        locked: &mut Locked<L>,
2219        current_task: &CurrentTask,
2220        mount: &MountInfo,
2221        owner: Option<uid_t>,
2222        group: Option<gid_t>,
2223    ) -> Result<(), Errno>
2224    where
2225        L: LockEqualOrBefore<FileOpsCore>,
2226    {
2227        mount.check_readonly_filesystem()?;
2228        self.update_attributes(locked, current_task, |info| {
2229            if security::is_task_capable_noaudit(current_task, CAP_CHOWN) {
2230                info.chown(owner, group);
2231                return Ok(());
2232            }
2233
2234            // Nobody can change the owner.
2235            if let Some(uid) = owner {
2236                if info.uid != uid {
2237                    return error!(EPERM);
2238                }
2239            }
2240
2241            let (euid, is_in_group) = {
2242                let current_creds = current_task.current_creds();
2243                (current_creds.euid, group.map(|gid| current_creds.is_in_group(gid)))
2244            };
2245
2246            // The owner can change the group.
2247            if info.uid == euid {
2248                // To a group that it belongs.
2249                if let Some(is_in_group) = is_in_group {
2250                    if !is_in_group {
2251                        return error!(EPERM);
2252                    }
2253                }
2254                info.chown(None, group);
2255                return Ok(());
2256            }
2257
2258            // Any other user can call chown(file, -1, -1)
2259            if owner.is_some() || group.is_some() {
2260                return error!(EPERM);
2261            }
2262
2263            // But not on set-user-ID or set-group-ID files.
2264            // If we were to chown them, they would drop the set-ID bit.
2265            if info.mode.is_reg()
2266                && (info.mode.contains(FileMode::ISUID)
2267                    || info.mode.contains(FileMode::ISGID | FileMode::IXGRP))
2268            {
2269                return error!(EPERM);
2270            }
2271
2272            info.chown(None, None);
2273            Ok(())
2274        })
2275    }
2276
2277    /// Forcefully change the owner and group of this node.
2278    ///
2279    /// # Safety
2280    ///
2281    /// This function skips all the security checks and just updates the owner and group. Also, does
2282    /// not check if the filesystem is read-only and does not update the attribute change time.
2283    ///
2284    /// This function is used to set the owner and group of /proc/pid to the credentials of the
2285    /// current task. Please consider carefully whether you want to use this function for another
2286    /// purpose.
2287    pub unsafe fn force_chown(&self, creds: FsCred) {
2288        self.update_info(|info| {
2289            info.chown(Some(creds.uid), Some(creds.gid));
2290        });
2291    }
2292
2293    /// Whether this node is a regular file.
2294    pub fn is_reg(&self) -> bool {
2295        self.info().mode.is_reg()
2296    }
2297
2298    /// Whether this node is a directory.
2299    pub fn is_dir(&self) -> bool {
2300        self.info().mode.is_dir()
2301    }
2302
2303    /// Whether this node is a socket.
2304    pub fn is_sock(&self) -> bool {
2305        self.info().mode.is_sock()
2306    }
2307
2308    /// Whether this node is a FIFO.
2309    pub fn is_fifo(&self) -> bool {
2310        self.info().mode.is_fifo()
2311    }
2312
2313    /// Whether this node is a symbolic link.
2314    pub fn is_lnk(&self) -> bool {
2315        self.info().mode.is_lnk()
2316    }
2317
2318    pub fn dev(&self) -> DeviceId {
2319        self.fs().dev_id
2320    }
2321
2322    pub fn stat<L>(
2323        &self,
2324        locked: &mut Locked<L>,
2325        current_task: &CurrentTask,
2326    ) -> Result<uapi::stat, Errno>
2327    where
2328        L: LockEqualOrBefore<FileOpsCore>,
2329    {
2330        security::check_fs_node_getattr_access(current_task, self)?;
2331
2332        let info = self.fetch_and_refresh_info(locked, current_task)?;
2333
2334        let time_to_kernel_timespec_pair = |t| {
2335            let timespec { tv_sec, tv_nsec } = timespec_from_time(t);
2336            let time = tv_sec.try_into().map_err(|_| errno!(EINVAL))?;
2337            let time_nsec = tv_nsec.try_into().map_err(|_| errno!(EINVAL))?;
2338            Ok((time, time_nsec))
2339        };
2340
2341        let (st_atime, st_atime_nsec) = time_to_kernel_timespec_pair(info.time_access)?;
2342        let (st_mtime, st_mtime_nsec) = time_to_kernel_timespec_pair(info.time_modify)?;
2343        let (st_ctime, st_ctime_nsec) = time_to_kernel_timespec_pair(info.time_status_change)?;
2344
2345        Ok(uapi::stat {
2346            st_dev: self.dev().bits(),
2347            st_ino: self.ino,
2348            st_nlink: info.link_count.try_into().map_err(|_| errno!(EINVAL))?,
2349            st_mode: info.mode.bits(),
2350            st_uid: info.uid,
2351            st_gid: info.gid,
2352            st_rdev: info.rdev.bits(),
2353            st_size: info.size.try_into().map_err(|_| errno!(EINVAL))?,
2354            st_blksize: info.blksize.try_into().map_err(|_| errno!(EINVAL))?,
2355            st_blocks: info.blocks.try_into().map_err(|_| errno!(EINVAL))?,
2356            st_atime,
2357            st_atime_nsec,
2358            st_mtime,
2359            st_mtime_nsec,
2360            st_ctime,
2361            st_ctime_nsec,
2362            ..Default::default()
2363        })
2364    }
2365
2366    /// Returns the current size of the file.  This is inherently racy, so any caller that
2367    /// might want to use the value returned should hold their own locks if necessary.  For
2368    /// example, if using the value here to implement append (which is the case at the time
2369    /// of writing this comment), locks must be held to prevent the file size being changed
2370    /// concurrently.
2371    // TODO(https://fxbug.dev/454730248): This is probably the wrong way to implement O_APPEND.
2372    pub fn get_size<L>(
2373        &self,
2374        locked: &mut Locked<L>,
2375        current_task: &CurrentTask,
2376    ) -> Result<usize, Errno>
2377    where
2378        L: LockEqualOrBefore<FileOpsCore>,
2379    {
2380        self.ops().get_size(locked.cast_locked::<FileOpsCore>(), self, current_task)
2381    }
2382
2383    fn statx_timestamp_from_time(time: UtcInstant) -> statx_timestamp {
2384        let nanos = time.into_nanos();
2385        statx_timestamp {
2386            tv_sec: nanos / NANOS_PER_SECOND,
2387            tv_nsec: (nanos % NANOS_PER_SECOND) as u32,
2388            ..Default::default()
2389        }
2390    }
2391
2392    pub fn statx<L>(
2393        &self,
2394        locked: &mut Locked<L>,
2395        current_task: &CurrentTask,
2396        flags: StatxFlags,
2397        mask: u32,
2398    ) -> Result<statx, Errno>
2399    where
2400        L: LockEqualOrBefore<FileOpsCore>,
2401    {
2402        security::check_fs_node_getattr_access(current_task, self)?;
2403
2404        // Ignore mask for now and fill in all of the fields.
2405        let info = if flags.contains(StatxFlags::AT_STATX_DONT_SYNC) {
2406            self.info()
2407        } else {
2408            self.fetch_and_refresh_info(locked, current_task)?
2409        };
2410        if mask & STATX__RESERVED == STATX__RESERVED {
2411            return error!(EINVAL);
2412        }
2413
2414        track_stub!(TODO("https://fxbug.dev/302594110"), "statx attributes");
2415        let stx_mnt_id = 0;
2416        let mut stx_attributes = 0;
2417        let stx_attributes_mask = STATX_ATTR_VERITY as u64;
2418
2419        if matches!(*self.fsverity.lock(), FsVerityState::FsVerity) {
2420            stx_attributes |= STATX_ATTR_VERITY as u64;
2421        }
2422
2423        Ok(statx {
2424            stx_mask: STATX_NLINK
2425                | STATX_UID
2426                | STATX_GID
2427                | STATX_ATIME
2428                | STATX_MTIME
2429                | STATX_CTIME
2430                | STATX_INO
2431                | STATX_SIZE
2432                | STATX_BLOCKS
2433                | STATX_BASIC_STATS,
2434            stx_blksize: info.blksize.try_into().map_err(|_| errno!(EINVAL))?,
2435            stx_attributes,
2436            stx_nlink: info.link_count.try_into().map_err(|_| errno!(EINVAL))?,
2437            stx_uid: info.uid,
2438            stx_gid: info.gid,
2439            stx_mode: info.mode.bits().try_into().map_err(|_| errno!(EINVAL))?,
2440            stx_ino: self.ino,
2441            stx_size: info.size.try_into().map_err(|_| errno!(EINVAL))?,
2442            stx_blocks: info.blocks.try_into().map_err(|_| errno!(EINVAL))?,
2443            stx_attributes_mask,
2444            stx_ctime: Self::statx_timestamp_from_time(info.time_status_change),
2445            stx_mtime: Self::statx_timestamp_from_time(info.time_modify),
2446            stx_atime: Self::statx_timestamp_from_time(info.time_access),
2447
2448            stx_rdev_major: info.rdev.major(),
2449            stx_rdev_minor: info.rdev.minor(),
2450
2451            stx_dev_major: self.fs().dev_id.major(),
2452            stx_dev_minor: self.fs().dev_id.minor(),
2453            stx_mnt_id,
2454            ..Default::default()
2455        })
2456    }
2457
2458    /// Checks whether `current_task` has capabilities required for the specified `access` to the
2459    /// extended attribute `name`.
2460    fn check_xattr_access<L>(
2461        &self,
2462        locked: &mut Locked<L>,
2463        current_task: &CurrentTask,
2464        mount: &MountInfo,
2465        name: &FsStr,
2466        access: Access,
2467    ) -> Result<(), Errno>
2468    where
2469        L: LockEqualOrBefore<FileOpsCore>,
2470    {
2471        assert!(access == Access::READ || access == Access::WRITE);
2472
2473        let enodata_if_read =
2474            |e: Errno| if access == Access::READ && e.code == EPERM { errno!(ENODATA) } else { e };
2475
2476        // man xattr(7) describes the different access checks applied to each extended attribute
2477        // namespace.
2478        if name.starts_with(XATTR_USER_PREFIX.to_bytes()) {
2479            {
2480                let info = self.info();
2481                if !info.mode.is_reg() && !info.mode.is_dir() {
2482                    return Err(enodata_if_read(errno!(EPERM)));
2483                }
2484            }
2485
2486            // TODO: https://fxbug.dev/460734830 - Perform capability check(s) if file has sticky
2487            // bit set.
2488
2489            self.check_access(
2490                locked,
2491                current_task,
2492                mount,
2493                access,
2494                CheckAccessReason::InternalPermissionChecks,
2495                security::Auditable::Name(name),
2496            )?;
2497        } else if name.starts_with(XATTR_TRUSTED_PREFIX.to_bytes()) {
2498            // Trusted extended attributes require `CAP_SYS_ADMIN` to read or write.
2499            security::check_task_capable(current_task, CAP_SYS_ADMIN).map_err(enodata_if_read)?;
2500        } else if name.starts_with(XATTR_SYSTEM_PREFIX.to_bytes()) {
2501            // System extended attributes have attribute-specific access policy.
2502            // TODO: https://fxbug.dev/460734830 -  Revise how system extended attributes are
2503            // access-controlled.
2504            security::check_task_capable(current_task, CAP_SYS_ADMIN).map_err(enodata_if_read)?;
2505        } else if name.starts_with(XATTR_SECURITY_PREFIX.to_bytes()) {
2506            if access == Access::WRITE {
2507                // Writes require `CAP_SYS_ADMIN`, unless the LSM owning `name` specifies to skip.
2508                if !security::fs_node_xattr_skipcap(current_task, name) {
2509                    security::check_task_capable(current_task, CAP_SYS_ADMIN)
2510                        .map_err(enodata_if_read)?;
2511                }
2512            }
2513        } else {
2514            panic!("Unknown extended attribute prefix: {}", name);
2515        }
2516        Ok(())
2517    }
2518
2519    pub fn get_xattr<L>(
2520        &self,
2521        locked: &mut Locked<L>,
2522        current_task: &CurrentTask,
2523        mount: &MountInfo,
2524        name: &FsStr,
2525        max_size: usize,
2526    ) -> Result<ValueOrSize<FsString>, Errno>
2527    where
2528        L: LockEqualOrBefore<FileOpsCore>,
2529    {
2530        // Perform discretionary capability & access checks appropriate to the xattr prefix.
2531        self.check_xattr_access(locked, current_task, mount, name, Access::READ)?;
2532
2533        // LSM access checks must be performed after discretionary checks.
2534        security::check_fs_node_getxattr_access(current_task, self, name)?;
2535
2536        if name.starts_with(XATTR_SECURITY_PREFIX.to_bytes()) {
2537            // If the attribute is in the security.* domain then allow the LSM to handle the
2538            // request, or to delegate to `FsNodeOps::get_xattr()`.
2539            security::fs_node_getsecurity(locked, current_task, self, name, max_size)
2540        } else {
2541            // If the attribute is outside security.*, delegate the read to the `FsNodeOps`.
2542            self.ops().get_xattr(
2543                locked.cast_locked::<FileOpsCore>(),
2544                self,
2545                current_task,
2546                name,
2547                max_size,
2548            )
2549        }
2550    }
2551
2552    pub fn set_xattr<L>(
2553        &self,
2554        locked: &mut Locked<L>,
2555        current_task: &CurrentTask,
2556        mount: &MountInfo,
2557        name: &FsStr,
2558        value: &FsStr,
2559        op: XattrOp,
2560    ) -> Result<(), Errno>
2561    where
2562        L: LockEqualOrBefore<FileOpsCore>,
2563    {
2564        // Perform discretionary capability & access checks appropriate to the xattr prefix.
2565        self.check_xattr_access(locked, current_task, mount, name, Access::WRITE)?;
2566
2567        // LSM access checks must be performed after discretionary checks.
2568        security::check_fs_node_setxattr_access(current_task, self, name, value, op)?;
2569
2570        if name.starts_with(XATTR_SECURITY_PREFIX.to_bytes()) {
2571            // If the attribute is in the security.* domain then allow the LSM to handle the
2572            // request, or to delegate to `FsNodeOps::set_xattr()`.
2573            security::fs_node_setsecurity(locked, current_task, self, name, value, op)
2574        } else {
2575            // If the attribute is outside security.*, delegate the read to the `FsNodeOps`.
2576            self.ops().set_xattr(
2577                locked.cast_locked::<FileOpsCore>(),
2578                self,
2579                current_task,
2580                name,
2581                value,
2582                op,
2583            )
2584        }
2585    }
2586
2587    pub fn remove_xattr<L>(
2588        &self,
2589        locked: &mut Locked<L>,
2590        current_task: &CurrentTask,
2591        mount: &MountInfo,
2592        name: &FsStr,
2593    ) -> Result<(), Errno>
2594    where
2595        L: LockEqualOrBefore<FileOpsCore>,
2596    {
2597        // Perform discretionary capability & access checks appropriate to the xattr prefix.
2598        self.check_xattr_access(locked, current_task, mount, name, Access::WRITE)?;
2599
2600        // LSM access checks must be performed after discretionary checks.
2601        security::check_fs_node_removexattr_access(current_task, self, name)?;
2602        self.ops().remove_xattr(locked.cast_locked::<FileOpsCore>(), self, current_task, name)
2603    }
2604
2605    pub fn list_xattrs<L>(
2606        &self,
2607        locked: &mut Locked<L>,
2608        current_task: &CurrentTask,
2609        max_size: usize,
2610    ) -> Result<ValueOrSize<Vec<FsString>>, Errno>
2611    where
2612        L: LockEqualOrBefore<FileOpsCore>,
2613    {
2614        security::check_fs_node_listxattr_access(current_task, self)?;
2615        Ok(self
2616            .ops()
2617            .list_xattrs(locked.cast_locked::<FileOpsCore>(), self, current_task, max_size)?
2618            .map(|mut v| {
2619                // Extended attributes may be listed even if the caller would not be able to read
2620                // (or modify) the attribute's value.
2621                // trusted.* attributes are only accessible with CAP_SYS_ADMIN and are omitted by
2622                // `listxattr()` unless the caller holds CAP_SYS_ADMIN.
2623                if !security::is_task_capable_noaudit(current_task, CAP_SYS_ADMIN) {
2624                    v.retain(|name| !name.starts_with(XATTR_TRUSTED_PREFIX.to_bytes()));
2625                }
2626                v
2627            }))
2628    }
2629
2630    /// Returns current `FsNodeInfo`.
2631    pub fn info(&self) -> RwLockReadGuard<'_, FsNodeInfo> {
2632        self.info.read()
2633    }
2634
2635    /// Refreshes the `FsNodeInfo` if necessary and returns a read guard.
2636    pub fn fetch_and_refresh_info<L>(
2637        &self,
2638        locked: &mut Locked<L>,
2639        current_task: &CurrentTask,
2640    ) -> Result<RwLockReadGuard<'_, FsNodeInfo>, Errno>
2641    where
2642        L: LockEqualOrBefore<FileOpsCore>,
2643    {
2644        self.ops().fetch_and_refresh_info(
2645            locked.cast_locked::<FileOpsCore>(),
2646            self,
2647            current_task,
2648            &self.info,
2649        )
2650    }
2651
2652    pub fn update_info<F, T>(&self, mutator: F) -> T
2653    where
2654        F: FnOnce(&mut FsNodeInfo) -> T,
2655    {
2656        let mut info = self.info.write();
2657        mutator(&mut info)
2658    }
2659
2660    /// Clear the SUID and SGID bits unless the `current_task` has `CAP_FSETID`
2661    pub fn clear_suid_and_sgid_bits<L>(
2662        &self,
2663        locked: &mut Locked<L>,
2664        current_task: &CurrentTask,
2665    ) -> Result<(), Errno>
2666    where
2667        L: LockEqualOrBefore<FileOpsCore>,
2668    {
2669        if !security::is_task_capable_noaudit(current_task, CAP_FSETID) {
2670            self.update_attributes(locked, current_task, |info| {
2671                info.clear_suid_and_sgid_bits();
2672                Ok(())
2673            })?;
2674        }
2675        Ok(())
2676    }
2677
2678    /// Update the ctime and mtime of a file to now.
2679    pub fn update_ctime_mtime(&self) {
2680        if self.fs().manages_timestamps() {
2681            return;
2682        }
2683        self.update_info(|info| {
2684            let now = utc::utc_now();
2685            info.time_status_change = now;
2686            info.time_modify = now;
2687        });
2688    }
2689
2690    /// Update the ctime of a file to now.
2691    pub fn update_ctime(&self) {
2692        if self.fs().manages_timestamps() {
2693            return;
2694        }
2695        self.update_info(|info| {
2696            let now = utc::utc_now();
2697            info.time_status_change = now;
2698        });
2699    }
2700
2701    /// Update the atime and mtime if the `current_task` has write access, is the file owner, or
2702    /// holds either the CAP_DAC_OVERRIDE or CAP_FOWNER capability.
2703    pub fn update_atime_mtime<L>(
2704        &self,
2705        locked: &mut Locked<L>,
2706        current_task: &CurrentTask,
2707        mount: &MountInfo,
2708        atime: TimeUpdateType,
2709        mtime: TimeUpdateType,
2710    ) -> Result<(), Errno>
2711    where
2712        L: LockEqualOrBefore<FileOpsCore>,
2713    {
2714        // If the filesystem is read-only, this always fail.
2715        mount.check_readonly_filesystem()?;
2716
2717        let now = matches!((atime, mtime), (TimeUpdateType::Now, TimeUpdateType::Now));
2718        self.check_access(
2719            locked,
2720            current_task,
2721            mount,
2722            Access::WRITE,
2723            CheckAccessReason::ChangeTimestamps { now },
2724            security::Auditable::Location(std::panic::Location::caller()),
2725        )?;
2726
2727        if !matches!((atime, mtime), (TimeUpdateType::Omit, TimeUpdateType::Omit)) {
2728            // This function is called by `utimes(..)` which will update the access and
2729            // modification time. We need to call `update_attributes()` to update the mtime of
2730            // filesystems that manages file timestamps.
2731            self.update_attributes(locked, current_task, |info| {
2732                let now = utc::utc_now();
2733                let get_time = |time: TimeUpdateType| match time {
2734                    TimeUpdateType::Now => Some(now),
2735                    TimeUpdateType::Time(t) => Some(t),
2736                    TimeUpdateType::Omit => None,
2737                };
2738                if let Some(time) = get_time(atime) {
2739                    info.time_access = time;
2740                }
2741                if let Some(time) = get_time(mtime) {
2742                    info.time_modify = time;
2743                }
2744                Ok(())
2745            })?;
2746        }
2747        Ok(())
2748    }
2749
2750    /// Returns a string describing this `FsNode` in the format used by "/proc/../fd" for anonymous
2751    /// file descriptors. By default this is in the form:
2752    ///   <class>:[<node_id>]
2753    /// though `FsNodeOps` may customize this as required.
2754    pub fn internal_name(&self) -> FsString {
2755        if let Some(name) = self.ops().internal_name(self) {
2756            return name;
2757        };
2758        let class = if self.is_sock() {
2759            "socket"
2760        } else if self.is_fifo() {
2761            "pipe"
2762        } else {
2763            "file"
2764        };
2765        format!("{}:[{}]", class, self.ino).into()
2766    }
2767
2768    /// The key used to identify this node in the file system's node cache.
2769    ///
2770    /// For many file systems, this will be the same as the inode number. However, some file
2771    /// systems, such as FUSE, sometimes use different `node_key` and inode numbers.
2772    pub fn node_key(&self) -> ino_t {
2773        self.ops().node_key(self)
2774    }
2775
2776    fn ensure_rare_data(&self) -> &FsNodeRareData {
2777        self.rare_data.get_or_init(|| Box::new(FsNodeRareData::default()))
2778    }
2779
2780    /// Returns the set of watchers for this node.
2781    ///
2782    /// Only call this function if you require this node to actually store a list of watchers. If
2783    /// you just wish to notify any watchers that might exist, please use `notify` instead.
2784    pub fn ensure_watchers(&self) -> &inotify::InotifyWatchers {
2785        &self.ensure_rare_data().watchers
2786    }
2787
2788    /// Notify the watchers of the given event.
2789    pub fn notify(
2790        &self,
2791        event_mask: InotifyMask,
2792        cookie: u32,
2793        name: &FsStr,
2794        mode: FileMode,
2795        is_dead: bool,
2796    ) {
2797        if let Some(rare_data) = self.rare_data.get() {
2798            rare_data.watchers.notify(event_mask, cookie, name, mode, is_dead);
2799        }
2800    }
2801
2802    /// Calls through to the filesystem to enable fs-verity on this file.
2803    pub fn enable_fsverity<L>(
2804        &self,
2805        locked: &mut Locked<L>,
2806        current_task: &CurrentTask,
2807        descriptor: &fsverity_descriptor,
2808    ) -> Result<(), Errno>
2809    where
2810        L: LockEqualOrBefore<FileOpsCore>,
2811    {
2812        let locked = locked.cast_locked::<FileOpsCore>();
2813        self.ops().enable_fsverity(locked, self, current_task, descriptor)
2814    }
2815}
2816
2817impl std::fmt::Debug for FsNode {
2818    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
2819        f.debug_struct("FsNode")
2820            .field("fs", &self.fs().name())
2821            .field("info", &*self.info())
2822            .field("ops_ty", &self.ops().type_name())
2823            .finish()
2824    }
2825}
2826
2827impl Releasable for FsNode {
2828    type Context<'a> = CurrentTaskAndLocked<'a>;
2829
2830    fn release<'a>(self, context: CurrentTaskAndLocked<'a>) {
2831        let (locked, current_task) = context;
2832        if let Some(fs) = self.fs.upgrade() {
2833            fs.remove_node(&self);
2834        }
2835        if let Err(err) = self.ops.forget(
2836            locked.cast_locked::<FileOpsCore>(),
2837            current_task,
2838            self.info.into_inner(),
2839        ) {
2840            log_error!("Error on FsNodeOps::forget: {err:?}");
2841        }
2842    }
2843}
2844
2845fn check_access(
2846    fs_node: &FsNode,
2847    current_task: &CurrentTask,
2848    permission_flags: security::PermissionFlags,
2849    node_uid: uid_t,
2850    node_gid: gid_t,
2851    mode: FileMode,
2852) -> Result<(), Errno> {
2853    // Determine which of the access bits apply to the `current_task`.
2854    let (fsuid, is_in_group) = {
2855        let current_creds = current_task.current_creds();
2856        (current_creds.fsuid, current_creds.is_in_group(node_gid))
2857    };
2858    let granted = if fsuid == node_uid {
2859        mode.user_access()
2860    } else if is_in_group {
2861        mode.group_access()
2862    } else {
2863        mode.other_access()
2864    };
2865
2866    let access = permission_flags.as_access();
2867    if granted.contains(access) {
2868        return Ok(());
2869    }
2870
2871    // Callers with CAP_DAC_READ_SEARCH override can read files & directories, and traverse
2872    // directories to which they lack permission.
2873    let mut requested = access & !granted;
2874
2875    // If this check was triggered by `access()`, or a variant, then check for a `dontaudit`
2876    // statement for the `audit_access` permission for this caller & file.
2877    let have_dont_audit = OnceBool::new();
2878    let has_capability = move |current_task, capability| {
2879        let dont_audit = have_dont_audit.get_or_init(|| {
2880            permission_flags.contains(PermissionFlags::ACCESS)
2881                && security::has_dontaudit_access(current_task, fs_node)
2882        });
2883        if dont_audit {
2884            security::is_task_capable_noaudit(current_task, capability)
2885        } else {
2886            security::check_task_capable(current_task, capability).is_ok()
2887        }
2888    };
2889
2890    // CAP_DAC_READ_SEARCH allows bypass of read checks, and directory traverse (eXecute) checks.
2891    let dac_read_search_access =
2892        if mode.is_dir() { Access::READ | Access::EXEC } else { Access::READ };
2893    if dac_read_search_access.intersects(requested)
2894        && has_capability(current_task, CAP_DAC_READ_SEARCH)
2895    {
2896        requested.remove(dac_read_search_access);
2897    }
2898    if requested.is_empty() {
2899        return Ok(());
2900    }
2901
2902    // CAP_DAC_OVERRIDE allows bypass of all checks (though see the comment for file-execute).
2903    let mut dac_override_access = Access::READ | Access::WRITE;
2904    dac_override_access |= if mode.is_dir() {
2905        Access::EXEC
2906    } else {
2907        // File execute access checks may not be bypassed unless at least one executable bit is set.
2908        (mode.user_access() | mode.group_access() | mode.other_access()) & Access::EXEC
2909    };
2910    if dac_override_access.intersects(requested) && has_capability(current_task, CAP_DAC_OVERRIDE) {
2911        requested.remove(dac_override_access);
2912    }
2913    if requested.is_empty() {
2914        return Ok(());
2915    }
2916
2917    return error!(EACCES);
2918}
2919
2920#[cfg(test)]
2921mod tests {
2922    use super::*;
2923    use crate::device::mem::mem_device_init;
2924    use crate::testing::*;
2925    use crate::vfs::buffers::VecOutputBuffer;
2926    use starnix_uapi::auth::Credentials;
2927    use starnix_uapi::file_mode::mode;
2928
2929    #[::fuchsia::test]
2930    async fn open_device_file() {
2931        spawn_kernel_and_run(async |locked, current_task| {
2932            mem_device_init(locked, &*current_task).expect("mem_device_init");
2933
2934            // Create a device file that points to the `zero` device (which is automatically
2935            // registered in the kernel).
2936            current_task
2937                .fs()
2938                .root()
2939                .create_node(
2940                    locked,
2941                    &current_task,
2942                    "zero".into(),
2943                    mode!(IFCHR, 0o666),
2944                    DeviceId::ZERO,
2945                )
2946                .expect("create_node");
2947
2948            const CONTENT_LEN: usize = 10;
2949            let mut buffer = VecOutputBuffer::new(CONTENT_LEN);
2950
2951            // Read from the zero device.
2952            let device_file = current_task
2953                .open_file(locked, "zero".into(), OpenFlags::RDONLY)
2954                .expect("open device file");
2955            device_file.read(locked, &current_task, &mut buffer).expect("read from zero");
2956
2957            // Assert the contents.
2958            assert_eq!(&[0; CONTENT_LEN], buffer.data());
2959        })
2960        .await;
2961    }
2962
2963    #[::fuchsia::test]
2964    async fn node_info_is_reflected_in_stat() {
2965        spawn_kernel_and_run(async |locked, current_task| {
2966            // Create a node.
2967            let node = &current_task
2968                .fs()
2969                .root()
2970                .create_node(locked, &current_task, "zero".into(), FileMode::IFCHR, DeviceId::ZERO)
2971                .expect("create_node")
2972                .entry
2973                .node;
2974            node.update_info(|info| {
2975                info.mode = FileMode::IFSOCK;
2976                info.size = 1;
2977                info.blocks = 2;
2978                info.blksize = 4;
2979                info.uid = 9;
2980                info.gid = 10;
2981                info.link_count = 11;
2982                info.time_status_change = UtcInstant::from_nanos(1);
2983                info.time_access = UtcInstant::from_nanos(2);
2984                info.time_modify = UtcInstant::from_nanos(3);
2985                info.rdev = DeviceId::new(13, 13);
2986            });
2987            let stat = node.stat(locked, &current_task).expect("stat");
2988
2989            assert_eq!(stat.st_mode, FileMode::IFSOCK.bits());
2990            assert_eq!(stat.st_size, 1);
2991            assert_eq!(stat.st_blksize, 4);
2992            assert_eq!(stat.st_blocks, 2);
2993            assert_eq!(stat.st_uid, 9);
2994            assert_eq!(stat.st_gid, 10);
2995            assert_eq!(stat.st_nlink, 11);
2996            assert_eq!(stat.st_ctime, 0);
2997            assert_eq!(stat.st_ctime_nsec, 1);
2998            assert_eq!(stat.st_atime, 0);
2999            assert_eq!(stat.st_atime_nsec, 2);
3000            assert_eq!(stat.st_mtime, 0);
3001            assert_eq!(stat.st_mtime_nsec, 3);
3002            assert_eq!(stat.st_rdev, DeviceId::new(13, 13).bits());
3003        })
3004        .await;
3005    }
3006
3007    #[::fuchsia::test]
3008    fn test_flock_operation() {
3009        assert!(FlockOperation::from_flags(0).is_err());
3010        assert!(FlockOperation::from_flags(u32::MAX).is_err());
3011
3012        let operation1 = FlockOperation::from_flags(LOCK_SH).expect("from_flags");
3013        assert!(!operation1.is_unlock());
3014        assert!(!operation1.is_lock_exclusive());
3015        assert!(operation1.is_blocking());
3016
3017        let operation2 = FlockOperation::from_flags(LOCK_EX | LOCK_NB).expect("from_flags");
3018        assert!(!operation2.is_unlock());
3019        assert!(operation2.is_lock_exclusive());
3020        assert!(!operation2.is_blocking());
3021
3022        let operation3 = FlockOperation::from_flags(LOCK_UN).expect("from_flags");
3023        assert!(operation3.is_unlock());
3024        assert!(!operation3.is_lock_exclusive());
3025        assert!(operation3.is_blocking());
3026    }
3027
3028    #[::fuchsia::test]
3029    async fn test_check_access() {
3030        spawn_kernel_and_run(async |locked, current_task| {
3031            let mut creds = Credentials::with_ids(1, 2);
3032            creds.groups = vec![3, 4];
3033            current_task.set_creds(creds);
3034
3035            // Create a node.
3036            let node = &current_task
3037                .fs()
3038                .root()
3039                .create_node(locked, &current_task, "foo".into(), FileMode::IFREG, DeviceId::NONE)
3040                .expect("create_node")
3041                .entry
3042                .node;
3043            let check_access = |locked: &mut Locked<Unlocked>,
3044                                uid: uid_t,
3045                                gid: gid_t,
3046                                perm: u32,
3047                                access: Access| {
3048                node.update_info(|info| {
3049                    info.mode = mode!(IFREG, perm);
3050                    info.uid = uid;
3051                    info.gid = gid;
3052                });
3053                node.check_access(
3054                    locked,
3055                    &current_task,
3056                    &MountInfo::detached(),
3057                    access,
3058                    CheckAccessReason::InternalPermissionChecks,
3059                    security::Auditable::Location(std::panic::Location::caller()),
3060                )
3061            };
3062
3063            assert_eq!(check_access(locked, 0, 0, 0o700, Access::EXEC), error!(EACCES));
3064            assert_eq!(check_access(locked, 0, 0, 0o700, Access::READ), error!(EACCES));
3065            assert_eq!(check_access(locked, 0, 0, 0o700, Access::WRITE), error!(EACCES));
3066
3067            assert_eq!(check_access(locked, 0, 0, 0o070, Access::EXEC), error!(EACCES));
3068            assert_eq!(check_access(locked, 0, 0, 0o070, Access::READ), error!(EACCES));
3069            assert_eq!(check_access(locked, 0, 0, 0o070, Access::WRITE), error!(EACCES));
3070
3071            assert_eq!(check_access(locked, 0, 0, 0o007, Access::EXEC), Ok(()));
3072            assert_eq!(check_access(locked, 0, 0, 0o007, Access::READ), Ok(()));
3073            assert_eq!(check_access(locked, 0, 0, 0o007, Access::WRITE), Ok(()));
3074
3075            assert_eq!(check_access(locked, 1, 0, 0o700, Access::EXEC), Ok(()));
3076            assert_eq!(check_access(locked, 1, 0, 0o700, Access::READ), Ok(()));
3077            assert_eq!(check_access(locked, 1, 0, 0o700, Access::WRITE), Ok(()));
3078
3079            assert_eq!(check_access(locked, 1, 0, 0o100, Access::EXEC), Ok(()));
3080            assert_eq!(check_access(locked, 1, 0, 0o100, Access::READ), error!(EACCES));
3081            assert_eq!(check_access(locked, 1, 0, 0o100, Access::WRITE), error!(EACCES));
3082
3083            assert_eq!(check_access(locked, 1, 0, 0o200, Access::EXEC), error!(EACCES));
3084            assert_eq!(check_access(locked, 1, 0, 0o200, Access::READ), error!(EACCES));
3085            assert_eq!(check_access(locked, 1, 0, 0o200, Access::WRITE), Ok(()));
3086
3087            assert_eq!(check_access(locked, 1, 0, 0o400, Access::EXEC), error!(EACCES));
3088            assert_eq!(check_access(locked, 1, 0, 0o400, Access::READ), Ok(()));
3089            assert_eq!(check_access(locked, 1, 0, 0o400, Access::WRITE), error!(EACCES));
3090
3091            assert_eq!(check_access(locked, 0, 2, 0o700, Access::EXEC), error!(EACCES));
3092            assert_eq!(check_access(locked, 0, 2, 0o700, Access::READ), error!(EACCES));
3093            assert_eq!(check_access(locked, 0, 2, 0o700, Access::WRITE), error!(EACCES));
3094
3095            assert_eq!(check_access(locked, 0, 2, 0o070, Access::EXEC), Ok(()));
3096            assert_eq!(check_access(locked, 0, 2, 0o070, Access::READ), Ok(()));
3097            assert_eq!(check_access(locked, 0, 2, 0o070, Access::WRITE), Ok(()));
3098
3099            assert_eq!(check_access(locked, 0, 3, 0o070, Access::EXEC), Ok(()));
3100            assert_eq!(check_access(locked, 0, 3, 0o070, Access::READ), Ok(()));
3101            assert_eq!(check_access(locked, 0, 3, 0o070, Access::WRITE), Ok(()));
3102        })
3103        .await;
3104    }
3105
3106    #[::fuchsia::test]
3107    async fn set_security_xattr_fails_without_security_module_or_root() {
3108        spawn_kernel_and_run(async |locked, current_task| {
3109            let mut creds = Credentials::with_ids(1, 2);
3110            creds.groups = vec![3, 4];
3111            current_task.set_creds(creds);
3112
3113            // Create a node.
3114            let node = &current_task
3115                .fs()
3116                .root()
3117                .create_node(locked, &current_task, "foo".into(), FileMode::IFREG, DeviceId::NONE)
3118                .expect("create_node")
3119                .entry
3120                .node;
3121
3122            // Give read-write-execute access.
3123            node.update_info(|info| info.mode = mode!(IFREG, 0o777));
3124
3125            // Without a security module, and without CAP_SYS_ADMIN capabilities, setting the xattr
3126            // should fail.
3127            assert_eq!(
3128                node.set_xattr(
3129                    locked,
3130                    &current_task,
3131                    &MountInfo::detached(),
3132                    "security.name".into(),
3133                    "security_label".into(),
3134                    XattrOp::Create,
3135                ),
3136                error!(EPERM)
3137            );
3138        })
3139        .await;
3140    }
3141
3142    #[::fuchsia::test]
3143    async fn set_non_user_xattr_fails_without_security_module_or_root() {
3144        spawn_kernel_and_run(async |locked, current_task| {
3145            let mut creds = Credentials::with_ids(1, 2);
3146            creds.groups = vec![3, 4];
3147            current_task.set_creds(creds);
3148
3149            // Create a node.
3150            let node = &current_task
3151                .fs()
3152                .root()
3153                .create_node(locked, &current_task, "foo".into(), FileMode::IFREG, DeviceId::NONE)
3154                .expect("create_node")
3155                .entry
3156                .node;
3157
3158            // Give read-write-execute access.
3159            node.update_info(|info| info.mode = mode!(IFREG, 0o777));
3160
3161            // Without a security module, and without CAP_SYS_ADMIN capabilities, setting the xattr
3162            // should fail.
3163            assert_eq!(
3164                node.set_xattr(
3165                    locked,
3166                    &current_task,
3167                    &MountInfo::detached(),
3168                    "trusted.name".into(),
3169                    "some data".into(),
3170                    XattrOp::Create,
3171                ),
3172                error!(EPERM)
3173            );
3174        })
3175        .await;
3176    }
3177
3178    #[::fuchsia::test]
3179    async fn get_security_xattr_succeeds_without_read_access() {
3180        spawn_kernel_and_run(async |locked, current_task| {
3181            let mut creds = Credentials::with_ids(1, 2);
3182            creds.groups = vec![3, 4];
3183            current_task.set_creds(creds);
3184
3185            // Create a node.
3186            let node = &current_task
3187                .fs()
3188                .root()
3189                .create_node(locked, &current_task, "foo".into(), FileMode::IFREG, DeviceId::NONE)
3190                .expect("create_node")
3191                .entry
3192                .node;
3193
3194            // Only give read access to the root and give root access to the current task.
3195            node.update_info(|info| info.mode = mode!(IFREG, 0o100));
3196            current_task.set_creds(Credentials::with_ids(0, 0));
3197
3198            // Setting the label should succeed even without write access to the file.
3199            assert_eq!(
3200                node.set_xattr(
3201                    locked,
3202                    &current_task,
3203                    &MountInfo::detached(),
3204                    "security.name".into(),
3205                    "security_label".into(),
3206                    XattrOp::Create,
3207                ),
3208                Ok(())
3209            );
3210
3211            // Remove root access from the current task.
3212            current_task.set_creds(Credentials::with_ids(1, 1));
3213
3214            // Getting the label should succeed even without read access to the file.
3215            assert_eq!(
3216                node.get_xattr(
3217                    locked,
3218                    &current_task,
3219                    &MountInfo::detached(),
3220                    "security.name".into(),
3221                    4096
3222                ),
3223                Ok(ValueOrSize::Value("security_label".into()))
3224            );
3225        })
3226        .await;
3227    }
3228}