Skip to main content

starnix_core/vfs/
file_object.rs

1// Cmpyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::memory::MemoryObject;
6use crate::mm::{DesiredAddress, MappingName, MappingOptions, MemoryAccessorExt, ProtectionFlags};
7use crate::power::OnWakeOps;
8use crate::security;
9use crate::task::{
10    CurrentTask, CurrentTaskAndLocked, EventHandler, Task, ThreadGroupKey, WaitCallback,
11    WaitCanceler, Waiter, register_delayed_release,
12};
13use crate::vfs::buffers::{InputBuffer, OutputBuffer};
14use crate::vfs::file_server::serve_file;
15use crate::vfs::fsverity::{
16    FsVerityState, {self},
17};
18use crate::vfs::{
19    ActiveNamespaceNode, DirentSink, EpollFileObject, EpollKey, FallocMode, FdTableId,
20    FileSystemHandle, FileWriteGuardMode, FsNodeHandle, FsString, NamespaceNode, RecordLockCommand,
21    RecordLockOwner,
22};
23use starnix_crypt::EncryptionKeyId;
24use starnix_lifecycle::{ObjectReleaser, ReleaserAction};
25use starnix_rcu::RcuAtomic;
26use starnix_types::ownership::ReleaseGuard;
27use starnix_uapi::mount_flags::MountFlags;
28use starnix_uapi::user_address::ArchSpecific;
29
30use fidl::HandleBased;
31use fidl::endpoints::ProtocolMarker as _;
32use linux_uapi::{FSCRYPT_MODE_AES_256_CTS, FSCRYPT_MODE_AES_256_XTS};
33use starnix_logging::{
34    CATEGORY_STARNIX_MM, impossible_error, log_error, trace_duration, track_stub,
35};
36use starnix_sync::{
37    BeforeFsNodeAppend, FileOpsCore, LockBefore, LockEqualOrBefore, Locked, Mutex, Unlocked,
38};
39use starnix_syscalls::{SUCCESS, SyscallArg, SyscallResult};
40use starnix_types::math::round_up_to_system_page_size;
41use starnix_types::ownership::Releasable;
42use starnix_uapi::arc_key::WeakKey;
43use starnix_uapi::as_any::AsAny;
44use starnix_uapi::auth::{CAP_FOWNER, CAP_SYS_RAWIO};
45use starnix_uapi::errors::{EAGAIN, ETIMEDOUT, Errno};
46use starnix_uapi::file_lease::FileLeaseType;
47use starnix_uapi::file_mode::Access;
48use starnix_uapi::inotify_mask::InotifyMask;
49use starnix_uapi::open_flags::{AtomicOpenFlags, OpenFlags};
50use starnix_uapi::seal_flags::SealFlags;
51use starnix_uapi::user_address::{UserAddress, UserRef};
52use starnix_uapi::vfs::FdEvents;
53use starnix_uapi::{
54    FIBMAP, FIGETBSZ, FIONBIO, FIONREAD, FIOQSIZE, FS_CASEFOLD_FL, FS_IOC_ADD_ENCRYPTION_KEY,
55    FS_IOC_ENABLE_VERITY, FS_IOC_FSGETXATTR, FS_IOC_FSSETXATTR, FS_IOC_MEASURE_VERITY,
56    FS_IOC_READ_VERITY_METADATA, FS_IOC_REMOVE_ENCRYPTION_KEY, FS_IOC_SET_ENCRYPTION_POLICY,
57    FS_VERITY_FL, FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER, FSCRYPT_POLICY_V2, SEEK_CUR, SEEK_DATA,
58    SEEK_END, SEEK_HOLE, SEEK_SET, TCGETS, errno, error, fscrypt_add_key_arg, fscrypt_identifier,
59    fsxattr, off_t, pid_t, uapi,
60};
61use std::collections::HashMap;
62use std::fmt;
63use std::ops::Deref;
64use std::sync::atomic::Ordering;
65use std::sync::{Arc, Weak};
66
67pub const MAX_LFS_FILESIZE: usize = 0x7fff_ffff_ffff_ffff;
68
69pub fn checked_add_offset_and_length(offset: usize, length: usize) -> Result<usize, Errno> {
70    let end = offset.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
71    if end > MAX_LFS_FILESIZE {
72        return error!(EINVAL);
73    }
74    Ok(end)
75}
76
77#[derive(Debug)]
78pub enum SeekTarget {
79    /// Seek to the given offset relative to the start of the file.
80    Set(off_t),
81    /// Seek to the given offset relative to the current position.
82    Cur(off_t),
83    /// Seek to the given offset relative to the end of the file.
84    End(off_t),
85    /// Seek for the first data after the given offset,
86    Data(off_t),
87    /// Seek for the first hole after the given offset,
88    Hole(off_t),
89}
90
91impl SeekTarget {
92    pub fn from_raw(whence: u32, offset: off_t) -> Result<SeekTarget, Errno> {
93        match whence {
94            SEEK_SET => Ok(SeekTarget::Set(offset)),
95            SEEK_CUR => Ok(SeekTarget::Cur(offset)),
96            SEEK_END => Ok(SeekTarget::End(offset)),
97            SEEK_DATA => Ok(SeekTarget::Data(offset)),
98            SEEK_HOLE => Ok(SeekTarget::Hole(offset)),
99            _ => error!(EINVAL),
100        }
101    }
102
103    pub fn whence(&self) -> u32 {
104        match self {
105            Self::Set(_) => SEEK_SET,
106            Self::Cur(_) => SEEK_CUR,
107            Self::End(_) => SEEK_END,
108            Self::Data(_) => SEEK_DATA,
109            Self::Hole(_) => SEEK_HOLE,
110        }
111    }
112
113    pub fn offset(&self) -> off_t {
114        match self {
115            Self::Set(off)
116            | Self::Cur(off)
117            | Self::End(off)
118            | Self::Data(off)
119            | Self::Hole(off) => *off,
120        }
121    }
122}
123
124/// Corresponds to struct file_operations in Linux, plus any filesystem-specific data.
125pub trait FileOps: Send + Sync + AsAny + 'static {
126    /// Called when the FileObject is opened/created
127    fn open(
128        &self,
129        _locked: &mut Locked<FileOpsCore>,
130        _file: &FileObject,
131        _current_task: &CurrentTask,
132    ) -> Result<(), Errno> {
133        Ok(())
134    }
135
136    /// Called when the FileObject is destroyed.
137    fn close(
138        self: Box<Self>,
139        _locked: &mut Locked<FileOpsCore>,
140        _file: &FileObjectState,
141        _current_task: &CurrentTask,
142    ) {
143    }
144
145    /// Called every time close() is called on this file, even if the file is not ready to be
146    /// released.
147    fn flush(
148        &self,
149        _locked: &mut Locked<FileOpsCore>,
150        _file: &FileObject,
151        _current_task: &CurrentTask,
152    ) {
153    }
154
155    /// Returns whether the file has meaningful seek offsets. Returning `false` is only
156    /// optimization and will makes `FileObject` never hold the offset lock when calling `read` and
157    /// `write`.
158    fn has_persistent_offsets(&self) -> bool {
159        self.is_seekable()
160    }
161
162    /// Returns whether the file is seekable.
163    fn is_seekable(&self) -> bool;
164
165    /// Returns true if `write()` operations on the file will update the seek offset.
166    fn writes_update_seek_offset(&self) -> bool {
167        self.has_persistent_offsets()
168    }
169
170    /// Read from the file at an offset. If the file does not have persistent offsets (either
171    /// directly, or because it is not seekable), offset will be 0 and can be ignored.
172    /// Returns the number of bytes read.
173    fn read(
174        &self,
175        locked: &mut Locked<FileOpsCore>,
176        file: &FileObject,
177        current_task: &CurrentTask,
178        offset: usize,
179        data: &mut dyn OutputBuffer,
180    ) -> Result<usize, Errno>;
181
182    /// Write to the file with an offset. If the file does not have persistent offsets (either
183    /// directly, or because it is not seekable), offset will be 0 and can be ignored.
184    /// Returns the number of bytes written.
185    fn write(
186        &self,
187        locked: &mut Locked<FileOpsCore>,
188        file: &FileObject,
189        current_task: &CurrentTask,
190        offset: usize,
191        data: &mut dyn InputBuffer,
192    ) -> Result<usize, Errno>;
193
194    /// Adjust the `current_offset` if the file is seekable.
195    fn seek(
196        &self,
197        locked: &mut Locked<FileOpsCore>,
198        file: &FileObject,
199        current_task: &CurrentTask,
200        current_offset: off_t,
201        target: SeekTarget,
202    ) -> Result<off_t, Errno>;
203
204    /// Syncs cached state associated with the file descriptor to persistent storage.
205    ///
206    /// The method blocks until the synchronization is complete.
207    fn sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
208        file.node().ops().sync(file.node(), current_task)
209    }
210
211    /// Syncs cached data, and only enough metadata to retrieve said data, to persistent storage.
212    ///
213    /// The method blocks until the synchronization is complete.
214    fn data_sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
215        // TODO(https://fxbug.dev/297305634) make a default macro once data can be done separately
216        self.sync(file, current_task)
217    }
218
219    /// Returns a VMO representing this file. At least the requested protection flags must
220    /// be set on the VMO. Reading or writing the VMO must read or write the file. If this is not
221    /// possible given the requested protection, an error must be returned.
222    /// The `length` is a hint for the desired size of the VMO. The returned VMO may be larger or
223    /// smaller than the requested length.
224    /// This method is typically called by [`Self::mmap`].
225    fn get_memory(
226        &self,
227        _locked: &mut Locked<FileOpsCore>,
228        _file: &FileObject,
229        _current_task: &CurrentTask,
230        _length: Option<usize>,
231        _prot: ProtectionFlags,
232    ) -> Result<Arc<MemoryObject>, Errno> {
233        error!(ENODEV)
234    }
235
236    /// Responds to an mmap call. The default implementation calls [`Self::get_memory`] to get a VMO
237    /// and then maps it with [`crate::mm::MemoryManager::map`].
238    /// Only implement this trait method if your file needs to control mapping, or record where
239    /// a VMO gets mapped.
240    fn mmap(
241        &self,
242        locked: &mut Locked<FileOpsCore>,
243        file: &FileObject,
244        current_task: &CurrentTask,
245        addr: DesiredAddress,
246        memory_offset: u64,
247        length: usize,
248        prot_flags: ProtectionFlags,
249        options: MappingOptions,
250        filename: NamespaceNode,
251    ) -> Result<UserAddress, Errno> {
252        trace_duration!(CATEGORY_STARNIX_MM, "FileOpsDefaultMmap");
253        let min_memory_size = (memory_offset as usize)
254            .checked_add(round_up_to_system_page_size(length)?)
255            .ok_or_else(|| errno!(EINVAL))?;
256        let mut memory = if options.contains(MappingOptions::SHARED) {
257            trace_duration!(CATEGORY_STARNIX_MM, "GetSharedVmo");
258            self.get_memory(locked, file, current_task, Some(min_memory_size), prot_flags)?
259        } else {
260            trace_duration!(CATEGORY_STARNIX_MM, "GetPrivateVmo");
261            // TODO(tbodt): Use PRIVATE_CLONE to have the filesystem server do the clone for us.
262            let base_prot_flags = (prot_flags | ProtectionFlags::READ) - ProtectionFlags::WRITE;
263            let memory = self.get_memory(
264                locked,
265                file,
266                current_task,
267                Some(min_memory_size),
268                base_prot_flags,
269            )?;
270            let mut clone_flags = zx::VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE;
271            if !prot_flags.contains(ProtectionFlags::WRITE) {
272                clone_flags |= zx::VmoChildOptions::NO_WRITE;
273            }
274            trace_duration!(CATEGORY_STARNIX_MM, "CreatePrivateChildVmo");
275            Arc::new(
276                memory.create_child(clone_flags, 0, memory.get_size()).map_err(impossible_error)?,
277            )
278        };
279
280        // Write guard is necessary only for shared mappings. Note that this doesn't depend on
281        // `prot_flags` since these can be changed later with `mprotect()`.
282        let file_write_guard = if options.contains(MappingOptions::SHARED) && file.can_write() {
283            let node = &file.name.entry.node;
284            let state = node.write_guard_state.lock();
285
286            // `F_SEAL_FUTURE_WRITE` should allow `mmap(PROT_READ)`, but block
287            // `mprotect(PROT_WRITE)`. This is different from `F_SEAL_WRITE`, which blocks
288            // `mmap(PROT_READ)`. To handle this case correctly remove `WRITE` right from the
289            // VMO handle to ensure `mprotect(PROT_WRITE)` fails.
290            let seals = state.get_seals().unwrap_or(SealFlags::empty());
291            if seals.contains(SealFlags::FUTURE_WRITE)
292                && !seals.contains(SealFlags::WRITE)
293                && !prot_flags.contains(ProtectionFlags::WRITE)
294            {
295                let mut new_rights = zx::Rights::VMO_DEFAULT - zx::Rights::WRITE;
296                if prot_flags.contains(ProtectionFlags::EXEC) {
297                    new_rights |= zx::Rights::EXECUTE;
298                }
299                memory = Arc::new(memory.duplicate_handle(new_rights).map_err(impossible_error)?);
300
301                None
302            } else {
303                Some(FileWriteGuardMode::WriteMapping)
304            }
305        } else {
306            None
307        };
308
309        current_task.mm()?.map_memory(
310            addr,
311            memory,
312            memory_offset,
313            length,
314            prot_flags,
315            file.max_access_for_memory_mapping(),
316            options,
317            MappingName::File(filename.into_mapping(file_write_guard)?),
318        )
319    }
320
321    /// Respond to a `getdents` or `getdents64` calls.
322    ///
323    /// The `file.offset` lock will be held while entering this method. The implementation must look
324    /// at `sink.offset()` to read the current offset into the file.
325    fn readdir(
326        &self,
327        _locked: &mut Locked<FileOpsCore>,
328        _file: &FileObject,
329        _current_task: &CurrentTask,
330        _sink: &mut dyn DirentSink,
331    ) -> Result<(), Errno> {
332        error!(ENOTDIR)
333    }
334
335    /// Establish a one-shot, edge-triggered, asynchronous wait for the given FdEvents for the
336    /// given file and task. Returns `None` if this file does not support blocking waits.
337    ///
338    /// Active events are not considered. This is similar to the semantics of the
339    /// ZX_WAIT_ASYNC_EDGE flag on zx_wait_async. To avoid missing events, the caller must call
340    /// query_events after calling this.
341    ///
342    /// If your file does not support blocking waits, leave this as the default implementation.
343    fn wait_async(
344        &self,
345        _locked: &mut Locked<FileOpsCore>,
346        _file: &FileObject,
347        _current_task: &CurrentTask,
348        _waiter: &Waiter,
349        _events: FdEvents,
350        _handler: EventHandler,
351    ) -> Option<WaitCanceler> {
352        None
353    }
354
355    /// The events currently active on this file.
356    ///
357    /// If this function returns `POLLIN` or `POLLOUT`, then FileObject will
358    /// add `POLLRDNORM` and `POLLWRNORM`, respective, which are equivalent in
359    /// the Linux UAPI.
360    ///
361    /// See https://linux.die.net/man/2/poll
362    fn query_events(
363        &self,
364        _locked: &mut Locked<FileOpsCore>,
365        _file: &FileObject,
366        _current_task: &CurrentTask,
367    ) -> Result<FdEvents, Errno> {
368        Ok(FdEvents::POLLIN | FdEvents::POLLOUT)
369    }
370
371    fn ioctl(
372        &self,
373        locked: &mut Locked<Unlocked>,
374        file: &FileObject,
375        current_task: &CurrentTask,
376        request: u32,
377        arg: SyscallArg,
378    ) -> Result<SyscallResult, Errno> {
379        default_ioctl(file, locked, current_task, request, arg)
380    }
381
382    fn fcntl(
383        &self,
384        _file: &FileObject,
385        _current_task: &CurrentTask,
386        cmd: u32,
387        _arg: u64,
388    ) -> Result<SyscallResult, Errno> {
389        default_fcntl(cmd)
390    }
391
392    /// Return a handle that allows access to this file descritor through the zxio protocols.
393    ///
394    /// If None is returned, the file will act as if it was a fd to `/dev/null`.
395    fn to_handle(
396        &self,
397        file: &FileObject,
398        current_task: &CurrentTask,
399    ) -> Result<Option<zx::NullableHandle>, Errno> {
400        serve_file(current_task, file, current_task.current_creds().clone())
401            .map(|c| Some(c.0.into_handle().into()))
402    }
403
404    // Return a vector of handles. This is used in situations where there is more than one handle
405    // associated with this file descriptor.
406    //
407    // In Fuchsia, there is an expectation that there is a 1:1 mapping between a file descriptor and
408    // a handle. In general, we do not want to violate that rule. This function is intended to used
409    // in very limited circumstances (compatibility with Linux and Binder), where we need to violate
410    // rule.
411    //
412    // Specifically, we are using this to implement SyncFiles correctly, where a single SyncFile can
413    // represent multiple SyncPoints. Each SyncPoint contains a zx::Counter.
414    //
415    // If you chose to implement this function, to_handle() should return an error. You must also be
416    // aware that if these handles are passed to Fuchsia over Binder, they will be represented as
417    // single file descriptor, and you should use the composite_fd library to manage that file
418    // descriptor.
419    fn get_handles(
420        &self,
421        _file: &FileObject,
422        _current_task: &CurrentTask,
423    ) -> Result<Vec<zx::NullableHandle>, Errno> {
424        error!(ENOTSUP)
425    }
426
427    /// Returns the associated pid_t.
428    ///
429    /// Used by pidfd and `/proc/<pid>`. Unlikely to be used by other files.
430    fn as_thread_group_key(&self, _file: &FileObject) -> Result<ThreadGroupKey, Errno> {
431        error!(EBADF)
432    }
433
434    fn readahead(
435        &self,
436        _file: &FileObject,
437        _current_task: &CurrentTask,
438        _offset: usize,
439        _length: usize,
440    ) -> Result<(), Errno> {
441        error!(EINVAL)
442    }
443
444    /// Extra information that is included in the /proc/<pid>/fdfino/<fd> entry.
445    fn extra_fdinfo(
446        &self,
447        _locked: &mut Locked<FileOpsCore>,
448        _file: &FileHandle,
449        _current_task: &CurrentTask,
450    ) -> Option<FsString> {
451        None
452    }
453}
454
455/// Marker trait for implementation of FileOps that do not need to implement `close` and can
456/// then pass a wrapper object as the `FileOps` implementation.
457pub trait CloseFreeSafe {}
458impl<T: FileOps + CloseFreeSafe, P: Deref<Target = T> + Send + Sync + 'static> FileOps for P {
459    fn close(
460        self: Box<Self>,
461        _locked: &mut Locked<FileOpsCore>,
462        _file: &FileObjectState,
463        _current_task: &CurrentTask,
464    ) {
465        // This method cannot be delegated. T being `CloseFreeSafe` this is fine.
466    }
467
468    fn flush(
469        &self,
470        locked: &mut Locked<FileOpsCore>,
471        file: &FileObject,
472        current_task: &CurrentTask,
473    ) {
474        self.deref().flush(locked, file, current_task)
475    }
476
477    fn has_persistent_offsets(&self) -> bool {
478        self.deref().has_persistent_offsets()
479    }
480
481    fn writes_update_seek_offset(&self) -> bool {
482        self.deref().writes_update_seek_offset()
483    }
484
485    fn is_seekable(&self) -> bool {
486        self.deref().is_seekable()
487    }
488
489    fn read(
490        &self,
491        locked: &mut Locked<FileOpsCore>,
492        file: &FileObject,
493        current_task: &CurrentTask,
494        offset: usize,
495        data: &mut dyn OutputBuffer,
496    ) -> Result<usize, Errno> {
497        self.deref().read(locked, file, current_task, offset, data)
498    }
499
500    fn write(
501        &self,
502        locked: &mut Locked<FileOpsCore>,
503        file: &FileObject,
504        current_task: &CurrentTask,
505        offset: usize,
506        data: &mut dyn InputBuffer,
507    ) -> Result<usize, Errno> {
508        self.deref().write(locked, file, current_task, offset, data)
509    }
510
511    fn seek(
512        &self,
513        locked: &mut Locked<FileOpsCore>,
514        file: &FileObject,
515        current_task: &CurrentTask,
516        current_offset: off_t,
517        target: SeekTarget,
518    ) -> Result<off_t, Errno> {
519        self.deref().seek(locked, file, current_task, current_offset, target)
520    }
521
522    fn sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
523        self.deref().sync(file, current_task)
524    }
525
526    fn data_sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
527        self.deref().data_sync(file, current_task)
528    }
529
530    fn get_memory(
531        &self,
532        locked: &mut Locked<FileOpsCore>,
533        file: &FileObject,
534        current_task: &CurrentTask,
535        length: Option<usize>,
536        prot: ProtectionFlags,
537    ) -> Result<Arc<MemoryObject>, Errno> {
538        self.deref().get_memory(locked, file, current_task, length, prot)
539    }
540
541    fn mmap(
542        &self,
543        locked: &mut Locked<FileOpsCore>,
544        file: &FileObject,
545        current_task: &CurrentTask,
546        addr: DesiredAddress,
547        memory_offset: u64,
548        length: usize,
549        prot_flags: ProtectionFlags,
550        options: MappingOptions,
551        filename: NamespaceNode,
552    ) -> Result<UserAddress, Errno> {
553        self.deref().mmap(
554            locked,
555            file,
556            current_task,
557            addr,
558            memory_offset,
559            length,
560            prot_flags,
561            options,
562            filename,
563        )
564    }
565
566    fn readdir(
567        &self,
568        locked: &mut Locked<FileOpsCore>,
569        file: &FileObject,
570        current_task: &CurrentTask,
571        sink: &mut dyn DirentSink,
572    ) -> Result<(), Errno> {
573        self.deref().readdir(locked, file, current_task, sink)
574    }
575
576    fn wait_async(
577        &self,
578        locked: &mut Locked<FileOpsCore>,
579        file: &FileObject,
580        current_task: &CurrentTask,
581        waiter: &Waiter,
582        events: FdEvents,
583        handler: EventHandler,
584    ) -> Option<WaitCanceler> {
585        self.deref().wait_async(locked, file, current_task, waiter, events, handler)
586    }
587
588    fn query_events(
589        &self,
590        locked: &mut Locked<FileOpsCore>,
591        file: &FileObject,
592        current_task: &CurrentTask,
593    ) -> Result<FdEvents, Errno> {
594        self.deref().query_events(locked, file, current_task)
595    }
596
597    fn ioctl(
598        &self,
599        locked: &mut Locked<Unlocked>,
600        file: &FileObject,
601        current_task: &CurrentTask,
602        request: u32,
603        arg: SyscallArg,
604    ) -> Result<SyscallResult, Errno> {
605        self.deref().ioctl(locked, file, current_task, request, arg)
606    }
607
608    fn fcntl(
609        &self,
610        file: &FileObject,
611        current_task: &CurrentTask,
612        cmd: u32,
613        arg: u64,
614    ) -> Result<SyscallResult, Errno> {
615        self.deref().fcntl(file, current_task, cmd, arg)
616    }
617
618    fn to_handle(
619        &self,
620        file: &FileObject,
621        current_task: &CurrentTask,
622    ) -> Result<Option<zx::NullableHandle>, Errno> {
623        self.deref().to_handle(file, current_task)
624    }
625
626    fn get_handles(
627        &self,
628        file: &FileObject,
629        current_task: &CurrentTask,
630    ) -> Result<Vec<zx::NullableHandle>, Errno> {
631        self.deref().get_handles(file, current_task)
632    }
633
634    fn as_thread_group_key(&self, file: &FileObject) -> Result<ThreadGroupKey, Errno> {
635        self.deref().as_thread_group_key(file)
636    }
637
638    fn readahead(
639        &self,
640        file: &FileObject,
641        current_task: &CurrentTask,
642        offset: usize,
643        length: usize,
644    ) -> Result<(), Errno> {
645        self.deref().readahead(file, current_task, offset, length)
646    }
647
648    fn extra_fdinfo(
649        &self,
650        locked: &mut Locked<FileOpsCore>,
651        file: &FileHandle,
652        current_task: &CurrentTask,
653    ) -> Option<FsString> {
654        self.deref().extra_fdinfo(locked, file, current_task)
655    }
656}
657
658pub fn default_eof_offset<L>(
659    locked: &mut Locked<L>,
660    file: &FileObject,
661    current_task: &CurrentTask,
662) -> Result<off_t, Errno>
663where
664    L: LockEqualOrBefore<FileOpsCore>,
665{
666    Ok(file.node().get_size(locked, current_task)? as off_t)
667}
668
669/// Implement the seek method for a file. The computation from the end of the file must be provided
670/// through a callback.
671///
672/// Errors if the calculated offset is invalid.
673///
674/// - `current_offset`: The current position
675/// - `target`: The location to seek to.
676/// - `compute_end`: Compute the new offset from the end. Return an error if the operation is not
677///    supported.
678pub fn default_seek<F>(
679    current_offset: off_t,
680    target: SeekTarget,
681    compute_end: F,
682) -> Result<off_t, Errno>
683where
684    F: FnOnce() -> Result<off_t, Errno>,
685{
686    let new_offset = match target {
687        SeekTarget::Set(offset) => Some(offset),
688        SeekTarget::Cur(offset) => current_offset.checked_add(offset),
689        SeekTarget::End(offset) => compute_end()?.checked_add(offset),
690        SeekTarget::Data(offset) => {
691            let eof = compute_end().unwrap_or(off_t::MAX);
692            if offset >= eof {
693                return error!(ENXIO);
694            }
695            Some(offset)
696        }
697        SeekTarget::Hole(offset) => {
698            let eof = compute_end()?;
699            if offset >= eof {
700                return error!(ENXIO);
701            }
702            Some(eof)
703        }
704    }
705    .ok_or_else(|| errno!(EINVAL))?;
706
707    if new_offset < 0 {
708        return error!(EINVAL);
709    }
710
711    Ok(new_offset)
712}
713
714/// Implement the seek method for a file without an upper bound on the resulting offset.
715///
716/// This is useful for files without a defined size.
717///
718/// Errors if the calculated offset is invalid.
719///
720/// - `current_offset`: The current position
721/// - `target`: The location to seek to.
722pub fn unbounded_seek(current_offset: off_t, target: SeekTarget) -> Result<off_t, Errno> {
723    default_seek(current_offset, target, || Ok(MAX_LFS_FILESIZE as off_t))
724}
725
726#[macro_export]
727macro_rules! fileops_impl_delegate_read_write_and_seek {
728    ($self:ident, $delegate:expr) => {
729        fn is_seekable(&self) -> bool {
730            true
731        }
732
733        fn read(
734            &$self,
735            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
736            file: &FileObject,
737            current_task: &$crate::task::CurrentTask,
738            offset: usize,
739            data: &mut dyn $crate::vfs::buffers::OutputBuffer,
740        ) -> Result<usize, starnix_uapi::errors::Errno> {
741            $delegate.read(locked, file, current_task, offset, data)
742        }
743
744        fn write(
745            &$self,
746            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
747            file: &FileObject,
748            current_task: &$crate::task::CurrentTask,
749            offset: usize,
750            data: &mut dyn $crate::vfs::buffers::InputBuffer,
751        ) -> Result<usize, starnix_uapi::errors::Errno> {
752            $delegate.write(locked, file, current_task, offset, data)
753        }
754
755        fn seek(
756            &$self,
757        locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
758            file: &FileObject,
759            current_task: &$crate::task::CurrentTask,
760            current_offset: starnix_uapi::off_t,
761            target: $crate::vfs::SeekTarget,
762        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
763            $delegate.seek(locked, file, current_task, current_offset, target)
764        }
765    };
766}
767
768/// Implements [`FileOps::seek`] in a way that makes sense for seekable files.
769#[macro_export]
770macro_rules! fileops_impl_seekable {
771    () => {
772        fn is_seekable(&self) -> bool {
773            true
774        }
775
776        fn seek(
777            &self,
778            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
779            file: &$crate::vfs::FileObject,
780            current_task: &$crate::task::CurrentTask,
781            current_offset: starnix_uapi::off_t,
782            target: $crate::vfs::SeekTarget,
783        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
784            $crate::vfs::default_seek(current_offset, target, || {
785                $crate::vfs::default_eof_offset(locked, file, current_task)
786            })
787        }
788    };
789}
790
791/// Implements [`FileOps`] methods in a way that makes sense for non-seekable files.
792#[macro_export]
793macro_rules! fileops_impl_nonseekable {
794    () => {
795        fn is_seekable(&self) -> bool {
796            false
797        }
798
799        fn seek(
800            &self,
801            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
802            _file: &$crate::vfs::FileObject,
803            _current_task: &$crate::task::CurrentTask,
804            _current_offset: starnix_uapi::off_t,
805            _target: $crate::vfs::SeekTarget,
806        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
807            starnix_uapi::error!(ESPIPE)
808        }
809    };
810}
811
812/// Implements [`FileOps::seek`] methods in a way that makes sense for files that ignore
813/// seeking operations and always read/write at offset 0.
814#[macro_export]
815macro_rules! fileops_impl_seekless {
816    () => {
817        fn has_persistent_offsets(&self) -> bool {
818            false
819        }
820
821        fn is_seekable(&self) -> bool {
822            true
823        }
824
825        fn seek(
826            &self,
827            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
828            _file: &$crate::vfs::FileObject,
829            _current_task: &$crate::task::CurrentTask,
830            _current_offset: starnix_uapi::off_t,
831            _target: $crate::vfs::SeekTarget,
832        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
833            Ok(0)
834        }
835    };
836}
837
838#[macro_export]
839macro_rules! fileops_impl_dataless {
840    () => {
841        fn write(
842            &self,
843            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
844            _file: &$crate::vfs::FileObject,
845            _current_task: &$crate::task::CurrentTask,
846            _offset: usize,
847            _data: &mut dyn $crate::vfs::buffers::InputBuffer,
848        ) -> Result<usize, starnix_uapi::errors::Errno> {
849            starnix_uapi::error!(EINVAL)
850        }
851
852        fn read(
853            &self,
854            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
855            _file: &$crate::vfs::FileObject,
856            _current_task: &$crate::task::CurrentTask,
857            _offset: usize,
858            _data: &mut dyn $crate::vfs::buffers::OutputBuffer,
859        ) -> Result<usize, starnix_uapi::errors::Errno> {
860            starnix_uapi::error!(EINVAL)
861        }
862    };
863}
864
865/// Implements [`FileOps`] methods in a way that makes sense for directories. You must implement
866/// [`FileOps::seek`] and [`FileOps::readdir`].
867#[macro_export]
868macro_rules! fileops_impl_directory {
869    () => {
870        fn is_seekable(&self) -> bool {
871            true
872        }
873
874        fn read(
875            &self,
876            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
877            _file: &$crate::vfs::FileObject,
878            _current_task: &$crate::task::CurrentTask,
879            _offset: usize,
880            _data: &mut dyn $crate::vfs::buffers::OutputBuffer,
881        ) -> Result<usize, starnix_uapi::errors::Errno> {
882            starnix_uapi::error!(EISDIR)
883        }
884
885        fn write(
886            &self,
887            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
888            _file: &$crate::vfs::FileObject,
889            _current_task: &$crate::task::CurrentTask,
890            _offset: usize,
891            _data: &mut dyn $crate::vfs::buffers::InputBuffer,
892        ) -> Result<usize, starnix_uapi::errors::Errno> {
893            starnix_uapi::error!(EISDIR)
894        }
895    };
896}
897
898#[macro_export]
899macro_rules! fileops_impl_unbounded_seek {
900    () => {
901        fn seek(
902            &self,
903            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
904            _file: &$crate::vfs::FileObject,
905            _current_task: &$crate::task::CurrentTask,
906            current_offset: starnix_uapi::off_t,
907            target: $crate::vfs::SeekTarget,
908        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
909            $crate::vfs::unbounded_seek(current_offset, target)
910        }
911    };
912}
913
914#[macro_export]
915macro_rules! fileops_impl_noop_sync {
916    () => {
917        fn sync(
918            &self,
919            file: &$crate::vfs::FileObject,
920            _current_task: &$crate::task::CurrentTask,
921        ) -> Result<(), starnix_uapi::errors::Errno> {
922            if !file.node().is_reg() && !file.node().is_dir() {
923                return starnix_uapi::error!(EINVAL);
924            }
925            Ok(())
926        }
927    };
928}
929
930// Public re-export of macros allows them to be used like regular rust items.
931
932pub use fileops_impl_dataless;
933pub use fileops_impl_delegate_read_write_and_seek;
934pub use fileops_impl_directory;
935pub use fileops_impl_nonseekable;
936pub use fileops_impl_noop_sync;
937pub use fileops_impl_seekable;
938pub use fileops_impl_seekless;
939pub use fileops_impl_unbounded_seek;
940pub const AES256_KEY_SIZE: usize = 32;
941
942pub fn canonicalize_ioctl_request(current_task: &CurrentTask, request: u32) -> u32 {
943    if current_task.is_arch32() {
944        match request {
945            uapi::arch32::FS_IOC_GETFLAGS => uapi::FS_IOC_GETFLAGS,
946            uapi::arch32::FS_IOC_SETFLAGS => uapi::FS_IOC_SETFLAGS,
947            _ => request,
948        }
949    } else {
950        request
951    }
952}
953
954pub fn default_ioctl(
955    file: &FileObject,
956    locked: &mut Locked<Unlocked>,
957    current_task: &CurrentTask,
958    request: u32,
959    arg: SyscallArg,
960) -> Result<SyscallResult, Errno> {
961    match canonicalize_ioctl_request(current_task, request) {
962        TCGETS => error!(ENOTTY),
963        FIGETBSZ => {
964            let node = file.node();
965            let supported_file = node.is_reg() || node.is_dir();
966            if !supported_file {
967                return error!(ENOTTY);
968            }
969
970            let blocksize = file.node().stat(locked, current_task)?.st_blksize;
971            current_task.write_object(arg.into(), &blocksize)?;
972            Ok(SUCCESS)
973        }
974        FIONBIO => {
975            let arg_ref = UserAddress::from(arg).into();
976            let arg: i32 = current_task.read_object(arg_ref)?;
977            let val = if arg == 0 {
978                // Clear the NONBLOCK flag
979                OpenFlags::empty()
980            } else {
981                // Set the NONBLOCK flag
982                OpenFlags::NONBLOCK
983            };
984            file.update_file_flags(val, OpenFlags::NONBLOCK);
985            Ok(SUCCESS)
986        }
987        FIOQSIZE => {
988            let node = file.node();
989            let supported_file = node.is_reg() || node.is_dir();
990            if !supported_file {
991                return error!(ENOTTY);
992            }
993
994            let size = file.node().stat(locked, current_task)?.st_size;
995            current_task.write_object(arg.into(), &size)?;
996            Ok(SUCCESS)
997        }
998        FIONREAD => {
999            track_stub!(TODO("https://fxbug.dev/322874897"), "FIONREAD");
1000            if !file.name.entry.node.is_reg() {
1001                return error!(ENOTTY);
1002            }
1003
1004            let size = file
1005                .name
1006                .entry
1007                .node
1008                .fetch_and_refresh_info(locked, current_task)
1009                .map_err(|_| errno!(EINVAL))?
1010                .size;
1011            let offset = usize::try_from(file.offset.read()).map_err(|_| errno!(EINVAL))?;
1012            let remaining =
1013                if size < offset { 0 } else { i32::try_from(size - offset).unwrap_or(i32::MAX) };
1014            current_task.write_object(arg.into(), &remaining)?;
1015            Ok(SUCCESS)
1016        }
1017        FS_IOC_FSGETXATTR => {
1018            track_stub!(TODO("https://fxbug.dev/322875209"), "FS_IOC_FSGETXATTR");
1019            let arg = UserAddress::from(arg).into();
1020            current_task.write_object(arg, &fsxattr::default())?;
1021            Ok(SUCCESS)
1022        }
1023        FS_IOC_FSSETXATTR => {
1024            track_stub!(TODO("https://fxbug.dev/322875271"), "FS_IOC_FSSETXATTR");
1025            let arg = UserAddress::from(arg).into();
1026            let _: fsxattr = current_task.read_object(arg)?;
1027            Ok(SUCCESS)
1028        }
1029        uapi::FS_IOC_GETFLAGS => {
1030            track_stub!(TODO("https://fxbug.dev/322874935"), "FS_IOC_GETFLAGS");
1031            let arg = UserRef::<u32>::from(arg);
1032            let mut flags: u32 = 0;
1033            if matches!(*file.node().fsverity.lock(), FsVerityState::FsVerity) {
1034                flags |= FS_VERITY_FL;
1035            }
1036            if file.node().info().casefold {
1037                flags |= FS_CASEFOLD_FL;
1038            }
1039            current_task.write_object(arg, &flags)?;
1040            Ok(SUCCESS)
1041        }
1042        uapi::FS_IOC_SETFLAGS => {
1043            track_stub!(TODO("https://fxbug.dev/322875367"), "FS_IOC_SETFLAGS");
1044            let arg = UserRef::<u32>::from(arg);
1045            let flags: u32 = current_task.read_object(arg)?;
1046            file.node().update_attributes(locked, current_task, |info| {
1047                info.casefold = flags & FS_CASEFOLD_FL != 0;
1048                Ok(())
1049            })?;
1050            Ok(SUCCESS)
1051        }
1052        FS_IOC_ENABLE_VERITY => {
1053            Ok(fsverity::ioctl::enable(locked, current_task, UserAddress::from(arg).into(), file)?)
1054        }
1055        FS_IOC_MEASURE_VERITY => {
1056            Ok(fsverity::ioctl::measure(locked, current_task, UserAddress::from(arg).into(), file)?)
1057        }
1058        FS_IOC_READ_VERITY_METADATA => {
1059            Ok(fsverity::ioctl::read_metadata(current_task, UserAddress::from(arg).into(), file)?)
1060        }
1061        FS_IOC_ADD_ENCRYPTION_KEY => {
1062            let fscrypt_add_key_ref = UserRef::<fscrypt_add_key_arg>::from(arg);
1063            let key_ref_addr = fscrypt_add_key_ref.next()?.addr();
1064            let mut fscrypt_add_key_arg = current_task.read_object(fscrypt_add_key_ref.clone())?;
1065            if fscrypt_add_key_arg.key_id != 0 {
1066                track_stub!(TODO("https://fxbug.dev/375649227"), "non-zero key ids");
1067                return error!(ENOTSUP);
1068            }
1069            if fscrypt_add_key_arg.key_spec.type_ != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER {
1070                track_stub!(TODO("https://fxbug.dev/375648306"), "fscrypt descriptor type");
1071                return error!(ENOTSUP);
1072            }
1073            let key = current_task
1074                .read_memory_to_vec(key_ref_addr, fscrypt_add_key_arg.raw_size as usize)?;
1075            let user_id = current_task.current_creds().uid;
1076
1077            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1078            let key_identifier = crypt_service.add_wrapping_key(&key, user_id)?;
1079            fscrypt_add_key_arg.key_spec.u.identifier =
1080                fscrypt_identifier { value: key_identifier, ..Default::default() };
1081            current_task.write_object(fscrypt_add_key_ref, &fscrypt_add_key_arg)?;
1082            Ok(SUCCESS)
1083        }
1084        FS_IOC_SET_ENCRYPTION_POLICY => {
1085            let fscrypt_policy_ref = UserRef::<uapi::fscrypt_policy_v2>::from(arg);
1086            let policy = current_task.read_object(fscrypt_policy_ref)?;
1087            if policy.version as u32 != FSCRYPT_POLICY_V2 {
1088                track_stub!(TODO("https://fxbug.dev/375649656"), "fscrypt policy v1");
1089                return error!(ENOTSUP);
1090            }
1091            if policy.flags != 0 {
1092                track_stub!(
1093                    TODO("https://fxbug.dev/375700939"),
1094                    "fscrypt policy flags",
1095                    policy.flags
1096                );
1097            }
1098            if policy.contents_encryption_mode as u32 != FSCRYPT_MODE_AES_256_XTS {
1099                track_stub!(
1100                    TODO("https://fxbug.dev/375684057"),
1101                    "fscrypt encryption modes",
1102                    policy.contents_encryption_mode
1103                );
1104            }
1105            if policy.filenames_encryption_mode as u32 != FSCRYPT_MODE_AES_256_CTS {
1106                track_stub!(
1107                    TODO("https://fxbug.dev/375684057"),
1108                    "fscrypt encryption modes",
1109                    policy.filenames_encryption_mode
1110                );
1111            }
1112            let user_id = current_task.current_creds().uid;
1113            if user_id != file.node().info().uid {
1114                security::check_task_capable(current_task, CAP_FOWNER)
1115                    .map_err(|_| errno!(EACCES))?;
1116            }
1117
1118            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1119            if let Some(users) =
1120                crypt_service.get_users_for_key(EncryptionKeyId::from(policy.master_key_identifier))
1121            {
1122                if !users.contains(&user_id) {
1123                    return error!(ENOKEY);
1124                }
1125            } else {
1126                track_stub!(
1127                    TODO("https://fxbug.dev/375067633"),
1128                    "users with CAP_FOWNER can set encryption policies with unadded keys"
1129                );
1130                return error!(ENOKEY);
1131            }
1132
1133            let attributes = file.node().fetch_and_refresh_info(locked, current_task)?;
1134            if let Some(wrapping_key_id) = &attributes.wrapping_key_id {
1135                if wrapping_key_id != &policy.master_key_identifier {
1136                    return error!(EEXIST);
1137                }
1138            } else {
1139                // Don't deadlock! update_attributes will also lock the attributes.
1140                std::mem::drop(attributes);
1141                file.node().update_attributes(locked, current_task, |info| {
1142                    info.wrapping_key_id = Some(policy.master_key_identifier);
1143                    Ok(())
1144                })?;
1145            }
1146            Ok(SUCCESS)
1147        }
1148        FS_IOC_REMOVE_ENCRYPTION_KEY => {
1149            let fscrypt_remove_key_arg_ref = UserRef::<uapi::fscrypt_remove_key_arg>::from(arg);
1150            let fscrypt_remove_key_arg = current_task.read_object(fscrypt_remove_key_arg_ref)?;
1151            if fscrypt_remove_key_arg.key_spec.type_ != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER {
1152                track_stub!(TODO("https://fxbug.dev/375648306"), "fscrypt descriptor type");
1153                return error!(ENOTSUP);
1154            }
1155            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1156            let user_id = current_task.current_creds().uid;
1157            #[allow(
1158                clippy::undocumented_unsafe_blocks,
1159                reason = "Force documented unsafe blocks in Starnix"
1160            )]
1161            let identifier = unsafe { fscrypt_remove_key_arg.key_spec.u.identifier.value };
1162            crypt_service.forget_wrapping_key(identifier, user_id)?;
1163            Ok(SUCCESS)
1164        }
1165        linux_uapi::FICLONE | linux_uapi::FICLONERANGE | linux_uapi::FIDEDUPERANGE => {
1166            error!(EOPNOTSUPP)
1167        }
1168        _ => {
1169            track_stub!(TODO("https://fxbug.dev/322874917"), "ioctl fallthrough", request);
1170            error!(ENOTTY)
1171        }
1172    }
1173}
1174
1175pub fn default_fcntl(cmd: u32) -> Result<SyscallResult, Errno> {
1176    track_stub!(TODO("https://fxbug.dev/322875704"), "default fcntl", cmd);
1177    error!(EINVAL)
1178}
1179
1180pub struct OPathOps {}
1181
1182impl OPathOps {
1183    pub fn new() -> OPathOps {
1184        OPathOps {}
1185    }
1186}
1187
1188impl FileOps for OPathOps {
1189    fileops_impl_noop_sync!();
1190
1191    fn has_persistent_offsets(&self) -> bool {
1192        false
1193    }
1194    fn is_seekable(&self) -> bool {
1195        true
1196    }
1197    fn read(
1198        &self,
1199        _locked: &mut Locked<FileOpsCore>,
1200        _file: &FileObject,
1201        _current_task: &CurrentTask,
1202        _offset: usize,
1203        _data: &mut dyn OutputBuffer,
1204    ) -> Result<usize, Errno> {
1205        error!(EBADF)
1206    }
1207    fn write(
1208        &self,
1209        _locked: &mut Locked<FileOpsCore>,
1210        _file: &FileObject,
1211        _current_task: &CurrentTask,
1212        _offset: usize,
1213        _data: &mut dyn InputBuffer,
1214    ) -> Result<usize, Errno> {
1215        error!(EBADF)
1216    }
1217    fn seek(
1218        &self,
1219        _locked: &mut Locked<FileOpsCore>,
1220        _file: &FileObject,
1221        _current_task: &CurrentTask,
1222        _current_offset: off_t,
1223        _target: SeekTarget,
1224    ) -> Result<off_t, Errno> {
1225        error!(EBADF)
1226    }
1227    fn get_memory(
1228        &self,
1229        _locked: &mut Locked<FileOpsCore>,
1230        _file: &FileObject,
1231        _current_task: &CurrentTask,
1232        _length: Option<usize>,
1233        _prot: ProtectionFlags,
1234    ) -> Result<Arc<MemoryObject>, Errno> {
1235        error!(EBADF)
1236    }
1237    fn readdir(
1238        &self,
1239        _locked: &mut Locked<FileOpsCore>,
1240        _file: &FileObject,
1241        _current_task: &CurrentTask,
1242        _sink: &mut dyn DirentSink,
1243    ) -> Result<(), Errno> {
1244        error!(EBADF)
1245    }
1246
1247    fn ioctl(
1248        &self,
1249        _locked: &mut Locked<Unlocked>,
1250        _file: &FileObject,
1251        _current_task: &CurrentTask,
1252        _request: u32,
1253        _arg: SyscallArg,
1254    ) -> Result<SyscallResult, Errno> {
1255        error!(EBADF)
1256    }
1257}
1258
1259pub struct ProxyFileOps(pub FileHandle);
1260
1261impl FileOps for ProxyFileOps {
1262    // `close` is not delegated because the last reference to a `ProxyFileOps` is not
1263    // necessarily the last reference of the proxied file. If this is the case, the
1264    // releaser will handle it.
1265    // These don't take &FileObject making it too hard to handle them properly in the macro
1266    fn has_persistent_offsets(&self) -> bool {
1267        self.0.ops().has_persistent_offsets()
1268    }
1269    fn writes_update_seek_offset(&self) -> bool {
1270        self.0.ops().writes_update_seek_offset()
1271    }
1272    fn is_seekable(&self) -> bool {
1273        self.0.ops().is_seekable()
1274    }
1275    // These take &mut Locked<L> as a second argument
1276    fn flush(
1277        &self,
1278        locked: &mut Locked<FileOpsCore>,
1279        _file: &FileObject,
1280        current_task: &CurrentTask,
1281    ) {
1282        self.0.ops().flush(locked, &self.0, current_task);
1283    }
1284    fn wait_async(
1285        &self,
1286        locked: &mut Locked<FileOpsCore>,
1287        _file: &FileObject,
1288        current_task: &CurrentTask,
1289        waiter: &Waiter,
1290        events: FdEvents,
1291        handler: EventHandler,
1292    ) -> Option<WaitCanceler> {
1293        self.0.ops().wait_async(locked, &self.0, current_task, waiter, events, handler)
1294    }
1295    fn query_events(
1296        &self,
1297        locked: &mut Locked<FileOpsCore>,
1298        _file: &FileObject,
1299        current_task: &CurrentTask,
1300    ) -> Result<FdEvents, Errno> {
1301        self.0.ops().query_events(locked, &self.0, current_task)
1302    }
1303    fn read(
1304        &self,
1305        locked: &mut Locked<FileOpsCore>,
1306        _file: &FileObject,
1307        current_task: &CurrentTask,
1308        offset: usize,
1309        data: &mut dyn OutputBuffer,
1310    ) -> Result<usize, Errno> {
1311        self.0.ops().read(locked, &self.0, current_task, offset, data)
1312    }
1313    fn write(
1314        &self,
1315        locked: &mut Locked<FileOpsCore>,
1316        _file: &FileObject,
1317        current_task: &CurrentTask,
1318        offset: usize,
1319        data: &mut dyn InputBuffer,
1320    ) -> Result<usize, Errno> {
1321        self.0.ops().write(locked, &self.0, current_task, offset, data)
1322    }
1323    fn ioctl(
1324        &self,
1325        locked: &mut Locked<Unlocked>,
1326        _file: &FileObject,
1327        current_task: &CurrentTask,
1328        request: u32,
1329        arg: SyscallArg,
1330    ) -> Result<SyscallResult, Errno> {
1331        self.0.ops().ioctl(locked, &self.0, current_task, request, arg)
1332    }
1333    fn fcntl(
1334        &self,
1335        _file: &FileObject,
1336        current_task: &CurrentTask,
1337        cmd: u32,
1338        arg: u64,
1339    ) -> Result<SyscallResult, Errno> {
1340        self.0.ops().fcntl(&self.0, current_task, cmd, arg)
1341    }
1342    fn readdir(
1343        &self,
1344        locked: &mut Locked<FileOpsCore>,
1345        _file: &FileObject,
1346        current_task: &CurrentTask,
1347        sink: &mut dyn DirentSink,
1348    ) -> Result<(), Errno> {
1349        self.0.ops().readdir(locked, &self.0, current_task, sink)
1350    }
1351    fn sync(&self, _file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
1352        self.0.ops().sync(&self.0, current_task)
1353    }
1354    fn data_sync(&self, _file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
1355        self.0.ops().sync(&self.0, current_task)
1356    }
1357    fn get_memory(
1358        &self,
1359        locked: &mut Locked<FileOpsCore>,
1360        _file: &FileObject,
1361        current_task: &CurrentTask,
1362        length: Option<usize>,
1363        prot: ProtectionFlags,
1364    ) -> Result<Arc<MemoryObject>, Errno> {
1365        self.0.ops.get_memory(locked, &self.0, current_task, length, prot)
1366    }
1367    fn mmap(
1368        &self,
1369        locked: &mut Locked<FileOpsCore>,
1370        _file: &FileObject,
1371        current_task: &CurrentTask,
1372        addr: DesiredAddress,
1373        memory_offset: u64,
1374        length: usize,
1375        prot_flags: ProtectionFlags,
1376        options: MappingOptions,
1377        filename: NamespaceNode,
1378    ) -> Result<UserAddress, Errno> {
1379        self.0.ops.mmap(
1380            locked,
1381            &self.0,
1382            current_task,
1383            addr,
1384            memory_offset,
1385            length,
1386            prot_flags,
1387            options,
1388            filename,
1389        )
1390    }
1391    fn seek(
1392        &self,
1393        locked: &mut Locked<FileOpsCore>,
1394        _file: &FileObject,
1395        current_task: &CurrentTask,
1396        offset: off_t,
1397        target: SeekTarget,
1398    ) -> Result<off_t, Errno> {
1399        self.0.ops.seek(locked, &self.0, current_task, offset, target)
1400    }
1401}
1402
1403#[derive(Debug, Default, Copy, Clone)]
1404pub enum FileAsyncOwner {
1405    #[default]
1406    Unowned,
1407    Thread(pid_t),
1408    Process(pid_t),
1409    ProcessGroup(pid_t),
1410}
1411
1412impl FileAsyncOwner {
1413    pub fn validate(self, current_task: &CurrentTask) -> Result<(), Errno> {
1414        match self {
1415            FileAsyncOwner::Unowned => (),
1416            FileAsyncOwner::Thread(id) | FileAsyncOwner::Process(id) => {
1417                Task::from_weak(&current_task.get_task(id))?;
1418            }
1419            FileAsyncOwner::ProcessGroup(pgid) => {
1420                current_task
1421                    .kernel()
1422                    .pids
1423                    .read()
1424                    .get_process_group(pgid)
1425                    .ok_or_else(|| errno!(ESRCH))?;
1426            }
1427        }
1428        Ok(())
1429    }
1430}
1431
1432#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
1433pub struct FileObjectId(u64);
1434
1435impl FileObjectId {
1436    pub fn as_epoll_key(&self) -> EpollKey {
1437        self.0 as EpollKey
1438    }
1439}
1440
1441/// A session with a file object.
1442///
1443/// Each time a client calls open(), we create a new FileObject from the
1444/// underlying FsNode that receives the open(). This object contains the state
1445/// that is specific to this sessions whereas the underlying FsNode contains
1446/// the state that is shared between all the sessions.
1447pub struct FileObject {
1448    ops: Box<dyn FileOps>,
1449    state: FileObjectState,
1450}
1451
1452impl std::ops::Deref for FileObject {
1453    type Target = FileObjectState;
1454    fn deref(&self) -> &Self::Target {
1455        &self.state
1456    }
1457}
1458
1459pub struct FileObjectState {
1460    /// Weak reference to the `FileHandle` of this `FileObject`. This allows to retrieve the
1461    /// `FileHandle` from a `FileObject`.
1462    pub weak_handle: WeakFileHandle,
1463
1464    /// A unique identifier for this file object.
1465    pub id: FileObjectId,
1466
1467    /// The NamespaceNode associated with this FileObject.
1468    ///
1469    /// Represents the name the process used to open this file.
1470    pub name: ActiveNamespaceNode,
1471
1472    pub fs: FileSystemHandle,
1473
1474    pub offset: RcuAtomic<off_t>,
1475
1476    flags: AtomicOpenFlags,
1477
1478    async_owner: Mutex<FileAsyncOwner>,
1479
1480    /// A set of epoll file descriptor numbers that tracks which `EpollFileObject`s add this
1481    /// `FileObject` as the control file.
1482    epoll_files: Mutex<HashMap<FileHandleKey, WeakFileHandle>>,
1483
1484    /// See fcntl F_SETLEASE and F_GETLEASE.
1485    lease: Mutex<FileLeaseType>,
1486
1487    // This extra reference to the FsNode should not be needed, but it is needed to make
1488    // Inotify.ExcludeUnlinkInodeEvents pass.
1489    _mysterious_node: Option<FsNodeHandle>,
1490
1491    /// Opaque security state associated this file object.
1492    pub security_state: security::FileObjectState,
1493}
1494
1495pub enum FileObjectReleaserAction {}
1496impl ReleaserAction<FileObject> for FileObjectReleaserAction {
1497    fn release(file_object: ReleaseGuard<FileObject>) {
1498        register_delayed_release(file_object);
1499    }
1500}
1501pub type FileReleaser = ObjectReleaser<FileObject, FileObjectReleaserAction>;
1502pub type FileHandle = Arc<FileReleaser>;
1503pub type WeakFileHandle = Weak<FileReleaser>;
1504pub type FileHandleKey = WeakKey<FileReleaser>;
1505
1506impl FileObjectState {
1507    /// The FsNode from which this FileObject was created.
1508    pub fn node(&self) -> &FsNodeHandle {
1509        &self.name.entry.node
1510    }
1511
1512    pub fn flags(&self) -> OpenFlags {
1513        self.flags.load(Ordering::Relaxed)
1514    }
1515
1516    pub fn can_read(&self) -> bool {
1517        self.flags.load(Ordering::Relaxed).can_read()
1518    }
1519
1520    pub fn can_write(&self) -> bool {
1521        self.flags.load(Ordering::Relaxed).can_write()
1522    }
1523
1524    /// Returns false if the file is not allowed to be executed.
1525    pub fn can_exec(&self) -> bool {
1526        let mounted_no_exec = self.name.to_passive().mount.flags().contains(MountFlags::NOEXEC);
1527        let no_exec_seal = self
1528            .node()
1529            .write_guard_state
1530            .lock()
1531            .get_seals()
1532            .map(|seals| seals.contains(SealFlags::NO_EXEC))
1533            .unwrap_or(false);
1534        !(mounted_no_exec || no_exec_seal)
1535    }
1536
1537    // Notifies watchers on the current node and its parent about an event.
1538    pub fn notify(&self, event_mask: InotifyMask) {
1539        self.name.notify(event_mask)
1540    }
1541}
1542
1543impl FileObject {
1544    /// Create a FileObject that is not mounted in a namespace.
1545    ///
1546    /// In particular, this will create a new unrooted entries. This should not be used on
1547    /// file system with persistent entries, as the created entry will be out of sync with the one
1548    /// from the file system.
1549    ///
1550    /// The returned FileObject does not have a name.
1551    pub fn new_anonymous<L>(
1552        locked: &mut Locked<L>,
1553        current_task: &CurrentTask,
1554        ops: Box<dyn FileOps>,
1555        node: FsNodeHandle,
1556        flags: OpenFlags,
1557    ) -> FileHandle
1558    where
1559        L: LockEqualOrBefore<FileOpsCore>,
1560    {
1561        assert!(!node.fs().has_permanent_entries());
1562        Self::new(
1563            locked,
1564            current_task,
1565            ops,
1566            NamespaceNode::new_anonymous_unrooted(current_task, node),
1567            flags,
1568        )
1569        .expect("Failed to create anonymous FileObject")
1570    }
1571
1572    /// Create a FileObject with an associated NamespaceNode.
1573    ///
1574    /// This function is not typically called directly. Instead, consider
1575    /// calling NamespaceNode::open.
1576    pub fn new<L>(
1577        locked: &mut Locked<L>,
1578        current_task: &CurrentTask,
1579        ops: Box<dyn FileOps>,
1580        name: NamespaceNode,
1581        flags: OpenFlags,
1582    ) -> Result<FileHandle, Errno>
1583    where
1584        L: LockEqualOrBefore<FileOpsCore>,
1585    {
1586        let _mysterious_node = if flags.can_write() {
1587            name.entry.node.write_guard_state.lock().acquire(FileWriteGuardMode::WriteFile)?;
1588            Some(name.entry.node.clone())
1589        } else {
1590            None
1591        };
1592        let fs = name.entry.node.fs();
1593        let id = FileObjectId(current_task.kernel.next_file_object_id.next());
1594        let security_state = security::file_alloc_security(current_task);
1595        let file = FileHandle::new_cyclic(|weak_handle| {
1596            Self {
1597                ops,
1598                state: FileObjectState {
1599                    weak_handle: weak_handle.clone(),
1600                    id,
1601                    name: name.into_active(),
1602                    fs,
1603                    offset: RcuAtomic::new(0),
1604                    flags: AtomicOpenFlags::new(flags - OpenFlags::CREAT),
1605                    async_owner: Default::default(),
1606                    epoll_files: Default::default(),
1607                    lease: Default::default(),
1608                    _mysterious_node,
1609                    security_state,
1610                },
1611            }
1612            .into()
1613        });
1614        file.notify(InotifyMask::OPEN);
1615
1616        file.ops().open(locked.cast_locked::<FileOpsCore>(), &file, current_task)?;
1617        Ok(file)
1618    }
1619
1620    pub fn max_access_for_memory_mapping(&self) -> Access {
1621        let mut access = Access::EXIST;
1622        if self.can_exec() {
1623            access |= Access::EXEC;
1624        }
1625        let flags = self.flags.load(Ordering::Relaxed);
1626        if flags.can_read() {
1627            access |= Access::READ;
1628        }
1629        if flags.can_write() {
1630            access |= Access::WRITE;
1631        }
1632        access
1633    }
1634
1635    pub fn ops(&self) -> &dyn FileOps {
1636        self.ops.as_ref()
1637    }
1638
1639    pub fn ops_type_name(&self) -> &'static str {
1640        self.ops().type_name()
1641    }
1642
1643    pub fn is_non_blocking(&self) -> bool {
1644        self.flags().contains(OpenFlags::NONBLOCK)
1645    }
1646
1647    /// Common implementation for blocking operations.
1648    ///
1649    /// This function is used to implement the blocking operations for file objects. FileOps
1650    /// implementations should call this function to handle the blocking logic.
1651    ///
1652    /// The `op` parameter is a function that implements the non-blocking version of the operation.
1653    /// The function is called once without registering a waiter in case no wait is needed. If the
1654    /// operation returns EAGAIN and the file object is non-blocking, the function returns EAGAIN.
1655    ///
1656    /// If the operation returns EAGAIN and the file object is blocking, the function will block
1657    /// until the given events are triggered. At that time, the operation is retried. Notice that
1658    /// the `op` function can be called multiple times before the operation completes.
1659    ///
1660    /// The `deadline` parameter is the deadline for the operation. If the operation does not
1661    /// complete before the deadline, the function will return ETIMEDOUT.
1662    pub fn blocking_op<L, T, Op>(
1663        &self,
1664        locked: &mut Locked<L>,
1665        current_task: &CurrentTask,
1666        events: FdEvents,
1667        deadline: Option<zx::MonotonicInstant>,
1668        mut op: Op,
1669    ) -> Result<T, Errno>
1670    where
1671        L: LockEqualOrBefore<FileOpsCore>,
1672        Op: FnMut(&mut Locked<L>) -> Result<T, Errno>,
1673    {
1674        // Don't return EAGAIN for directories. This can happen because glibc always opens a
1675        // directory with O_NONBLOCK.
1676        let can_return_eagain = self.flags().contains(OpenFlags::NONBLOCK)
1677            && !self.flags().contains(OpenFlags::DIRECTORY);
1678        // Run the operation a first time without registering a waiter in case no wait is needed.
1679        match op(locked) {
1680            Err(errno) if errno == EAGAIN && !can_return_eagain => {}
1681            result => return result,
1682        }
1683
1684        let waiter = Waiter::new();
1685        loop {
1686            // Register the waiter before running the operation to prevent a race.
1687            self.wait_async(locked, current_task, &waiter, events, WaitCallback::none());
1688            match op(locked) {
1689                Err(e) if e == EAGAIN => {}
1690                result => return result,
1691            }
1692            let locked = locked.cast_locked::<FileOpsCore>();
1693            waiter
1694                .wait_until(
1695                    locked,
1696                    current_task,
1697                    deadline.unwrap_or(zx::MonotonicInstant::INFINITE),
1698                )
1699                .map_err(|e| if e == ETIMEDOUT { errno!(EAGAIN) } else { e })?;
1700        }
1701    }
1702
1703    pub fn is_seekable(&self) -> bool {
1704        self.ops().is_seekable()
1705    }
1706
1707    pub fn has_persistent_offsets(&self) -> bool {
1708        self.ops().has_persistent_offsets()
1709    }
1710
1711    /// Common implementation for `read` and `read_at`.
1712    fn read_internal<R>(&self, current_task: &CurrentTask, read: R) -> Result<usize, Errno>
1713    where
1714        R: FnOnce() -> Result<usize, Errno>,
1715    {
1716        security::file_permission(current_task, self, security::PermissionFlags::READ)?;
1717
1718        if !self.can_read() {
1719            return error!(EBADF);
1720        }
1721        let bytes_read = read()?;
1722
1723        // TODO(steveaustin) - omit updating time_access to allow info to be immutable
1724        // and thus allow simultaneous reads.
1725        self.update_atime();
1726        if bytes_read > 0 {
1727            self.notify(InotifyMask::ACCESS);
1728        }
1729
1730        Ok(bytes_read)
1731    }
1732
1733    pub fn read<L>(
1734        &self,
1735        locked: &mut Locked<L>,
1736        current_task: &CurrentTask,
1737        data: &mut dyn OutputBuffer,
1738    ) -> Result<usize, Errno>
1739    where
1740        L: LockEqualOrBefore<FileOpsCore>,
1741    {
1742        self.read_internal(current_task, || {
1743            let locked = locked.cast_locked::<FileOpsCore>();
1744            if !self.ops().has_persistent_offsets() {
1745                if data.available() > MAX_LFS_FILESIZE {
1746                    return error!(EINVAL);
1747                }
1748                return self.ops.read(locked, self, current_task, 0, data);
1749            }
1750
1751            let mut offset_guard = self.offset.copy();
1752            let offset = *offset_guard as usize;
1753            checked_add_offset_and_length(offset, data.available())?;
1754            let read = self.ops.read(locked, self, current_task, offset, data)?;
1755            *offset_guard += read as off_t;
1756            offset_guard.update();
1757            Ok(read)
1758        })
1759    }
1760
1761    pub fn read_at<L>(
1762        &self,
1763        locked: &mut Locked<L>,
1764        current_task: &CurrentTask,
1765        offset: usize,
1766        data: &mut dyn OutputBuffer,
1767    ) -> Result<usize, Errno>
1768    where
1769        L: LockEqualOrBefore<FileOpsCore>,
1770    {
1771        if !self.ops().is_seekable() {
1772            return error!(ESPIPE);
1773        }
1774        checked_add_offset_and_length(offset, data.available())?;
1775        let locked = locked.cast_locked::<FileOpsCore>();
1776        self.read_internal(current_task, || self.ops.read(locked, self, current_task, offset, data))
1777    }
1778
1779    /// Common checks before calling ops().write.
1780    fn write_common<L>(
1781        &self,
1782        locked: &mut Locked<L>,
1783        current_task: &CurrentTask,
1784        offset: usize,
1785        data: &mut dyn InputBuffer,
1786    ) -> Result<usize, Errno>
1787    where
1788        L: LockEqualOrBefore<FileOpsCore>,
1789    {
1790        security::file_permission(current_task, self, security::PermissionFlags::WRITE)?;
1791
1792        // We need to cap the size of `data` to prevent us from growing the file too large,
1793        // according to <https://man7.org/linux/man-pages/man2/write.2.html>:
1794        //
1795        //   The number of bytes written may be less than count if, for example, there is
1796        //   insufficient space on the underlying physical medium, or the RLIMIT_FSIZE resource
1797        //   limit is encountered (see setrlimit(2)),
1798        checked_add_offset_and_length(offset, data.available())?;
1799        let locked = locked.cast_locked::<FileOpsCore>();
1800        self.ops().write(locked, self, current_task, offset, data)
1801    }
1802
1803    /// Common wrapper work for `write` and `write_at`.
1804    fn write_fn<W, L>(
1805        &self,
1806        locked: &mut Locked<L>,
1807        current_task: &CurrentTask,
1808        write: W,
1809    ) -> Result<usize, Errno>
1810    where
1811        L: LockEqualOrBefore<FileOpsCore>,
1812        W: FnOnce(&mut Locked<L>) -> Result<usize, Errno>,
1813    {
1814        if !self.can_write() {
1815            return error!(EBADF);
1816        }
1817        self.node().clear_suid_and_sgid_bits(locked, current_task)?;
1818        let bytes_written = write(locked)?;
1819        self.node().update_ctime_mtime();
1820
1821        if bytes_written > 0 {
1822            self.notify(InotifyMask::MODIFY);
1823        }
1824
1825        Ok(bytes_written)
1826    }
1827
1828    pub fn write<L>(
1829        &self,
1830        locked: &mut Locked<L>,
1831        current_task: &CurrentTask,
1832        data: &mut dyn InputBuffer,
1833    ) -> Result<usize, Errno>
1834    where
1835        L: LockEqualOrBefore<FileOpsCore>,
1836    {
1837        self.write_fn(locked, current_task, |locked| {
1838            if !self.ops().has_persistent_offsets() {
1839                return self.write_common(locked, current_task, 0, data);
1840            }
1841            // TODO(https://fxbug.dev/333540469): write_fn should take L: LockBefore<FsNodeAppend>,
1842            // but FileOpsCore must be after FsNodeAppend
1843            #[allow(
1844                clippy::undocumented_unsafe_blocks,
1845                reason = "Force documented unsafe blocks in Starnix"
1846            )]
1847            let locked = unsafe { Unlocked::new() };
1848            let mut offset = self.offset.copy();
1849            let bytes_written = if self.flags().contains(OpenFlags::APPEND) {
1850                let (_guard, locked) = self.node().ops().append_lock_write(
1851                    locked.cast_locked::<BeforeFsNodeAppend>(),
1852                    self.node(),
1853                    current_task,
1854                )?;
1855                *offset = self.ops().seek(
1856                    locked.cast_locked::<FileOpsCore>(),
1857                    self,
1858                    current_task,
1859                    *offset,
1860                    SeekTarget::End(0),
1861                )?;
1862                self.write_common(locked, current_task, *offset as usize, data)
1863            } else {
1864                let (_guard, locked) = self.node().ops().append_lock_read(
1865                    locked.cast_locked::<BeforeFsNodeAppend>(),
1866                    self.node(),
1867                    current_task,
1868                )?;
1869                self.write_common(locked, current_task, *offset as usize, data)
1870            }?;
1871            if self.ops().writes_update_seek_offset() {
1872                *offset += bytes_written as off_t;
1873            }
1874            offset.update();
1875            Ok(bytes_written)
1876        })
1877    }
1878
1879    pub fn write_at<L>(
1880        &self,
1881        locked: &mut Locked<L>,
1882        current_task: &CurrentTask,
1883        mut offset: usize,
1884        data: &mut dyn InputBuffer,
1885    ) -> Result<usize, Errno>
1886    where
1887        L: LockEqualOrBefore<FileOpsCore>,
1888    {
1889        if !self.ops().is_seekable() {
1890            return error!(ESPIPE);
1891        }
1892        self.write_fn(locked, current_task, |_locked| {
1893            // TODO(https://fxbug.dev/333540469): write_fn should take L: LockBefore<FsNodeAppend>,
1894            // but FileOpsCore must be after FsNodeAppend
1895            #[allow(
1896                clippy::undocumented_unsafe_blocks,
1897                reason = "Force documented unsafe blocks in Starnix"
1898            )]
1899            let locked = unsafe { Unlocked::new() };
1900            if self.flags().contains(OpenFlags::APPEND) {
1901                let (_guard, locked) = self.node().append_lock.write_and(locked, current_task)?;
1902                // According to LTP test pwrite04:
1903                //
1904                //   POSIX requires that opening a file with the O_APPEND flag should have no effect on the
1905                //   location at which pwrite() writes data. However, on Linux, if a file is opened with
1906                //   O_APPEND, pwrite() appends data to the end of the file, regardless of the value of offset.
1907                if self.ops().is_seekable() {
1908                    checked_add_offset_and_length(offset, data.available())?;
1909                    offset = default_eof_offset(locked, self, current_task)? as usize;
1910                }
1911                self.write_common(locked, current_task, offset, data)
1912            } else {
1913                let (_guard, locked) = self.node().append_lock.read_and(locked, current_task)?;
1914                self.write_common(locked, current_task, offset, data)
1915            }
1916        })
1917    }
1918
1919    pub fn seek<L>(
1920        &self,
1921        locked: &mut Locked<L>,
1922        current_task: &CurrentTask,
1923        target: SeekTarget,
1924    ) -> Result<off_t, Errno>
1925    where
1926        L: LockEqualOrBefore<FileOpsCore>,
1927    {
1928        let locked = locked.cast_locked::<FileOpsCore>();
1929        let locked = locked;
1930
1931        if !self.ops().is_seekable() {
1932            return error!(ESPIPE);
1933        }
1934
1935        if !self.ops().has_persistent_offsets() {
1936            return self.ops().seek(locked, self, current_task, 0, target);
1937        }
1938
1939        let mut offset_guard = self.offset.copy();
1940        let new_offset = self.ops().seek(locked, self, current_task, *offset_guard, target)?;
1941        *offset_guard = new_offset;
1942        offset_guard.update();
1943        Ok(new_offset)
1944    }
1945
1946    pub fn sync(&self, current_task: &CurrentTask) -> Result<(), Errno> {
1947        self.ops().sync(self, current_task)
1948    }
1949
1950    pub fn data_sync(&self, current_task: &CurrentTask) -> Result<(), Errno> {
1951        self.ops().data_sync(self, current_task)
1952    }
1953
1954    pub fn get_memory<L>(
1955        &self,
1956        locked: &mut Locked<L>,
1957        current_task: &CurrentTask,
1958        length: Option<usize>,
1959        prot: ProtectionFlags,
1960    ) -> Result<Arc<MemoryObject>, Errno>
1961    where
1962        L: LockEqualOrBefore<FileOpsCore>,
1963    {
1964        if prot.contains(ProtectionFlags::READ) && !self.can_read() {
1965            return error!(EACCES);
1966        }
1967        if prot.contains(ProtectionFlags::WRITE) && !self.can_write() {
1968            return error!(EACCES);
1969        }
1970        if prot.contains(ProtectionFlags::EXEC) && !self.can_exec() {
1971            return error!(EPERM);
1972        }
1973        self.ops().get_memory(locked.cast_locked::<FileOpsCore>(), self, current_task, length, prot)
1974    }
1975
1976    pub fn mmap<L>(
1977        &self,
1978        locked: &mut Locked<L>,
1979        current_task: &CurrentTask,
1980        addr: DesiredAddress,
1981        memory_offset: u64,
1982        length: usize,
1983        prot_flags: ProtectionFlags,
1984        options: MappingOptions,
1985        filename: NamespaceNode,
1986    ) -> Result<UserAddress, Errno>
1987    where
1988        L: LockEqualOrBefore<FileOpsCore>,
1989    {
1990        let locked = locked.cast_locked::<FileOpsCore>();
1991        if !self.can_read() {
1992            return error!(EACCES);
1993        }
1994        if prot_flags.contains(ProtectionFlags::WRITE)
1995            && !self.can_write()
1996            && options.contains(MappingOptions::SHARED)
1997        {
1998            return error!(EACCES);
1999        }
2000        if prot_flags.contains(ProtectionFlags::EXEC) && !self.can_exec() {
2001            return error!(EPERM);
2002        }
2003        self.ops().mmap(
2004            locked,
2005            self,
2006            current_task,
2007            addr,
2008            memory_offset,
2009            length,
2010            prot_flags,
2011            options,
2012            filename,
2013        )
2014    }
2015
2016    pub fn readdir<L>(
2017        &self,
2018        locked: &mut Locked<L>,
2019        current_task: &CurrentTask,
2020        sink: &mut dyn DirentSink,
2021    ) -> Result<(), Errno>
2022    where
2023        L: LockEqualOrBefore<FileOpsCore>,
2024    {
2025        let locked = locked.cast_locked::<FileOpsCore>();
2026        if self.name.entry.is_dead() {
2027            return error!(ENOENT);
2028        }
2029
2030        self.ops().readdir(locked, self, current_task, sink)?;
2031        self.update_atime();
2032        self.notify(InotifyMask::ACCESS);
2033        Ok(())
2034    }
2035
2036    pub fn ioctl(
2037        &self,
2038        locked: &mut Locked<Unlocked>,
2039        current_task: &CurrentTask,
2040        request: u32,
2041        arg: SyscallArg,
2042    ) -> Result<SyscallResult, Errno> {
2043        security::check_file_ioctl_access(current_task, &self, request)?;
2044
2045        if request == FIBMAP {
2046            security::check_task_capable(current_task, CAP_SYS_RAWIO)?;
2047
2048            // TODO: https://fxbug.dev/404795644 - eliminate this phoney response when the SELinux
2049            // Test Suite no longer requires it.
2050            if current_task.kernel().features.selinux_test_suite {
2051                let phoney_block = 0xbadf000du32;
2052                current_task.write_object(arg.into(), &phoney_block)?;
2053                return Ok(SUCCESS);
2054            }
2055        }
2056
2057        self.ops().ioctl(locked, self, current_task, request, arg)
2058    }
2059
2060    pub fn fcntl(
2061        &self,
2062        current_task: &CurrentTask,
2063        cmd: u32,
2064        arg: u64,
2065    ) -> Result<SyscallResult, Errno> {
2066        self.ops().fcntl(self, current_task, cmd, arg)
2067    }
2068
2069    pub fn ftruncate<L>(
2070        &self,
2071        locked: &mut Locked<L>,
2072        current_task: &CurrentTask,
2073        length: u64,
2074    ) -> Result<(), Errno>
2075    where
2076        L: LockBefore<BeforeFsNodeAppend>,
2077    {
2078        // The file must be opened with write permissions. Otherwise
2079        // truncating it is forbidden.
2080        if !self.can_write() {
2081            return error!(EINVAL);
2082        }
2083        self.node().ftruncate(locked, current_task, length)?;
2084        self.name.entry.notify_ignoring_excl_unlink(InotifyMask::MODIFY);
2085        Ok(())
2086    }
2087
2088    pub fn fallocate<L>(
2089        &self,
2090        locked: &mut Locked<L>,
2091        current_task: &CurrentTask,
2092        mode: FallocMode,
2093        offset: u64,
2094        length: u64,
2095    ) -> Result<(), Errno>
2096    where
2097        L: LockBefore<BeforeFsNodeAppend>,
2098    {
2099        // If the file is a pipe or FIFO, ESPIPE is returned.
2100        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2101        if self.node().is_fifo() {
2102            return error!(ESPIPE);
2103        }
2104
2105        // Must be a regular file or directory.
2106        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2107        if !self.node().is_dir() && !self.node().is_reg() {
2108            return error!(ENODEV);
2109        }
2110
2111        // The file must be opened with write permissions. Otherwise operation is forbidden.
2112        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2113        if !self.can_write() {
2114            return error!(EBADF);
2115        }
2116
2117        self.node().fallocate(locked, current_task, mode, offset, length)?;
2118        self.notify(InotifyMask::MODIFY);
2119        Ok(())
2120    }
2121
2122    pub fn to_handle(
2123        &self,
2124        current_task: &CurrentTask,
2125    ) -> Result<Option<zx::NullableHandle>, Errno> {
2126        self.ops().to_handle(self, current_task)
2127    }
2128
2129    pub fn get_handles(
2130        &self,
2131        current_task: &CurrentTask,
2132    ) -> Result<Vec<zx::NullableHandle>, Errno> {
2133        self.ops().get_handles(self, current_task)
2134    }
2135
2136    pub fn as_thread_group_key(&self) -> Result<ThreadGroupKey, Errno> {
2137        self.ops().as_thread_group_key(self)
2138    }
2139
2140    /// Update the file flags.
2141    ///
2142    /// Writes the bits in `value` that are set in `mask` into the file flags.
2143    ///
2144    /// Does not provide any synchronization.
2145    pub fn update_file_flags(&self, value: OpenFlags, mask: OpenFlags) {
2146        self.flags.update(value, mask, Ordering::Relaxed, Ordering::Relaxed);
2147    }
2148
2149    /// Get the async owner of this file.
2150    ///
2151    /// See fcntl(F_GETOWN)
2152    pub fn get_async_owner(&self) -> FileAsyncOwner {
2153        *self.async_owner.lock()
2154    }
2155
2156    /// Set the async owner of this file.
2157    ///
2158    /// See fcntl(F_SETOWN)
2159    pub fn set_async_owner(&self, owner: FileAsyncOwner) {
2160        *self.async_owner.lock() = owner;
2161    }
2162
2163    /// See fcntl(F_GETLEASE)
2164    pub fn get_lease(&self, _current_task: &CurrentTask) -> FileLeaseType {
2165        *self.lease.lock()
2166    }
2167
2168    /// See fcntl(F_SETLEASE)
2169    pub fn set_lease(
2170        &self,
2171        _current_task: &CurrentTask,
2172        lease: FileLeaseType,
2173    ) -> Result<(), Errno> {
2174        if !self.node().is_reg() {
2175            return error!(EINVAL);
2176        }
2177        if lease == FileLeaseType::Read && self.can_write() {
2178            return error!(EAGAIN);
2179        }
2180        *self.lease.lock() = lease;
2181        Ok(())
2182    }
2183
2184    /// Wait on the specified events and call the EventHandler when ready
2185    pub fn wait_async<L>(
2186        &self,
2187        locked: &mut Locked<L>,
2188        current_task: &CurrentTask,
2189        waiter: &Waiter,
2190        events: FdEvents,
2191        handler: EventHandler,
2192    ) -> Option<WaitCanceler>
2193    where
2194        L: LockEqualOrBefore<FileOpsCore>,
2195    {
2196        self.ops().wait_async(
2197            locked.cast_locked::<FileOpsCore>(),
2198            self,
2199            current_task,
2200            waiter,
2201            events,
2202            handler,
2203        )
2204    }
2205
2206    /// The events currently active on this file.
2207    pub fn query_events<L>(
2208        &self,
2209        locked: &mut Locked<L>,
2210        current_task: &CurrentTask,
2211    ) -> Result<FdEvents, Errno>
2212    where
2213        L: LockEqualOrBefore<FileOpsCore>,
2214    {
2215        self.ops()
2216            .query_events(locked.cast_locked::<FileOpsCore>(), self, current_task)
2217            .map(FdEvents::add_equivalent_fd_events)
2218    }
2219
2220    pub fn record_lock(
2221        &self,
2222        locked: &mut Locked<Unlocked>,
2223        current_task: &CurrentTask,
2224        cmd: RecordLockCommand,
2225        flock: uapi::flock,
2226    ) -> Result<Option<uapi::flock>, Errno> {
2227        self.node().record_lock(locked, current_task, self, cmd, flock)
2228    }
2229
2230    pub fn flush<L>(&self, locked: &mut Locked<L>, current_task: &CurrentTask, id: FdTableId)
2231    where
2232        L: LockEqualOrBefore<FileOpsCore>,
2233    {
2234        self.name.entry.node.record_lock_release(RecordLockOwner::FdTable(id));
2235        self.ops().flush(locked.cast_locked::<FileOpsCore>(), self, current_task)
2236    }
2237
2238    fn update_atime(&self) {
2239        if !self.flags().contains(OpenFlags::NOATIME) {
2240            self.name.update_atime();
2241        }
2242    }
2243
2244    pub fn readahead(
2245        &self,
2246        current_task: &CurrentTask,
2247        offset: usize,
2248        length: usize,
2249    ) -> Result<(), Errno> {
2250        // readfile() fails with EBADF if the file was not open for read.
2251        if !self.can_read() {
2252            return error!(EBADF);
2253        }
2254        checked_add_offset_and_length(offset, length)?;
2255        self.ops().readahead(self, current_task, offset, length)
2256    }
2257
2258    pub fn extra_fdinfo(
2259        &self,
2260        locked: &mut Locked<FileOpsCore>,
2261        current_task: &CurrentTask,
2262    ) -> Option<FsString> {
2263        let file = self.weak_handle.upgrade()?;
2264        self.ops().extra_fdinfo(locked, &file, current_task)
2265    }
2266
2267    /// Register the fd number of an `EpollFileObject` that listens to events from this
2268    /// `FileObject`.
2269    pub fn register_epfd(&self, file: &FileHandle) {
2270        self.epoll_files.lock().insert(WeakKey::from(file), file.weak_handle.clone());
2271    }
2272
2273    pub fn unregister_epfd(&self, file: &FileHandle) {
2274        self.epoll_files.lock().remove(&WeakKey::from(file));
2275    }
2276}
2277
2278impl Releasable for FileObject {
2279    type Context<'a> = CurrentTaskAndLocked<'a>;
2280
2281    fn release<'a>(self, context: CurrentTaskAndLocked<'a>) {
2282        let (locked, current_task) = context;
2283        // Release all wake leases associated with this file in the corresponding `WaitObject`
2284        // of each registered epfd.
2285        for (_, file) in self.epoll_files.lock().drain() {
2286            if let Some(file) = file.upgrade() {
2287                if let Some(epoll_object) = file.downcast_file::<EpollFileObject>() {
2288                    let _ = epoll_object.delete(current_task, &self);
2289                }
2290            }
2291        }
2292
2293        if self.can_write() {
2294            self.name.entry.node.write_guard_state.lock().release(FileWriteGuardMode::WriteFile);
2295        }
2296
2297        let locked = locked.cast_locked::<FileOpsCore>();
2298        let ops = self.ops;
2299        let state = self.state;
2300        ops.close(locked, &state, current_task);
2301        state.name.entry.node.on_file_closed(&state);
2302        let event =
2303            if state.can_write() { InotifyMask::CLOSE_WRITE } else { InotifyMask::CLOSE_NOWRITE };
2304        state.notify(event);
2305    }
2306}
2307
2308impl fmt::Debug for FileObject {
2309    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2310        f.debug_struct("FileObject")
2311            .field("name", &self.name)
2312            .field("fs", &self.fs.name())
2313            .field("offset", &self.offset)
2314            .field("flags", &self.flags)
2315            .field("ops_ty", &self.ops().type_name())
2316            .finish()
2317    }
2318}
2319
2320impl OnWakeOps for FileReleaser {
2321    fn on_wake(&self, _current_task: &CurrentTask, _baton_lease: &zx::NullableHandle) {}
2322}
2323
2324/// A FileObject with the type of its FileOps known. Dereferencing it returns the FileOps.
2325pub struct DowncastedFile<'a, Ops> {
2326    file: &'a FileObject,
2327    ops: &'a Ops,
2328}
2329impl<'a, Ops> Copy for DowncastedFile<'a, Ops> {}
2330impl<'a, Ops> Clone for DowncastedFile<'a, Ops> {
2331    fn clone(&self) -> Self {
2332        *self
2333    }
2334}
2335
2336impl<'a, Ops> DowncastedFile<'a, Ops> {
2337    pub fn file(&self) -> &'a FileObject {
2338        self.file
2339    }
2340}
2341
2342impl<'a, Ops> Deref for DowncastedFile<'a, Ops> {
2343    type Target = &'a Ops;
2344    fn deref(&self) -> &Self::Target {
2345        &self.ops
2346    }
2347}
2348
2349impl FileObject {
2350    /// Returns the `FileObject`'s `FileOps` as a `DowncastedFile<T>`, or `None` if the downcast
2351    /// fails.
2352    ///
2353    /// This is useful for syscalls that only operate on a certain type of file.
2354    pub fn downcast_file<'a, T>(&'a self) -> Option<DowncastedFile<'a, T>>
2355    where
2356        T: 'static,
2357    {
2358        let ops = self.ops().as_any().downcast_ref::<T>()?;
2359        Some(DowncastedFile { file: self, ops })
2360    }
2361}
2362
2363/// Invokes the specified one-way `method` on the `proxy` and waits until the `proxy`'s underlying
2364/// channel has been closed by the peer.
2365///
2366/// This is used in `close()` implementations when the `FileOps` wraps a FIDL resource that provides
2367/// a one-way API to request teardown, and acknowledges completion of teardown by closing the FIDL
2368/// channel, to ensure that the `close()` call does not return until the FIDL server has actually
2369/// processed the teardown request.
2370pub fn call_fidl_and_await_close<P, M>(method: M, proxy: &P)
2371where
2372    P: fidl::endpoints::SynchronousProxy,
2373    M: FnOnce(&P) -> Result<(), fidl::Error>,
2374{
2375    if let Err(e) = method(proxy) {
2376        log_error!("call_fidl_and_await_close: call {} failed: {e:?}", P::Protocol::DEBUG_NAME);
2377        return;
2378    }
2379    let channel = proxy.as_channel();
2380    let result = channel.wait_one(zx::Signals::CHANNEL_PEER_CLOSED, zx::MonotonicInstant::INFINITE);
2381    if let Err(status) = result.to_result() {
2382        log_error!(
2383            "call_fidl_and_await_close: wait_one {} failed: {status:?}",
2384            P::Protocol::DEBUG_NAME
2385        );
2386    }
2387}
2388
2389#[cfg(test)]
2390mod tests {
2391    use crate::fs::tmpfs::TmpFs;
2392    use crate::task::CurrentTask;
2393    use crate::task::dynamic_thread_spawner::SpawnRequestBuilder;
2394    use crate::testing::*;
2395    use crate::vfs::MountInfo;
2396    use crate::vfs::buffers::{VecInputBuffer, VecOutputBuffer};
2397    use starnix_sync::{Locked, Unlocked};
2398    use starnix_uapi::auth::FsCred;
2399    use starnix_uapi::device_id::DeviceId;
2400    use starnix_uapi::file_mode::FileMode;
2401    use starnix_uapi::open_flags::OpenFlags;
2402    use std::sync::Arc;
2403    use std::sync::atomic::{AtomicBool, Ordering};
2404    use zerocopy::{FromBytes, IntoBytes, LE, U64};
2405
2406    #[::fuchsia::test]
2407    async fn test_append_truncate_race() {
2408        spawn_kernel_and_run(async |locked, current_task| {
2409            let kernel = current_task.kernel();
2410            let root_fs = TmpFs::new_fs(locked, &kernel);
2411            let mount = MountInfo::detached();
2412            let root_node = Arc::clone(root_fs.root());
2413            let file = root_node
2414                .create_entry(
2415                    locked,
2416                    &current_task,
2417                    &mount,
2418                    "test".into(),
2419                    |locked, dir, mount, name| {
2420                        dir.create_node(
2421                            locked,
2422                            &current_task,
2423                            mount,
2424                            name,
2425                            FileMode::IFREG | FileMode::ALLOW_ALL,
2426                            DeviceId::NONE,
2427                            FsCred::root(),
2428                        )
2429                    },
2430                )
2431                .expect("create_node failed");
2432            let file_handle = file
2433                .open_anonymous(locked, &current_task, OpenFlags::APPEND | OpenFlags::RDWR)
2434                .expect("open failed");
2435            let done = Arc::new(AtomicBool::new(false));
2436
2437            let fh = file_handle.clone();
2438            let done_clone = done.clone();
2439            let closure = move |locked: &mut Locked<Unlocked>, current_task: &CurrentTask| {
2440                for i in 0..2000 {
2441                    fh.write(
2442                        locked,
2443                        current_task,
2444                        &mut VecInputBuffer::new(U64::<LE>::new(i).as_bytes()),
2445                    )
2446                    .expect("write failed");
2447                }
2448                done_clone.store(true, Ordering::SeqCst);
2449                let result: Result<(), starnix_uapi::errors::Errno> = Ok(());
2450                result
2451            };
2452            let (write_thread, req) =
2453                SpawnRequestBuilder::new().with_sync_closure(closure).build_with_sync_result();
2454            kernel.kthreads.spawner().spawn_from_request(req);
2455
2456            let fh = file_handle.clone();
2457            let done_clone = done.clone();
2458            let closure = move |locked: &mut Locked<Unlocked>, current_task: &CurrentTask| {
2459                while !done_clone.load(Ordering::SeqCst) {
2460                    fh.ftruncate(locked, current_task, 0).expect("truncate failed");
2461                }
2462                let result: Result<(), starnix_uapi::errors::Errno> = Ok(());
2463                result
2464            };
2465            let (truncate_thread, req) =
2466                SpawnRequestBuilder::new().with_sync_closure(closure).build_with_sync_result();
2467            kernel.kthreads.spawner().spawn_from_request(req);
2468
2469            // If we read from the file, we should always find an increasing sequence. If there are
2470            // races, then we might unexpectedly see zeroes.
2471            while !done.load(Ordering::SeqCst) {
2472                let mut buffer = VecOutputBuffer::new(4096);
2473                let amount = file_handle
2474                    .read_at(locked, &current_task, 0, &mut buffer)
2475                    .expect("read failed");
2476                let mut last = None;
2477                let buffer = &Vec::from(buffer)[..amount];
2478                for i in
2479                    buffer.chunks_exact(8).map(|chunk| U64::<LE>::read_from_bytes(chunk).unwrap())
2480                {
2481                    if let Some(last) = last {
2482                        assert!(i.get() > last, "buffer: {:?}", buffer);
2483                    }
2484                    last = Some(i.get());
2485                }
2486            }
2487
2488            let _ = write_thread().unwrap();
2489            let _ = truncate_thread().unwrap();
2490        })
2491        .await;
2492    }
2493}