Skip to main content

starnix_core/vfs/
file_object.rs

1// Cmpyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::memory::MemoryObject;
6use crate::mm::{DesiredAddress, MappingName, MappingOptions, MemoryAccessorExt, ProtectionFlags};
7use crate::power::{OnWakeOps, WakeupSourceOrigin};
8use crate::security;
9use crate::task::{
10    CurrentTask, CurrentTaskAndLocked, EventHandler, Task, ThreadGroupKey, WaitCallback,
11    WaitCanceler, Waiter, register_delayed_release,
12};
13use crate::vfs::buffers::{InputBuffer, OutputBuffer};
14use crate::vfs::file_server::serve_file;
15use crate::vfs::fsverity::{
16    FsVerityState, {self},
17};
18use crate::vfs::{
19    ActiveNamespaceNode, DirentSink, EpollFileObject, EpollKey, FallocMode, FdTableId,
20    FileSystemHandle, FileWriteGuardMode, FsNodeHandle, FsString, NamespaceNode, RecordLockCommand,
21    RecordLockOwner, wakeup_source_name_for_epoll,
22};
23use starnix_crypt::EncryptionKeyId;
24use starnix_lifecycle::{ObjectReleaser, ReleaserAction};
25use starnix_types::ownership::ReleaseGuard;
26use starnix_uapi::mount_flags::MountFlags;
27use starnix_uapi::user_address::ArchSpecific;
28
29use fidl::HandleBased;
30use linux_uapi::{FSCRYPT_MODE_AES_256_CTS, FSCRYPT_MODE_AES_256_XTS};
31use starnix_logging::{CATEGORY_STARNIX_MM, impossible_error, trace_duration, track_stub};
32use starnix_sync::{
33    BeforeFsNodeAppend, FileOpsCore, LockBefore, LockEqualOrBefore, Locked, Mutex, Unlocked,
34};
35use starnix_syscalls::{SUCCESS, SyscallArg, SyscallResult};
36use starnix_types::math::round_up_to_system_page_size;
37use starnix_types::ownership::Releasable;
38use starnix_uapi::arc_key::WeakKey;
39use starnix_uapi::as_any::AsAny;
40use starnix_uapi::auth::{CAP_FOWNER, CAP_SYS_RAWIO};
41use starnix_uapi::errors::{EAGAIN, ETIMEDOUT, Errno};
42use starnix_uapi::file_lease::FileLeaseType;
43use starnix_uapi::file_mode::Access;
44use starnix_uapi::inotify_mask::InotifyMask;
45use starnix_uapi::open_flags::{AtomicOpenFlags, OpenFlags};
46use starnix_uapi::seal_flags::SealFlags;
47use starnix_uapi::user_address::{UserAddress, UserRef};
48use starnix_uapi::vfs::FdEvents;
49use starnix_uapi::{
50    FIBMAP, FIGETBSZ, FIONBIO, FIONREAD, FIOQSIZE, FS_CASEFOLD_FL, FS_IOC_ADD_ENCRYPTION_KEY,
51    FS_IOC_ENABLE_VERITY, FS_IOC_FSGETXATTR, FS_IOC_FSSETXATTR, FS_IOC_MEASURE_VERITY,
52    FS_IOC_READ_VERITY_METADATA, FS_IOC_REMOVE_ENCRYPTION_KEY, FS_IOC_SET_ENCRYPTION_POLICY,
53    FS_VERITY_FL, FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER, FSCRYPT_POLICY_V2, SEEK_CUR, SEEK_DATA,
54    SEEK_END, SEEK_HOLE, SEEK_SET, TCGETS, errno, error, fscrypt_add_key_arg, fscrypt_identifier,
55    fsxattr, off_t, pid_t, uapi,
56};
57use std::collections::HashMap;
58use std::fmt;
59use std::ops::Deref;
60use std::sync::atomic::Ordering;
61use std::sync::{Arc, Weak};
62
63pub const MAX_LFS_FILESIZE: usize = 0x7fff_ffff_ffff_ffff;
64
65pub fn checked_add_offset_and_length(offset: usize, length: usize) -> Result<usize, Errno> {
66    let end = offset.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
67    if end > MAX_LFS_FILESIZE {
68        return error!(EINVAL);
69    }
70    Ok(end)
71}
72
73#[derive(Debug)]
74pub enum SeekTarget {
75    /// Seek to the given offset relative to the start of the file.
76    Set(off_t),
77    /// Seek to the given offset relative to the current position.
78    Cur(off_t),
79    /// Seek to the given offset relative to the end of the file.
80    End(off_t),
81    /// Seek for the first data after the given offset,
82    Data(off_t),
83    /// Seek for the first hole after the given offset,
84    Hole(off_t),
85}
86
87impl SeekTarget {
88    pub fn from_raw(whence: u32, offset: off_t) -> Result<SeekTarget, Errno> {
89        match whence {
90            SEEK_SET => Ok(SeekTarget::Set(offset)),
91            SEEK_CUR => Ok(SeekTarget::Cur(offset)),
92            SEEK_END => Ok(SeekTarget::End(offset)),
93            SEEK_DATA => Ok(SeekTarget::Data(offset)),
94            SEEK_HOLE => Ok(SeekTarget::Hole(offset)),
95            _ => error!(EINVAL),
96        }
97    }
98
99    pub fn whence(&self) -> u32 {
100        match self {
101            Self::Set(_) => SEEK_SET,
102            Self::Cur(_) => SEEK_CUR,
103            Self::End(_) => SEEK_END,
104            Self::Data(_) => SEEK_DATA,
105            Self::Hole(_) => SEEK_HOLE,
106        }
107    }
108
109    pub fn offset(&self) -> off_t {
110        match self {
111            Self::Set(off)
112            | Self::Cur(off)
113            | Self::End(off)
114            | Self::Data(off)
115            | Self::Hole(off) => *off,
116        }
117    }
118}
119
120/// Corresponds to struct file_operations in Linux, plus any filesystem-specific data.
121pub trait FileOps: Send + Sync + AsAny + 'static {
122    /// Called when the FileObject is opened/created
123    fn open(
124        &self,
125        _locked: &mut Locked<FileOpsCore>,
126        _file: &FileObject,
127        _current_task: &CurrentTask,
128    ) -> Result<(), Errno> {
129        Ok(())
130    }
131
132    /// Called when the FileObject is destroyed.
133    fn close(
134        self: Box<Self>,
135        _locked: &mut Locked<FileOpsCore>,
136        _file: &FileObjectState,
137        _current_task: &CurrentTask,
138    ) {
139    }
140
141    /// Called every time close() is called on this file, even if the file is not ready to be
142    /// released.
143    fn flush(
144        &self,
145        _locked: &mut Locked<FileOpsCore>,
146        _file: &FileObject,
147        _current_task: &CurrentTask,
148    ) {
149    }
150
151    /// Returns whether the file has meaningful seek offsets. Returning `false` is only
152    /// optimization and will makes `FileObject` never hold the offset lock when calling `read` and
153    /// `write`.
154    fn has_persistent_offsets(&self) -> bool {
155        self.is_seekable()
156    }
157
158    /// Returns whether the file is seekable.
159    fn is_seekable(&self) -> bool;
160
161    /// Returns true if `write()` operations on the file will update the seek offset.
162    fn writes_update_seek_offset(&self) -> bool {
163        self.has_persistent_offsets()
164    }
165
166    /// Read from the file at an offset. If the file does not have persistent offsets (either
167    /// directly, or because it is not seekable), offset will be 0 and can be ignored.
168    /// Returns the number of bytes read.
169    fn read(
170        &self,
171        locked: &mut Locked<FileOpsCore>,
172        file: &FileObject,
173        current_task: &CurrentTask,
174        offset: usize,
175        data: &mut dyn OutputBuffer,
176    ) -> Result<usize, Errno>;
177
178    /// Write to the file with an offset. If the file does not have persistent offsets (either
179    /// directly, or because it is not seekable), offset will be 0 and can be ignored.
180    /// Returns the number of bytes written.
181    fn write(
182        &self,
183        locked: &mut Locked<FileOpsCore>,
184        file: &FileObject,
185        current_task: &CurrentTask,
186        offset: usize,
187        data: &mut dyn InputBuffer,
188    ) -> Result<usize, Errno>;
189
190    /// Adjust the `current_offset` if the file is seekable.
191    fn seek(
192        &self,
193        locked: &mut Locked<FileOpsCore>,
194        file: &FileObject,
195        current_task: &CurrentTask,
196        current_offset: off_t,
197        target: SeekTarget,
198    ) -> Result<off_t, Errno>;
199
200    /// Syncs cached state associated with the file descriptor to persistent storage.
201    ///
202    /// The method blocks until the synchronization is complete.
203    fn sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
204        file.node().ops().sync(file.node(), current_task)
205    }
206
207    /// Syncs cached data, and only enough metadata to retrieve said data, to persistent storage.
208    ///
209    /// The method blocks until the synchronization is complete.
210    fn data_sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
211        // TODO(https://fxbug.dev/297305634) make a default macro once data can be done separately
212        self.sync(file, current_task)
213    }
214
215    /// Returns a VMO representing this file. At least the requested protection flags must
216    /// be set on the VMO. Reading or writing the VMO must read or write the file. If this is not
217    /// possible given the requested protection, an error must be returned.
218    /// The `length` is a hint for the desired size of the VMO. The returned VMO may be larger or
219    /// smaller than the requested length.
220    /// This method is typically called by [`Self::mmap`].
221    fn get_memory(
222        &self,
223        _locked: &mut Locked<FileOpsCore>,
224        _file: &FileObject,
225        _current_task: &CurrentTask,
226        _length: Option<usize>,
227        _prot: ProtectionFlags,
228    ) -> Result<Arc<MemoryObject>, Errno> {
229        error!(ENODEV)
230    }
231
232    /// Responds to an mmap call. The default implementation calls [`Self::get_memory`] to get a VMO
233    /// and then maps it with [`crate::mm::MemoryManager::map`].
234    /// Only implement this trait method if your file needs to control mapping, or record where
235    /// a VMO gets mapped.
236    fn mmap(
237        &self,
238        locked: &mut Locked<FileOpsCore>,
239        file: &FileObject,
240        current_task: &CurrentTask,
241        addr: DesiredAddress,
242        memory_offset: u64,
243        length: usize,
244        prot_flags: ProtectionFlags,
245        options: MappingOptions,
246        filename: NamespaceNode,
247    ) -> Result<UserAddress, Errno> {
248        trace_duration!(CATEGORY_STARNIX_MM, "FileOpsDefaultMmap");
249        let min_memory_size = (memory_offset as usize)
250            .checked_add(round_up_to_system_page_size(length)?)
251            .ok_or_else(|| errno!(EINVAL))?;
252        let mut memory = if options.contains(MappingOptions::SHARED) {
253            trace_duration!(CATEGORY_STARNIX_MM, "GetSharedVmo");
254            self.get_memory(locked, file, current_task, Some(min_memory_size), prot_flags)?
255        } else {
256            trace_duration!(CATEGORY_STARNIX_MM, "GetPrivateVmo");
257            // TODO(tbodt): Use PRIVATE_CLONE to have the filesystem server do the clone for us.
258            let base_prot_flags = (prot_flags | ProtectionFlags::READ) - ProtectionFlags::WRITE;
259            let memory = self.get_memory(
260                locked,
261                file,
262                current_task,
263                Some(min_memory_size),
264                base_prot_flags,
265            )?;
266            let mut clone_flags = zx::VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE;
267            if !prot_flags.contains(ProtectionFlags::WRITE) {
268                clone_flags |= zx::VmoChildOptions::NO_WRITE;
269            }
270            trace_duration!(CATEGORY_STARNIX_MM, "CreatePrivateChildVmo");
271            Arc::new(
272                memory.create_child(clone_flags, 0, memory.get_size()).map_err(impossible_error)?,
273            )
274        };
275
276        // Write guard is necessary only for shared mappings. Note that this doesn't depend on
277        // `prot_flags` since these can be changed later with `mprotect()`.
278        let file_write_guard = if options.contains(MappingOptions::SHARED) && file.can_write() {
279            let node = &file.name.entry.node;
280            let state = node.write_guard_state.lock();
281
282            // `F_SEAL_FUTURE_WRITE` should allow `mmap(PROT_READ)`, but block
283            // `mprotect(PROT_WRITE)`. This is different from `F_SEAL_WRITE`, which blocks
284            // `mmap(PROT_READ)`. To handle this case correctly remove `WRITE` right from the
285            // VMO handle to ensure `mprotect(PROT_WRITE)` fails.
286            let seals = state.get_seals().unwrap_or(SealFlags::empty());
287            if seals.contains(SealFlags::FUTURE_WRITE)
288                && !seals.contains(SealFlags::WRITE)
289                && !prot_flags.contains(ProtectionFlags::WRITE)
290            {
291                let mut new_rights = zx::Rights::VMO_DEFAULT - zx::Rights::WRITE;
292                if prot_flags.contains(ProtectionFlags::EXEC) {
293                    new_rights |= zx::Rights::EXECUTE;
294                }
295                memory = Arc::new(memory.duplicate_handle(new_rights).map_err(impossible_error)?);
296
297                None
298            } else {
299                Some(FileWriteGuardMode::WriteMapping)
300            }
301        } else {
302            None
303        };
304
305        current_task.mm()?.map_memory(
306            addr,
307            memory,
308            memory_offset,
309            length,
310            prot_flags,
311            file.max_access_for_memory_mapping(),
312            options,
313            MappingName::File(filename.into_mapping(file_write_guard)?),
314        )
315    }
316
317    /// Respond to a `getdents` or `getdents64` calls.
318    ///
319    /// The `file.offset` lock will be held while entering this method. The implementation must look
320    /// at `sink.offset()` to read the current offset into the file.
321    fn readdir(
322        &self,
323        _locked: &mut Locked<FileOpsCore>,
324        _file: &FileObject,
325        _current_task: &CurrentTask,
326        _sink: &mut dyn DirentSink,
327    ) -> Result<(), Errno> {
328        error!(ENOTDIR)
329    }
330
331    /// Establish a one-shot, edge-triggered, asynchronous wait for the given FdEvents for the
332    /// given file and task. Returns `None` if this file does not support blocking waits.
333    ///
334    /// Active events are not considered. This is similar to the semantics of the
335    /// ZX_WAIT_ASYNC_EDGE flag on zx_wait_async. To avoid missing events, the caller must call
336    /// query_events after calling this.
337    ///
338    /// If your file does not support blocking waits, leave this as the default implementation.
339    fn wait_async(
340        &self,
341        _locked: &mut Locked<FileOpsCore>,
342        _file: &FileObject,
343        _current_task: &CurrentTask,
344        _waiter: &Waiter,
345        _events: FdEvents,
346        _handler: EventHandler,
347    ) -> Option<WaitCanceler> {
348        None
349    }
350
351    /// The events currently active on this file.
352    ///
353    /// If this function returns `POLLIN` or `POLLOUT`, then FileObject will
354    /// add `POLLRDNORM` and `POLLWRNORM`, respective, which are equivalent in
355    /// the Linux UAPI.
356    ///
357    /// See https://linux.die.net/man/2/poll
358    fn query_events(
359        &self,
360        _locked: &mut Locked<FileOpsCore>,
361        _file: &FileObject,
362        _current_task: &CurrentTask,
363    ) -> Result<FdEvents, Errno> {
364        Ok(FdEvents::POLLIN | FdEvents::POLLOUT)
365    }
366
367    fn ioctl(
368        &self,
369        locked: &mut Locked<Unlocked>,
370        file: &FileObject,
371        current_task: &CurrentTask,
372        request: u32,
373        arg: SyscallArg,
374    ) -> Result<SyscallResult, Errno> {
375        default_ioctl(file, locked, current_task, request, arg)
376    }
377
378    fn fcntl(
379        &self,
380        _file: &FileObject,
381        _current_task: &CurrentTask,
382        cmd: u32,
383        _arg: u64,
384    ) -> Result<SyscallResult, Errno> {
385        default_fcntl(cmd)
386    }
387
388    /// Return a handle that allows access to this file descritor through the zxio protocols.
389    ///
390    /// If None is returned, the file will act as if it was a fd to `/dev/null`.
391    fn to_handle(
392        &self,
393        file: &FileObject,
394        current_task: &CurrentTask,
395    ) -> Result<Option<zx::NullableHandle>, Errno> {
396        serve_file(current_task, file, current_task.current_creds().clone())
397            .map(|c| Some(c.0.into_handle().into()))
398    }
399
400    // Return a vector of handles. This is used in situations where there is more than one handle
401    // associated with this file descriptor.
402    //
403    // In Fuchsia, there is an expectation that there is a 1:1 mapping between a file descriptor and
404    // a handle. In general, we do not want to violate that rule. This function is intended to used
405    // in very limited circumstances (compatibility with Linux and Binder), where we need to violate
406    // rule.
407    //
408    // Specifically, we are using this to implement SyncFiles correctly, where a single SyncFile can
409    // represent multiple SyncPoints. Each SyncPoint contains a zx::Counter.
410    //
411    // If you chose to implement this function, to_handle() should return an error. You must also be
412    // aware that if these handles are passed to Fuchsia over Binder, they will be represented as
413    // single file descriptor, and you should use the composite_fd library to manage that file
414    // descriptor.
415    fn get_handles(
416        &self,
417        _file: &FileObject,
418        _current_task: &CurrentTask,
419    ) -> Result<Vec<zx::NullableHandle>, Errno> {
420        error!(ENOTSUP)
421    }
422
423    /// Returns the associated pid_t.
424    ///
425    /// Used by pidfd and `/proc/<pid>`. Unlikely to be used by other files.
426    fn as_thread_group_key(&self, _file: &FileObject) -> Result<ThreadGroupKey, Errno> {
427        error!(EBADF)
428    }
429
430    fn readahead(
431        &self,
432        _file: &FileObject,
433        _current_task: &CurrentTask,
434        _offset: usize,
435        _length: usize,
436    ) -> Result<(), Errno> {
437        error!(EINVAL)
438    }
439
440    /// Extra information that is included in the /proc/<pid>/fdfino/<fd> entry.
441    fn extra_fdinfo(
442        &self,
443        _locked: &mut Locked<FileOpsCore>,
444        _file: &FileHandle,
445        _current_task: &CurrentTask,
446    ) -> Option<FsString> {
447        None
448    }
449}
450
451/// Marker trait for implementation of FileOps that do not need to implement `close` and can
452/// then pass a wrapper object as the `FileOps` implementation.
453pub trait CloseFreeSafe {}
454impl<T: FileOps + CloseFreeSafe, P: Deref<Target = T> + Send + Sync + 'static> FileOps for P {
455    fn close(
456        self: Box<Self>,
457        _locked: &mut Locked<FileOpsCore>,
458        _file: &FileObjectState,
459        _current_task: &CurrentTask,
460    ) {
461        // This method cannot be delegated. T being `CloseFreeSafe` this is fine.
462    }
463
464    fn flush(
465        &self,
466        locked: &mut Locked<FileOpsCore>,
467        file: &FileObject,
468        current_task: &CurrentTask,
469    ) {
470        self.deref().flush(locked, file, current_task)
471    }
472
473    fn has_persistent_offsets(&self) -> bool {
474        self.deref().has_persistent_offsets()
475    }
476
477    fn writes_update_seek_offset(&self) -> bool {
478        self.deref().writes_update_seek_offset()
479    }
480
481    fn is_seekable(&self) -> bool {
482        self.deref().is_seekable()
483    }
484
485    fn read(
486        &self,
487        locked: &mut Locked<FileOpsCore>,
488        file: &FileObject,
489        current_task: &CurrentTask,
490        offset: usize,
491        data: &mut dyn OutputBuffer,
492    ) -> Result<usize, Errno> {
493        self.deref().read(locked, file, current_task, offset, data)
494    }
495
496    fn write(
497        &self,
498        locked: &mut Locked<FileOpsCore>,
499        file: &FileObject,
500        current_task: &CurrentTask,
501        offset: usize,
502        data: &mut dyn InputBuffer,
503    ) -> Result<usize, Errno> {
504        self.deref().write(locked, file, current_task, offset, data)
505    }
506
507    fn seek(
508        &self,
509        locked: &mut Locked<FileOpsCore>,
510        file: &FileObject,
511        current_task: &CurrentTask,
512        current_offset: off_t,
513        target: SeekTarget,
514    ) -> Result<off_t, Errno> {
515        self.deref().seek(locked, file, current_task, current_offset, target)
516    }
517
518    fn sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
519        self.deref().sync(file, current_task)
520    }
521
522    fn data_sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
523        self.deref().data_sync(file, current_task)
524    }
525
526    fn get_memory(
527        &self,
528        locked: &mut Locked<FileOpsCore>,
529        file: &FileObject,
530        current_task: &CurrentTask,
531        length: Option<usize>,
532        prot: ProtectionFlags,
533    ) -> Result<Arc<MemoryObject>, Errno> {
534        self.deref().get_memory(locked, file, current_task, length, prot)
535    }
536
537    fn mmap(
538        &self,
539        locked: &mut Locked<FileOpsCore>,
540        file: &FileObject,
541        current_task: &CurrentTask,
542        addr: DesiredAddress,
543        memory_offset: u64,
544        length: usize,
545        prot_flags: ProtectionFlags,
546        options: MappingOptions,
547        filename: NamespaceNode,
548    ) -> Result<UserAddress, Errno> {
549        self.deref().mmap(
550            locked,
551            file,
552            current_task,
553            addr,
554            memory_offset,
555            length,
556            prot_flags,
557            options,
558            filename,
559        )
560    }
561
562    fn readdir(
563        &self,
564        locked: &mut Locked<FileOpsCore>,
565        file: &FileObject,
566        current_task: &CurrentTask,
567        sink: &mut dyn DirentSink,
568    ) -> Result<(), Errno> {
569        self.deref().readdir(locked, file, current_task, sink)
570    }
571
572    fn wait_async(
573        &self,
574        locked: &mut Locked<FileOpsCore>,
575        file: &FileObject,
576        current_task: &CurrentTask,
577        waiter: &Waiter,
578        events: FdEvents,
579        handler: EventHandler,
580    ) -> Option<WaitCanceler> {
581        self.deref().wait_async(locked, file, current_task, waiter, events, handler)
582    }
583
584    fn query_events(
585        &self,
586        locked: &mut Locked<FileOpsCore>,
587        file: &FileObject,
588        current_task: &CurrentTask,
589    ) -> Result<FdEvents, Errno> {
590        self.deref().query_events(locked, file, current_task)
591    }
592
593    fn ioctl(
594        &self,
595        locked: &mut Locked<Unlocked>,
596        file: &FileObject,
597        current_task: &CurrentTask,
598        request: u32,
599        arg: SyscallArg,
600    ) -> Result<SyscallResult, Errno> {
601        self.deref().ioctl(locked, file, current_task, request, arg)
602    }
603
604    fn fcntl(
605        &self,
606        file: &FileObject,
607        current_task: &CurrentTask,
608        cmd: u32,
609        arg: u64,
610    ) -> Result<SyscallResult, Errno> {
611        self.deref().fcntl(file, current_task, cmd, arg)
612    }
613
614    fn to_handle(
615        &self,
616        file: &FileObject,
617        current_task: &CurrentTask,
618    ) -> Result<Option<zx::NullableHandle>, Errno> {
619        self.deref().to_handle(file, current_task)
620    }
621
622    fn get_handles(
623        &self,
624        file: &FileObject,
625        current_task: &CurrentTask,
626    ) -> Result<Vec<zx::NullableHandle>, Errno> {
627        self.deref().get_handles(file, current_task)
628    }
629
630    fn as_thread_group_key(&self, file: &FileObject) -> Result<ThreadGroupKey, Errno> {
631        self.deref().as_thread_group_key(file)
632    }
633
634    fn readahead(
635        &self,
636        file: &FileObject,
637        current_task: &CurrentTask,
638        offset: usize,
639        length: usize,
640    ) -> Result<(), Errno> {
641        self.deref().readahead(file, current_task, offset, length)
642    }
643
644    fn extra_fdinfo(
645        &self,
646        locked: &mut Locked<FileOpsCore>,
647        file: &FileHandle,
648        current_task: &CurrentTask,
649    ) -> Option<FsString> {
650        self.deref().extra_fdinfo(locked, file, current_task)
651    }
652}
653
654pub fn default_eof_offset<L>(
655    locked: &mut Locked<L>,
656    file: &FileObject,
657    current_task: &CurrentTask,
658) -> Result<off_t, Errno>
659where
660    L: LockEqualOrBefore<FileOpsCore>,
661{
662    Ok(file.node().get_size(locked, current_task)? as off_t)
663}
664
665/// Implement the seek method for a file. The computation from the end of the file must be provided
666/// through a callback.
667///
668/// Errors if the calculated offset is invalid.
669///
670/// - `current_offset`: The current position
671/// - `target`: The location to seek to.
672/// - `compute_end`: Compute the new offset from the end. Return an error if the operation is not
673///    supported.
674pub fn default_seek<F>(
675    current_offset: off_t,
676    target: SeekTarget,
677    compute_end: F,
678) -> Result<off_t, Errno>
679where
680    F: FnOnce() -> Result<off_t, Errno>,
681{
682    let new_offset = match target {
683        SeekTarget::Set(offset) => Some(offset),
684        SeekTarget::Cur(offset) => current_offset.checked_add(offset),
685        SeekTarget::End(offset) => compute_end()?.checked_add(offset),
686        SeekTarget::Data(offset) => {
687            let eof = compute_end().unwrap_or(off_t::MAX);
688            if offset >= eof {
689                return error!(ENXIO);
690            }
691            Some(offset)
692        }
693        SeekTarget::Hole(offset) => {
694            let eof = compute_end()?;
695            if offset >= eof {
696                return error!(ENXIO);
697            }
698            Some(eof)
699        }
700    }
701    .ok_or_else(|| errno!(EINVAL))?;
702
703    if new_offset < 0 {
704        return error!(EINVAL);
705    }
706
707    Ok(new_offset)
708}
709
710/// Implement the seek method for a file without an upper bound on the resulting offset.
711///
712/// This is useful for files without a defined size.
713///
714/// Errors if the calculated offset is invalid.
715///
716/// - `current_offset`: The current position
717/// - `target`: The location to seek to.
718pub fn unbounded_seek(current_offset: off_t, target: SeekTarget) -> Result<off_t, Errno> {
719    default_seek(current_offset, target, || Ok(MAX_LFS_FILESIZE as off_t))
720}
721
722#[macro_export]
723macro_rules! fileops_impl_delegate_read_write_and_seek {
724    ($self:ident, $delegate:expr) => {
725        fn is_seekable(&self) -> bool {
726            true
727        }
728
729        fn read(
730            &$self,
731            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
732            file: &FileObject,
733            current_task: &$crate::task::CurrentTask,
734            offset: usize,
735            data: &mut dyn $crate::vfs::buffers::OutputBuffer,
736        ) -> Result<usize, starnix_uapi::errors::Errno> {
737            $delegate.read(locked, file, current_task, offset, data)
738        }
739
740        fn write(
741            &$self,
742            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
743            file: &FileObject,
744            current_task: &$crate::task::CurrentTask,
745            offset: usize,
746            data: &mut dyn $crate::vfs::buffers::InputBuffer,
747        ) -> Result<usize, starnix_uapi::errors::Errno> {
748            $delegate.write(locked, file, current_task, offset, data)
749        }
750
751        fn seek(
752            &$self,
753        locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
754            file: &FileObject,
755            current_task: &$crate::task::CurrentTask,
756            current_offset: starnix_uapi::off_t,
757            target: $crate::vfs::SeekTarget,
758        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
759            $delegate.seek(locked, file, current_task, current_offset, target)
760        }
761    };
762}
763
764/// Implements [`FileOps::seek`] in a way that makes sense for seekable files.
765#[macro_export]
766macro_rules! fileops_impl_seekable {
767    () => {
768        fn is_seekable(&self) -> bool {
769            true
770        }
771
772        fn seek(
773            &self,
774            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
775            file: &$crate::vfs::FileObject,
776            current_task: &$crate::task::CurrentTask,
777            current_offset: starnix_uapi::off_t,
778            target: $crate::vfs::SeekTarget,
779        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
780            $crate::vfs::default_seek(current_offset, target, || {
781                $crate::vfs::default_eof_offset(locked, file, current_task)
782            })
783        }
784    };
785}
786
787/// Implements [`FileOps`] methods in a way that makes sense for non-seekable files.
788#[macro_export]
789macro_rules! fileops_impl_nonseekable {
790    () => {
791        fn is_seekable(&self) -> bool {
792            false
793        }
794
795        fn seek(
796            &self,
797            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
798            _file: &$crate::vfs::FileObject,
799            _current_task: &$crate::task::CurrentTask,
800            _current_offset: starnix_uapi::off_t,
801            _target: $crate::vfs::SeekTarget,
802        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
803            starnix_uapi::error!(ESPIPE)
804        }
805    };
806}
807
808/// Implements [`FileOps::seek`] methods in a way that makes sense for files that ignore
809/// seeking operations and always read/write at offset 0.
810#[macro_export]
811macro_rules! fileops_impl_seekless {
812    () => {
813        fn has_persistent_offsets(&self) -> bool {
814            false
815        }
816
817        fn is_seekable(&self) -> bool {
818            true
819        }
820
821        fn seek(
822            &self,
823            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
824            _file: &$crate::vfs::FileObject,
825            _current_task: &$crate::task::CurrentTask,
826            _current_offset: starnix_uapi::off_t,
827            _target: $crate::vfs::SeekTarget,
828        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
829            Ok(0)
830        }
831    };
832}
833
834#[macro_export]
835macro_rules! fileops_impl_dataless {
836    () => {
837        fn write(
838            &self,
839            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
840            _file: &$crate::vfs::FileObject,
841            _current_task: &$crate::task::CurrentTask,
842            _offset: usize,
843            _data: &mut dyn $crate::vfs::buffers::InputBuffer,
844        ) -> Result<usize, starnix_uapi::errors::Errno> {
845            starnix_uapi::error!(EINVAL)
846        }
847
848        fn read(
849            &self,
850            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
851            _file: &$crate::vfs::FileObject,
852            _current_task: &$crate::task::CurrentTask,
853            _offset: usize,
854            _data: &mut dyn $crate::vfs::buffers::OutputBuffer,
855        ) -> Result<usize, starnix_uapi::errors::Errno> {
856            starnix_uapi::error!(EINVAL)
857        }
858    };
859}
860
861/// Implements [`FileOps`] methods in a way that makes sense for directories. You must implement
862/// [`FileOps::seek`] and [`FileOps::readdir`].
863#[macro_export]
864macro_rules! fileops_impl_directory {
865    () => {
866        fn is_seekable(&self) -> bool {
867            true
868        }
869
870        fn read(
871            &self,
872            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
873            _file: &$crate::vfs::FileObject,
874            _current_task: &$crate::task::CurrentTask,
875            _offset: usize,
876            _data: &mut dyn $crate::vfs::buffers::OutputBuffer,
877        ) -> Result<usize, starnix_uapi::errors::Errno> {
878            starnix_uapi::error!(EISDIR)
879        }
880
881        fn write(
882            &self,
883            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
884            _file: &$crate::vfs::FileObject,
885            _current_task: &$crate::task::CurrentTask,
886            _offset: usize,
887            _data: &mut dyn $crate::vfs::buffers::InputBuffer,
888        ) -> Result<usize, starnix_uapi::errors::Errno> {
889            starnix_uapi::error!(EISDIR)
890        }
891    };
892}
893
894#[macro_export]
895macro_rules! fileops_impl_unbounded_seek {
896    () => {
897        fn seek(
898            &self,
899            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
900            _file: &$crate::vfs::FileObject,
901            _current_task: &$crate::task::CurrentTask,
902            current_offset: starnix_uapi::off_t,
903            target: $crate::vfs::SeekTarget,
904        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
905            $crate::vfs::unbounded_seek(current_offset, target)
906        }
907    };
908}
909
910#[macro_export]
911macro_rules! fileops_impl_noop_sync {
912    () => {
913        fn sync(
914            &self,
915            file: &$crate::vfs::FileObject,
916            _current_task: &$crate::task::CurrentTask,
917        ) -> Result<(), starnix_uapi::errors::Errno> {
918            if !file.node().is_reg() && !file.node().is_dir() {
919                return starnix_uapi::error!(EINVAL);
920            }
921            Ok(())
922        }
923    };
924}
925
926// Public re-export of macros allows them to be used like regular rust items.
927
928pub use {
929    fileops_impl_dataless, fileops_impl_delegate_read_write_and_seek, fileops_impl_directory,
930    fileops_impl_nonseekable, fileops_impl_noop_sync, fileops_impl_seekable, fileops_impl_seekless,
931    fileops_impl_unbounded_seek,
932};
933pub const AES256_KEY_SIZE: usize = 32;
934
935pub fn canonicalize_ioctl_request(current_task: &CurrentTask, request: u32) -> u32 {
936    if current_task.is_arch32() {
937        match request {
938            uapi::arch32::FS_IOC_GETFLAGS => uapi::FS_IOC_GETFLAGS,
939            uapi::arch32::FS_IOC_SETFLAGS => uapi::FS_IOC_SETFLAGS,
940            _ => request,
941        }
942    } else {
943        request
944    }
945}
946
947pub fn default_ioctl(
948    file: &FileObject,
949    locked: &mut Locked<Unlocked>,
950    current_task: &CurrentTask,
951    request: u32,
952    arg: SyscallArg,
953) -> Result<SyscallResult, Errno> {
954    match canonicalize_ioctl_request(current_task, request) {
955        TCGETS => error!(ENOTTY),
956        FIGETBSZ => {
957            let node = file.node();
958            let supported_file = node.is_reg() || node.is_dir();
959            if !supported_file {
960                return error!(ENOTTY);
961            }
962
963            let blocksize = file.node().stat(locked, current_task)?.st_blksize;
964            current_task.write_object(arg.into(), &blocksize)?;
965            Ok(SUCCESS)
966        }
967        FIONBIO => {
968            let arg_ref = UserAddress::from(arg).into();
969            let arg: i32 = current_task.read_object(arg_ref)?;
970            let val = if arg == 0 {
971                // Clear the NONBLOCK flag
972                OpenFlags::empty()
973            } else {
974                // Set the NONBLOCK flag
975                OpenFlags::NONBLOCK
976            };
977            file.update_file_flags(val, OpenFlags::NONBLOCK);
978            Ok(SUCCESS)
979        }
980        FIOQSIZE => {
981            let node = file.node();
982            let supported_file = node.is_reg() || node.is_dir();
983            if !supported_file {
984                return error!(ENOTTY);
985            }
986
987            let size = file.node().stat(locked, current_task)?.st_size;
988            current_task.write_object(arg.into(), &size)?;
989            Ok(SUCCESS)
990        }
991        FIONREAD => {
992            track_stub!(TODO("https://fxbug.dev/322874897"), "FIONREAD");
993            if !file.name.entry.node.is_reg() {
994                return error!(ENOTTY);
995            }
996
997            let size = file
998                .name
999                .entry
1000                .node
1001                .fetch_and_refresh_info(locked, current_task)
1002                .map_err(|_| errno!(EINVAL))?
1003                .size;
1004            let offset = usize::try_from(*file.offset.lock()).map_err(|_| errno!(EINVAL))?;
1005            let remaining =
1006                if size < offset { 0 } else { i32::try_from(size - offset).unwrap_or(i32::MAX) };
1007            current_task.write_object(arg.into(), &remaining)?;
1008            Ok(SUCCESS)
1009        }
1010        FS_IOC_FSGETXATTR => {
1011            track_stub!(TODO("https://fxbug.dev/322875209"), "FS_IOC_FSGETXATTR");
1012            let arg = UserAddress::from(arg).into();
1013            current_task.write_object(arg, &fsxattr::default())?;
1014            Ok(SUCCESS)
1015        }
1016        FS_IOC_FSSETXATTR => {
1017            track_stub!(TODO("https://fxbug.dev/322875271"), "FS_IOC_FSSETXATTR");
1018            let arg = UserAddress::from(arg).into();
1019            let _: fsxattr = current_task.read_object(arg)?;
1020            Ok(SUCCESS)
1021        }
1022        uapi::FS_IOC_GETFLAGS => {
1023            track_stub!(TODO("https://fxbug.dev/322874935"), "FS_IOC_GETFLAGS");
1024            let arg = UserRef::<u32>::from(arg);
1025            let mut flags: u32 = 0;
1026            if matches!(*file.node().fsverity.lock(), FsVerityState::FsVerity) {
1027                flags |= FS_VERITY_FL;
1028            }
1029            if file.node().info().casefold {
1030                flags |= FS_CASEFOLD_FL;
1031            }
1032            current_task.write_object(arg, &flags)?;
1033            Ok(SUCCESS)
1034        }
1035        uapi::FS_IOC_SETFLAGS => {
1036            track_stub!(TODO("https://fxbug.dev/322875367"), "FS_IOC_SETFLAGS");
1037            let arg = UserRef::<u32>::from(arg);
1038            let flags: u32 = current_task.read_object(arg)?;
1039            file.node().update_attributes(locked, current_task, |info| {
1040                info.casefold = flags & FS_CASEFOLD_FL != 0;
1041                Ok(())
1042            })?;
1043            Ok(SUCCESS)
1044        }
1045        FS_IOC_ENABLE_VERITY => {
1046            Ok(fsverity::ioctl::enable(locked, current_task, UserAddress::from(arg).into(), file)?)
1047        }
1048        FS_IOC_MEASURE_VERITY => {
1049            Ok(fsverity::ioctl::measure(locked, current_task, UserAddress::from(arg).into(), file)?)
1050        }
1051        FS_IOC_READ_VERITY_METADATA => {
1052            Ok(fsverity::ioctl::read_metadata(current_task, UserAddress::from(arg).into(), file)?)
1053        }
1054        FS_IOC_ADD_ENCRYPTION_KEY => {
1055            let fscrypt_add_key_ref = UserRef::<fscrypt_add_key_arg>::from(arg);
1056            let key_ref_addr = fscrypt_add_key_ref.next()?.addr();
1057            let mut fscrypt_add_key_arg = current_task.read_object(fscrypt_add_key_ref.clone())?;
1058            if fscrypt_add_key_arg.key_id != 0 {
1059                track_stub!(TODO("https://fxbug.dev/375649227"), "non-zero key ids");
1060                return error!(ENOTSUP);
1061            }
1062            if fscrypt_add_key_arg.key_spec.type_ != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER {
1063                track_stub!(TODO("https://fxbug.dev/375648306"), "fscrypt descriptor type");
1064                return error!(ENOTSUP);
1065            }
1066            let key = current_task
1067                .read_memory_to_vec(key_ref_addr, fscrypt_add_key_arg.raw_size as usize)?;
1068            let user_id = current_task.current_creds().uid;
1069
1070            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1071            let key_identifier = crypt_service.add_wrapping_key(&key, user_id)?;
1072            fscrypt_add_key_arg.key_spec.u.identifier =
1073                fscrypt_identifier { value: key_identifier, ..Default::default() };
1074            current_task.write_object(fscrypt_add_key_ref, &fscrypt_add_key_arg)?;
1075            Ok(SUCCESS)
1076        }
1077        FS_IOC_SET_ENCRYPTION_POLICY => {
1078            let fscrypt_policy_ref = UserRef::<uapi::fscrypt_policy_v2>::from(arg);
1079            let policy = current_task.read_object(fscrypt_policy_ref)?;
1080            if policy.version as u32 != FSCRYPT_POLICY_V2 {
1081                track_stub!(TODO("https://fxbug.dev/375649656"), "fscrypt policy v1");
1082                return error!(ENOTSUP);
1083            }
1084            if policy.flags != 0 {
1085                track_stub!(
1086                    TODO("https://fxbug.dev/375700939"),
1087                    "fscrypt policy flags",
1088                    policy.flags
1089                );
1090            }
1091            if policy.contents_encryption_mode as u32 != FSCRYPT_MODE_AES_256_XTS {
1092                track_stub!(
1093                    TODO("https://fxbug.dev/375684057"),
1094                    "fscrypt encryption modes",
1095                    policy.contents_encryption_mode
1096                );
1097            }
1098            if policy.filenames_encryption_mode as u32 != FSCRYPT_MODE_AES_256_CTS {
1099                track_stub!(
1100                    TODO("https://fxbug.dev/375684057"),
1101                    "fscrypt encryption modes",
1102                    policy.filenames_encryption_mode
1103                );
1104            }
1105            let user_id = current_task.current_creds().uid;
1106            if user_id != file.node().info().uid {
1107                security::check_task_capable(current_task, CAP_FOWNER)
1108                    .map_err(|_| errno!(EACCES))?;
1109            }
1110
1111            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1112            if let Some(users) =
1113                crypt_service.get_users_for_key(EncryptionKeyId::from(policy.master_key_identifier))
1114            {
1115                if !users.contains(&user_id) {
1116                    return error!(ENOKEY);
1117                }
1118            } else {
1119                track_stub!(
1120                    TODO("https://fxbug.dev/375067633"),
1121                    "users with CAP_FOWNER can set encryption policies with unadded keys"
1122                );
1123                return error!(ENOKEY);
1124            }
1125
1126            let attributes = file.node().fetch_and_refresh_info(locked, current_task)?;
1127            if let Some(wrapping_key_id) = &attributes.wrapping_key_id {
1128                if wrapping_key_id != &policy.master_key_identifier {
1129                    return error!(EEXIST);
1130                }
1131            } else {
1132                // Don't deadlock! update_attributes will also lock the attributes.
1133                std::mem::drop(attributes);
1134                file.node().update_attributes(locked, current_task, |info| {
1135                    info.wrapping_key_id = Some(policy.master_key_identifier);
1136                    Ok(())
1137                })?;
1138            }
1139            Ok(SUCCESS)
1140        }
1141        FS_IOC_REMOVE_ENCRYPTION_KEY => {
1142            let fscrypt_remove_key_arg_ref = UserRef::<uapi::fscrypt_remove_key_arg>::from(arg);
1143            let fscrypt_remove_key_arg = current_task.read_object(fscrypt_remove_key_arg_ref)?;
1144            if fscrypt_remove_key_arg.key_spec.type_ != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER {
1145                track_stub!(TODO("https://fxbug.dev/375648306"), "fscrypt descriptor type");
1146                return error!(ENOTSUP);
1147            }
1148            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1149            let user_id = current_task.current_creds().uid;
1150            #[allow(
1151                clippy::undocumented_unsafe_blocks,
1152                reason = "Force documented unsafe blocks in Starnix"
1153            )]
1154            let identifier = unsafe { fscrypt_remove_key_arg.key_spec.u.identifier.value };
1155            crypt_service.forget_wrapping_key(identifier, user_id)?;
1156            Ok(SUCCESS)
1157        }
1158        _ => {
1159            track_stub!(TODO("https://fxbug.dev/322874917"), "ioctl fallthrough", request);
1160            error!(ENOTTY)
1161        }
1162    }
1163}
1164
1165pub fn default_fcntl(cmd: u32) -> Result<SyscallResult, Errno> {
1166    track_stub!(TODO("https://fxbug.dev/322875704"), "default fcntl", cmd);
1167    error!(EINVAL)
1168}
1169
1170pub struct OPathOps {}
1171
1172impl OPathOps {
1173    pub fn new() -> OPathOps {
1174        OPathOps {}
1175    }
1176}
1177
1178impl FileOps for OPathOps {
1179    fileops_impl_noop_sync!();
1180
1181    fn has_persistent_offsets(&self) -> bool {
1182        false
1183    }
1184    fn is_seekable(&self) -> bool {
1185        true
1186    }
1187    fn read(
1188        &self,
1189        _locked: &mut Locked<FileOpsCore>,
1190        _file: &FileObject,
1191        _current_task: &CurrentTask,
1192        _offset: usize,
1193        _data: &mut dyn OutputBuffer,
1194    ) -> Result<usize, Errno> {
1195        error!(EBADF)
1196    }
1197    fn write(
1198        &self,
1199        _locked: &mut Locked<FileOpsCore>,
1200        _file: &FileObject,
1201        _current_task: &CurrentTask,
1202        _offset: usize,
1203        _data: &mut dyn InputBuffer,
1204    ) -> Result<usize, Errno> {
1205        error!(EBADF)
1206    }
1207    fn seek(
1208        &self,
1209        _locked: &mut Locked<FileOpsCore>,
1210        _file: &FileObject,
1211        _current_task: &CurrentTask,
1212        _current_offset: off_t,
1213        _target: SeekTarget,
1214    ) -> Result<off_t, Errno> {
1215        error!(EBADF)
1216    }
1217    fn get_memory(
1218        &self,
1219        _locked: &mut Locked<FileOpsCore>,
1220        _file: &FileObject,
1221        _current_task: &CurrentTask,
1222        _length: Option<usize>,
1223        _prot: ProtectionFlags,
1224    ) -> Result<Arc<MemoryObject>, Errno> {
1225        error!(EBADF)
1226    }
1227    fn readdir(
1228        &self,
1229        _locked: &mut Locked<FileOpsCore>,
1230        _file: &FileObject,
1231        _current_task: &CurrentTask,
1232        _sink: &mut dyn DirentSink,
1233    ) -> Result<(), Errno> {
1234        error!(EBADF)
1235    }
1236
1237    fn ioctl(
1238        &self,
1239        _locked: &mut Locked<Unlocked>,
1240        _file: &FileObject,
1241        _current_task: &CurrentTask,
1242        _request: u32,
1243        _arg: SyscallArg,
1244    ) -> Result<SyscallResult, Errno> {
1245        error!(EBADF)
1246    }
1247}
1248
1249pub struct ProxyFileOps(pub FileHandle);
1250
1251impl FileOps for ProxyFileOps {
1252    // `close` is not delegated because the last reference to a `ProxyFileOps` is not
1253    // necessarily the last reference of the proxied file. If this is the case, the
1254    // releaser will handle it.
1255    // These don't take &FileObject making it too hard to handle them properly in the macro
1256    fn has_persistent_offsets(&self) -> bool {
1257        self.0.ops().has_persistent_offsets()
1258    }
1259    fn writes_update_seek_offset(&self) -> bool {
1260        self.0.ops().writes_update_seek_offset()
1261    }
1262    fn is_seekable(&self) -> bool {
1263        self.0.ops().is_seekable()
1264    }
1265    // These take &mut Locked<L> as a second argument
1266    fn flush(
1267        &self,
1268        locked: &mut Locked<FileOpsCore>,
1269        _file: &FileObject,
1270        current_task: &CurrentTask,
1271    ) {
1272        self.0.ops().flush(locked, &self.0, current_task);
1273    }
1274    fn wait_async(
1275        &self,
1276        locked: &mut Locked<FileOpsCore>,
1277        _file: &FileObject,
1278        current_task: &CurrentTask,
1279        waiter: &Waiter,
1280        events: FdEvents,
1281        handler: EventHandler,
1282    ) -> Option<WaitCanceler> {
1283        self.0.ops().wait_async(locked, &self.0, current_task, waiter, events, handler)
1284    }
1285    fn query_events(
1286        &self,
1287        locked: &mut Locked<FileOpsCore>,
1288        _file: &FileObject,
1289        current_task: &CurrentTask,
1290    ) -> Result<FdEvents, Errno> {
1291        self.0.ops().query_events(locked, &self.0, current_task)
1292    }
1293    fn read(
1294        &self,
1295        locked: &mut Locked<FileOpsCore>,
1296        _file: &FileObject,
1297        current_task: &CurrentTask,
1298        offset: usize,
1299        data: &mut dyn OutputBuffer,
1300    ) -> Result<usize, Errno> {
1301        self.0.ops().read(locked, &self.0, current_task, offset, data)
1302    }
1303    fn write(
1304        &self,
1305        locked: &mut Locked<FileOpsCore>,
1306        _file: &FileObject,
1307        current_task: &CurrentTask,
1308        offset: usize,
1309        data: &mut dyn InputBuffer,
1310    ) -> Result<usize, Errno> {
1311        self.0.ops().write(locked, &self.0, current_task, offset, data)
1312    }
1313    fn ioctl(
1314        &self,
1315        locked: &mut Locked<Unlocked>,
1316        _file: &FileObject,
1317        current_task: &CurrentTask,
1318        request: u32,
1319        arg: SyscallArg,
1320    ) -> Result<SyscallResult, Errno> {
1321        self.0.ops().ioctl(locked, &self.0, current_task, request, arg)
1322    }
1323    fn fcntl(
1324        &self,
1325        _file: &FileObject,
1326        current_task: &CurrentTask,
1327        cmd: u32,
1328        arg: u64,
1329    ) -> Result<SyscallResult, Errno> {
1330        self.0.ops().fcntl(&self.0, current_task, cmd, arg)
1331    }
1332    fn readdir(
1333        &self,
1334        locked: &mut Locked<FileOpsCore>,
1335        _file: &FileObject,
1336        current_task: &CurrentTask,
1337        sink: &mut dyn DirentSink,
1338    ) -> Result<(), Errno> {
1339        self.0.ops().readdir(locked, &self.0, current_task, sink)
1340    }
1341    fn sync(&self, _file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
1342        self.0.ops().sync(&self.0, current_task)
1343    }
1344    fn data_sync(&self, _file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
1345        self.0.ops().sync(&self.0, current_task)
1346    }
1347    fn get_memory(
1348        &self,
1349        locked: &mut Locked<FileOpsCore>,
1350        _file: &FileObject,
1351        current_task: &CurrentTask,
1352        length: Option<usize>,
1353        prot: ProtectionFlags,
1354    ) -> Result<Arc<MemoryObject>, Errno> {
1355        self.0.ops.get_memory(locked, &self.0, current_task, length, prot)
1356    }
1357    fn mmap(
1358        &self,
1359        locked: &mut Locked<FileOpsCore>,
1360        _file: &FileObject,
1361        current_task: &CurrentTask,
1362        addr: DesiredAddress,
1363        memory_offset: u64,
1364        length: usize,
1365        prot_flags: ProtectionFlags,
1366        options: MappingOptions,
1367        filename: NamespaceNode,
1368    ) -> Result<UserAddress, Errno> {
1369        self.0.ops.mmap(
1370            locked,
1371            &self.0,
1372            current_task,
1373            addr,
1374            memory_offset,
1375            length,
1376            prot_flags,
1377            options,
1378            filename,
1379        )
1380    }
1381    fn seek(
1382        &self,
1383        locked: &mut Locked<FileOpsCore>,
1384        _file: &FileObject,
1385        current_task: &CurrentTask,
1386        offset: off_t,
1387        target: SeekTarget,
1388    ) -> Result<off_t, Errno> {
1389        self.0.ops.seek(locked, &self.0, current_task, offset, target)
1390    }
1391}
1392
1393#[derive(Debug, Default, Copy, Clone)]
1394pub enum FileAsyncOwner {
1395    #[default]
1396    Unowned,
1397    Thread(pid_t),
1398    Process(pid_t),
1399    ProcessGroup(pid_t),
1400}
1401
1402impl FileAsyncOwner {
1403    pub fn validate(self, current_task: &CurrentTask) -> Result<(), Errno> {
1404        match self {
1405            FileAsyncOwner::Unowned => (),
1406            FileAsyncOwner::Thread(id) | FileAsyncOwner::Process(id) => {
1407                Task::from_weak(&current_task.get_task(id))?;
1408            }
1409            FileAsyncOwner::ProcessGroup(pgid) => {
1410                current_task
1411                    .kernel()
1412                    .pids
1413                    .read()
1414                    .get_process_group(pgid)
1415                    .ok_or_else(|| errno!(ESRCH))?;
1416            }
1417        }
1418        Ok(())
1419    }
1420}
1421
1422#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
1423pub struct FileObjectId(u64);
1424
1425impl FileObjectId {
1426    pub fn as_epoll_key(&self) -> EpollKey {
1427        self.0 as EpollKey
1428    }
1429}
1430
1431/// A session with a file object.
1432///
1433/// Each time a client calls open(), we create a new FileObject from the
1434/// underlying FsNode that receives the open(). This object contains the state
1435/// that is specific to this sessions whereas the underlying FsNode contains
1436/// the state that is shared between all the sessions.
1437pub struct FileObject {
1438    ops: Box<dyn FileOps>,
1439    state: FileObjectState,
1440}
1441
1442impl std::ops::Deref for FileObject {
1443    type Target = FileObjectState;
1444    fn deref(&self) -> &Self::Target {
1445        &self.state
1446    }
1447}
1448
1449pub struct FileObjectState {
1450    /// Weak reference to the `FileHandle` of this `FileObject`. This allows to retrieve the
1451    /// `FileHandle` from a `FileObject`.
1452    pub weak_handle: WeakFileHandle,
1453
1454    /// A unique identifier for this file object.
1455    pub id: FileObjectId,
1456
1457    /// The NamespaceNode associated with this FileObject.
1458    ///
1459    /// Represents the name the process used to open this file.
1460    pub name: ActiveNamespaceNode,
1461
1462    pub fs: FileSystemHandle,
1463
1464    pub offset: Mutex<off_t>,
1465
1466    flags: AtomicOpenFlags,
1467
1468    async_owner: Mutex<FileAsyncOwner>,
1469
1470    /// A set of epoll file descriptor numbers that tracks which `EpollFileObject`s add this
1471    /// `FileObject` as the control file.
1472    epoll_files: Mutex<HashMap<FileHandleKey, WeakFileHandle>>,
1473
1474    /// See fcntl F_SETLEASE and F_GETLEASE.
1475    lease: Mutex<FileLeaseType>,
1476
1477    // This extra reference to the FsNode should not be needed, but it is needed to make
1478    // Inotify.ExcludeUnlinkInodeEvents pass.
1479    _mysterious_node: Option<FsNodeHandle>,
1480
1481    /// Opaque security state associated this file object.
1482    pub security_state: security::FileObjectState,
1483}
1484
1485pub enum FileObjectReleaserAction {}
1486impl ReleaserAction<FileObject> for FileObjectReleaserAction {
1487    fn release(file_object: ReleaseGuard<FileObject>) {
1488        register_delayed_release(file_object);
1489    }
1490}
1491pub type FileReleaser = ObjectReleaser<FileObject, FileObjectReleaserAction>;
1492pub type FileHandle = Arc<FileReleaser>;
1493pub type WeakFileHandle = Weak<FileReleaser>;
1494pub type FileHandleKey = WeakKey<FileReleaser>;
1495
1496impl FileObjectState {
1497    /// The FsNode from which this FileObject was created.
1498    pub fn node(&self) -> &FsNodeHandle {
1499        &self.name.entry.node
1500    }
1501
1502    pub fn flags(&self) -> OpenFlags {
1503        self.flags.load(Ordering::Relaxed)
1504    }
1505
1506    pub fn can_read(&self) -> bool {
1507        self.flags.load(Ordering::Relaxed).can_read()
1508    }
1509
1510    pub fn can_write(&self) -> bool {
1511        self.flags.load(Ordering::Relaxed).can_write()
1512    }
1513
1514    /// Returns false if the file is not allowed to be executed.
1515    pub fn can_exec(&self) -> bool {
1516        let mounted_no_exec = self.name.to_passive().mount.flags().contains(MountFlags::NOEXEC);
1517        let no_exec_seal = self
1518            .node()
1519            .write_guard_state
1520            .lock()
1521            .get_seals()
1522            .map(|seals| seals.contains(SealFlags::NO_EXEC))
1523            .unwrap_or(false);
1524        !(mounted_no_exec || no_exec_seal)
1525    }
1526
1527    // Notifies watchers on the current node and its parent about an event.
1528    pub fn notify(&self, event_mask: InotifyMask) {
1529        self.name.notify(event_mask)
1530    }
1531}
1532
1533impl FileObject {
1534    /// Create a FileObject that is not mounted in a namespace.
1535    ///
1536    /// In particular, this will create a new unrooted entries. This should not be used on
1537    /// file system with persistent entries, as the created entry will be out of sync with the one
1538    /// from the file system.
1539    ///
1540    /// The returned FileObject does not have a name.
1541    pub fn new_anonymous<L>(
1542        locked: &mut Locked<L>,
1543        current_task: &CurrentTask,
1544        ops: Box<dyn FileOps>,
1545        node: FsNodeHandle,
1546        flags: OpenFlags,
1547    ) -> FileHandle
1548    where
1549        L: LockEqualOrBefore<FileOpsCore>,
1550    {
1551        assert!(!node.fs().has_permanent_entries());
1552        Self::new(
1553            locked,
1554            current_task,
1555            ops,
1556            NamespaceNode::new_anonymous_unrooted(current_task, node),
1557            flags,
1558        )
1559        .expect("Failed to create anonymous FileObject")
1560    }
1561
1562    /// Create a FileObject with an associated NamespaceNode.
1563    ///
1564    /// This function is not typically called directly. Instead, consider
1565    /// calling NamespaceNode::open.
1566    pub fn new<L>(
1567        locked: &mut Locked<L>,
1568        current_task: &CurrentTask,
1569        ops: Box<dyn FileOps>,
1570        name: NamespaceNode,
1571        flags: OpenFlags,
1572    ) -> Result<FileHandle, Errno>
1573    where
1574        L: LockEqualOrBefore<FileOpsCore>,
1575    {
1576        let _mysterious_node = if flags.can_write() {
1577            name.entry.node.write_guard_state.lock().acquire(FileWriteGuardMode::WriteFile)?;
1578            Some(name.entry.node.clone())
1579        } else {
1580            None
1581        };
1582        let fs = name.entry.node.fs();
1583        let id = FileObjectId(current_task.kernel.next_file_object_id.next());
1584        let security_state = security::file_alloc_security(current_task);
1585        let file = FileHandle::new_cyclic(|weak_handle| {
1586            Self {
1587                ops,
1588                state: FileObjectState {
1589                    weak_handle: weak_handle.clone(),
1590                    id,
1591                    name: name.into_active(),
1592                    fs,
1593                    offset: Mutex::new(0),
1594                    flags: AtomicOpenFlags::new(flags - OpenFlags::CREAT),
1595                    async_owner: Default::default(),
1596                    epoll_files: Default::default(),
1597                    lease: Default::default(),
1598                    _mysterious_node,
1599                    security_state,
1600                },
1601            }
1602            .into()
1603        });
1604        file.notify(InotifyMask::OPEN);
1605
1606        file.ops().open(locked.cast_locked::<FileOpsCore>(), &file, current_task)?;
1607        Ok(file)
1608    }
1609
1610    pub fn max_access_for_memory_mapping(&self) -> Access {
1611        let mut access = Access::EXIST;
1612        if self.can_exec() {
1613            access |= Access::EXEC;
1614        }
1615        let flags = self.flags.load(Ordering::Relaxed);
1616        if flags.can_read() {
1617            access |= Access::READ;
1618        }
1619        if flags.can_write() {
1620            access |= Access::WRITE;
1621        }
1622        access
1623    }
1624
1625    pub fn ops(&self) -> &dyn FileOps {
1626        self.ops.as_ref()
1627    }
1628
1629    pub fn ops_type_name(&self) -> &'static str {
1630        self.ops().type_name()
1631    }
1632
1633    pub fn is_non_blocking(&self) -> bool {
1634        self.flags().contains(OpenFlags::NONBLOCK)
1635    }
1636
1637    /// Common implementation for blocking operations.
1638    ///
1639    /// This function is used to implement the blocking operations for file objects. FileOps
1640    /// implementations should call this function to handle the blocking logic.
1641    ///
1642    /// The `op` parameter is a function that implements the non-blocking version of the operation.
1643    /// The function is called once without registering a waiter in case no wait is needed. If the
1644    /// operation returns EAGAIN and the file object is non-blocking, the function returns EAGAIN.
1645    ///
1646    /// If the operation returns EAGAIN and the file object is blocking, the function will block
1647    /// until the given events are triggered. At that time, the operation is retried. Notice that
1648    /// the `op` function can be called multiple times before the operation completes.
1649    ///
1650    /// The `deadline` parameter is the deadline for the operation. If the operation does not
1651    /// complete before the deadline, the function will return ETIMEDOUT.
1652    pub fn blocking_op<L, T, Op>(
1653        &self,
1654        locked: &mut Locked<L>,
1655        current_task: &CurrentTask,
1656        events: FdEvents,
1657        deadline: Option<zx::MonotonicInstant>,
1658        mut op: Op,
1659    ) -> Result<T, Errno>
1660    where
1661        L: LockEqualOrBefore<FileOpsCore>,
1662        Op: FnMut(&mut Locked<L>) -> Result<T, Errno>,
1663    {
1664        // Don't return EAGAIN for directories. This can happen because glibc always opens a
1665        // directory with O_NONBLOCK.
1666        let can_return_eagain = self.flags().contains(OpenFlags::NONBLOCK)
1667            && !self.flags().contains(OpenFlags::DIRECTORY);
1668        // Run the operation a first time without registering a waiter in case no wait is needed.
1669        match op(locked) {
1670            Err(errno) if errno == EAGAIN && !can_return_eagain => {}
1671            result => return result,
1672        }
1673
1674        let waiter = Waiter::new();
1675        loop {
1676            // Register the waiter before running the operation to prevent a race.
1677            self.wait_async(locked, current_task, &waiter, events, WaitCallback::none());
1678            match op(locked) {
1679                Err(e) if e == EAGAIN => {}
1680                result => return result,
1681            }
1682            let locked = locked.cast_locked::<FileOpsCore>();
1683            waiter
1684                .wait_until(
1685                    locked,
1686                    current_task,
1687                    deadline.unwrap_or(zx::MonotonicInstant::INFINITE),
1688                )
1689                .map_err(|e| if e == ETIMEDOUT { errno!(EAGAIN) } else { e })?;
1690        }
1691    }
1692
1693    pub fn is_seekable(&self) -> bool {
1694        self.ops().is_seekable()
1695    }
1696
1697    pub fn has_persistent_offsets(&self) -> bool {
1698        self.ops().has_persistent_offsets()
1699    }
1700
1701    /// Common implementation for `read` and `read_at`.
1702    fn read_internal<R>(&self, current_task: &CurrentTask, read: R) -> Result<usize, Errno>
1703    where
1704        R: FnOnce() -> Result<usize, Errno>,
1705    {
1706        security::file_permission(current_task, self, security::PermissionFlags::READ)?;
1707
1708        if !self.can_read() {
1709            return error!(EBADF);
1710        }
1711        let bytes_read = read()?;
1712
1713        // TODO(steveaustin) - omit updating time_access to allow info to be immutable
1714        // and thus allow simultaneous reads.
1715        self.update_atime();
1716        if bytes_read > 0 {
1717            self.notify(InotifyMask::ACCESS);
1718        }
1719
1720        Ok(bytes_read)
1721    }
1722
1723    pub fn read<L>(
1724        &self,
1725        locked: &mut Locked<L>,
1726        current_task: &CurrentTask,
1727        data: &mut dyn OutputBuffer,
1728    ) -> Result<usize, Errno>
1729    where
1730        L: LockEqualOrBefore<FileOpsCore>,
1731    {
1732        self.read_internal(current_task, || {
1733            let locked = locked.cast_locked::<FileOpsCore>();
1734            if !self.ops().has_persistent_offsets() {
1735                if data.available() > MAX_LFS_FILESIZE {
1736                    return error!(EINVAL);
1737                }
1738                return self.ops.read(locked, self, current_task, 0, data);
1739            }
1740
1741            let mut offset_guard = self.offset.lock();
1742            let offset = *offset_guard as usize;
1743            checked_add_offset_and_length(offset, data.available())?;
1744            let read = self.ops.read(locked, self, current_task, offset, data)?;
1745            *offset_guard += read as off_t;
1746            Ok(read)
1747        })
1748    }
1749
1750    pub fn read_at<L>(
1751        &self,
1752        locked: &mut Locked<L>,
1753        current_task: &CurrentTask,
1754        offset: usize,
1755        data: &mut dyn OutputBuffer,
1756    ) -> Result<usize, Errno>
1757    where
1758        L: LockEqualOrBefore<FileOpsCore>,
1759    {
1760        if !self.ops().is_seekable() {
1761            return error!(ESPIPE);
1762        }
1763        checked_add_offset_and_length(offset, data.available())?;
1764        let locked = locked.cast_locked::<FileOpsCore>();
1765        self.read_internal(current_task, || self.ops.read(locked, self, current_task, offset, data))
1766    }
1767
1768    /// Common checks before calling ops().write.
1769    fn write_common<L>(
1770        &self,
1771        locked: &mut Locked<L>,
1772        current_task: &CurrentTask,
1773        offset: usize,
1774        data: &mut dyn InputBuffer,
1775    ) -> Result<usize, Errno>
1776    where
1777        L: LockEqualOrBefore<FileOpsCore>,
1778    {
1779        security::file_permission(current_task, self, security::PermissionFlags::WRITE)?;
1780
1781        // We need to cap the size of `data` to prevent us from growing the file too large,
1782        // according to <https://man7.org/linux/man-pages/man2/write.2.html>:
1783        //
1784        //   The number of bytes written may be less than count if, for example, there is
1785        //   insufficient space on the underlying physical medium, or the RLIMIT_FSIZE resource
1786        //   limit is encountered (see setrlimit(2)),
1787        checked_add_offset_and_length(offset, data.available())?;
1788        let locked = locked.cast_locked::<FileOpsCore>();
1789        self.ops().write(locked, self, current_task, offset, data)
1790    }
1791
1792    /// Common wrapper work for `write` and `write_at`.
1793    fn write_fn<W, L>(
1794        &self,
1795        locked: &mut Locked<L>,
1796        current_task: &CurrentTask,
1797        write: W,
1798    ) -> Result<usize, Errno>
1799    where
1800        L: LockEqualOrBefore<FileOpsCore>,
1801        W: FnOnce(&mut Locked<L>) -> Result<usize, Errno>,
1802    {
1803        if !self.can_write() {
1804            return error!(EBADF);
1805        }
1806        self.node().clear_suid_and_sgid_bits(locked, current_task)?;
1807        let bytes_written = write(locked)?;
1808        self.node().update_ctime_mtime();
1809
1810        if bytes_written > 0 {
1811            self.notify(InotifyMask::MODIFY);
1812        }
1813
1814        Ok(bytes_written)
1815    }
1816
1817    pub fn write<L>(
1818        &self,
1819        locked: &mut Locked<L>,
1820        current_task: &CurrentTask,
1821        data: &mut dyn InputBuffer,
1822    ) -> Result<usize, Errno>
1823    where
1824        L: LockEqualOrBefore<FileOpsCore>,
1825    {
1826        self.write_fn(locked, current_task, |locked| {
1827            if !self.ops().has_persistent_offsets() {
1828                return self.write_common(locked, current_task, 0, data);
1829            }
1830            // TODO(https://fxbug.dev/333540469): write_fn should take L: LockBefore<FsNodeAppend>,
1831            // but FileOpsCore must be after FsNodeAppend
1832            #[allow(
1833                clippy::undocumented_unsafe_blocks,
1834                reason = "Force documented unsafe blocks in Starnix"
1835            )]
1836            let locked = unsafe { Unlocked::new() };
1837            let mut offset = self.offset.lock();
1838            let bytes_written = if self.flags().contains(OpenFlags::APPEND) {
1839                let (_guard, locked) = self.node().append_lock.write_and(locked, current_task)?;
1840                *offset = self.ops().seek(
1841                    locked.cast_locked::<FileOpsCore>(),
1842                    self,
1843                    current_task,
1844                    *offset,
1845                    SeekTarget::End(0),
1846                )?;
1847                self.write_common(locked, current_task, *offset as usize, data)
1848            } else {
1849                let (_guard, locked) = self.node().append_lock.read_and(locked, current_task)?;
1850                self.write_common(locked, current_task, *offset as usize, data)
1851            }?;
1852            if self.ops().writes_update_seek_offset() {
1853                *offset += bytes_written as off_t;
1854            }
1855            Ok(bytes_written)
1856        })
1857    }
1858
1859    pub fn write_at<L>(
1860        &self,
1861        locked: &mut Locked<L>,
1862        current_task: &CurrentTask,
1863        mut offset: usize,
1864        data: &mut dyn InputBuffer,
1865    ) -> Result<usize, Errno>
1866    where
1867        L: LockEqualOrBefore<FileOpsCore>,
1868    {
1869        if !self.ops().is_seekable() {
1870            return error!(ESPIPE);
1871        }
1872        self.write_fn(locked, current_task, |_locked| {
1873            // TODO(https://fxbug.dev/333540469): write_fn should take L: LockBefore<FsNodeAppend>,
1874            // but FileOpsCore must be after FsNodeAppend
1875            #[allow(
1876                clippy::undocumented_unsafe_blocks,
1877                reason = "Force documented unsafe blocks in Starnix"
1878            )]
1879            let locked = unsafe { Unlocked::new() };
1880            let (_guard, locked) = self.node().append_lock.read_and(locked, current_task)?;
1881
1882            // According to LTP test pwrite04:
1883            //
1884            //   POSIX requires that opening a file with the O_APPEND flag should have no effect on the
1885            //   location at which pwrite() writes data. However, on Linux, if a file is opened with
1886            //   O_APPEND, pwrite() appends data to the end of the file, regardless of the value of offset.
1887            if self.flags().contains(OpenFlags::APPEND) && self.ops().is_seekable() {
1888                checked_add_offset_and_length(offset, data.available())?;
1889                offset = default_eof_offset(locked, self, current_task)? as usize;
1890            }
1891
1892            self.write_common(locked, current_task, offset, data)
1893        })
1894    }
1895
1896    pub fn seek<L>(
1897        &self,
1898        locked: &mut Locked<L>,
1899        current_task: &CurrentTask,
1900        target: SeekTarget,
1901    ) -> Result<off_t, Errno>
1902    where
1903        L: LockEqualOrBefore<FileOpsCore>,
1904    {
1905        let locked = locked.cast_locked::<FileOpsCore>();
1906        let locked = locked;
1907
1908        if !self.ops().is_seekable() {
1909            return error!(ESPIPE);
1910        }
1911
1912        if !self.ops().has_persistent_offsets() {
1913            return self.ops().seek(locked, self, current_task, 0, target);
1914        }
1915
1916        let mut offset_guard = self.offset.lock();
1917        let new_offset = self.ops().seek(locked, self, current_task, *offset_guard, target)?;
1918        *offset_guard = new_offset;
1919        Ok(new_offset)
1920    }
1921
1922    pub fn sync(&self, current_task: &CurrentTask) -> Result<(), Errno> {
1923        self.ops().sync(self, current_task)
1924    }
1925
1926    pub fn data_sync(&self, current_task: &CurrentTask) -> Result<(), Errno> {
1927        self.ops().data_sync(self, current_task)
1928    }
1929
1930    pub fn get_memory<L>(
1931        &self,
1932        locked: &mut Locked<L>,
1933        current_task: &CurrentTask,
1934        length: Option<usize>,
1935        prot: ProtectionFlags,
1936    ) -> Result<Arc<MemoryObject>, Errno>
1937    where
1938        L: LockEqualOrBefore<FileOpsCore>,
1939    {
1940        if prot.contains(ProtectionFlags::READ) && !self.can_read() {
1941            return error!(EACCES);
1942        }
1943        if prot.contains(ProtectionFlags::WRITE) && !self.can_write() {
1944            return error!(EACCES);
1945        }
1946        if prot.contains(ProtectionFlags::EXEC) && !self.can_exec() {
1947            return error!(EPERM);
1948        }
1949        self.ops().get_memory(locked.cast_locked::<FileOpsCore>(), self, current_task, length, prot)
1950    }
1951
1952    pub fn mmap<L>(
1953        &self,
1954        locked: &mut Locked<L>,
1955        current_task: &CurrentTask,
1956        addr: DesiredAddress,
1957        memory_offset: u64,
1958        length: usize,
1959        prot_flags: ProtectionFlags,
1960        options: MappingOptions,
1961        filename: NamespaceNode,
1962    ) -> Result<UserAddress, Errno>
1963    where
1964        L: LockEqualOrBefore<FileOpsCore>,
1965    {
1966        let locked = locked.cast_locked::<FileOpsCore>();
1967        if !self.can_read() {
1968            return error!(EACCES);
1969        }
1970        if prot_flags.contains(ProtectionFlags::WRITE)
1971            && !self.can_write()
1972            && options.contains(MappingOptions::SHARED)
1973        {
1974            return error!(EACCES);
1975        }
1976        if prot_flags.contains(ProtectionFlags::EXEC) && !self.can_exec() {
1977            return error!(EPERM);
1978        }
1979        self.ops().mmap(
1980            locked,
1981            self,
1982            current_task,
1983            addr,
1984            memory_offset,
1985            length,
1986            prot_flags,
1987            options,
1988            filename,
1989        )
1990    }
1991
1992    pub fn readdir<L>(
1993        &self,
1994        locked: &mut Locked<L>,
1995        current_task: &CurrentTask,
1996        sink: &mut dyn DirentSink,
1997    ) -> Result<(), Errno>
1998    where
1999        L: LockEqualOrBefore<FileOpsCore>,
2000    {
2001        let locked = locked.cast_locked::<FileOpsCore>();
2002        if self.name.entry.is_dead() {
2003            return error!(ENOENT);
2004        }
2005
2006        self.ops().readdir(locked, self, current_task, sink)?;
2007        self.update_atime();
2008        self.notify(InotifyMask::ACCESS);
2009        Ok(())
2010    }
2011
2012    pub fn ioctl(
2013        &self,
2014        locked: &mut Locked<Unlocked>,
2015        current_task: &CurrentTask,
2016        request: u32,
2017        arg: SyscallArg,
2018    ) -> Result<SyscallResult, Errno> {
2019        security::check_file_ioctl_access(current_task, &self, request)?;
2020
2021        if request == FIBMAP {
2022            security::check_task_capable(current_task, CAP_SYS_RAWIO)?;
2023
2024            // TODO: https://fxbug.dev/404795644 - eliminate this phoney response when the SELinux
2025            // Test Suite no longer requires it.
2026            if current_task.kernel().features.selinux_test_suite {
2027                let phoney_block = 0xbadf000du32;
2028                current_task.write_object(arg.into(), &phoney_block)?;
2029                return Ok(SUCCESS);
2030            }
2031        }
2032
2033        self.ops().ioctl(locked, self, current_task, request, arg)
2034    }
2035
2036    pub fn fcntl(
2037        &self,
2038        current_task: &CurrentTask,
2039        cmd: u32,
2040        arg: u64,
2041    ) -> Result<SyscallResult, Errno> {
2042        self.ops().fcntl(self, current_task, cmd, arg)
2043    }
2044
2045    pub fn ftruncate<L>(
2046        &self,
2047        locked: &mut Locked<L>,
2048        current_task: &CurrentTask,
2049        length: u64,
2050    ) -> Result<(), Errno>
2051    where
2052        L: LockBefore<BeforeFsNodeAppend>,
2053    {
2054        // The file must be opened with write permissions. Otherwise
2055        // truncating it is forbidden.
2056        if !self.can_write() {
2057            return error!(EINVAL);
2058        }
2059        self.node().ftruncate(locked, current_task, length)?;
2060        self.name.entry.notify_ignoring_excl_unlink(InotifyMask::MODIFY);
2061        Ok(())
2062    }
2063
2064    pub fn fallocate<L>(
2065        &self,
2066        locked: &mut Locked<L>,
2067        current_task: &CurrentTask,
2068        mode: FallocMode,
2069        offset: u64,
2070        length: u64,
2071    ) -> Result<(), Errno>
2072    where
2073        L: LockBefore<BeforeFsNodeAppend>,
2074    {
2075        // If the file is a pipe or FIFO, ESPIPE is returned.
2076        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2077        if self.node().is_fifo() {
2078            return error!(ESPIPE);
2079        }
2080
2081        // Must be a regular file or directory.
2082        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2083        if !self.node().is_dir() && !self.node().is_reg() {
2084            return error!(ENODEV);
2085        }
2086
2087        // The file must be opened with write permissions. Otherwise operation is forbidden.
2088        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2089        if !self.can_write() {
2090            return error!(EBADF);
2091        }
2092
2093        self.node().fallocate(locked, current_task, mode, offset, length)?;
2094        self.notify(InotifyMask::MODIFY);
2095        Ok(())
2096    }
2097
2098    pub fn to_handle(
2099        &self,
2100        current_task: &CurrentTask,
2101    ) -> Result<Option<zx::NullableHandle>, Errno> {
2102        self.ops().to_handle(self, current_task)
2103    }
2104
2105    pub fn get_handles(
2106        &self,
2107        current_task: &CurrentTask,
2108    ) -> Result<Vec<zx::NullableHandle>, Errno> {
2109        self.ops().get_handles(self, current_task)
2110    }
2111
2112    pub fn as_thread_group_key(&self) -> Result<ThreadGroupKey, Errno> {
2113        self.ops().as_thread_group_key(self)
2114    }
2115
2116    /// Update the file flags.
2117    ///
2118    /// Writes the bits in `value` that are set in `mask` into the file flags.
2119    ///
2120    /// Does not provide any synchronization.
2121    pub fn update_file_flags(&self, value: OpenFlags, mask: OpenFlags) {
2122        self.flags.update(value, mask, Ordering::Relaxed, Ordering::Relaxed);
2123    }
2124
2125    /// Get the async owner of this file.
2126    ///
2127    /// See fcntl(F_GETOWN)
2128    pub fn get_async_owner(&self) -> FileAsyncOwner {
2129        *self.async_owner.lock()
2130    }
2131
2132    /// Set the async owner of this file.
2133    ///
2134    /// See fcntl(F_SETOWN)
2135    pub fn set_async_owner(&self, owner: FileAsyncOwner) {
2136        *self.async_owner.lock() = owner;
2137    }
2138
2139    /// See fcntl(F_GETLEASE)
2140    pub fn get_lease(&self, _current_task: &CurrentTask) -> FileLeaseType {
2141        *self.lease.lock()
2142    }
2143
2144    /// See fcntl(F_SETLEASE)
2145    pub fn set_lease(
2146        &self,
2147        _current_task: &CurrentTask,
2148        lease: FileLeaseType,
2149    ) -> Result<(), Errno> {
2150        if !self.node().is_reg() {
2151            return error!(EINVAL);
2152        }
2153        if lease == FileLeaseType::Read && self.can_write() {
2154            return error!(EAGAIN);
2155        }
2156        *self.lease.lock() = lease;
2157        Ok(())
2158    }
2159
2160    /// Wait on the specified events and call the EventHandler when ready
2161    pub fn wait_async<L>(
2162        &self,
2163        locked: &mut Locked<L>,
2164        current_task: &CurrentTask,
2165        waiter: &Waiter,
2166        events: FdEvents,
2167        handler: EventHandler,
2168    ) -> Option<WaitCanceler>
2169    where
2170        L: LockEqualOrBefore<FileOpsCore>,
2171    {
2172        self.ops().wait_async(
2173            locked.cast_locked::<FileOpsCore>(),
2174            self,
2175            current_task,
2176            waiter,
2177            events,
2178            handler,
2179        )
2180    }
2181
2182    /// The events currently active on this file.
2183    pub fn query_events<L>(
2184        &self,
2185        locked: &mut Locked<L>,
2186        current_task: &CurrentTask,
2187    ) -> Result<FdEvents, Errno>
2188    where
2189        L: LockEqualOrBefore<FileOpsCore>,
2190    {
2191        self.ops()
2192            .query_events(locked.cast_locked::<FileOpsCore>(), self, current_task)
2193            .map(FdEvents::add_equivalent_fd_events)
2194    }
2195
2196    pub fn record_lock(
2197        &self,
2198        locked: &mut Locked<Unlocked>,
2199        current_task: &CurrentTask,
2200        cmd: RecordLockCommand,
2201        flock: uapi::flock,
2202    ) -> Result<Option<uapi::flock>, Errno> {
2203        self.node().record_lock(locked, current_task, self, cmd, flock)
2204    }
2205
2206    pub fn flush<L>(&self, locked: &mut Locked<L>, current_task: &CurrentTask, id: FdTableId)
2207    where
2208        L: LockEqualOrBefore<FileOpsCore>,
2209    {
2210        self.name.entry.node.record_lock_release(RecordLockOwner::FdTable(id));
2211        self.ops().flush(locked.cast_locked::<FileOpsCore>(), self, current_task)
2212    }
2213
2214    fn update_atime(&self) {
2215        if !self.flags().contains(OpenFlags::NOATIME) {
2216            self.name.update_atime();
2217        }
2218    }
2219
2220    pub fn readahead(
2221        &self,
2222        current_task: &CurrentTask,
2223        offset: usize,
2224        length: usize,
2225    ) -> Result<(), Errno> {
2226        // readfile() fails with EBADF if the file was not open for read.
2227        if !self.can_read() {
2228            return error!(EBADF);
2229        }
2230        checked_add_offset_and_length(offset, length)?;
2231        self.ops().readahead(self, current_task, offset, length)
2232    }
2233
2234    pub fn extra_fdinfo(
2235        &self,
2236        locked: &mut Locked<FileOpsCore>,
2237        current_task: &CurrentTask,
2238    ) -> Option<FsString> {
2239        let file = self.weak_handle.upgrade()?;
2240        self.ops().extra_fdinfo(locked, &file, current_task)
2241    }
2242
2243    /// Register the fd number of an `EpollFileObject` that listens to events from this
2244    /// `FileObject`.
2245    pub fn register_epfd(&self, file: &FileHandle) {
2246        self.epoll_files.lock().insert(WeakKey::from(file), file.weak_handle.clone());
2247    }
2248
2249    pub fn unregister_epfd(&self, file: &FileHandle) {
2250        self.epoll_files.lock().remove(&WeakKey::from(file));
2251    }
2252}
2253
2254impl Releasable for FileObject {
2255    type Context<'a> = CurrentTaskAndLocked<'a>;
2256
2257    fn release<'a>(self, context: CurrentTaskAndLocked<'a>) {
2258        let (locked, current_task) = context;
2259        // Release all wake leases associated with this file in the corresponding `WaitObject`
2260        // of each registered epfd.
2261        for (_, file) in self.epoll_files.lock().drain() {
2262            if let Some(file) = file.upgrade() {
2263                if let Some(epoll_object) = file.downcast_file::<EpollFileObject>() {
2264                    current_task.kernel().suspend_resume_manager.deactivate_wakeup_source(
2265                        &WakeupSourceOrigin::Epoll(wakeup_source_name_for_epoll(
2266                            current_task,
2267                            self.id.as_epoll_key(),
2268                        )),
2269                    );
2270                    let _ = epoll_object.delete(&self);
2271                }
2272            }
2273        }
2274
2275        if self.can_write() {
2276            self.name.entry.node.write_guard_state.lock().release(FileWriteGuardMode::WriteFile);
2277        }
2278
2279        let locked = locked.cast_locked::<FileOpsCore>();
2280        let ops = self.ops;
2281        let state = self.state;
2282        ops.close(locked, &state, current_task);
2283        state.name.entry.node.on_file_closed(&state);
2284        let event =
2285            if state.can_write() { InotifyMask::CLOSE_WRITE } else { InotifyMask::CLOSE_NOWRITE };
2286        state.notify(event);
2287    }
2288}
2289
2290impl fmt::Debug for FileObject {
2291    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2292        f.debug_struct("FileObject")
2293            .field("name", &self.name)
2294            .field("fs", &self.fs.name())
2295            .field("offset", &self.offset)
2296            .field("flags", &self.flags)
2297            .field("ops_ty", &self.ops().type_name())
2298            .finish()
2299    }
2300}
2301
2302impl OnWakeOps for FileReleaser {
2303    fn on_wake(&self, _current_task: &CurrentTask, _baton_lease: &zx::NullableHandle) {}
2304}
2305
2306/// A FileObject with the type of its FileOps known. Dereferencing it returns the FileOps.
2307pub struct DowncastedFile<'a, Ops> {
2308    file: &'a FileObject,
2309    ops: &'a Ops,
2310}
2311impl<'a, Ops> Copy for DowncastedFile<'a, Ops> {}
2312impl<'a, Ops> Clone for DowncastedFile<'a, Ops> {
2313    fn clone(&self) -> Self {
2314        *self
2315    }
2316}
2317
2318impl<'a, Ops> DowncastedFile<'a, Ops> {
2319    pub fn file(&self) -> &'a FileObject {
2320        self.file
2321    }
2322}
2323
2324impl<'a, Ops> Deref for DowncastedFile<'a, Ops> {
2325    type Target = &'a Ops;
2326    fn deref(&self) -> &Self::Target {
2327        &self.ops
2328    }
2329}
2330
2331impl FileObject {
2332    /// Returns the `FileObject`'s `FileOps` as a `DowncastedFile<T>`, or `None` if the downcast
2333    /// fails.
2334    ///
2335    /// This is useful for syscalls that only operate on a certain type of file.
2336    pub fn downcast_file<'a, T>(&'a self) -> Option<DowncastedFile<'a, T>>
2337    where
2338        T: 'static,
2339    {
2340        let ops = self.ops().as_any().downcast_ref::<T>()?;
2341        Some(DowncastedFile { file: self, ops })
2342    }
2343}
2344
2345#[cfg(test)]
2346mod tests {
2347    use crate::fs::tmpfs::TmpFs;
2348    use crate::task::CurrentTask;
2349    use crate::task::dynamic_thread_spawner::SpawnRequestBuilder;
2350    use crate::testing::*;
2351    use crate::vfs::MountInfo;
2352    use crate::vfs::buffers::{VecInputBuffer, VecOutputBuffer};
2353    use starnix_sync::{Locked, Unlocked};
2354    use starnix_uapi::auth::FsCred;
2355    use starnix_uapi::device_type::DeviceType;
2356    use starnix_uapi::file_mode::FileMode;
2357    use starnix_uapi::open_flags::OpenFlags;
2358    use std::sync::Arc;
2359    use std::sync::atomic::{AtomicBool, Ordering};
2360    use zerocopy::{FromBytes, IntoBytes, LE, U64};
2361
2362    #[::fuchsia::test]
2363    async fn test_append_truncate_race() {
2364        spawn_kernel_and_run(async |locked, current_task| {
2365            let kernel = current_task.kernel();
2366            let root_fs = TmpFs::new_fs(locked, &kernel);
2367            let mount = MountInfo::detached();
2368            let root_node = Arc::clone(root_fs.root());
2369            let file = root_node
2370                .create_entry(
2371                    locked,
2372                    &current_task,
2373                    &mount,
2374                    "test".into(),
2375                    |locked, dir, mount, name| {
2376                        dir.create_node(
2377                            locked,
2378                            &current_task,
2379                            mount,
2380                            name,
2381                            FileMode::IFREG | FileMode::ALLOW_ALL,
2382                            DeviceType::NONE,
2383                            FsCred::root(),
2384                        )
2385                    },
2386                )
2387                .expect("create_node failed");
2388            let file_handle = file
2389                .open_anonymous(locked, &current_task, OpenFlags::APPEND | OpenFlags::RDWR)
2390                .expect("open failed");
2391            let done = Arc::new(AtomicBool::new(false));
2392
2393            let fh = file_handle.clone();
2394            let done_clone = done.clone();
2395            let closure = move |locked: &mut Locked<Unlocked>, current_task: &CurrentTask| {
2396                for i in 0..2000 {
2397                    fh.write(
2398                        locked,
2399                        current_task,
2400                        &mut VecInputBuffer::new(U64::<LE>::new(i).as_bytes()),
2401                    )
2402                    .expect("write failed");
2403                }
2404                done_clone.store(true, Ordering::SeqCst);
2405                let result: Result<(), starnix_uapi::errors::Errno> = Ok(());
2406                result
2407            };
2408            let (write_thread, req) =
2409                SpawnRequestBuilder::new().with_sync_closure(closure).build_with_sync_result();
2410            kernel.kthreads.spawner().spawn_from_request(req);
2411
2412            let fh = file_handle.clone();
2413            let done_clone = done.clone();
2414            let closure = move |locked: &mut Locked<Unlocked>, current_task: &CurrentTask| {
2415                while !done_clone.load(Ordering::SeqCst) {
2416                    fh.ftruncate(locked, current_task, 0).expect("truncate failed");
2417                }
2418                let result: Result<(), starnix_uapi::errors::Errno> = Ok(());
2419                result
2420            };
2421            let (truncate_thread, req) =
2422                SpawnRequestBuilder::new().with_sync_closure(closure).build_with_sync_result();
2423            kernel.kthreads.spawner().spawn_from_request(req);
2424
2425            // If we read from the file, we should always find an increasing sequence. If there are
2426            // races, then we might unexpectedly see zeroes.
2427            while !done.load(Ordering::SeqCst) {
2428                let mut buffer = VecOutputBuffer::new(4096);
2429                let amount = file_handle
2430                    .read_at(locked, &current_task, 0, &mut buffer)
2431                    .expect("read failed");
2432                let mut last = None;
2433                let buffer = &Vec::from(buffer)[..amount];
2434                for i in
2435                    buffer.chunks_exact(8).map(|chunk| U64::<LE>::read_from_bytes(chunk).unwrap())
2436                {
2437                    if let Some(last) = last {
2438                        assert!(i.get() > last, "buffer: {:?}", buffer);
2439                    }
2440                    last = Some(i.get());
2441                }
2442            }
2443
2444            let _ = write_thread().unwrap();
2445            let _ = truncate_thread().unwrap();
2446        })
2447        .await;
2448    }
2449}