starnix_core/vfs/
file_object.rs

1// Cmpyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::memory::MemoryObject;
6use crate::mm::{DesiredAddress, MappingName, MappingOptions, MemoryAccessorExt, ProtectionFlags};
7use crate::power::{OnWakeOps, WakeupSourceOrigin};
8use crate::security;
9use crate::task::{
10    CurrentTask, CurrentTaskAndLocked, EventHandler, Task, ThreadGroupKey, WaitCallback,
11    WaitCanceler, Waiter, register_delayed_release,
12};
13use crate::vfs::buffers::{InputBuffer, OutputBuffer};
14use crate::vfs::file_server::serve_file;
15use crate::vfs::fsverity::{
16    FsVerityState, {self},
17};
18use crate::vfs::{
19    ActiveNamespaceNode, DirentSink, EpollFileObject, EpollKey, FallocMode, FdTableId,
20    FileSystemHandle, FileWriteGuardMode, FsNodeHandle, FsString, NamespaceNode, RecordLockCommand,
21    RecordLockOwner, wakeup_source_name_for_epoll,
22};
23use starnix_crypt::EncryptionKeyId;
24use starnix_lifecycle::{ObjectReleaser, ReleaserAction};
25use starnix_types::ownership::ReleaseGuard;
26use starnix_uapi::mount_flags::MountFlags;
27use starnix_uapi::user_address::ArchSpecific;
28
29use fidl::HandleBased;
30use linux_uapi::{FSCRYPT_MODE_AES_256_CTS, FSCRYPT_MODE_AES_256_XTS};
31use starnix_logging::{CATEGORY_STARNIX_MM, impossible_error, trace_duration, track_stub};
32use starnix_sync::{
33    BeforeFsNodeAppend, FileOpsCore, LockBefore, LockEqualOrBefore, Locked, Mutex, Unlocked,
34};
35use starnix_syscalls::{SUCCESS, SyscallArg, SyscallResult};
36use starnix_types::math::round_up_to_system_page_size;
37use starnix_types::ownership::Releasable;
38use starnix_uapi::arc_key::WeakKey;
39use starnix_uapi::as_any::AsAny;
40use starnix_uapi::auth::{CAP_FOWNER, CAP_SYS_RAWIO};
41use starnix_uapi::errors::{EAGAIN, ETIMEDOUT, Errno};
42use starnix_uapi::file_lease::FileLeaseType;
43use starnix_uapi::file_mode::Access;
44use starnix_uapi::inotify_mask::InotifyMask;
45use starnix_uapi::open_flags::OpenFlags;
46use starnix_uapi::seal_flags::SealFlags;
47use starnix_uapi::user_address::{UserAddress, UserRef};
48use starnix_uapi::vfs::FdEvents;
49use starnix_uapi::{
50    FIBMAP, FIGETBSZ, FIONBIO, FIONREAD, FIOQSIZE, FS_CASEFOLD_FL, FS_IOC_ADD_ENCRYPTION_KEY,
51    FS_IOC_ENABLE_VERITY, FS_IOC_FSGETXATTR, FS_IOC_FSSETXATTR, FS_IOC_MEASURE_VERITY,
52    FS_IOC_READ_VERITY_METADATA, FS_IOC_REMOVE_ENCRYPTION_KEY, FS_IOC_SET_ENCRYPTION_POLICY,
53    FS_VERITY_FL, FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER, FSCRYPT_POLICY_V2, SEEK_CUR, SEEK_DATA,
54    SEEK_END, SEEK_HOLE, SEEK_SET, TCGETS, errno, error, fscrypt_add_key_arg, fscrypt_identifier,
55    fsxattr, off_t, pid_t, uapi,
56};
57use std::collections::HashMap;
58use std::fmt;
59use std::ops::Deref;
60use std::sync::{Arc, Weak};
61
62pub const MAX_LFS_FILESIZE: usize = 0x7fff_ffff_ffff_ffff;
63
64pub fn checked_add_offset_and_length(offset: usize, length: usize) -> Result<usize, Errno> {
65    let end = offset.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
66    if end > MAX_LFS_FILESIZE {
67        return error!(EINVAL);
68    }
69    Ok(end)
70}
71
72#[derive(Debug)]
73pub enum SeekTarget {
74    /// Seek to the given offset relative to the start of the file.
75    Set(off_t),
76    /// Seek to the given offset relative to the current position.
77    Cur(off_t),
78    /// Seek to the given offset relative to the end of the file.
79    End(off_t),
80    /// Seek for the first data after the given offset,
81    Data(off_t),
82    /// Seek for the first hole after the given offset,
83    Hole(off_t),
84}
85
86impl SeekTarget {
87    pub fn from_raw(whence: u32, offset: off_t) -> Result<SeekTarget, Errno> {
88        match whence {
89            SEEK_SET => Ok(SeekTarget::Set(offset)),
90            SEEK_CUR => Ok(SeekTarget::Cur(offset)),
91            SEEK_END => Ok(SeekTarget::End(offset)),
92            SEEK_DATA => Ok(SeekTarget::Data(offset)),
93            SEEK_HOLE => Ok(SeekTarget::Hole(offset)),
94            _ => error!(EINVAL),
95        }
96    }
97
98    pub fn whence(&self) -> u32 {
99        match self {
100            Self::Set(_) => SEEK_SET,
101            Self::Cur(_) => SEEK_CUR,
102            Self::End(_) => SEEK_END,
103            Self::Data(_) => SEEK_DATA,
104            Self::Hole(_) => SEEK_HOLE,
105        }
106    }
107
108    pub fn offset(&self) -> off_t {
109        match self {
110            Self::Set(off)
111            | Self::Cur(off)
112            | Self::End(off)
113            | Self::Data(off)
114            | Self::Hole(off) => *off,
115        }
116    }
117}
118
119/// Corresponds to struct file_operations in Linux, plus any filesystem-specific data.
120pub trait FileOps: Send + Sync + AsAny + 'static {
121    /// Called when the FileObject is opened/created
122    fn open(
123        &self,
124        _locked: &mut Locked<FileOpsCore>,
125        _file: &FileObject,
126        _current_task: &CurrentTask,
127    ) -> Result<(), Errno> {
128        Ok(())
129    }
130
131    /// Called when the FileObject is destroyed.
132    fn close(
133        self: Box<Self>,
134        _locked: &mut Locked<FileOpsCore>,
135        _file: &FileObjectState,
136        _current_task: &CurrentTask,
137    ) {
138    }
139
140    /// Called every time close() is called on this file, even if the file is not ready to be
141    /// released.
142    fn flush(
143        &self,
144        _locked: &mut Locked<FileOpsCore>,
145        _file: &FileObject,
146        _current_task: &CurrentTask,
147    ) {
148    }
149
150    /// Returns whether the file has meaningful seek offsets. Returning `false` is only
151    /// optimization and will makes `FileObject` never hold the offset lock when calling `read` and
152    /// `write`.
153    fn has_persistent_offsets(&self) -> bool {
154        self.is_seekable()
155    }
156
157    /// Returns whether the file is seekable.
158    fn is_seekable(&self) -> bool;
159
160    /// Returns true if `write()` operations on the file will update the seek offset.
161    fn writes_update_seek_offset(&self) -> bool {
162        self.has_persistent_offsets()
163    }
164
165    /// Read from the file at an offset. If the file does not have persistent offsets (either
166    /// directly, or because it is not seekable), offset will be 0 and can be ignored.
167    /// Returns the number of bytes read.
168    fn read(
169        &self,
170        locked: &mut Locked<FileOpsCore>,
171        file: &FileObject,
172        current_task: &CurrentTask,
173        offset: usize,
174        data: &mut dyn OutputBuffer,
175    ) -> Result<usize, Errno>;
176
177    /// Write to the file with an offset. If the file does not have persistent offsets (either
178    /// directly, or because it is not seekable), offset will be 0 and can be ignored.
179    /// Returns the number of bytes written.
180    fn write(
181        &self,
182        locked: &mut Locked<FileOpsCore>,
183        file: &FileObject,
184        current_task: &CurrentTask,
185        offset: usize,
186        data: &mut dyn InputBuffer,
187    ) -> Result<usize, Errno>;
188
189    /// Adjust the `current_offset` if the file is seekable.
190    fn seek(
191        &self,
192        locked: &mut Locked<FileOpsCore>,
193        file: &FileObject,
194        current_task: &CurrentTask,
195        current_offset: off_t,
196        target: SeekTarget,
197    ) -> Result<off_t, Errno>;
198
199    /// Syncs cached state associated with the file descriptor to persistent storage.
200    ///
201    /// The method blocks until the synchronization is complete.
202    fn sync(&self, file: &FileObject, _current_task: &CurrentTask) -> Result<(), Errno>;
203
204    /// Syncs cached data, and only enough metadata to retrieve said data, to persistent storage.
205    ///
206    /// The method blocks until the synchronization is complete.
207    fn data_sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
208        // TODO(https://fxbug.dev/297305634) make a default macro once data can be done separately
209        self.sync(file, current_task)
210    }
211
212    /// Returns a VMO representing this file. At least the requested protection flags must
213    /// be set on the VMO. Reading or writing the VMO must read or write the file. If this is not
214    /// possible given the requested protection, an error must be returned.
215    /// The `length` is a hint for the desired size of the VMO. The returned VMO may be larger or
216    /// smaller than the requested length.
217    /// This method is typically called by [`Self::mmap`].
218    fn get_memory(
219        &self,
220        _locked: &mut Locked<FileOpsCore>,
221        _file: &FileObject,
222        _current_task: &CurrentTask,
223        _length: Option<usize>,
224        _prot: ProtectionFlags,
225    ) -> Result<Arc<MemoryObject>, Errno> {
226        error!(ENODEV)
227    }
228
229    /// Responds to an mmap call. The default implementation calls [`Self::get_memory`] to get a VMO
230    /// and then maps it with [`crate::mm::MemoryManager::map`].
231    /// Only implement this trait method if your file needs to control mapping, or record where
232    /// a VMO gets mapped.
233    fn mmap(
234        &self,
235        locked: &mut Locked<FileOpsCore>,
236        file: &FileObject,
237        current_task: &CurrentTask,
238        addr: DesiredAddress,
239        memory_offset: u64,
240        length: usize,
241        prot_flags: ProtectionFlags,
242        options: MappingOptions,
243        filename: NamespaceNode,
244    ) -> Result<UserAddress, Errno> {
245        trace_duration!(CATEGORY_STARNIX_MM, "FileOpsDefaultMmap");
246        let min_memory_size = (memory_offset as usize)
247            .checked_add(round_up_to_system_page_size(length)?)
248            .ok_or_else(|| errno!(EINVAL))?;
249        let mut memory = if options.contains(MappingOptions::SHARED) {
250            trace_duration!(CATEGORY_STARNIX_MM, "GetSharedVmo");
251            self.get_memory(locked, file, current_task, Some(min_memory_size), prot_flags)?
252        } else {
253            trace_duration!(CATEGORY_STARNIX_MM, "GetPrivateVmo");
254            // TODO(tbodt): Use PRIVATE_CLONE to have the filesystem server do the clone for us.
255            let base_prot_flags = (prot_flags | ProtectionFlags::READ) - ProtectionFlags::WRITE;
256            let memory = self.get_memory(
257                locked,
258                file,
259                current_task,
260                Some(min_memory_size),
261                base_prot_flags,
262            )?;
263            let mut clone_flags = zx::VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE;
264            if !prot_flags.contains(ProtectionFlags::WRITE) {
265                clone_flags |= zx::VmoChildOptions::NO_WRITE;
266            }
267            trace_duration!(CATEGORY_STARNIX_MM, "CreatePrivateChildVmo");
268            Arc::new(
269                memory.create_child(clone_flags, 0, memory.get_size()).map_err(impossible_error)?,
270            )
271        };
272
273        // Write guard is necessary only for shared mappings. Note that this doesn't depend on
274        // `prot_flags` since these can be changed later with `mprotect()`.
275        let file_write_guard = if options.contains(MappingOptions::SHARED) && file.can_write() {
276            let node = &file.name.entry.node;
277            let state = node.write_guard_state.lock();
278
279            // `F_SEAL_FUTURE_WRITE` should allow `mmap(PROT_READ)`, but block
280            // `mprotect(PROT_WRITE)`. This is different from `F_SEAL_WRITE`, which blocks
281            // `mmap(PROT_READ)`. To handle this case correctly remove `WRITE` right from the
282            // VMO handle to ensure `mprotect(PROT_WRITE)` fails.
283            let seals = state.get_seals().unwrap_or(SealFlags::empty());
284            if seals.contains(SealFlags::FUTURE_WRITE)
285                && !seals.contains(SealFlags::WRITE)
286                && !prot_flags.contains(ProtectionFlags::WRITE)
287            {
288                let mut new_rights = zx::Rights::VMO_DEFAULT - zx::Rights::WRITE;
289                if prot_flags.contains(ProtectionFlags::EXEC) {
290                    new_rights |= zx::Rights::EXECUTE;
291                }
292                memory = Arc::new(memory.duplicate_handle(new_rights).map_err(impossible_error)?);
293
294                None
295            } else {
296                Some(FileWriteGuardMode::WriteMapping)
297            }
298        } else {
299            None
300        };
301
302        current_task.mm()?.map_memory(
303            addr,
304            memory,
305            memory_offset,
306            length,
307            prot_flags,
308            file.max_access_for_memory_mapping(),
309            options,
310            MappingName::File(filename.into_mapping(file_write_guard)?),
311        )
312    }
313
314    /// Respond to a `getdents` or `getdents64` calls.
315    ///
316    /// The `file.offset` lock will be held while entering this method. The implementation must look
317    /// at `sink.offset()` to read the current offset into the file.
318    fn readdir(
319        &self,
320        _locked: &mut Locked<FileOpsCore>,
321        _file: &FileObject,
322        _current_task: &CurrentTask,
323        _sink: &mut dyn DirentSink,
324    ) -> Result<(), Errno> {
325        error!(ENOTDIR)
326    }
327
328    /// Establish a one-shot, edge-triggered, asynchronous wait for the given FdEvents for the
329    /// given file and task. Returns `None` if this file does not support blocking waits.
330    ///
331    /// Active events are not considered. This is similar to the semantics of the
332    /// ZX_WAIT_ASYNC_EDGE flag on zx_wait_async. To avoid missing events, the caller must call
333    /// query_events after calling this.
334    ///
335    /// If your file does not support blocking waits, leave this as the default implementation.
336    fn wait_async(
337        &self,
338        _locked: &mut Locked<FileOpsCore>,
339        _file: &FileObject,
340        _current_task: &CurrentTask,
341        _waiter: &Waiter,
342        _events: FdEvents,
343        _handler: EventHandler,
344    ) -> Option<WaitCanceler> {
345        None
346    }
347
348    /// The events currently active on this file.
349    ///
350    /// If this function returns `POLLIN` or `POLLOUT`, then FileObject will
351    /// add `POLLRDNORM` and `POLLWRNORM`, respective, which are equivalent in
352    /// the Linux UAPI.
353    ///
354    /// See https://linux.die.net/man/2/poll
355    fn query_events(
356        &self,
357        _locked: &mut Locked<FileOpsCore>,
358        _file: &FileObject,
359        _current_task: &CurrentTask,
360    ) -> Result<FdEvents, Errno> {
361        Ok(FdEvents::POLLIN | FdEvents::POLLOUT)
362    }
363
364    fn ioctl(
365        &self,
366        locked: &mut Locked<Unlocked>,
367        file: &FileObject,
368        current_task: &CurrentTask,
369        request: u32,
370        arg: SyscallArg,
371    ) -> Result<SyscallResult, Errno> {
372        default_ioctl(file, locked, current_task, request, arg)
373    }
374
375    fn fcntl(
376        &self,
377        _file: &FileObject,
378        _current_task: &CurrentTask,
379        cmd: u32,
380        _arg: u64,
381    ) -> Result<SyscallResult, Errno> {
382        default_fcntl(cmd)
383    }
384
385    /// Return a handle that allows access to this file descritor through the zxio protocols.
386    ///
387    /// If None is returned, the file will act as if it was a fd to `/dev/null`.
388    fn to_handle(
389        &self,
390        file: &FileObject,
391        current_task: &CurrentTask,
392    ) -> Result<Option<zx::NullableHandle>, Errno> {
393        serve_file(current_task, file, current_task.full_current_creds())
394            .map(|c| Some(c.0.into_handle().into()))
395    }
396
397    // Return a vector of handles. This is used in situations where there is more than one handle
398    // associated with this file descriptor.
399    //
400    // In Fuchsia, there is an expectation that there is a 1:1 mapping between a file descriptor and
401    // a handle. In general, we do not want to violate that rule. This function is intended to used
402    // in very limited circumstances (compatibility with Linux and Binder), where we need to violate
403    // rule.
404    //
405    // Specifically, we are using this to implement SyncFiles correctly, where a single SyncFile can
406    // represent multiple SyncPoints. Each SyncPoint contains a zx::Counter.
407    //
408    // If you chose to implement this function, to_handle() should return an error. You must also be
409    // aware that if these handles are passed to Fuchsia over Binder, they will be represented as
410    // single file descriptor, and you should use the composite_fd library to manage that file
411    // descriptor.
412    fn get_handles(
413        &self,
414        _file: &FileObject,
415        _current_task: &CurrentTask,
416    ) -> Result<Vec<zx::NullableHandle>, Errno> {
417        error!(ENOTSUP)
418    }
419
420    /// Returns the associated pid_t.
421    ///
422    /// Used by pidfd and `/proc/<pid>`. Unlikely to be used by other files.
423    fn as_thread_group_key(&self, _file: &FileObject) -> Result<ThreadGroupKey, Errno> {
424        error!(EBADF)
425    }
426
427    fn readahead(
428        &self,
429        _file: &FileObject,
430        _current_task: &CurrentTask,
431        _offset: usize,
432        _length: usize,
433    ) -> Result<(), Errno> {
434        error!(EINVAL)
435    }
436
437    /// Extra information that is included in the /proc/<pid>/fdfino/<fd> entry.
438    fn extra_fdinfo(
439        &self,
440        _locked: &mut Locked<FileOpsCore>,
441        _file: &FileHandle,
442        _current_task: &CurrentTask,
443    ) -> Option<FsString> {
444        None
445    }
446}
447
448/// Marker trait for implementation of FileOps that do not need to implement `close` and can
449/// then pass a wrapper object as the `FileOps` implementation.
450pub trait CloseFreeSafe {}
451impl<T: FileOps + CloseFreeSafe, P: Deref<Target = T> + Send + Sync + 'static> FileOps for P {
452    fn close(
453        self: Box<Self>,
454        _locked: &mut Locked<FileOpsCore>,
455        _file: &FileObjectState,
456        _current_task: &CurrentTask,
457    ) {
458        // This method cannot be delegated. T being `CloseFreeSafe` this is fine.
459    }
460
461    fn flush(
462        &self,
463        locked: &mut Locked<FileOpsCore>,
464        file: &FileObject,
465        current_task: &CurrentTask,
466    ) {
467        self.deref().flush(locked, file, current_task)
468    }
469
470    fn has_persistent_offsets(&self) -> bool {
471        self.deref().has_persistent_offsets()
472    }
473
474    fn writes_update_seek_offset(&self) -> bool {
475        self.deref().writes_update_seek_offset()
476    }
477
478    fn is_seekable(&self) -> bool {
479        self.deref().is_seekable()
480    }
481
482    fn read(
483        &self,
484        locked: &mut Locked<FileOpsCore>,
485        file: &FileObject,
486        current_task: &CurrentTask,
487        offset: usize,
488        data: &mut dyn OutputBuffer,
489    ) -> Result<usize, Errno> {
490        self.deref().read(locked, file, current_task, offset, data)
491    }
492
493    fn write(
494        &self,
495        locked: &mut Locked<FileOpsCore>,
496        file: &FileObject,
497        current_task: &CurrentTask,
498        offset: usize,
499        data: &mut dyn InputBuffer,
500    ) -> Result<usize, Errno> {
501        self.deref().write(locked, file, current_task, offset, data)
502    }
503
504    fn seek(
505        &self,
506        locked: &mut Locked<FileOpsCore>,
507        file: &FileObject,
508        current_task: &CurrentTask,
509        current_offset: off_t,
510        target: SeekTarget,
511    ) -> Result<off_t, Errno> {
512        self.deref().seek(locked, file, current_task, current_offset, target)
513    }
514
515    fn sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
516        self.deref().sync(file, current_task)
517    }
518
519    fn data_sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
520        self.deref().data_sync(file, current_task)
521    }
522
523    fn get_memory(
524        &self,
525        locked: &mut Locked<FileOpsCore>,
526        file: &FileObject,
527        current_task: &CurrentTask,
528        length: Option<usize>,
529        prot: ProtectionFlags,
530    ) -> Result<Arc<MemoryObject>, Errno> {
531        self.deref().get_memory(locked, file, current_task, length, prot)
532    }
533
534    fn mmap(
535        &self,
536        locked: &mut Locked<FileOpsCore>,
537        file: &FileObject,
538        current_task: &CurrentTask,
539        addr: DesiredAddress,
540        memory_offset: u64,
541        length: usize,
542        prot_flags: ProtectionFlags,
543        options: MappingOptions,
544        filename: NamespaceNode,
545    ) -> Result<UserAddress, Errno> {
546        self.deref().mmap(
547            locked,
548            file,
549            current_task,
550            addr,
551            memory_offset,
552            length,
553            prot_flags,
554            options,
555            filename,
556        )
557    }
558
559    fn readdir(
560        &self,
561        locked: &mut Locked<FileOpsCore>,
562        file: &FileObject,
563        current_task: &CurrentTask,
564        sink: &mut dyn DirentSink,
565    ) -> Result<(), Errno> {
566        self.deref().readdir(locked, file, current_task, sink)
567    }
568
569    fn wait_async(
570        &self,
571        locked: &mut Locked<FileOpsCore>,
572        file: &FileObject,
573        current_task: &CurrentTask,
574        waiter: &Waiter,
575        events: FdEvents,
576        handler: EventHandler,
577    ) -> Option<WaitCanceler> {
578        self.deref().wait_async(locked, file, current_task, waiter, events, handler)
579    }
580
581    fn query_events(
582        &self,
583        locked: &mut Locked<FileOpsCore>,
584        file: &FileObject,
585        current_task: &CurrentTask,
586    ) -> Result<FdEvents, Errno> {
587        self.deref().query_events(locked, file, current_task)
588    }
589
590    fn ioctl(
591        &self,
592        locked: &mut Locked<Unlocked>,
593        file: &FileObject,
594        current_task: &CurrentTask,
595        request: u32,
596        arg: SyscallArg,
597    ) -> Result<SyscallResult, Errno> {
598        self.deref().ioctl(locked, file, current_task, request, arg)
599    }
600
601    fn fcntl(
602        &self,
603        file: &FileObject,
604        current_task: &CurrentTask,
605        cmd: u32,
606        arg: u64,
607    ) -> Result<SyscallResult, Errno> {
608        self.deref().fcntl(file, current_task, cmd, arg)
609    }
610
611    fn to_handle(
612        &self,
613        file: &FileObject,
614        current_task: &CurrentTask,
615    ) -> Result<Option<zx::NullableHandle>, Errno> {
616        self.deref().to_handle(file, current_task)
617    }
618
619    fn get_handles(
620        &self,
621        file: &FileObject,
622        current_task: &CurrentTask,
623    ) -> Result<Vec<zx::NullableHandle>, Errno> {
624        self.deref().get_handles(file, current_task)
625    }
626
627    fn as_thread_group_key(&self, file: &FileObject) -> Result<ThreadGroupKey, Errno> {
628        self.deref().as_thread_group_key(file)
629    }
630
631    fn readahead(
632        &self,
633        file: &FileObject,
634        current_task: &CurrentTask,
635        offset: usize,
636        length: usize,
637    ) -> Result<(), Errno> {
638        self.deref().readahead(file, current_task, offset, length)
639    }
640
641    fn extra_fdinfo(
642        &self,
643        locked: &mut Locked<FileOpsCore>,
644        file: &FileHandle,
645        current_task: &CurrentTask,
646    ) -> Option<FsString> {
647        self.deref().extra_fdinfo(locked, file, current_task)
648    }
649}
650
651pub fn default_eof_offset<L>(
652    locked: &mut Locked<L>,
653    file: &FileObject,
654    current_task: &CurrentTask,
655) -> Result<off_t, Errno>
656where
657    L: LockEqualOrBefore<FileOpsCore>,
658{
659    Ok(file.node().get_size(locked, current_task)? as off_t)
660}
661
662/// Implement the seek method for a file. The computation from the end of the file must be provided
663/// through a callback.
664///
665/// Errors if the calculated offset is invalid.
666///
667/// - `current_offset`: The current position
668/// - `target`: The location to seek to.
669/// - `compute_end`: Compute the new offset from the end. Return an error if the operation is not
670///    supported.
671pub fn default_seek<F>(
672    current_offset: off_t,
673    target: SeekTarget,
674    compute_end: F,
675) -> Result<off_t, Errno>
676where
677    F: FnOnce() -> Result<off_t, Errno>,
678{
679    let new_offset = match target {
680        SeekTarget::Set(offset) => Some(offset),
681        SeekTarget::Cur(offset) => current_offset.checked_add(offset),
682        SeekTarget::End(offset) => compute_end()?.checked_add(offset),
683        SeekTarget::Data(offset) => {
684            let eof = compute_end().unwrap_or(off_t::MAX);
685            if offset >= eof {
686                return error!(ENXIO);
687            }
688            Some(offset)
689        }
690        SeekTarget::Hole(offset) => {
691            let eof = compute_end()?;
692            if offset >= eof {
693                return error!(ENXIO);
694            }
695            Some(eof)
696        }
697    }
698    .ok_or_else(|| errno!(EINVAL))?;
699
700    if new_offset < 0 {
701        return error!(EINVAL);
702    }
703
704    Ok(new_offset)
705}
706
707/// Implement the seek method for a file without an upper bound on the resulting offset.
708///
709/// This is useful for files without a defined size.
710///
711/// Errors if the calculated offset is invalid.
712///
713/// - `current_offset`: The current position
714/// - `target`: The location to seek to.
715pub fn unbounded_seek(current_offset: off_t, target: SeekTarget) -> Result<off_t, Errno> {
716    default_seek(current_offset, target, || Ok(MAX_LFS_FILESIZE as off_t))
717}
718
719#[macro_export]
720macro_rules! fileops_impl_delegate_read_and_seek {
721    ($self:ident, $delegate:expr) => {
722        fn is_seekable(&self) -> bool {
723            true
724        }
725
726        fn read(
727            &$self,
728            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
729            file: &FileObject,
730            current_task: &$crate::task::CurrentTask,
731            offset: usize,
732            data: &mut dyn $crate::vfs::buffers::OutputBuffer,
733        ) -> Result<usize, starnix_uapi::errors::Errno> {
734            $delegate.read(locked, file, current_task, offset, data)
735        }
736
737        fn seek(
738            &$self,
739        locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
740            file: &FileObject,
741            current_task: &$crate::task::CurrentTask,
742            current_offset: starnix_uapi::off_t,
743            target: $crate::vfs::SeekTarget,
744        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
745            $delegate.seek(locked, file, current_task, current_offset, target)
746        }
747    };
748}
749
750/// Implements [`FileOps::seek`] in a way that makes sense for seekable files.
751#[macro_export]
752macro_rules! fileops_impl_seekable {
753    () => {
754        fn is_seekable(&self) -> bool {
755            true
756        }
757
758        fn seek(
759            &self,
760            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
761            file: &$crate::vfs::FileObject,
762            current_task: &$crate::task::CurrentTask,
763            current_offset: starnix_uapi::off_t,
764            target: $crate::vfs::SeekTarget,
765        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
766            $crate::vfs::default_seek(current_offset, target, || {
767                $crate::vfs::default_eof_offset(locked, file, current_task)
768            })
769        }
770    };
771}
772
773/// Implements [`FileOps`] methods in a way that makes sense for non-seekable files.
774#[macro_export]
775macro_rules! fileops_impl_nonseekable {
776    () => {
777        fn is_seekable(&self) -> bool {
778            false
779        }
780
781        fn seek(
782            &self,
783            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
784            _file: &$crate::vfs::FileObject,
785            _current_task: &$crate::task::CurrentTask,
786            _current_offset: starnix_uapi::off_t,
787            _target: $crate::vfs::SeekTarget,
788        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
789            starnix_uapi::error!(ESPIPE)
790        }
791    };
792}
793
794/// Implements [`FileOps::seek`] methods in a way that makes sense for files that ignore
795/// seeking operations and always read/write at offset 0.
796#[macro_export]
797macro_rules! fileops_impl_seekless {
798    () => {
799        fn has_persistent_offsets(&self) -> bool {
800            false
801        }
802
803        fn is_seekable(&self) -> bool {
804            true
805        }
806
807        fn seek(
808            &self,
809            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
810            _file: &$crate::vfs::FileObject,
811            _current_task: &$crate::task::CurrentTask,
812            _current_offset: starnix_uapi::off_t,
813            _target: $crate::vfs::SeekTarget,
814        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
815            Ok(0)
816        }
817    };
818}
819
820#[macro_export]
821macro_rules! fileops_impl_dataless {
822    () => {
823        fn write(
824            &self,
825            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
826            _file: &$crate::vfs::FileObject,
827            _current_task: &$crate::task::CurrentTask,
828            _offset: usize,
829            _data: &mut dyn $crate::vfs::buffers::InputBuffer,
830        ) -> Result<usize, starnix_uapi::errors::Errno> {
831            starnix_uapi::error!(EINVAL)
832        }
833
834        fn read(
835            &self,
836            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
837            _file: &$crate::vfs::FileObject,
838            _current_task: &$crate::task::CurrentTask,
839            _offset: usize,
840            _data: &mut dyn $crate::vfs::buffers::OutputBuffer,
841        ) -> Result<usize, starnix_uapi::errors::Errno> {
842            starnix_uapi::error!(EINVAL)
843        }
844    };
845}
846
847/// Implements [`FileOps`] methods in a way that makes sense for directories. You must implement
848/// [`FileOps::seek`] and [`FileOps::readdir`].
849#[macro_export]
850macro_rules! fileops_impl_directory {
851    () => {
852        fn is_seekable(&self) -> bool {
853            true
854        }
855
856        fn read(
857            &self,
858            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
859            _file: &$crate::vfs::FileObject,
860            _current_task: &$crate::task::CurrentTask,
861            _offset: usize,
862            _data: &mut dyn $crate::vfs::buffers::OutputBuffer,
863        ) -> Result<usize, starnix_uapi::errors::Errno> {
864            starnix_uapi::error!(EISDIR)
865        }
866
867        fn write(
868            &self,
869            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
870            _file: &$crate::vfs::FileObject,
871            _current_task: &$crate::task::CurrentTask,
872            _offset: usize,
873            _data: &mut dyn $crate::vfs::buffers::InputBuffer,
874        ) -> Result<usize, starnix_uapi::errors::Errno> {
875            starnix_uapi::error!(EISDIR)
876        }
877    };
878}
879
880#[macro_export]
881macro_rules! fileops_impl_unbounded_seek {
882    () => {
883        fn seek(
884            &self,
885            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
886            _file: &$crate::vfs::FileObject,
887            _current_task: &$crate::task::CurrentTask,
888            current_offset: starnix_uapi::off_t,
889            target: $crate::vfs::SeekTarget,
890        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
891            $crate::vfs::unbounded_seek(current_offset, target)
892        }
893    };
894}
895
896#[macro_export]
897macro_rules! fileops_impl_noop_sync {
898    () => {
899        fn sync(
900            &self,
901            file: &$crate::vfs::FileObject,
902            _current_task: &$crate::task::CurrentTask,
903        ) -> Result<(), starnix_uapi::errors::Errno> {
904            if !file.node().is_reg() && !file.node().is_dir() {
905                return starnix_uapi::error!(EINVAL);
906            }
907            Ok(())
908        }
909    };
910}
911
912// Public re-export of macros allows them to be used like regular rust items.
913
914pub use {
915    fileops_impl_dataless, fileops_impl_delegate_read_and_seek, fileops_impl_directory,
916    fileops_impl_nonseekable, fileops_impl_noop_sync, fileops_impl_seekable, fileops_impl_seekless,
917    fileops_impl_unbounded_seek,
918};
919pub const AES256_KEY_SIZE: usize = 32;
920
921pub fn canonicalize_ioctl_request(current_task: &CurrentTask, request: u32) -> u32 {
922    if current_task.is_arch32() {
923        match request {
924            uapi::arch32::FS_IOC_GETFLAGS => uapi::FS_IOC_GETFLAGS,
925            uapi::arch32::FS_IOC_SETFLAGS => uapi::FS_IOC_SETFLAGS,
926            _ => request,
927        }
928    } else {
929        request
930    }
931}
932
933pub fn default_ioctl(
934    file: &FileObject,
935    locked: &mut Locked<Unlocked>,
936    current_task: &CurrentTask,
937    request: u32,
938    arg: SyscallArg,
939) -> Result<SyscallResult, Errno> {
940    match canonicalize_ioctl_request(current_task, request) {
941        TCGETS => error!(ENOTTY),
942        FIGETBSZ => {
943            let node = file.node();
944            let supported_file = node.is_reg() || node.is_dir();
945            if !supported_file {
946                return error!(ENOTTY);
947            }
948
949            let blocksize = file.node().stat(locked, current_task)?.st_blksize;
950            current_task.write_object(arg.into(), &blocksize)?;
951            Ok(SUCCESS)
952        }
953        FIONBIO => {
954            let arg_ref = UserAddress::from(arg).into();
955            let arg: i32 = current_task.read_object(arg_ref)?;
956            let val = if arg == 0 {
957                // Clear the NONBLOCK flag
958                OpenFlags::empty()
959            } else {
960                // Set the NONBLOCK flag
961                OpenFlags::NONBLOCK
962            };
963            file.update_file_flags(val, OpenFlags::NONBLOCK);
964            Ok(SUCCESS)
965        }
966        FIOQSIZE => {
967            let node = file.node();
968            let supported_file = node.is_reg() || node.is_dir();
969            if !supported_file {
970                return error!(ENOTTY);
971            }
972
973            let size = file.node().stat(locked, current_task)?.st_size;
974            current_task.write_object(arg.into(), &size)?;
975            Ok(SUCCESS)
976        }
977        FIONREAD => {
978            track_stub!(TODO("https://fxbug.dev/322874897"), "FIONREAD");
979            if !file.name.entry.node.is_reg() {
980                return error!(ENOTTY);
981            }
982
983            let size = file
984                .name
985                .entry
986                .node
987                .fetch_and_refresh_info(locked, current_task)
988                .map_err(|_| errno!(EINVAL))?
989                .size;
990            let offset = usize::try_from(*file.offset.lock()).map_err(|_| errno!(EINVAL))?;
991            let remaining =
992                if size < offset { 0 } else { i32::try_from(size - offset).unwrap_or(i32::MAX) };
993            current_task.write_object(arg.into(), &remaining)?;
994            Ok(SUCCESS)
995        }
996        FS_IOC_FSGETXATTR => {
997            track_stub!(TODO("https://fxbug.dev/322875209"), "FS_IOC_FSGETXATTR");
998            let arg = UserAddress::from(arg).into();
999            current_task.write_object(arg, &fsxattr::default())?;
1000            Ok(SUCCESS)
1001        }
1002        FS_IOC_FSSETXATTR => {
1003            track_stub!(TODO("https://fxbug.dev/322875271"), "FS_IOC_FSSETXATTR");
1004            let arg = UserAddress::from(arg).into();
1005            let _: fsxattr = current_task.read_object(arg)?;
1006            Ok(SUCCESS)
1007        }
1008        uapi::FS_IOC_GETFLAGS => {
1009            track_stub!(TODO("https://fxbug.dev/322874935"), "FS_IOC_GETFLAGS");
1010            let arg = UserRef::<u32>::from(arg);
1011            let mut flags: u32 = 0;
1012            if matches!(*file.node().fsverity.lock(), FsVerityState::FsVerity) {
1013                flags |= FS_VERITY_FL;
1014            }
1015            if file.node().info().casefold {
1016                flags |= FS_CASEFOLD_FL;
1017            }
1018            current_task.write_object(arg, &flags)?;
1019            Ok(SUCCESS)
1020        }
1021        uapi::FS_IOC_SETFLAGS => {
1022            track_stub!(TODO("https://fxbug.dev/322875367"), "FS_IOC_SETFLAGS");
1023            let arg = UserRef::<u32>::from(arg);
1024            let flags: u32 = current_task.read_object(arg)?;
1025            file.node().update_attributes(locked, current_task, |info| {
1026                info.casefold = flags & FS_CASEFOLD_FL != 0;
1027                Ok(())
1028            })?;
1029            Ok(SUCCESS)
1030        }
1031        FS_IOC_ENABLE_VERITY => {
1032            Ok(fsverity::ioctl::enable(locked, current_task, UserAddress::from(arg).into(), file)?)
1033        }
1034        FS_IOC_MEASURE_VERITY => {
1035            Ok(fsverity::ioctl::measure(locked, current_task, UserAddress::from(arg).into(), file)?)
1036        }
1037        FS_IOC_READ_VERITY_METADATA => {
1038            Ok(fsverity::ioctl::read_metadata(current_task, UserAddress::from(arg).into(), file)?)
1039        }
1040        FS_IOC_ADD_ENCRYPTION_KEY => {
1041            let fscrypt_add_key_ref = UserRef::<fscrypt_add_key_arg>::from(arg);
1042            let key_ref_addr = fscrypt_add_key_ref.next()?.addr();
1043            let mut fscrypt_add_key_arg = current_task.read_object(fscrypt_add_key_ref.clone())?;
1044            if fscrypt_add_key_arg.key_id != 0 {
1045                track_stub!(TODO("https://fxbug.dev/375649227"), "non-zero key ids");
1046                return error!(ENOTSUP);
1047            }
1048            if fscrypt_add_key_arg.key_spec.type_ != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER {
1049                track_stub!(TODO("https://fxbug.dev/375648306"), "fscrypt descriptor type");
1050                return error!(ENOTSUP);
1051            }
1052            let key = current_task
1053                .read_memory_to_vec(key_ref_addr, fscrypt_add_key_arg.raw_size as usize)?;
1054            let user_id = current_task.current_creds().uid;
1055
1056            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1057            let key_identifier = crypt_service.add_wrapping_key(&key, user_id)?;
1058            fscrypt_add_key_arg.key_spec.u.identifier =
1059                fscrypt_identifier { value: key_identifier, ..Default::default() };
1060            current_task.write_object(fscrypt_add_key_ref, &fscrypt_add_key_arg)?;
1061            Ok(SUCCESS)
1062        }
1063        FS_IOC_SET_ENCRYPTION_POLICY => {
1064            let fscrypt_policy_ref = UserRef::<uapi::fscrypt_policy_v2>::from(arg);
1065            let policy = current_task.read_object(fscrypt_policy_ref)?;
1066            if policy.version as u32 != FSCRYPT_POLICY_V2 {
1067                track_stub!(TODO("https://fxbug.dev/375649656"), "fscrypt policy v1");
1068                return error!(ENOTSUP);
1069            }
1070            if policy.flags != 0 {
1071                track_stub!(
1072                    TODO("https://fxbug.dev/375700939"),
1073                    "fscrypt policy flags",
1074                    policy.flags
1075                );
1076            }
1077            if policy.contents_encryption_mode as u32 != FSCRYPT_MODE_AES_256_XTS {
1078                track_stub!(
1079                    TODO("https://fxbug.dev/375684057"),
1080                    "fscrypt encryption modes",
1081                    policy.contents_encryption_mode
1082                );
1083            }
1084            if policy.filenames_encryption_mode as u32 != FSCRYPT_MODE_AES_256_CTS {
1085                track_stub!(
1086                    TODO("https://fxbug.dev/375684057"),
1087                    "fscrypt encryption modes",
1088                    policy.filenames_encryption_mode
1089                );
1090            }
1091            let user_id = current_task.current_creds().uid;
1092            if user_id != file.node().info().uid {
1093                security::check_task_capable(current_task, CAP_FOWNER)
1094                    .map_err(|_| errno!(EACCES))?;
1095            }
1096
1097            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1098            if let Some(users) =
1099                crypt_service.get_users_for_key(EncryptionKeyId::from(policy.master_key_identifier))
1100            {
1101                if !users.contains(&user_id) {
1102                    return error!(ENOKEY);
1103                }
1104            } else {
1105                track_stub!(
1106                    TODO("https://fxbug.dev/375067633"),
1107                    "users with CAP_FOWNER can set encryption policies with unadded keys"
1108                );
1109                return error!(ENOKEY);
1110            }
1111
1112            let attributes = file.node().fetch_and_refresh_info(locked, current_task)?;
1113            if let Some(wrapping_key_id) = &attributes.wrapping_key_id {
1114                if wrapping_key_id != &policy.master_key_identifier {
1115                    return error!(EEXIST);
1116                }
1117            } else {
1118                // Don't deadlock! update_attributes will also lock the attributes.
1119                std::mem::drop(attributes);
1120                file.node().update_attributes(locked, current_task, |info| {
1121                    info.wrapping_key_id = Some(policy.master_key_identifier);
1122                    Ok(())
1123                })?;
1124            }
1125            Ok(SUCCESS)
1126        }
1127        FS_IOC_REMOVE_ENCRYPTION_KEY => {
1128            let fscrypt_remove_key_arg_ref = UserRef::<uapi::fscrypt_remove_key_arg>::from(arg);
1129            let fscrypt_remove_key_arg = current_task.read_object(fscrypt_remove_key_arg_ref)?;
1130            if fscrypt_remove_key_arg.key_spec.type_ != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER {
1131                track_stub!(TODO("https://fxbug.dev/375648306"), "fscrypt descriptor type");
1132                return error!(ENOTSUP);
1133            }
1134            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1135            let user_id = current_task.current_creds().uid;
1136            #[allow(
1137                clippy::undocumented_unsafe_blocks,
1138                reason = "Force documented unsafe blocks in Starnix"
1139            )]
1140            let identifier = unsafe { fscrypt_remove_key_arg.key_spec.u.identifier.value };
1141            crypt_service.forget_wrapping_key(identifier, user_id)?;
1142            Ok(SUCCESS)
1143        }
1144        _ => {
1145            track_stub!(TODO("https://fxbug.dev/322874917"), "ioctl fallthrough", request);
1146            error!(ENOTTY)
1147        }
1148    }
1149}
1150
1151pub fn default_fcntl(cmd: u32) -> Result<SyscallResult, Errno> {
1152    track_stub!(TODO("https://fxbug.dev/322875704"), "default fcntl", cmd);
1153    error!(EINVAL)
1154}
1155
1156pub struct OPathOps {}
1157
1158impl OPathOps {
1159    pub fn new() -> OPathOps {
1160        OPathOps {}
1161    }
1162}
1163
1164impl FileOps for OPathOps {
1165    fileops_impl_noop_sync!();
1166
1167    fn has_persistent_offsets(&self) -> bool {
1168        false
1169    }
1170    fn is_seekable(&self) -> bool {
1171        true
1172    }
1173    fn read(
1174        &self,
1175        _locked: &mut Locked<FileOpsCore>,
1176        _file: &FileObject,
1177        _current_task: &CurrentTask,
1178        _offset: usize,
1179        _data: &mut dyn OutputBuffer,
1180    ) -> Result<usize, Errno> {
1181        error!(EBADF)
1182    }
1183    fn write(
1184        &self,
1185        _locked: &mut Locked<FileOpsCore>,
1186        _file: &FileObject,
1187        _current_task: &CurrentTask,
1188        _offset: usize,
1189        _data: &mut dyn InputBuffer,
1190    ) -> Result<usize, Errno> {
1191        error!(EBADF)
1192    }
1193    fn seek(
1194        &self,
1195        _locked: &mut Locked<FileOpsCore>,
1196        _file: &FileObject,
1197        _current_task: &CurrentTask,
1198        _current_offset: off_t,
1199        _target: SeekTarget,
1200    ) -> Result<off_t, Errno> {
1201        error!(EBADF)
1202    }
1203    fn get_memory(
1204        &self,
1205        _locked: &mut Locked<FileOpsCore>,
1206        _file: &FileObject,
1207        _current_task: &CurrentTask,
1208        _length: Option<usize>,
1209        _prot: ProtectionFlags,
1210    ) -> Result<Arc<MemoryObject>, Errno> {
1211        error!(EBADF)
1212    }
1213    fn readdir(
1214        &self,
1215        _locked: &mut Locked<FileOpsCore>,
1216        _file: &FileObject,
1217        _current_task: &CurrentTask,
1218        _sink: &mut dyn DirentSink,
1219    ) -> Result<(), Errno> {
1220        error!(EBADF)
1221    }
1222
1223    fn ioctl(
1224        &self,
1225        _locked: &mut Locked<Unlocked>,
1226        _file: &FileObject,
1227        _current_task: &CurrentTask,
1228        _request: u32,
1229        _arg: SyscallArg,
1230    ) -> Result<SyscallResult, Errno> {
1231        error!(EBADF)
1232    }
1233}
1234
1235pub struct ProxyFileOps(pub FileHandle);
1236
1237impl FileOps for ProxyFileOps {
1238    // `close` is not delegated because the last reference to a `ProxyFileOps` is not
1239    // necessarily the last reference of the proxied file. If this is the case, the
1240    // releaser will handle it.
1241    // These don't take &FileObject making it too hard to handle them properly in the macro
1242    fn has_persistent_offsets(&self) -> bool {
1243        self.0.ops().has_persistent_offsets()
1244    }
1245    fn writes_update_seek_offset(&self) -> bool {
1246        self.0.ops().writes_update_seek_offset()
1247    }
1248    fn is_seekable(&self) -> bool {
1249        self.0.ops().is_seekable()
1250    }
1251    // These take &mut Locked<L> as a second argument
1252    fn flush(
1253        &self,
1254        locked: &mut Locked<FileOpsCore>,
1255        _file: &FileObject,
1256        current_task: &CurrentTask,
1257    ) {
1258        self.0.ops().flush(locked, &self.0, current_task);
1259    }
1260    fn wait_async(
1261        &self,
1262        locked: &mut Locked<FileOpsCore>,
1263        _file: &FileObject,
1264        current_task: &CurrentTask,
1265        waiter: &Waiter,
1266        events: FdEvents,
1267        handler: EventHandler,
1268    ) -> Option<WaitCanceler> {
1269        self.0.ops().wait_async(locked, &self.0, current_task, waiter, events, handler)
1270    }
1271    fn query_events(
1272        &self,
1273        locked: &mut Locked<FileOpsCore>,
1274        _file: &FileObject,
1275        current_task: &CurrentTask,
1276    ) -> Result<FdEvents, Errno> {
1277        self.0.ops().query_events(locked, &self.0, current_task)
1278    }
1279    fn read(
1280        &self,
1281        locked: &mut Locked<FileOpsCore>,
1282        _file: &FileObject,
1283        current_task: &CurrentTask,
1284        offset: usize,
1285        data: &mut dyn OutputBuffer,
1286    ) -> Result<usize, Errno> {
1287        self.0.ops().read(locked, &self.0, current_task, offset, data)
1288    }
1289    fn write(
1290        &self,
1291        locked: &mut Locked<FileOpsCore>,
1292        _file: &FileObject,
1293        current_task: &CurrentTask,
1294        offset: usize,
1295        data: &mut dyn InputBuffer,
1296    ) -> Result<usize, Errno> {
1297        self.0.ops().write(locked, &self.0, current_task, offset, data)
1298    }
1299    fn ioctl(
1300        &self,
1301        locked: &mut Locked<Unlocked>,
1302        _file: &FileObject,
1303        current_task: &CurrentTask,
1304        request: u32,
1305        arg: SyscallArg,
1306    ) -> Result<SyscallResult, Errno> {
1307        self.0.ops().ioctl(locked, &self.0, current_task, request, arg)
1308    }
1309    fn fcntl(
1310        &self,
1311        _file: &FileObject,
1312        current_task: &CurrentTask,
1313        cmd: u32,
1314        arg: u64,
1315    ) -> Result<SyscallResult, Errno> {
1316        self.0.ops().fcntl(&self.0, current_task, cmd, arg)
1317    }
1318    fn readdir(
1319        &self,
1320        locked: &mut Locked<FileOpsCore>,
1321        _file: &FileObject,
1322        current_task: &CurrentTask,
1323        sink: &mut dyn DirentSink,
1324    ) -> Result<(), Errno> {
1325        self.0.ops().readdir(locked, &self.0, current_task, sink)
1326    }
1327    fn sync(&self, _file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
1328        self.0.ops().sync(&self.0, current_task)
1329    }
1330    fn data_sync(&self, _file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
1331        self.0.ops().sync(&self.0, current_task)
1332    }
1333    fn get_memory(
1334        &self,
1335        locked: &mut Locked<FileOpsCore>,
1336        _file: &FileObject,
1337        current_task: &CurrentTask,
1338        length: Option<usize>,
1339        prot: ProtectionFlags,
1340    ) -> Result<Arc<MemoryObject>, Errno> {
1341        self.0.ops.get_memory(locked, &self.0, current_task, length, prot)
1342    }
1343    fn mmap(
1344        &self,
1345        locked: &mut Locked<FileOpsCore>,
1346        _file: &FileObject,
1347        current_task: &CurrentTask,
1348        addr: DesiredAddress,
1349        memory_offset: u64,
1350        length: usize,
1351        prot_flags: ProtectionFlags,
1352        options: MappingOptions,
1353        filename: NamespaceNode,
1354    ) -> Result<UserAddress, Errno> {
1355        self.0.ops.mmap(
1356            locked,
1357            &self.0,
1358            current_task,
1359            addr,
1360            memory_offset,
1361            length,
1362            prot_flags,
1363            options,
1364            filename,
1365        )
1366    }
1367    fn seek(
1368        &self,
1369        locked: &mut Locked<FileOpsCore>,
1370        _file: &FileObject,
1371        current_task: &CurrentTask,
1372        offset: off_t,
1373        target: SeekTarget,
1374    ) -> Result<off_t, Errno> {
1375        self.0.ops.seek(locked, &self.0, current_task, offset, target)
1376    }
1377}
1378
1379#[derive(Debug, Default, Copy, Clone)]
1380pub enum FileAsyncOwner {
1381    #[default]
1382    Unowned,
1383    Thread(pid_t),
1384    Process(pid_t),
1385    ProcessGroup(pid_t),
1386}
1387
1388impl FileAsyncOwner {
1389    pub fn validate(self, current_task: &CurrentTask) -> Result<(), Errno> {
1390        match self {
1391            FileAsyncOwner::Unowned => (),
1392            FileAsyncOwner::Thread(id) | FileAsyncOwner::Process(id) => {
1393                Task::from_weak(&current_task.get_task(id))?;
1394            }
1395            FileAsyncOwner::ProcessGroup(pgid) => {
1396                current_task
1397                    .kernel()
1398                    .pids
1399                    .read()
1400                    .get_process_group(pgid)
1401                    .ok_or_else(|| errno!(ESRCH))?;
1402            }
1403        }
1404        Ok(())
1405    }
1406}
1407
1408#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
1409pub struct FileObjectId(u64);
1410
1411impl FileObjectId {
1412    pub fn as_epoll_key(&self) -> EpollKey {
1413        self.0 as EpollKey
1414    }
1415}
1416
1417/// A session with a file object.
1418///
1419/// Each time a client calls open(), we create a new FileObject from the
1420/// underlying FsNode that receives the open(). This object contains the state
1421/// that is specific to this sessions whereas the underlying FsNode contains
1422/// the state that is shared between all the sessions.
1423pub struct FileObject {
1424    ops: Box<dyn FileOps>,
1425    state: FileObjectState,
1426}
1427
1428impl std::ops::Deref for FileObject {
1429    type Target = FileObjectState;
1430    fn deref(&self) -> &Self::Target {
1431        &self.state
1432    }
1433}
1434
1435pub struct FileObjectState {
1436    /// Weak reference to the `FileHandle` of this `FileObject`. This allows to retrieve the
1437    /// `FileHandle` from a `FileObject`.
1438    pub weak_handle: WeakFileHandle,
1439
1440    /// A unique identifier for this file object.
1441    pub id: FileObjectId,
1442
1443    /// The NamespaceNode associated with this FileObject.
1444    ///
1445    /// Represents the name the process used to open this file.
1446    pub name: ActiveNamespaceNode,
1447
1448    pub fs: FileSystemHandle,
1449
1450    pub offset: Mutex<off_t>,
1451
1452    flags: Mutex<OpenFlags>,
1453
1454    async_owner: Mutex<FileAsyncOwner>,
1455
1456    /// A set of epoll file descriptor numbers that tracks which `EpollFileObject`s add this
1457    /// `FileObject` as the control file.
1458    epoll_files: Mutex<HashMap<FileHandleKey, WeakFileHandle>>,
1459
1460    /// See fcntl F_SETLEASE and F_GETLEASE.
1461    lease: Mutex<FileLeaseType>,
1462
1463    // This extra reference to the FsNode should not be needed, but it is needed to make
1464    // Inotify.ExcludeUnlinkInodeEvents pass.
1465    _mysterious_node: Option<FsNodeHandle>,
1466
1467    /// Opaque security state associated this file object.
1468    pub security_state: security::FileObjectState,
1469}
1470
1471pub enum FileObjectReleaserAction {}
1472impl ReleaserAction<FileObject> for FileObjectReleaserAction {
1473    fn release(file_object: ReleaseGuard<FileObject>) {
1474        register_delayed_release(file_object);
1475    }
1476}
1477pub type FileReleaser = ObjectReleaser<FileObject, FileObjectReleaserAction>;
1478pub type FileHandle = Arc<FileReleaser>;
1479pub type WeakFileHandle = Weak<FileReleaser>;
1480pub type FileHandleKey = WeakKey<FileReleaser>;
1481
1482impl FileObjectState {
1483    /// The FsNode from which this FileObject was created.
1484    pub fn node(&self) -> &FsNodeHandle {
1485        &self.name.entry.node
1486    }
1487
1488    pub fn flags(&self) -> OpenFlags {
1489        *self.flags.lock()
1490    }
1491
1492    pub fn can_read(&self) -> bool {
1493        // TODO: Consider caching the access mode outside of this lock
1494        // because it cannot change.
1495        self.flags.lock().can_read()
1496    }
1497
1498    pub fn can_write(&self) -> bool {
1499        // TODO: Consider caching the access mode outside of this lock
1500        // because it cannot change.
1501        self.flags.lock().can_write()
1502    }
1503
1504    /// Returns false if the file is not allowed to be executed.
1505    pub fn can_exec(&self) -> bool {
1506        let mounted_no_exec = self.name.to_passive().mount.flags().contains(MountFlags::NOEXEC);
1507        let no_exec_seal = self
1508            .node()
1509            .write_guard_state
1510            .lock()
1511            .get_seals()
1512            .map(|seals| seals.contains(SealFlags::NO_EXEC))
1513            .unwrap_or(false);
1514        !(mounted_no_exec || no_exec_seal)
1515    }
1516
1517    // Notifies watchers on the current node and its parent about an event.
1518    pub fn notify(&self, event_mask: InotifyMask) {
1519        self.name.notify(event_mask)
1520    }
1521}
1522
1523impl FileObject {
1524    /// Create a FileObject that is not mounted in a namespace.
1525    ///
1526    /// In particular, this will create a new unrooted entries. This should not be used on
1527    /// file system with persistent entries, as the created entry will be out of sync with the one
1528    /// from the file system.
1529    ///
1530    /// The returned FileObject does not have a name.
1531    pub fn new_anonymous<L>(
1532        locked: &mut Locked<L>,
1533        current_task: &CurrentTask,
1534        ops: Box<dyn FileOps>,
1535        node: FsNodeHandle,
1536        flags: OpenFlags,
1537    ) -> FileHandle
1538    where
1539        L: LockEqualOrBefore<FileOpsCore>,
1540    {
1541        assert!(!node.fs().has_permanent_entries());
1542        Self::new(
1543            locked,
1544            current_task,
1545            ops,
1546            NamespaceNode::new_anonymous_unrooted(current_task, node),
1547            flags,
1548        )
1549        .expect("Failed to create anonymous FileObject")
1550    }
1551
1552    /// Create a FileObject with an associated NamespaceNode.
1553    ///
1554    /// This function is not typically called directly. Instead, consider
1555    /// calling NamespaceNode::open.
1556    pub fn new<L>(
1557        locked: &mut Locked<L>,
1558        current_task: &CurrentTask,
1559        ops: Box<dyn FileOps>,
1560        name: NamespaceNode,
1561        flags: OpenFlags,
1562    ) -> Result<FileHandle, Errno>
1563    where
1564        L: LockEqualOrBefore<FileOpsCore>,
1565    {
1566        let _mysterious_node = if flags.can_write() {
1567            name.entry.node.write_guard_state.lock().acquire(FileWriteGuardMode::WriteFile)?;
1568            Some(name.entry.node.clone())
1569        } else {
1570            None
1571        };
1572        let fs = name.entry.node.fs();
1573        let id = FileObjectId(current_task.kernel.next_file_object_id.next());
1574        let security_state = security::file_alloc_security(current_task);
1575        let file = FileHandle::new_cyclic(|weak_handle| {
1576            Self {
1577                ops,
1578                state: FileObjectState {
1579                    weak_handle: weak_handle.clone(),
1580                    id,
1581                    name: name.into_active(),
1582                    fs,
1583                    offset: Mutex::new(0),
1584                    flags: Mutex::new(flags - OpenFlags::CREAT),
1585                    async_owner: Default::default(),
1586                    epoll_files: Default::default(),
1587                    lease: Default::default(),
1588                    _mysterious_node,
1589                    security_state,
1590                },
1591            }
1592            .into()
1593        });
1594        file.notify(InotifyMask::OPEN);
1595
1596        file.ops().open(locked.cast_locked::<FileOpsCore>(), &file, current_task)?;
1597        Ok(file)
1598    }
1599
1600    pub fn max_access_for_memory_mapping(&self) -> Access {
1601        let mut access = Access::EXIST;
1602        if self.can_exec() {
1603            access |= Access::EXEC;
1604        }
1605        let flags = self.flags.lock();
1606        if flags.can_read() {
1607            access |= Access::READ;
1608        }
1609        if flags.can_write() {
1610            access |= Access::WRITE;
1611        }
1612        access
1613    }
1614
1615    pub fn ops(&self) -> &dyn FileOps {
1616        self.ops.as_ref()
1617    }
1618
1619    pub fn ops_type_name(&self) -> &'static str {
1620        self.ops().type_name()
1621    }
1622
1623    pub fn is_non_blocking(&self) -> bool {
1624        self.flags().contains(OpenFlags::NONBLOCK)
1625    }
1626
1627    /// Common implementation for blocking operations.
1628    ///
1629    /// This function is used to implement the blocking operations for file objects. FileOps
1630    /// implementations should call this function to handle the blocking logic.
1631    ///
1632    /// The `op` parameter is a function that implements the non-blocking version of the operation.
1633    /// The function is called once without registering a waiter in case no wait is needed. If the
1634    /// operation returns EAGAIN and the file object is non-blocking, the function returns EAGAIN.
1635    ///
1636    /// If the operation returns EAGAIN and the file object is blocking, the function will block
1637    /// until the given events are triggered. At that time, the operation is retried. Notice that
1638    /// the `op` function can be called multiple times before the operation completes.
1639    ///
1640    /// The `deadline` parameter is the deadline for the operation. If the operation does not
1641    /// complete before the deadline, the function will return ETIMEDOUT.
1642    pub fn blocking_op<L, T, Op>(
1643        &self,
1644        locked: &mut Locked<L>,
1645        current_task: &CurrentTask,
1646        events: FdEvents,
1647        deadline: Option<zx::MonotonicInstant>,
1648        mut op: Op,
1649    ) -> Result<T, Errno>
1650    where
1651        L: LockEqualOrBefore<FileOpsCore>,
1652        Op: FnMut(&mut Locked<L>) -> Result<T, Errno>,
1653    {
1654        // Don't return EAGAIN for directories. This can happen because glibc always opens a
1655        // directory with O_NONBLOCK.
1656        let can_return_eagain = self.flags().contains(OpenFlags::NONBLOCK)
1657            && !self.flags().contains(OpenFlags::DIRECTORY);
1658        // Run the operation a first time without registering a waiter in case no wait is needed.
1659        match op(locked) {
1660            Err(errno) if errno == EAGAIN && !can_return_eagain => {}
1661            result => return result,
1662        }
1663
1664        let waiter = Waiter::new();
1665        loop {
1666            // Register the waiter before running the operation to prevent a race.
1667            self.wait_async(locked, current_task, &waiter, events, WaitCallback::none());
1668            match op(locked) {
1669                Err(e) if e == EAGAIN => {}
1670                result => return result,
1671            }
1672            let locked = locked.cast_locked::<FileOpsCore>();
1673            waiter
1674                .wait_until(
1675                    locked,
1676                    current_task,
1677                    deadline.unwrap_or(zx::MonotonicInstant::INFINITE),
1678                )
1679                .map_err(|e| if e == ETIMEDOUT { errno!(EAGAIN) } else { e })?;
1680        }
1681    }
1682
1683    pub fn is_seekable(&self) -> bool {
1684        self.ops().is_seekable()
1685    }
1686
1687    pub fn has_persistent_offsets(&self) -> bool {
1688        self.ops().has_persistent_offsets()
1689    }
1690
1691    /// Common implementation for `read` and `read_at`.
1692    fn read_internal<R>(&self, current_task: &CurrentTask, read: R) -> Result<usize, Errno>
1693    where
1694        R: FnOnce() -> Result<usize, Errno>,
1695    {
1696        security::file_permission(current_task, self, security::PermissionFlags::READ)?;
1697
1698        if !self.can_read() {
1699            return error!(EBADF);
1700        }
1701        let bytes_read = read()?;
1702
1703        // TODO(steveaustin) - omit updating time_access to allow info to be immutable
1704        // and thus allow simultaneous reads.
1705        self.update_atime();
1706        if bytes_read > 0 {
1707            self.notify(InotifyMask::ACCESS);
1708        }
1709
1710        Ok(bytes_read)
1711    }
1712
1713    pub fn read<L>(
1714        &self,
1715        locked: &mut Locked<L>,
1716        current_task: &CurrentTask,
1717        data: &mut dyn OutputBuffer,
1718    ) -> Result<usize, Errno>
1719    where
1720        L: LockEqualOrBefore<FileOpsCore>,
1721    {
1722        self.read_internal(current_task, || {
1723            let locked = locked.cast_locked::<FileOpsCore>();
1724            if !self.ops().has_persistent_offsets() {
1725                if data.available() > MAX_LFS_FILESIZE {
1726                    return error!(EINVAL);
1727                }
1728                return self.ops.read(locked, self, current_task, 0, data);
1729            }
1730
1731            let mut offset_guard = self.offset.lock();
1732            let offset = *offset_guard as usize;
1733            checked_add_offset_and_length(offset, data.available())?;
1734            let read = self.ops.read(locked, self, current_task, offset, data)?;
1735            *offset_guard += read as off_t;
1736            Ok(read)
1737        })
1738    }
1739
1740    pub fn read_at<L>(
1741        &self,
1742        locked: &mut Locked<L>,
1743        current_task: &CurrentTask,
1744        offset: usize,
1745        data: &mut dyn OutputBuffer,
1746    ) -> Result<usize, Errno>
1747    where
1748        L: LockEqualOrBefore<FileOpsCore>,
1749    {
1750        if !self.ops().is_seekable() {
1751            return error!(ESPIPE);
1752        }
1753        checked_add_offset_and_length(offset, data.available())?;
1754        let locked = locked.cast_locked::<FileOpsCore>();
1755        self.read_internal(current_task, || self.ops.read(locked, self, current_task, offset, data))
1756    }
1757
1758    /// Common checks before calling ops().write.
1759    fn write_common<L>(
1760        &self,
1761        locked: &mut Locked<L>,
1762        current_task: &CurrentTask,
1763        offset: usize,
1764        data: &mut dyn InputBuffer,
1765    ) -> Result<usize, Errno>
1766    where
1767        L: LockEqualOrBefore<FileOpsCore>,
1768    {
1769        security::file_permission(current_task, self, security::PermissionFlags::WRITE)?;
1770
1771        // We need to cap the size of `data` to prevent us from growing the file too large,
1772        // according to <https://man7.org/linux/man-pages/man2/write.2.html>:
1773        //
1774        //   The number of bytes written may be less than count if, for example, there is
1775        //   insufficient space on the underlying physical medium, or the RLIMIT_FSIZE resource
1776        //   limit is encountered (see setrlimit(2)),
1777        checked_add_offset_and_length(offset, data.available())?;
1778        let locked = locked.cast_locked::<FileOpsCore>();
1779        self.ops().write(locked, self, current_task, offset, data)
1780    }
1781
1782    /// Common wrapper work for `write` and `write_at`.
1783    fn write_fn<W, L>(
1784        &self,
1785        locked: &mut Locked<L>,
1786        current_task: &CurrentTask,
1787        write: W,
1788    ) -> Result<usize, Errno>
1789    where
1790        L: LockEqualOrBefore<FileOpsCore>,
1791        W: FnOnce(&mut Locked<L>) -> Result<usize, Errno>,
1792    {
1793        if !self.can_write() {
1794            return error!(EBADF);
1795        }
1796        self.node().clear_suid_and_sgid_bits(locked, current_task)?;
1797        let bytes_written = write(locked)?;
1798        self.node().update_ctime_mtime();
1799
1800        if bytes_written > 0 {
1801            self.notify(InotifyMask::MODIFY);
1802        }
1803
1804        Ok(bytes_written)
1805    }
1806
1807    pub fn write<L>(
1808        &self,
1809        locked: &mut Locked<L>,
1810        current_task: &CurrentTask,
1811        data: &mut dyn InputBuffer,
1812    ) -> Result<usize, Errno>
1813    where
1814        L: LockEqualOrBefore<FileOpsCore>,
1815    {
1816        self.write_fn(locked, current_task, |locked| {
1817            if !self.ops().has_persistent_offsets() {
1818                return self.write_common(locked, current_task, 0, data);
1819            }
1820            // TODO(https://fxbug.dev/333540469): write_fn should take L: LockBefore<FsNodeAppend>,
1821            // but FileOpsCore must be after FsNodeAppend
1822            #[allow(
1823                clippy::undocumented_unsafe_blocks,
1824                reason = "Force documented unsafe blocks in Starnix"
1825            )]
1826            let locked = unsafe { Unlocked::new() };
1827            let mut offset = self.offset.lock();
1828            let bytes_written = if self.flags().contains(OpenFlags::APPEND) {
1829                let (_guard, locked) = self.node().append_lock.write_and(locked, current_task)?;
1830                *offset = self.ops().seek(
1831                    locked.cast_locked::<FileOpsCore>(),
1832                    self,
1833                    current_task,
1834                    *offset,
1835                    SeekTarget::End(0),
1836                )?;
1837                self.write_common(locked, current_task, *offset as usize, data)
1838            } else {
1839                let (_guard, locked) = self.node().append_lock.read_and(locked, current_task)?;
1840                self.write_common(locked, current_task, *offset as usize, data)
1841            }?;
1842            if self.ops().writes_update_seek_offset() {
1843                *offset += bytes_written as off_t;
1844            }
1845            Ok(bytes_written)
1846        })
1847    }
1848
1849    pub fn write_at<L>(
1850        &self,
1851        locked: &mut Locked<L>,
1852        current_task: &CurrentTask,
1853        mut offset: usize,
1854        data: &mut dyn InputBuffer,
1855    ) -> Result<usize, Errno>
1856    where
1857        L: LockEqualOrBefore<FileOpsCore>,
1858    {
1859        if !self.ops().is_seekable() {
1860            return error!(ESPIPE);
1861        }
1862        self.write_fn(locked, current_task, |_locked| {
1863            // TODO(https://fxbug.dev/333540469): write_fn should take L: LockBefore<FsNodeAppend>,
1864            // but FileOpsCore must be after FsNodeAppend
1865            #[allow(
1866                clippy::undocumented_unsafe_blocks,
1867                reason = "Force documented unsafe blocks in Starnix"
1868            )]
1869            let locked = unsafe { Unlocked::new() };
1870            let (_guard, locked) = self.node().append_lock.read_and(locked, current_task)?;
1871
1872            // According to LTP test pwrite04:
1873            //
1874            //   POSIX requires that opening a file with the O_APPEND flag should have no effect on the
1875            //   location at which pwrite() writes data. However, on Linux, if a file is opened with
1876            //   O_APPEND, pwrite() appends data to the end of the file, regardless of the value of offset.
1877            if self.flags().contains(OpenFlags::APPEND) && self.ops().is_seekable() {
1878                checked_add_offset_and_length(offset, data.available())?;
1879                offset = default_eof_offset(locked, self, current_task)? as usize;
1880            }
1881
1882            self.write_common(locked, current_task, offset, data)
1883        })
1884    }
1885
1886    pub fn seek<L>(
1887        &self,
1888        locked: &mut Locked<L>,
1889        current_task: &CurrentTask,
1890        target: SeekTarget,
1891    ) -> Result<off_t, Errno>
1892    where
1893        L: LockEqualOrBefore<FileOpsCore>,
1894    {
1895        let locked = locked.cast_locked::<FileOpsCore>();
1896        let locked = locked;
1897
1898        if !self.ops().is_seekable() {
1899            return error!(ESPIPE);
1900        }
1901
1902        if !self.ops().has_persistent_offsets() {
1903            return self.ops().seek(locked, self, current_task, 0, target);
1904        }
1905
1906        let mut offset_guard = self.offset.lock();
1907        let new_offset = self.ops().seek(locked, self, current_task, *offset_guard, target)?;
1908        *offset_guard = new_offset;
1909        Ok(new_offset)
1910    }
1911
1912    pub fn sync(&self, current_task: &CurrentTask) -> Result<(), Errno> {
1913        self.ops().sync(self, current_task)
1914    }
1915
1916    pub fn data_sync(&self, current_task: &CurrentTask) -> Result<(), Errno> {
1917        self.ops().data_sync(self, current_task)
1918    }
1919
1920    pub fn get_memory<L>(
1921        &self,
1922        locked: &mut Locked<L>,
1923        current_task: &CurrentTask,
1924        length: Option<usize>,
1925        prot: ProtectionFlags,
1926    ) -> Result<Arc<MemoryObject>, Errno>
1927    where
1928        L: LockEqualOrBefore<FileOpsCore>,
1929    {
1930        if prot.contains(ProtectionFlags::READ) && !self.can_read() {
1931            return error!(EACCES);
1932        }
1933        if prot.contains(ProtectionFlags::WRITE) && !self.can_write() {
1934            return error!(EACCES);
1935        }
1936        if prot.contains(ProtectionFlags::EXEC) && !self.can_exec() {
1937            return error!(EPERM);
1938        }
1939        self.ops().get_memory(locked.cast_locked::<FileOpsCore>(), self, current_task, length, prot)
1940    }
1941
1942    pub fn mmap<L>(
1943        &self,
1944        locked: &mut Locked<L>,
1945        current_task: &CurrentTask,
1946        addr: DesiredAddress,
1947        memory_offset: u64,
1948        length: usize,
1949        prot_flags: ProtectionFlags,
1950        options: MappingOptions,
1951        filename: NamespaceNode,
1952    ) -> Result<UserAddress, Errno>
1953    where
1954        L: LockEqualOrBefore<FileOpsCore>,
1955    {
1956        let locked = locked.cast_locked::<FileOpsCore>();
1957        if !self.can_read() {
1958            return error!(EACCES);
1959        }
1960        if prot_flags.contains(ProtectionFlags::WRITE)
1961            && !self.can_write()
1962            && options.contains(MappingOptions::SHARED)
1963        {
1964            return error!(EACCES);
1965        }
1966        if prot_flags.contains(ProtectionFlags::EXEC) && !self.can_exec() {
1967            return error!(EPERM);
1968        }
1969        self.ops().mmap(
1970            locked,
1971            self,
1972            current_task,
1973            addr,
1974            memory_offset,
1975            length,
1976            prot_flags,
1977            options,
1978            filename,
1979        )
1980    }
1981
1982    pub fn readdir<L>(
1983        &self,
1984        locked: &mut Locked<L>,
1985        current_task: &CurrentTask,
1986        sink: &mut dyn DirentSink,
1987    ) -> Result<(), Errno>
1988    where
1989        L: LockEqualOrBefore<FileOpsCore>,
1990    {
1991        let locked = locked.cast_locked::<FileOpsCore>();
1992        if self.name.entry.is_dead() {
1993            return error!(ENOENT);
1994        }
1995
1996        self.ops().readdir(locked, self, current_task, sink)?;
1997        self.update_atime();
1998        self.notify(InotifyMask::ACCESS);
1999        Ok(())
2000    }
2001
2002    pub fn ioctl(
2003        &self,
2004        locked: &mut Locked<Unlocked>,
2005        current_task: &CurrentTask,
2006        request: u32,
2007        arg: SyscallArg,
2008    ) -> Result<SyscallResult, Errno> {
2009        security::check_file_ioctl_access(current_task, &self, request)?;
2010
2011        if request == FIBMAP {
2012            security::check_task_capable(current_task, CAP_SYS_RAWIO)?;
2013
2014            // TODO: https://fxbug.dev/404795644 - eliminate this phoney response when the SELinux
2015            // Test Suite no longer requires it.
2016            if current_task.kernel().features.selinux_test_suite {
2017                let phoney_block = 0xbadf000du32;
2018                current_task.write_object(arg.into(), &phoney_block)?;
2019                return Ok(SUCCESS);
2020            }
2021        }
2022
2023        self.ops().ioctl(locked, self, current_task, request, arg)
2024    }
2025
2026    pub fn fcntl(
2027        &self,
2028        current_task: &CurrentTask,
2029        cmd: u32,
2030        arg: u64,
2031    ) -> Result<SyscallResult, Errno> {
2032        self.ops().fcntl(self, current_task, cmd, arg)
2033    }
2034
2035    pub fn ftruncate<L>(
2036        &self,
2037        locked: &mut Locked<L>,
2038        current_task: &CurrentTask,
2039        length: u64,
2040    ) -> Result<(), Errno>
2041    where
2042        L: LockBefore<BeforeFsNodeAppend>,
2043    {
2044        // The file must be opened with write permissions. Otherwise
2045        // truncating it is forbidden.
2046        if !self.can_write() {
2047            return error!(EINVAL);
2048        }
2049        self.node().ftruncate(locked, current_task, length)?;
2050        self.name.entry.notify_ignoring_excl_unlink(InotifyMask::MODIFY);
2051        Ok(())
2052    }
2053
2054    pub fn fallocate<L>(
2055        &self,
2056        locked: &mut Locked<L>,
2057        current_task: &CurrentTask,
2058        mode: FallocMode,
2059        offset: u64,
2060        length: u64,
2061    ) -> Result<(), Errno>
2062    where
2063        L: LockBefore<BeforeFsNodeAppend>,
2064    {
2065        // If the file is a pipe or FIFO, ESPIPE is returned.
2066        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2067        if self.node().is_fifo() {
2068            return error!(ESPIPE);
2069        }
2070
2071        // Must be a regular file or directory.
2072        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2073        if !self.node().is_dir() && !self.node().is_reg() {
2074            return error!(ENODEV);
2075        }
2076
2077        // The file must be opened with write permissions. Otherwise operation is forbidden.
2078        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2079        if !self.can_write() {
2080            return error!(EBADF);
2081        }
2082
2083        self.node().fallocate(locked, current_task, mode, offset, length)?;
2084        self.notify(InotifyMask::MODIFY);
2085        Ok(())
2086    }
2087
2088    pub fn to_handle(
2089        &self,
2090        current_task: &CurrentTask,
2091    ) -> Result<Option<zx::NullableHandle>, Errno> {
2092        self.ops().to_handle(self, current_task)
2093    }
2094
2095    pub fn get_handles(
2096        &self,
2097        current_task: &CurrentTask,
2098    ) -> Result<Vec<zx::NullableHandle>, Errno> {
2099        self.ops().get_handles(self, current_task)
2100    }
2101
2102    pub fn as_thread_group_key(&self) -> Result<ThreadGroupKey, Errno> {
2103        self.ops().as_thread_group_key(self)
2104    }
2105
2106    pub fn update_file_flags(&self, value: OpenFlags, mask: OpenFlags) {
2107        let mask_bits = mask.bits();
2108        let mut flags = self.flags.lock();
2109        let bits = (flags.bits() & !mask_bits) | (value.bits() & mask_bits);
2110        *flags = OpenFlags::from_bits_truncate(bits);
2111    }
2112
2113    /// Get the async owner of this file.
2114    ///
2115    /// See fcntl(F_GETOWN)
2116    pub fn get_async_owner(&self) -> FileAsyncOwner {
2117        *self.async_owner.lock()
2118    }
2119
2120    /// Set the async owner of this file.
2121    ///
2122    /// See fcntl(F_SETOWN)
2123    pub fn set_async_owner(&self, owner: FileAsyncOwner) {
2124        *self.async_owner.lock() = owner;
2125    }
2126
2127    /// See fcntl(F_GETLEASE)
2128    pub fn get_lease(&self, _current_task: &CurrentTask) -> FileLeaseType {
2129        *self.lease.lock()
2130    }
2131
2132    /// See fcntl(F_SETLEASE)
2133    pub fn set_lease(
2134        &self,
2135        _current_task: &CurrentTask,
2136        lease: FileLeaseType,
2137    ) -> Result<(), Errno> {
2138        if !self.node().is_reg() {
2139            return error!(EINVAL);
2140        }
2141        if lease == FileLeaseType::Read && self.can_write() {
2142            return error!(EAGAIN);
2143        }
2144        *self.lease.lock() = lease;
2145        Ok(())
2146    }
2147
2148    /// Wait on the specified events and call the EventHandler when ready
2149    pub fn wait_async<L>(
2150        &self,
2151        locked: &mut Locked<L>,
2152        current_task: &CurrentTask,
2153        waiter: &Waiter,
2154        events: FdEvents,
2155        handler: EventHandler,
2156    ) -> Option<WaitCanceler>
2157    where
2158        L: LockEqualOrBefore<FileOpsCore>,
2159    {
2160        self.ops().wait_async(
2161            locked.cast_locked::<FileOpsCore>(),
2162            self,
2163            current_task,
2164            waiter,
2165            events,
2166            handler,
2167        )
2168    }
2169
2170    /// The events currently active on this file.
2171    pub fn query_events<L>(
2172        &self,
2173        locked: &mut Locked<L>,
2174        current_task: &CurrentTask,
2175    ) -> Result<FdEvents, Errno>
2176    where
2177        L: LockEqualOrBefore<FileOpsCore>,
2178    {
2179        self.ops()
2180            .query_events(locked.cast_locked::<FileOpsCore>(), self, current_task)
2181            .map(FdEvents::add_equivalent_fd_events)
2182    }
2183
2184    pub fn record_lock(
2185        &self,
2186        locked: &mut Locked<Unlocked>,
2187        current_task: &CurrentTask,
2188        cmd: RecordLockCommand,
2189        flock: uapi::flock,
2190    ) -> Result<Option<uapi::flock>, Errno> {
2191        self.node().record_lock(locked, current_task, self, cmd, flock)
2192    }
2193
2194    pub fn flush<L>(&self, locked: &mut Locked<L>, current_task: &CurrentTask, id: FdTableId)
2195    where
2196        L: LockEqualOrBefore<FileOpsCore>,
2197    {
2198        self.name.entry.node.record_lock_release(RecordLockOwner::FdTable(id));
2199        self.ops().flush(locked.cast_locked::<FileOpsCore>(), self, current_task)
2200    }
2201
2202    fn update_atime(&self) {
2203        if !self.flags().contains(OpenFlags::NOATIME) {
2204            self.name.update_atime();
2205        }
2206    }
2207
2208    pub fn readahead(
2209        &self,
2210        current_task: &CurrentTask,
2211        offset: usize,
2212        length: usize,
2213    ) -> Result<(), Errno> {
2214        // readfile() fails with EBADF if the file was not open for read.
2215        if !self.can_read() {
2216            return error!(EBADF);
2217        }
2218        checked_add_offset_and_length(offset, length)?;
2219        self.ops().readahead(self, current_task, offset, length)
2220    }
2221
2222    pub fn extra_fdinfo(
2223        &self,
2224        locked: &mut Locked<FileOpsCore>,
2225        current_task: &CurrentTask,
2226    ) -> Option<FsString> {
2227        let file = self.weak_handle.upgrade()?;
2228        self.ops().extra_fdinfo(locked, &file, current_task)
2229    }
2230
2231    /// Register the fd number of an `EpollFileObject` that listens to events from this
2232    /// `FileObject`.
2233    pub fn register_epfd(&self, file: &FileHandle) {
2234        self.epoll_files.lock().insert(WeakKey::from(file), file.weak_handle.clone());
2235    }
2236
2237    pub fn unregister_epfd(&self, file: &FileHandle) {
2238        self.epoll_files.lock().remove(&WeakKey::from(file));
2239    }
2240}
2241
2242impl Releasable for FileObject {
2243    type Context<'a> = CurrentTaskAndLocked<'a>;
2244
2245    fn release<'a>(self, context: CurrentTaskAndLocked<'a>) {
2246        let (locked, current_task) = context;
2247        // Release all wake leases associated with this file in the corresponding `WaitObject`
2248        // of each registered epfd.
2249        for (_, file) in self.epoll_files.lock().drain() {
2250            if let Some(file) = file.upgrade() {
2251                if let Some(epoll_object) = file.downcast_file::<EpollFileObject>() {
2252                    current_task.kernel().suspend_resume_manager.deactivate_wakeup_source(
2253                        &WakeupSourceOrigin::Epoll(wakeup_source_name_for_epoll(
2254                            current_task,
2255                            self.id.as_epoll_key(),
2256                        )),
2257                    );
2258                    let _ = epoll_object.delete(&self);
2259                }
2260            }
2261        }
2262
2263        if self.can_write() {
2264            self.name.entry.node.write_guard_state.lock().release(FileWriteGuardMode::WriteFile);
2265        }
2266
2267        let locked = locked.cast_locked::<FileOpsCore>();
2268        let ops = self.ops;
2269        let state = self.state;
2270        ops.close(locked, &state, current_task);
2271        state.name.entry.node.on_file_closed(&state);
2272        let event =
2273            if state.can_write() { InotifyMask::CLOSE_WRITE } else { InotifyMask::CLOSE_NOWRITE };
2274        state.notify(event);
2275    }
2276}
2277
2278impl fmt::Debug for FileObject {
2279    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2280        f.debug_struct("FileObject")
2281            .field("name", &self.name)
2282            .field("fs", &self.fs.name())
2283            .field("offset", &self.offset)
2284            .field("flags", &self.flags)
2285            .field("ops_ty", &self.ops().type_name())
2286            .finish()
2287    }
2288}
2289
2290impl OnWakeOps for FileReleaser {
2291    fn on_wake(&self, _current_task: &CurrentTask, _baton_lease: &zx::NullableHandle) {}
2292}
2293
2294/// A FileObject with the type of its FileOps known. Dereferencing it returns the FileOps.
2295pub struct DowncastedFile<'a, Ops> {
2296    file: &'a FileObject,
2297    ops: &'a Ops,
2298}
2299impl<'a, Ops> Copy for DowncastedFile<'a, Ops> {}
2300impl<'a, Ops> Clone for DowncastedFile<'a, Ops> {
2301    fn clone(&self) -> Self {
2302        *self
2303    }
2304}
2305
2306impl<'a, Ops> DowncastedFile<'a, Ops> {
2307    pub fn file(&self) -> &'a FileObject {
2308        self.file
2309    }
2310}
2311
2312impl<'a, Ops> Deref for DowncastedFile<'a, Ops> {
2313    type Target = &'a Ops;
2314    fn deref(&self) -> &Self::Target {
2315        &self.ops
2316    }
2317}
2318
2319impl FileObject {
2320    /// Returns the `FileObject`'s `FileOps` as a `DowncastedFile<T>`, or `None` if the downcast
2321    /// fails.
2322    ///
2323    /// This is useful for syscalls that only operate on a certain type of file.
2324    pub fn downcast_file<'a, T>(&'a self) -> Option<DowncastedFile<'a, T>>
2325    where
2326        T: 'static,
2327    {
2328        let ops = self.ops().as_any().downcast_ref::<T>()?;
2329        Some(DowncastedFile { file: self, ops })
2330    }
2331}
2332
2333#[cfg(test)]
2334mod tests {
2335    use crate::fs::tmpfs::TmpFs;
2336    use crate::task::CurrentTask;
2337    use crate::task::dynamic_thread_spawner::SpawnRequestBuilder;
2338    use crate::testing::*;
2339    use crate::vfs::MountInfo;
2340    use crate::vfs::buffers::{VecInputBuffer, VecOutputBuffer};
2341    use starnix_sync::{Locked, Unlocked};
2342    use starnix_uapi::auth::FsCred;
2343    use starnix_uapi::device_type::DeviceType;
2344    use starnix_uapi::file_mode::FileMode;
2345    use starnix_uapi::open_flags::OpenFlags;
2346    use std::sync::Arc;
2347    use std::sync::atomic::{AtomicBool, Ordering};
2348    use zerocopy::{FromBytes, IntoBytes, LE, U64};
2349
2350    #[::fuchsia::test]
2351    async fn test_append_truncate_race() {
2352        spawn_kernel_and_run(async |locked, current_task| {
2353            let kernel = current_task.kernel();
2354            let root_fs = TmpFs::new_fs(locked, &kernel);
2355            let mount = MountInfo::detached();
2356            let root_node = Arc::clone(root_fs.root());
2357            let file = root_node
2358                .create_entry(
2359                    locked,
2360                    &current_task,
2361                    &mount,
2362                    "test".into(),
2363                    |locked, dir, mount, name| {
2364                        dir.create_node(
2365                            locked,
2366                            &current_task,
2367                            mount,
2368                            name,
2369                            FileMode::IFREG | FileMode::ALLOW_ALL,
2370                            DeviceType::NONE,
2371                            FsCred::root(),
2372                        )
2373                    },
2374                )
2375                .expect("create_node failed");
2376            let file_handle = file
2377                .open_anonymous(locked, &current_task, OpenFlags::APPEND | OpenFlags::RDWR)
2378                .expect("open failed");
2379            let done = Arc::new(AtomicBool::new(false));
2380
2381            let fh = file_handle.clone();
2382            let done_clone = done.clone();
2383            let closure = move |locked: &mut Locked<Unlocked>, current_task: &CurrentTask| {
2384                for i in 0..2000 {
2385                    fh.write(
2386                        locked,
2387                        current_task,
2388                        &mut VecInputBuffer::new(U64::<LE>::new(i).as_bytes()),
2389                    )
2390                    .expect("write failed");
2391                }
2392                done_clone.store(true, Ordering::SeqCst);
2393                let result: Result<(), starnix_uapi::errors::Errno> = Ok(());
2394                result
2395            };
2396            let (write_thread, req) =
2397                SpawnRequestBuilder::new().with_sync_closure(closure).build_with_sync_result();
2398            kernel.kthreads.spawner().spawn_from_request(req);
2399
2400            let fh = file_handle.clone();
2401            let done_clone = done.clone();
2402            let closure = move |locked: &mut Locked<Unlocked>, current_task: &CurrentTask| {
2403                while !done_clone.load(Ordering::SeqCst) {
2404                    fh.ftruncate(locked, current_task, 0).expect("truncate failed");
2405                }
2406                let result: Result<(), starnix_uapi::errors::Errno> = Ok(());
2407                result
2408            };
2409            let (truncate_thread, req) =
2410                SpawnRequestBuilder::new().with_sync_closure(closure).build_with_sync_result();
2411            kernel.kthreads.spawner().spawn_from_request(req);
2412
2413            // If we read from the file, we should always find an increasing sequence. If there are
2414            // races, then we might unexpectedly see zeroes.
2415            while !done.load(Ordering::SeqCst) {
2416                let mut buffer = VecOutputBuffer::new(4096);
2417                let amount = file_handle
2418                    .read_at(locked, &current_task, 0, &mut buffer)
2419                    .expect("read failed");
2420                let mut last = None;
2421                let buffer = &Vec::from(buffer)[..amount];
2422                for i in
2423                    buffer.chunks_exact(8).map(|chunk| U64::<LE>::read_from_bytes(chunk).unwrap())
2424                {
2425                    if let Some(last) = last {
2426                        assert!(i.get() > last, "buffer: {:?}", buffer);
2427                    }
2428                    last = Some(i.get());
2429                }
2430            }
2431
2432            let _ = write_thread().unwrap();
2433            let _ = truncate_thread().unwrap();
2434        })
2435        .await;
2436    }
2437}