Skip to main content

starnix_core/vfs/
file_object.rs

1// Cmpyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::memory::MemoryObject;
6use crate::mm::{DesiredAddress, MappingName, MappingOptions, MemoryAccessorExt, ProtectionFlags};
7use crate::power::OnWakeOps;
8use crate::security;
9use crate::task::{
10    CurrentTask, CurrentTaskAndLocked, EventHandler, ThreadGroupKey, WaitCallback, WaitCanceler,
11    Waiter, register_delayed_release,
12};
13use crate::vfs::buffers::{InputBuffer, OutputBuffer};
14use crate::vfs::file_server::serve_file;
15use crate::vfs::fsverity::{
16    FsVerityState, {self},
17};
18use crate::vfs::{
19    ActiveNamespaceNode, DirentSink, EpollFileObject, EpollKey, FallocMode, FdTableId,
20    FileSystemHandle, FileWriteGuardMode, FsNodeHandle, FsString, NamespaceNode, RecordLockCommand,
21    RecordLockOwner,
22};
23use starnix_crypt::EncryptionKeyId;
24use starnix_lifecycle::{ObjectReleaser, ReleaserAction};
25use starnix_rcu::RcuAtomic;
26use starnix_types::ownership::ReleaseGuard;
27use starnix_uapi::mount_flags::MountFlags;
28use starnix_uapi::user_address::ArchSpecific;
29
30use fidl::endpoints::ProtocolMarker as _;
31use linux_uapi::{FSCRYPT_MODE_AES_256_CTS, FSCRYPT_MODE_AES_256_XTS};
32use starnix_logging::{CATEGORY_STARNIX_MM, impossible_error, log_error, track_stub};
33use starnix_sync::{
34    BeforeFsNodeAppend, FileAsyncOwnerLock, FileEpollFilesLock, FileLeaseLock, FileObjectOffset,
35    FileOpsCore, LockBefore, LockDepMutex, LockEqualOrBefore, Locked, Unlocked,
36};
37use starnix_syscalls::{SUCCESS, SyscallArg, SyscallResult};
38use starnix_types::math::round_up_to_system_page_size;
39use starnix_types::ownership::Releasable;
40use starnix_uapi::arc_key::WeakKey;
41use starnix_uapi::as_any::AsAny;
42use starnix_uapi::auth::{CAP_FOWNER, CAP_SYS_RAWIO};
43use starnix_uapi::errors::{EAGAIN, ETIMEDOUT, Errno};
44use starnix_uapi::file_lease::FileLeaseType;
45use starnix_uapi::file_mode::Access;
46use starnix_uapi::inotify_mask::InotifyMask;
47use starnix_uapi::open_flags::{AtomicOpenFlags, OpenFlags};
48use starnix_uapi::seal_flags::SealFlags;
49use starnix_uapi::user_address::{UserAddress, UserRef};
50use starnix_uapi::vfs::FdEvents;
51use starnix_uapi::{
52    FIBMAP, FIGETBSZ, FIONBIO, FIONREAD, FIOQSIZE, FS_CASEFOLD_FL, FS_IOC_ADD_ENCRYPTION_KEY,
53    FS_IOC_ENABLE_VERITY, FS_IOC_FSGETXATTR, FS_IOC_FSSETXATTR, FS_IOC_MEASURE_VERITY,
54    FS_IOC_READ_VERITY_METADATA, FS_IOC_REMOVE_ENCRYPTION_KEY, FS_IOC_SET_ENCRYPTION_POLICY,
55    FS_VERITY_FL, FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER, FSCRYPT_POLICY_V2, SEEK_CUR, SEEK_DATA,
56    SEEK_END, SEEK_HOLE, SEEK_SET, TCGETS, errno, error, fscrypt_add_key_arg, fscrypt_identifier,
57    fsxattr, off_t, pid_t, uapi,
58};
59use std::collections::HashMap;
60use std::fmt;
61use std::ops::Deref;
62use std::sync::atomic::Ordering;
63use std::sync::{Arc, Weak};
64
65pub const MAX_LFS_FILESIZE: usize = 0x7fff_ffff_ffff_ffff;
66
67pub fn checked_add_offset_and_length(offset: usize, length: usize) -> Result<usize, Errno> {
68    let end = offset.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
69    if end > MAX_LFS_FILESIZE {
70        return error!(EINVAL);
71    }
72    Ok(end)
73}
74
75#[derive(Debug)]
76pub enum SeekTarget {
77    /// Seek to the given offset relative to the start of the file.
78    Set(off_t),
79    /// Seek to the given offset relative to the current position.
80    Cur(off_t),
81    /// Seek to the given offset relative to the end of the file.
82    End(off_t),
83    /// Seek for the first data after the given offset,
84    Data(off_t),
85    /// Seek for the first hole after the given offset,
86    Hole(off_t),
87}
88
89impl SeekTarget {
90    pub fn from_raw(whence: u32, offset: off_t) -> Result<SeekTarget, Errno> {
91        match whence {
92            SEEK_SET => Ok(SeekTarget::Set(offset)),
93            SEEK_CUR => Ok(SeekTarget::Cur(offset)),
94            SEEK_END => Ok(SeekTarget::End(offset)),
95            SEEK_DATA => Ok(SeekTarget::Data(offset)),
96            SEEK_HOLE => Ok(SeekTarget::Hole(offset)),
97            _ => error!(EINVAL),
98        }
99    }
100
101    pub fn whence(&self) -> u32 {
102        match self {
103            Self::Set(_) => SEEK_SET,
104            Self::Cur(_) => SEEK_CUR,
105            Self::End(_) => SEEK_END,
106            Self::Data(_) => SEEK_DATA,
107            Self::Hole(_) => SEEK_HOLE,
108        }
109    }
110
111    pub fn offset(&self) -> off_t {
112        match self {
113            Self::Set(off)
114            | Self::Cur(off)
115            | Self::End(off)
116            | Self::Data(off)
117            | Self::Hole(off) => *off,
118        }
119    }
120}
121
122/// Corresponds to struct file_operations in Linux, plus any filesystem-specific data.
123pub trait FileOps: Send + Sync + AsAny + 'static {
124    /// Called when the FileObject is opened/created
125    fn open(
126        &self,
127        _locked: &mut Locked<FileOpsCore>,
128        _file: &FileObject,
129        _current_task: &CurrentTask,
130    ) -> Result<(), Errno> {
131        Ok(())
132    }
133
134    /// Called when the FileObject is destroyed.
135    fn close(
136        self: Box<Self>,
137        _locked: &mut Locked<FileOpsCore>,
138        _file: &FileObjectState,
139        _current_task: &CurrentTask,
140    ) {
141    }
142
143    /// Called every time close() is called on this file, even if the file is not ready to be
144    /// released.
145    fn flush(
146        &self,
147        _locked: &mut Locked<FileOpsCore>,
148        _file: &FileObject,
149        _current_task: &CurrentTask,
150    ) {
151    }
152
153    /// Returns whether the file has meaningful seek offsets. Returning `false` is only
154    /// optimization and will makes `FileObject` never hold the offset lock when calling `read` and
155    /// `write`.
156    fn has_persistent_offsets(&self) -> bool {
157        self.is_seekable()
158    }
159
160    /// Returns whether the file is seekable.
161    fn is_seekable(&self) -> bool;
162
163    /// Returns true if `write()` operations on the file will update the seek offset.
164    fn writes_update_seek_offset(&self) -> bool {
165        self.has_persistent_offsets()
166    }
167
168    /// Read from the file at an offset. If the file does not have persistent offsets (either
169    /// directly, or because it is not seekable), offset will be 0 and can be ignored.
170    /// Returns the number of bytes read.
171    fn read(
172        &self,
173        locked: &mut Locked<FileOpsCore>,
174        file: &FileObject,
175        current_task: &CurrentTask,
176        offset: usize,
177        data: &mut dyn OutputBuffer,
178    ) -> Result<usize, Errno>;
179
180    /// Write to the file with an offset. If the file does not have persistent offsets (either
181    /// directly, or because it is not seekable), offset will be 0 and can be ignored.
182    /// Returns the number of bytes written.
183    fn write(
184        &self,
185        locked: &mut Locked<FileOpsCore>,
186        file: &FileObject,
187        current_task: &CurrentTask,
188        offset: usize,
189        data: &mut dyn InputBuffer,
190    ) -> Result<usize, Errno>;
191
192    /// Adjust the `current_offset` if the file is seekable.
193    fn seek(
194        &self,
195        locked: &mut Locked<FileOpsCore>,
196        file: &FileObject,
197        current_task: &CurrentTask,
198        current_offset: off_t,
199        target: SeekTarget,
200    ) -> Result<off_t, Errno>;
201
202    /// Syncs cached state associated with the file descriptor to persistent storage.
203    ///
204    /// The method blocks until the synchronization is complete.
205    fn sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
206        file.node().ops().sync(file.node(), current_task)
207    }
208
209    /// Syncs cached data, and only enough metadata to retrieve said data, to persistent storage.
210    ///
211    /// The method blocks until the synchronization is complete.
212    fn data_sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
213        // TODO(https://fxbug.dev/297305634) make a default macro once data can be done separately
214        self.sync(file, current_task)
215    }
216
217    /// Returns a VMO representing this file. At least the requested protection flags must
218    /// be set on the VMO. Reading or writing the VMO must read or write the file. If this is not
219    /// possible given the requested protection, an error must be returned.
220    /// The `length` is a hint for the desired size of the VMO. The returned VMO may be larger or
221    /// smaller than the requested length.
222    /// This method is typically called by [`Self::mmap`].
223    fn get_memory(
224        &self,
225        _locked: &mut Locked<FileOpsCore>,
226        _file: &FileObject,
227        _current_task: &CurrentTask,
228        _length: Option<usize>,
229        _prot: ProtectionFlags,
230    ) -> Result<Arc<MemoryObject>, Errno> {
231        error!(ENODEV)
232    }
233
234    /// Responds to an mmap call. The default implementation calls [`Self::get_memory`] to get a VMO
235    /// and then maps it with [`crate::mm::MemoryManager::map`].
236    /// Only implement this trait method if your file needs to control mapping, or record where
237    /// a VMO gets mapped.
238    fn mmap(
239        &self,
240        locked: &mut Locked<FileOpsCore>,
241        file: &FileObject,
242        current_task: &CurrentTask,
243        addr: DesiredAddress,
244        memory_offset: u64,
245        length: usize,
246        prot_flags: ProtectionFlags,
247        options: MappingOptions,
248        filename: NamespaceNode,
249    ) -> Result<UserAddress, Errno> {
250        default_mmap(
251            locked,
252            file,
253            current_task,
254            addr,
255            memory_offset,
256            length,
257            prot_flags,
258            options,
259            filename,
260        )
261    }
262
263    /// Respond to a `getdents` or `getdents64` calls.
264    ///
265    /// The `file.offset` lock will be held while entering this method. The implementation must look
266    /// at `sink.offset()` to read the current offset into the file.
267    fn readdir(
268        &self,
269        _locked: &mut Locked<FileOpsCore>,
270        _file: &FileObject,
271        _current_task: &CurrentTask,
272        _sink: &mut dyn DirentSink,
273    ) -> Result<(), Errno> {
274        error!(ENOTDIR)
275    }
276
277    /// Establish a one-shot, edge-triggered, asynchronous wait for the given FdEvents for the
278    /// given file and task. Returns `None` if this file does not support blocking waits.
279    ///
280    /// Active events are not considered. This is similar to the semantics of the
281    /// ZX_WAIT_ASYNC_EDGE flag on zx_wait_async. To avoid missing events, the caller must call
282    /// query_events after calling this.
283    ///
284    /// If your file does not support blocking waits, leave this as the default implementation.
285    fn wait_async(
286        &self,
287        _locked: &mut Locked<FileOpsCore>,
288        _file: &FileObject,
289        _current_task: &CurrentTask,
290        _waiter: &Waiter,
291        _events: FdEvents,
292        _handler: EventHandler,
293    ) -> Option<WaitCanceler> {
294        None
295    }
296
297    /// The events currently active on this file.
298    ///
299    /// If this function returns `POLLIN` or `POLLOUT`, then FileObject will
300    /// add `POLLRDNORM` and `POLLWRNORM`, respective, which are equivalent in
301    /// the Linux UAPI.
302    ///
303    /// See https://linux.die.net/man/2/poll
304    fn query_events(
305        &self,
306        _locked: &mut Locked<FileOpsCore>,
307        _file: &FileObject,
308        _current_task: &CurrentTask,
309    ) -> Result<FdEvents, Errno> {
310        Ok(FdEvents::POLLIN | FdEvents::POLLOUT)
311    }
312
313    fn ioctl(
314        &self,
315        locked: &mut Locked<Unlocked>,
316        file: &FileObject,
317        current_task: &CurrentTask,
318        request: u32,
319        arg: SyscallArg,
320    ) -> Result<SyscallResult, Errno> {
321        default_ioctl(file, locked, current_task, request, arg)
322    }
323
324    fn fcntl(
325        &self,
326        _file: &FileObject,
327        _current_task: &CurrentTask,
328        cmd: u32,
329        _arg: u64,
330    ) -> Result<SyscallResult, Errno> {
331        default_fcntl(cmd)
332    }
333
334    /// Return a handle that allows access to this file descritor through the zxio protocols.
335    ///
336    /// If None is returned, the file will act as if it was a fd to `/dev/null`.
337    fn to_handle(
338        &self,
339        file: &FileObject,
340        current_task: &CurrentTask,
341    ) -> Result<Option<zx::NullableHandle>, Errno> {
342        serve_file(current_task, file, current_task.current_creds().clone())
343            .map(|c| Some(c.0.into_channel().into()))
344    }
345
346    // Return a vector of handles. This is used in situations where there is more than one handle
347    // associated with this file descriptor.
348    //
349    // In Fuchsia, there is an expectation that there is a 1:1 mapping between a file descriptor and
350    // a handle. In general, we do not want to violate that rule. This function is intended to used
351    // in very limited circumstances (compatibility with Linux and Binder), where we need to violate
352    // rule.
353    //
354    // Specifically, we are using this to implement SyncFiles correctly, where a single SyncFile can
355    // represent multiple SyncPoints. Each SyncPoint contains a zx::Counter.
356    //
357    // If you chose to implement this function, to_handle() should return an error. You must also be
358    // aware that if these handles are passed to Fuchsia over Binder, they will be represented as
359    // single file descriptor, and you should use the composite_fd library to manage that file
360    // descriptor.
361    fn get_handles(
362        &self,
363        _file: &FileObject,
364        _current_task: &CurrentTask,
365    ) -> Result<Vec<zx::NullableHandle>, Errno> {
366        error!(ENOTSUP)
367    }
368
369    /// Returns the associated pid_t.
370    ///
371    /// Used by pidfd and `/proc/<pid>`. Unlikely to be used by other files.
372    fn as_thread_group_key(&self, _file: &FileObject) -> Result<ThreadGroupKey, Errno> {
373        error!(EBADF)
374    }
375
376    fn readahead(
377        &self,
378        _file: &FileObject,
379        _current_task: &CurrentTask,
380        _offset: usize,
381        _length: usize,
382    ) -> Result<(), Errno> {
383        error!(EINVAL)
384    }
385
386    /// Extra information that is included in the /proc/<pid>/fdfino/<fd> entry.
387    fn extra_fdinfo(
388        &self,
389        _locked: &mut Locked<FileOpsCore>,
390        _file: &FileHandle,
391        _current_task: &CurrentTask,
392    ) -> Option<FsString> {
393        None
394    }
395}
396
397/// Marker trait for implementation of FileOps that do not need to implement `close` and can
398/// then pass a wrapper object as the `FileOps` implementation.
399pub trait CloseFreeSafe {}
400impl<T: FileOps + CloseFreeSafe, P: Deref<Target = T> + Send + Sync + 'static> FileOps for P {
401    fn close(
402        self: Box<Self>,
403        _locked: &mut Locked<FileOpsCore>,
404        _file: &FileObjectState,
405        _current_task: &CurrentTask,
406    ) {
407        // This method cannot be delegated. T being `CloseFreeSafe` this is fine.
408    }
409
410    fn flush(
411        &self,
412        locked: &mut Locked<FileOpsCore>,
413        file: &FileObject,
414        current_task: &CurrentTask,
415    ) {
416        self.deref().flush(locked, file, current_task)
417    }
418
419    fn has_persistent_offsets(&self) -> bool {
420        self.deref().has_persistent_offsets()
421    }
422
423    fn writes_update_seek_offset(&self) -> bool {
424        self.deref().writes_update_seek_offset()
425    }
426
427    fn is_seekable(&self) -> bool {
428        self.deref().is_seekable()
429    }
430
431    fn read(
432        &self,
433        locked: &mut Locked<FileOpsCore>,
434        file: &FileObject,
435        current_task: &CurrentTask,
436        offset: usize,
437        data: &mut dyn OutputBuffer,
438    ) -> Result<usize, Errno> {
439        self.deref().read(locked, file, current_task, offset, data)
440    }
441
442    fn write(
443        &self,
444        locked: &mut Locked<FileOpsCore>,
445        file: &FileObject,
446        current_task: &CurrentTask,
447        offset: usize,
448        data: &mut dyn InputBuffer,
449    ) -> Result<usize, Errno> {
450        self.deref().write(locked, file, current_task, offset, data)
451    }
452
453    fn seek(
454        &self,
455        locked: &mut Locked<FileOpsCore>,
456        file: &FileObject,
457        current_task: &CurrentTask,
458        current_offset: off_t,
459        target: SeekTarget,
460    ) -> Result<off_t, Errno> {
461        self.deref().seek(locked, file, current_task, current_offset, target)
462    }
463
464    fn sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
465        self.deref().sync(file, current_task)
466    }
467
468    fn data_sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
469        self.deref().data_sync(file, current_task)
470    }
471
472    fn get_memory(
473        &self,
474        locked: &mut Locked<FileOpsCore>,
475        file: &FileObject,
476        current_task: &CurrentTask,
477        length: Option<usize>,
478        prot: ProtectionFlags,
479    ) -> Result<Arc<MemoryObject>, Errno> {
480        self.deref().get_memory(locked, file, current_task, length, prot)
481    }
482
483    fn mmap(
484        &self,
485        locked: &mut Locked<FileOpsCore>,
486        file: &FileObject,
487        current_task: &CurrentTask,
488        addr: DesiredAddress,
489        memory_offset: u64,
490        length: usize,
491        prot_flags: ProtectionFlags,
492        options: MappingOptions,
493        filename: NamespaceNode,
494    ) -> Result<UserAddress, Errno> {
495        self.deref().mmap(
496            locked,
497            file,
498            current_task,
499            addr,
500            memory_offset,
501            length,
502            prot_flags,
503            options,
504            filename,
505        )
506    }
507
508    fn readdir(
509        &self,
510        locked: &mut Locked<FileOpsCore>,
511        file: &FileObject,
512        current_task: &CurrentTask,
513        sink: &mut dyn DirentSink,
514    ) -> Result<(), Errno> {
515        self.deref().readdir(locked, file, current_task, sink)
516    }
517
518    fn wait_async(
519        &self,
520        locked: &mut Locked<FileOpsCore>,
521        file: &FileObject,
522        current_task: &CurrentTask,
523        waiter: &Waiter,
524        events: FdEvents,
525        handler: EventHandler,
526    ) -> Option<WaitCanceler> {
527        self.deref().wait_async(locked, file, current_task, waiter, events, handler)
528    }
529
530    fn query_events(
531        &self,
532        locked: &mut Locked<FileOpsCore>,
533        file: &FileObject,
534        current_task: &CurrentTask,
535    ) -> Result<FdEvents, Errno> {
536        self.deref().query_events(locked, file, current_task)
537    }
538
539    fn ioctl(
540        &self,
541        locked: &mut Locked<Unlocked>,
542        file: &FileObject,
543        current_task: &CurrentTask,
544        request: u32,
545        arg: SyscallArg,
546    ) -> Result<SyscallResult, Errno> {
547        self.deref().ioctl(locked, file, current_task, request, arg)
548    }
549
550    fn fcntl(
551        &self,
552        file: &FileObject,
553        current_task: &CurrentTask,
554        cmd: u32,
555        arg: u64,
556    ) -> Result<SyscallResult, Errno> {
557        self.deref().fcntl(file, current_task, cmd, arg)
558    }
559
560    fn to_handle(
561        &self,
562        file: &FileObject,
563        current_task: &CurrentTask,
564    ) -> Result<Option<zx::NullableHandle>, Errno> {
565        self.deref().to_handle(file, current_task)
566    }
567
568    fn get_handles(
569        &self,
570        file: &FileObject,
571        current_task: &CurrentTask,
572    ) -> Result<Vec<zx::NullableHandle>, Errno> {
573        self.deref().get_handles(file, current_task)
574    }
575
576    fn as_thread_group_key(&self, file: &FileObject) -> Result<ThreadGroupKey, Errno> {
577        self.deref().as_thread_group_key(file)
578    }
579
580    fn readahead(
581        &self,
582        file: &FileObject,
583        current_task: &CurrentTask,
584        offset: usize,
585        length: usize,
586    ) -> Result<(), Errno> {
587        self.deref().readahead(file, current_task, offset, length)
588    }
589
590    fn extra_fdinfo(
591        &self,
592        locked: &mut Locked<FileOpsCore>,
593        file: &FileHandle,
594        current_task: &CurrentTask,
595    ) -> Option<FsString> {
596        self.deref().extra_fdinfo(locked, file, current_task)
597    }
598}
599
600pub fn default_eof_offset<L>(
601    locked: &mut Locked<L>,
602    file: &FileObject,
603    current_task: &CurrentTask,
604) -> Result<off_t, Errno>
605where
606    L: LockEqualOrBefore<FileOpsCore>,
607{
608    Ok(file.node().get_size(locked, current_task)? as off_t)
609}
610
611/// Implement the seek method for a file. The computation from the end of the file must be provided
612/// through a callback.
613///
614/// Errors if the calculated offset is invalid.
615///
616/// - `current_offset`: The current position
617/// - `target`: The location to seek to.
618/// - `compute_end`: Compute the new offset from the end. Return an error if the operation is not
619///    supported.
620pub fn default_seek<F>(
621    current_offset: off_t,
622    target: SeekTarget,
623    compute_end: F,
624) -> Result<off_t, Errno>
625where
626    F: FnOnce() -> Result<off_t, Errno>,
627{
628    let new_offset = match target {
629        SeekTarget::Set(offset) => Some(offset),
630        SeekTarget::Cur(offset) => current_offset.checked_add(offset),
631        SeekTarget::End(offset) => compute_end()?.checked_add(offset),
632        SeekTarget::Data(offset) => {
633            let eof = compute_end().unwrap_or(off_t::MAX);
634            if offset >= eof {
635                return error!(ENXIO);
636            }
637            Some(offset)
638        }
639        SeekTarget::Hole(offset) => {
640            let eof = compute_end()?;
641            if offset >= eof {
642                return error!(ENXIO);
643            }
644            Some(eof)
645        }
646    }
647    .ok_or_else(|| errno!(EINVAL))?;
648
649    if new_offset < 0 {
650        return error!(EINVAL);
651    }
652
653    Ok(new_offset)
654}
655
656/// Implement the seek method for a file without an upper bound on the resulting offset.
657///
658/// This is useful for files without a defined size.
659///
660/// Errors if the calculated offset is invalid.
661///
662/// - `current_offset`: The current position
663/// - `target`: The location to seek to.
664pub fn unbounded_seek(current_offset: off_t, target: SeekTarget) -> Result<off_t, Errno> {
665    default_seek(current_offset, target, || Ok(MAX_LFS_FILESIZE as off_t))
666}
667
668#[macro_export]
669macro_rules! fileops_impl_delegate_read_write_and_seek {
670    ($self:ident, $delegate:expr) => {
671        fn is_seekable(&self) -> bool {
672            true
673        }
674
675        fn read(
676            &$self,
677            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
678            file: &FileObject,
679            current_task: &$crate::task::CurrentTask,
680            offset: usize,
681            data: &mut dyn $crate::vfs::buffers::OutputBuffer,
682        ) -> Result<usize, starnix_uapi::errors::Errno> {
683            $delegate.read(locked, file, current_task, offset, data)
684        }
685
686        fn write(
687            &$self,
688            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
689            file: &FileObject,
690            current_task: &$crate::task::CurrentTask,
691            offset: usize,
692            data: &mut dyn $crate::vfs::buffers::InputBuffer,
693        ) -> Result<usize, starnix_uapi::errors::Errno> {
694            $delegate.write(locked, file, current_task, offset, data)
695        }
696
697        fn seek(
698            &$self,
699        locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
700            file: &FileObject,
701            current_task: &$crate::task::CurrentTask,
702            current_offset: starnix_uapi::off_t,
703            target: $crate::vfs::SeekTarget,
704        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
705            $delegate.seek(locked, file, current_task, current_offset, target)
706        }
707    };
708}
709
710/// Implements [`FileOps::seek`] in a way that makes sense for seekable files.
711#[macro_export]
712macro_rules! fileops_impl_seekable {
713    () => {
714        fn is_seekable(&self) -> bool {
715            true
716        }
717
718        fn seek(
719            &self,
720            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
721            file: &$crate::vfs::FileObject,
722            current_task: &$crate::task::CurrentTask,
723            current_offset: starnix_uapi::off_t,
724            target: $crate::vfs::SeekTarget,
725        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
726            $crate::vfs::default_seek(current_offset, target, || {
727                $crate::vfs::default_eof_offset(locked, file, current_task)
728            })
729        }
730    };
731}
732
733/// Implements [`FileOps`] methods in a way that makes sense for non-seekable files.
734#[macro_export]
735macro_rules! fileops_impl_nonseekable {
736    () => {
737        fn is_seekable(&self) -> bool {
738            false
739        }
740
741        fn seek(
742            &self,
743            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
744            _file: &$crate::vfs::FileObject,
745            _current_task: &$crate::task::CurrentTask,
746            _current_offset: starnix_uapi::off_t,
747            _target: $crate::vfs::SeekTarget,
748        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
749            starnix_uapi::error!(ESPIPE)
750        }
751    };
752}
753
754/// Implements [`FileOps::seek`] methods in a way that makes sense for files that ignore
755/// seeking operations and always read/write at offset 0.
756#[macro_export]
757macro_rules! fileops_impl_seekless {
758    () => {
759        fn has_persistent_offsets(&self) -> bool {
760            false
761        }
762
763        fn is_seekable(&self) -> bool {
764            true
765        }
766
767        fn seek(
768            &self,
769            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
770            _file: &$crate::vfs::FileObject,
771            _current_task: &$crate::task::CurrentTask,
772            _current_offset: starnix_uapi::off_t,
773            _target: $crate::vfs::SeekTarget,
774        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
775            Ok(0)
776        }
777    };
778}
779
780#[macro_export]
781macro_rules! fileops_impl_dataless {
782    () => {
783        fn write(
784            &self,
785            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
786            _file: &$crate::vfs::FileObject,
787            _current_task: &$crate::task::CurrentTask,
788            _offset: usize,
789            _data: &mut dyn $crate::vfs::buffers::InputBuffer,
790        ) -> Result<usize, starnix_uapi::errors::Errno> {
791            starnix_uapi::error!(EINVAL)
792        }
793
794        fn read(
795            &self,
796            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
797            _file: &$crate::vfs::FileObject,
798            _current_task: &$crate::task::CurrentTask,
799            _offset: usize,
800            _data: &mut dyn $crate::vfs::buffers::OutputBuffer,
801        ) -> Result<usize, starnix_uapi::errors::Errno> {
802            starnix_uapi::error!(EINVAL)
803        }
804    };
805}
806
807/// Implements [`FileOps`] methods in a way that makes sense for directories. You must implement
808/// [`FileOps::seek`] and [`FileOps::readdir`].
809#[macro_export]
810macro_rules! fileops_impl_directory {
811    () => {
812        fn is_seekable(&self) -> bool {
813            true
814        }
815
816        fn read(
817            &self,
818            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
819            _file: &$crate::vfs::FileObject,
820            _current_task: &$crate::task::CurrentTask,
821            _offset: usize,
822            _data: &mut dyn $crate::vfs::buffers::OutputBuffer,
823        ) -> Result<usize, starnix_uapi::errors::Errno> {
824            starnix_uapi::error!(EISDIR)
825        }
826
827        fn write(
828            &self,
829            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
830            _file: &$crate::vfs::FileObject,
831            _current_task: &$crate::task::CurrentTask,
832            _offset: usize,
833            _data: &mut dyn $crate::vfs::buffers::InputBuffer,
834        ) -> Result<usize, starnix_uapi::errors::Errno> {
835            starnix_uapi::error!(EISDIR)
836        }
837    };
838}
839
840#[macro_export]
841macro_rules! fileops_impl_unbounded_seek {
842    () => {
843        fn seek(
844            &self,
845            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
846            _file: &$crate::vfs::FileObject,
847            _current_task: &$crate::task::CurrentTask,
848            current_offset: starnix_uapi::off_t,
849            target: $crate::vfs::SeekTarget,
850        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
851            $crate::vfs::unbounded_seek(current_offset, target)
852        }
853    };
854}
855
856#[macro_export]
857macro_rules! fileops_impl_noop_sync {
858    () => {
859        fn sync(
860            &self,
861            file: &$crate::vfs::FileObject,
862            _current_task: &$crate::task::CurrentTask,
863        ) -> Result<(), starnix_uapi::errors::Errno> {
864            if !file.node().is_reg() && !file.node().is_dir() {
865                return starnix_uapi::error!(EINVAL);
866            }
867            Ok(())
868        }
869    };
870}
871
872// Public re-export of macros allows them to be used like regular rust items.
873
874pub use fileops_impl_dataless;
875pub use fileops_impl_delegate_read_write_and_seek;
876pub use fileops_impl_directory;
877pub use fileops_impl_nonseekable;
878pub use fileops_impl_noop_sync;
879pub use fileops_impl_seekable;
880pub use fileops_impl_seekless;
881pub use fileops_impl_unbounded_seek;
882pub const AES256_KEY_SIZE: usize = 32;
883
884pub fn canonicalize_ioctl_request(current_task: &CurrentTask, request: u32) -> u32 {
885    if current_task.is_arch32() {
886        match request {
887            uapi::arch32::FS_IOC_GETFLAGS => uapi::FS_IOC_GETFLAGS,
888            uapi::arch32::FS_IOC_SETFLAGS => uapi::FS_IOC_SETFLAGS,
889            _ => request,
890        }
891    } else {
892        request
893    }
894}
895
896pub fn default_ioctl(
897    file: &FileObject,
898    locked: &mut Locked<Unlocked>,
899    current_task: &CurrentTask,
900    request: u32,
901    arg: SyscallArg,
902) -> Result<SyscallResult, Errno> {
903    match canonicalize_ioctl_request(current_task, request) {
904        TCGETS => error!(ENOTTY),
905        FIGETBSZ => {
906            let node = file.node();
907            let supported_file = node.is_reg() || node.is_dir();
908            if !supported_file {
909                return error!(ENOTTY);
910            }
911
912            let blocksize = file.node().stat(locked, current_task)?.st_blksize;
913            current_task.write_object(arg.into(), &blocksize)?;
914            Ok(SUCCESS)
915        }
916        FIONBIO => {
917            let arg_ref = UserAddress::from(arg).into();
918            let arg: i32 = current_task.read_object(arg_ref)?;
919            let val = if arg == 0 {
920                // Clear the NONBLOCK flag
921                OpenFlags::empty()
922            } else {
923                // Set the NONBLOCK flag
924                OpenFlags::NONBLOCK
925            };
926            file.update_file_flags(val, OpenFlags::NONBLOCK);
927            Ok(SUCCESS)
928        }
929        FIOQSIZE => {
930            let node = file.node();
931            let supported_file = node.is_reg() || node.is_dir();
932            if !supported_file {
933                return error!(ENOTTY);
934            }
935
936            let size = file.node().stat(locked, current_task)?.st_size;
937            current_task.write_object(arg.into(), &size)?;
938            Ok(SUCCESS)
939        }
940        FIONREAD => {
941            track_stub!(TODO("https://fxbug.dev/322874897"), "FIONREAD");
942            if !file.name.entry.node.is_reg() {
943                return error!(ENOTTY);
944            }
945
946            let size = file
947                .name
948                .entry
949                .node
950                .fetch_and_refresh_info(locked, current_task)
951                .map_err(|_| errno!(EINVAL))?
952                .size;
953            let offset = usize::try_from(file.offset.read()).map_err(|_| errno!(EINVAL))?;
954            let remaining =
955                if size < offset { 0 } else { i32::try_from(size - offset).unwrap_or(i32::MAX) };
956            current_task.write_object(arg.into(), &remaining)?;
957            Ok(SUCCESS)
958        }
959        FS_IOC_FSGETXATTR => {
960            track_stub!(TODO("https://fxbug.dev/322875209"), "FS_IOC_FSGETXATTR");
961            let arg = UserAddress::from(arg).into();
962            current_task.write_object(arg, &fsxattr::default())?;
963            Ok(SUCCESS)
964        }
965        FS_IOC_FSSETXATTR => {
966            track_stub!(TODO("https://fxbug.dev/322875271"), "FS_IOC_FSSETXATTR");
967            let arg = UserAddress::from(arg).into();
968            let _: fsxattr = current_task.read_object(arg)?;
969            Ok(SUCCESS)
970        }
971        uapi::FS_IOC_GETFLAGS => {
972            track_stub!(TODO("https://fxbug.dev/322874935"), "FS_IOC_GETFLAGS");
973            let arg = UserRef::<u32>::from(arg);
974            let mut flags: u32 = 0;
975            if matches!(*file.node().fsverity.lock(), FsVerityState::FsVerity) {
976                flags |= FS_VERITY_FL;
977            }
978            if file.node().info().casefold {
979                flags |= FS_CASEFOLD_FL;
980            }
981            current_task.write_object(arg, &flags)?;
982            Ok(SUCCESS)
983        }
984        uapi::FS_IOC_SETFLAGS => {
985            track_stub!(TODO("https://fxbug.dev/322875367"), "FS_IOC_SETFLAGS");
986            let arg = UserRef::<u32>::from(arg);
987            let flags: u32 = current_task.read_object(arg)?;
988            file.node().update_attributes(locked, current_task, |info| {
989                info.casefold = flags & FS_CASEFOLD_FL != 0;
990                Ok(())
991            })?;
992            Ok(SUCCESS)
993        }
994        FS_IOC_ENABLE_VERITY => {
995            Ok(fsverity::ioctl::enable(locked, current_task, UserAddress::from(arg).into(), file)?)
996        }
997        FS_IOC_MEASURE_VERITY => {
998            Ok(fsverity::ioctl::measure(locked, current_task, UserAddress::from(arg).into(), file)?)
999        }
1000        FS_IOC_READ_VERITY_METADATA => {
1001            Ok(fsverity::ioctl::read_metadata(current_task, UserAddress::from(arg).into(), file)?)
1002        }
1003        FS_IOC_ADD_ENCRYPTION_KEY => {
1004            let fscrypt_add_key_ref = UserRef::<fscrypt_add_key_arg>::from(arg);
1005            let key_ref_addr = fscrypt_add_key_ref.next()?.addr();
1006            let mut fscrypt_add_key_arg = current_task.read_object(fscrypt_add_key_ref.clone())?;
1007            if fscrypt_add_key_arg.key_id != 0 {
1008                track_stub!(TODO("https://fxbug.dev/375649227"), "non-zero key ids");
1009                return error!(ENOTSUP);
1010            }
1011            if fscrypt_add_key_arg.key_spec.type_ != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER {
1012                track_stub!(TODO("https://fxbug.dev/375648306"), "fscrypt descriptor type");
1013                return error!(ENOTSUP);
1014            }
1015            let key = current_task
1016                .read_memory_to_vec(key_ref_addr, fscrypt_add_key_arg.raw_size as usize)?;
1017            let user_id = current_task.current_creds().uid;
1018
1019            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1020            let key_identifier = crypt_service.add_wrapping_key(&key, user_id)?;
1021            fscrypt_add_key_arg.key_spec.u.identifier =
1022                fscrypt_identifier { value: key_identifier, ..Default::default() };
1023            current_task.write_object(fscrypt_add_key_ref, &fscrypt_add_key_arg)?;
1024            Ok(SUCCESS)
1025        }
1026        FS_IOC_SET_ENCRYPTION_POLICY => {
1027            let fscrypt_policy_ref = UserRef::<uapi::fscrypt_policy_v2>::from(arg);
1028            let policy = current_task.read_object(fscrypt_policy_ref)?;
1029            if policy.version as u32 != FSCRYPT_POLICY_V2 {
1030                track_stub!(TODO("https://fxbug.dev/375649656"), "fscrypt policy v1");
1031                return error!(ENOTSUP);
1032            }
1033            if policy.flags != 0 {
1034                track_stub!(
1035                    TODO("https://fxbug.dev/375700939"),
1036                    "fscrypt policy flags",
1037                    policy.flags
1038                );
1039            }
1040            if policy.contents_encryption_mode as u32 != FSCRYPT_MODE_AES_256_XTS {
1041                track_stub!(
1042                    TODO("https://fxbug.dev/375684057"),
1043                    "fscrypt encryption modes",
1044                    policy.contents_encryption_mode
1045                );
1046            }
1047            if policy.filenames_encryption_mode as u32 != FSCRYPT_MODE_AES_256_CTS {
1048                track_stub!(
1049                    TODO("https://fxbug.dev/375684057"),
1050                    "fscrypt encryption modes",
1051                    policy.filenames_encryption_mode
1052                );
1053            }
1054            let user_id = current_task.current_creds().uid;
1055            if user_id != file.node().info().uid {
1056                security::check_task_capable(current_task, CAP_FOWNER)
1057                    .map_err(|_| errno!(EACCES))?;
1058            }
1059
1060            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1061            if let Some(users) =
1062                crypt_service.get_users_for_key(EncryptionKeyId::from(policy.master_key_identifier))
1063            {
1064                if !users.contains(&user_id) {
1065                    return error!(ENOKEY);
1066                }
1067            } else {
1068                track_stub!(
1069                    TODO("https://fxbug.dev/375067633"),
1070                    "users with CAP_FOWNER can set encryption policies with unadded keys"
1071                );
1072                return error!(ENOKEY);
1073            }
1074
1075            let attributes = file.node().fetch_and_refresh_info(locked, current_task)?;
1076            if let Some(wrapping_key_id) = &attributes.wrapping_key_id {
1077                if wrapping_key_id != &policy.master_key_identifier {
1078                    return error!(EEXIST);
1079                }
1080            } else {
1081                // Don't deadlock! update_attributes will also lock the attributes.
1082                std::mem::drop(attributes);
1083                file.node().update_attributes(locked, current_task, |info| {
1084                    info.wrapping_key_id = Some(policy.master_key_identifier);
1085                    Ok(())
1086                })?;
1087            }
1088            Ok(SUCCESS)
1089        }
1090        FS_IOC_REMOVE_ENCRYPTION_KEY => {
1091            let fscrypt_remove_key_arg_ref = UserRef::<uapi::fscrypt_remove_key_arg>::from(arg);
1092            let fscrypt_remove_key_arg = current_task.read_object(fscrypt_remove_key_arg_ref)?;
1093            if fscrypt_remove_key_arg.key_spec.type_ != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER {
1094                track_stub!(TODO("https://fxbug.dev/375648306"), "fscrypt descriptor type");
1095                return error!(ENOTSUP);
1096            }
1097            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1098            let user_id = current_task.current_creds().uid;
1099            #[allow(
1100                clippy::undocumented_unsafe_blocks,
1101                reason = "Force documented unsafe blocks in Starnix"
1102            )]
1103            let identifier = unsafe { fscrypt_remove_key_arg.key_spec.u.identifier.value };
1104            crypt_service.forget_wrapping_key(identifier, user_id)?;
1105            Ok(SUCCESS)
1106        }
1107        linux_uapi::FICLONE | linux_uapi::FICLONERANGE | linux_uapi::FIDEDUPERANGE => {
1108            error!(EOPNOTSUPP)
1109        }
1110        _ => {
1111            track_stub!(TODO("https://fxbug.dev/322874917"), "ioctl fallthrough", request);
1112            error!(ENOTTY)
1113        }
1114    }
1115}
1116
1117pub fn default_fcntl(cmd: u32) -> Result<SyscallResult, Errno> {
1118    track_stub!(TODO("https://fxbug.dev/322875704"), "default fcntl", cmd);
1119    error!(EINVAL)
1120}
1121
1122pub fn default_mmap(
1123    locked: &mut Locked<FileOpsCore>,
1124    file: &FileObject,
1125    current_task: &CurrentTask,
1126    addr: DesiredAddress,
1127    memory_offset: u64,
1128    length: usize,
1129    prot_flags: ProtectionFlags,
1130    options: MappingOptions,
1131    filename: NamespaceNode,
1132) -> Result<UserAddress, Errno> {
1133    fuchsia_trace::duration!(CATEGORY_STARNIX_MM, "FileOpsDefaultMmap");
1134    let min_memory_size = (memory_offset as usize)
1135        .checked_add(round_up_to_system_page_size(length)?)
1136        .ok_or_else(|| errno!(EINVAL))?;
1137    let mut memory = if options.contains(MappingOptions::SHARED) {
1138        fuchsia_trace::duration!(CATEGORY_STARNIX_MM, "GetSharedVmo");
1139        file.ops.get_memory(locked, file, current_task, Some(min_memory_size), prot_flags)?
1140    } else {
1141        fuchsia_trace::duration!(CATEGORY_STARNIX_MM, "GetPrivateVmo");
1142        // TODO(tbodt): Use PRIVATE_CLONE to have the filesystem server do the clone for us.
1143        let base_prot_flags = (prot_flags | ProtectionFlags::READ) - ProtectionFlags::WRITE;
1144        let memory = file.ops.get_memory(
1145            locked,
1146            file,
1147            current_task,
1148            Some(min_memory_size),
1149            base_prot_flags,
1150        )?;
1151        let mut clone_flags = zx::VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE;
1152        if !prot_flags.contains(ProtectionFlags::WRITE) {
1153            clone_flags |= zx::VmoChildOptions::NO_WRITE;
1154        }
1155        fuchsia_trace::duration!(CATEGORY_STARNIX_MM, "CreatePrivateChildVmo");
1156        Arc::new(memory.create_child(clone_flags, 0, memory.get_size()).map_err(impossible_error)?)
1157    };
1158
1159    // Write guard is necessary only for shared mappings. Note that this doesn't depend on
1160    // `prot_flags` since these can be changed later with `mprotect()`.
1161    let file_write_guard = if options.contains(MappingOptions::SHARED) && file.can_write() {
1162        let node = &file.name.entry.node;
1163        let state = node.write_guard_state.lock();
1164
1165        // `F_SEAL_FUTURE_WRITE` should allow `mmap(PROT_READ)`, but block
1166        // `mprotect(PROT_WRITE)`. This is different from `F_SEAL_WRITE`, which blocks
1167        // `mmap(PROT_READ)`. To handle this case correctly remove `WRITE` right from the
1168        // VMO handle to ensure `mprotect(PROT_WRITE)` fails.
1169        let seals = state.get_seals().unwrap_or(SealFlags::empty());
1170        if seals.contains(SealFlags::FUTURE_WRITE)
1171            && !seals.contains(SealFlags::WRITE)
1172            && !prot_flags.contains(ProtectionFlags::WRITE)
1173        {
1174            let mut new_rights = zx::Rights::VMO_DEFAULT - zx::Rights::WRITE;
1175            if prot_flags.contains(ProtectionFlags::EXEC) {
1176                new_rights |= zx::Rights::EXECUTE;
1177            }
1178            memory = Arc::new(memory.duplicate_handle(new_rights).map_err(impossible_error)?);
1179
1180            None
1181        } else {
1182            Some(FileWriteGuardMode::WriteMapping)
1183        }
1184    } else {
1185        None
1186    };
1187
1188    current_task.mm()?.map_memory(
1189        addr,
1190        memory,
1191        memory_offset,
1192        length,
1193        prot_flags,
1194        file.max_access_for_memory_mapping(),
1195        options,
1196        MappingName::File(filename.into_mapping(file_write_guard)?),
1197    )
1198}
1199
1200pub struct OPathOps {}
1201
1202impl OPathOps {
1203    pub fn new() -> OPathOps {
1204        OPathOps {}
1205    }
1206}
1207
1208impl FileOps for OPathOps {
1209    fileops_impl_noop_sync!();
1210
1211    fn has_persistent_offsets(&self) -> bool {
1212        false
1213    }
1214    fn is_seekable(&self) -> bool {
1215        true
1216    }
1217    fn read(
1218        &self,
1219        _locked: &mut Locked<FileOpsCore>,
1220        _file: &FileObject,
1221        _current_task: &CurrentTask,
1222        _offset: usize,
1223        _data: &mut dyn OutputBuffer,
1224    ) -> Result<usize, Errno> {
1225        error!(EBADF)
1226    }
1227    fn write(
1228        &self,
1229        _locked: &mut Locked<FileOpsCore>,
1230        _file: &FileObject,
1231        _current_task: &CurrentTask,
1232        _offset: usize,
1233        _data: &mut dyn InputBuffer,
1234    ) -> Result<usize, Errno> {
1235        error!(EBADF)
1236    }
1237    fn seek(
1238        &self,
1239        _locked: &mut Locked<FileOpsCore>,
1240        _file: &FileObject,
1241        _current_task: &CurrentTask,
1242        _current_offset: off_t,
1243        _target: SeekTarget,
1244    ) -> Result<off_t, Errno> {
1245        error!(EBADF)
1246    }
1247    fn get_memory(
1248        &self,
1249        _locked: &mut Locked<FileOpsCore>,
1250        _file: &FileObject,
1251        _current_task: &CurrentTask,
1252        _length: Option<usize>,
1253        _prot: ProtectionFlags,
1254    ) -> Result<Arc<MemoryObject>, Errno> {
1255        error!(EBADF)
1256    }
1257    fn readdir(
1258        &self,
1259        _locked: &mut Locked<FileOpsCore>,
1260        _file: &FileObject,
1261        _current_task: &CurrentTask,
1262        _sink: &mut dyn DirentSink,
1263    ) -> Result<(), Errno> {
1264        error!(EBADF)
1265    }
1266
1267    fn ioctl(
1268        &self,
1269        _locked: &mut Locked<Unlocked>,
1270        _file: &FileObject,
1271        _current_task: &CurrentTask,
1272        _request: u32,
1273        _arg: SyscallArg,
1274    ) -> Result<SyscallResult, Errno> {
1275        error!(EBADF)
1276    }
1277}
1278
1279pub struct ProxyFileOps(pub FileHandle);
1280
1281impl FileOps for ProxyFileOps {
1282    // `close` is not delegated because the last reference to a `ProxyFileOps` is not
1283    // necessarily the last reference of the proxied file. If this is the case, the
1284    // releaser will handle it.
1285    // These don't take &FileObject making it too hard to handle them properly in the macro
1286    fn has_persistent_offsets(&self) -> bool {
1287        self.0.ops().has_persistent_offsets()
1288    }
1289    fn writes_update_seek_offset(&self) -> bool {
1290        self.0.ops().writes_update_seek_offset()
1291    }
1292    fn is_seekable(&self) -> bool {
1293        self.0.ops().is_seekable()
1294    }
1295    // These take &mut Locked<L> as a second argument
1296    fn flush(
1297        &self,
1298        locked: &mut Locked<FileOpsCore>,
1299        _file: &FileObject,
1300        current_task: &CurrentTask,
1301    ) {
1302        self.0.ops().flush(locked, &self.0, current_task);
1303    }
1304    fn wait_async(
1305        &self,
1306        locked: &mut Locked<FileOpsCore>,
1307        _file: &FileObject,
1308        current_task: &CurrentTask,
1309        waiter: &Waiter,
1310        events: FdEvents,
1311        handler: EventHandler,
1312    ) -> Option<WaitCanceler> {
1313        self.0.ops().wait_async(locked, &self.0, current_task, waiter, events, handler)
1314    }
1315    fn query_events(
1316        &self,
1317        locked: &mut Locked<FileOpsCore>,
1318        _file: &FileObject,
1319        current_task: &CurrentTask,
1320    ) -> Result<FdEvents, Errno> {
1321        self.0.ops().query_events(locked, &self.0, current_task)
1322    }
1323    fn read(
1324        &self,
1325        locked: &mut Locked<FileOpsCore>,
1326        _file: &FileObject,
1327        current_task: &CurrentTask,
1328        offset: usize,
1329        data: &mut dyn OutputBuffer,
1330    ) -> Result<usize, Errno> {
1331        self.0.ops().read(locked, &self.0, current_task, offset, data)
1332    }
1333    fn write(
1334        &self,
1335        locked: &mut Locked<FileOpsCore>,
1336        _file: &FileObject,
1337        current_task: &CurrentTask,
1338        offset: usize,
1339        data: &mut dyn InputBuffer,
1340    ) -> Result<usize, Errno> {
1341        self.0.ops().write(locked, &self.0, current_task, offset, data)
1342    }
1343    fn ioctl(
1344        &self,
1345        locked: &mut Locked<Unlocked>,
1346        _file: &FileObject,
1347        current_task: &CurrentTask,
1348        request: u32,
1349        arg: SyscallArg,
1350    ) -> Result<SyscallResult, Errno> {
1351        self.0.ops().ioctl(locked, &self.0, current_task, request, arg)
1352    }
1353    fn fcntl(
1354        &self,
1355        _file: &FileObject,
1356        current_task: &CurrentTask,
1357        cmd: u32,
1358        arg: u64,
1359    ) -> Result<SyscallResult, Errno> {
1360        self.0.ops().fcntl(&self.0, current_task, cmd, arg)
1361    }
1362    fn readdir(
1363        &self,
1364        locked: &mut Locked<FileOpsCore>,
1365        _file: &FileObject,
1366        current_task: &CurrentTask,
1367        sink: &mut dyn DirentSink,
1368    ) -> Result<(), Errno> {
1369        self.0.ops().readdir(locked, &self.0, current_task, sink)
1370    }
1371    fn sync(&self, _file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
1372        self.0.ops().sync(&self.0, current_task)
1373    }
1374    fn data_sync(&self, _file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
1375        self.0.ops().sync(&self.0, current_task)
1376    }
1377    fn get_memory(
1378        &self,
1379        locked: &mut Locked<FileOpsCore>,
1380        _file: &FileObject,
1381        current_task: &CurrentTask,
1382        length: Option<usize>,
1383        prot: ProtectionFlags,
1384    ) -> Result<Arc<MemoryObject>, Errno> {
1385        self.0.ops.get_memory(locked, &self.0, current_task, length, prot)
1386    }
1387    fn mmap(
1388        &self,
1389        locked: &mut Locked<FileOpsCore>,
1390        _file: &FileObject,
1391        current_task: &CurrentTask,
1392        addr: DesiredAddress,
1393        memory_offset: u64,
1394        length: usize,
1395        prot_flags: ProtectionFlags,
1396        options: MappingOptions,
1397        filename: NamespaceNode,
1398    ) -> Result<UserAddress, Errno> {
1399        self.0.ops.mmap(
1400            locked,
1401            &self.0,
1402            current_task,
1403            addr,
1404            memory_offset,
1405            length,
1406            prot_flags,
1407            options,
1408            filename,
1409        )
1410    }
1411    fn seek(
1412        &self,
1413        locked: &mut Locked<FileOpsCore>,
1414        _file: &FileObject,
1415        current_task: &CurrentTask,
1416        offset: off_t,
1417        target: SeekTarget,
1418    ) -> Result<off_t, Errno> {
1419        self.0.ops.seek(locked, &self.0, current_task, offset, target)
1420    }
1421}
1422
1423#[derive(Debug, Default, Copy, Clone)]
1424pub enum FileAsyncOwner {
1425    #[default]
1426    Unowned,
1427    Thread(pid_t),
1428    Process(pid_t),
1429    ProcessGroup(pid_t),
1430}
1431
1432impl FileAsyncOwner {
1433    pub fn validate(self, current_task: &CurrentTask) -> Result<(), Errno> {
1434        match self {
1435            FileAsyncOwner::Unowned => (),
1436            FileAsyncOwner::Thread(id) | FileAsyncOwner::Process(id) => {
1437                if id != 0 {
1438                    current_task.get_task(id)?;
1439                }
1440            }
1441            FileAsyncOwner::ProcessGroup(pgid) => {
1442                if pgid != 0 {
1443                    current_task
1444                        .kernel()
1445                        .pids
1446                        .read()
1447                        .get_process_group(pgid)
1448                        .ok_or_else(|| errno!(ESRCH))?;
1449                }
1450            }
1451        }
1452        Ok(())
1453    }
1454}
1455
1456#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
1457pub struct FileObjectId(u64);
1458
1459impl FileObjectId {
1460    pub fn as_epoll_key(&self) -> EpollKey {
1461        self.0 as EpollKey
1462    }
1463}
1464
1465/// A session with a file object.
1466///
1467/// Each time a client calls open(), we create a new FileObject from the
1468/// underlying FsNode that receives the open(). This object contains the state
1469/// that is specific to this sessions whereas the underlying FsNode contains
1470/// the state that is shared between all the sessions.
1471pub struct FileObject {
1472    ops: Box<dyn FileOps>,
1473    state: FileObjectState,
1474}
1475
1476impl std::ops::Deref for FileObject {
1477    type Target = FileObjectState;
1478    fn deref(&self) -> &Self::Target {
1479        &self.state
1480    }
1481}
1482
1483pub struct FileObjectState {
1484    /// Weak reference to the `FileHandle` of this `FileObject`. This allows to retrieve the
1485    /// `FileHandle` from a `FileObject`.
1486    pub weak_handle: WeakFileHandle,
1487
1488    /// A unique identifier for this file object.
1489    pub id: FileObjectId,
1490
1491    /// The NamespaceNode associated with this FileObject.
1492    ///
1493    /// Represents the name the process used to open this file.
1494    pub name: ActiveNamespaceNode,
1495
1496    pub fs: FileSystemHandle,
1497
1498    pub offset: RcuAtomic<off_t, FileObjectOffset>,
1499
1500    flags: AtomicOpenFlags,
1501
1502    async_owner: LockDepMutex<FileAsyncOwner, FileAsyncOwnerLock>,
1503
1504    /// A set of epoll file descriptor numbers that tracks which `EpollFileObject`s add this
1505    /// `FileObject` as the control file.
1506    epoll_files: LockDepMutex<HashMap<FileHandleKey, WeakFileHandle>, FileEpollFilesLock>,
1507
1508    /// See fcntl F_SETLEASE and F_GETLEASE.
1509    lease: LockDepMutex<FileLeaseType, FileLeaseLock>,
1510
1511    // This extra reference to the FsNode should not be needed, but it is needed to make
1512    // Inotify.ExcludeUnlinkInodeEvents pass.
1513    _mysterious_node: Option<FsNodeHandle>,
1514
1515    /// Opaque security state associated this file object.
1516    pub security_state: security::FileObjectState,
1517}
1518
1519pub enum FileObjectReleaserAction {}
1520impl ReleaserAction<FileObject> for FileObjectReleaserAction {
1521    fn release(file_object: ReleaseGuard<FileObject>) {
1522        register_delayed_release(file_object);
1523    }
1524}
1525pub type FileReleaser = ObjectReleaser<FileObject, FileObjectReleaserAction>;
1526pub type FileHandle = Arc<FileReleaser>;
1527pub type WeakFileHandle = Weak<FileReleaser>;
1528pub type FileHandleKey = WeakKey<FileReleaser>;
1529
1530impl FileObjectState {
1531    /// The FsNode from which this FileObject was created.
1532    pub fn node(&self) -> &FsNodeHandle {
1533        &self.name.entry.node
1534    }
1535
1536    pub fn flags(&self) -> OpenFlags {
1537        self.flags.load(Ordering::Relaxed)
1538    }
1539
1540    pub fn can_read(&self) -> bool {
1541        self.flags.load(Ordering::Relaxed).can_read()
1542    }
1543
1544    pub fn can_write(&self) -> bool {
1545        self.flags.load(Ordering::Relaxed).can_write()
1546    }
1547
1548    /// Returns false if the file is not allowed to be executed.
1549    pub fn can_exec(&self) -> bool {
1550        let mounted_no_exec = self.name.to_passive().mount.flags().contains(MountFlags::NOEXEC);
1551        let no_exec_seal = self
1552            .node()
1553            .write_guard_state
1554            .lock()
1555            .get_seals()
1556            .map(|seals| seals.contains(SealFlags::NO_EXEC))
1557            .unwrap_or(false);
1558        !(mounted_no_exec || no_exec_seal)
1559    }
1560
1561    // Notifies watchers on the current node and its parent about an event.
1562    pub fn notify(&self, event_mask: InotifyMask) {
1563        self.name.notify(event_mask)
1564    }
1565}
1566
1567impl FileObject {
1568    /// Create a FileObject that is not mounted in a namespace.
1569    ///
1570    /// In particular, this will create a new unrooted entries. This should not be used on
1571    /// file system with persistent entries, as the created entry will be out of sync with the one
1572    /// from the file system.
1573    ///
1574    /// The returned FileObject does not have a name.
1575    pub fn new_anonymous<L>(
1576        locked: &mut Locked<L>,
1577        current_task: &CurrentTask,
1578        ops: Box<dyn FileOps>,
1579        node: FsNodeHandle,
1580        flags: OpenFlags,
1581    ) -> FileHandle
1582    where
1583        L: LockEqualOrBefore<FileOpsCore>,
1584    {
1585        assert!(!node.fs().has_permanent_entries());
1586        Self::new(
1587            locked,
1588            current_task,
1589            ops,
1590            NamespaceNode::new_anonymous_unrooted(current_task, node),
1591            flags,
1592        )
1593        .expect("Failed to create anonymous FileObject")
1594    }
1595
1596    /// Create a FileObject with an associated NamespaceNode.
1597    ///
1598    /// This function is not typically called directly. Instead, consider
1599    /// calling NamespaceNode::open.
1600    pub fn new<L>(
1601        locked: &mut Locked<L>,
1602        current_task: &CurrentTask,
1603        ops: Box<dyn FileOps>,
1604        name: NamespaceNode,
1605        flags: OpenFlags,
1606    ) -> Result<FileHandle, Errno>
1607    where
1608        L: LockEqualOrBefore<FileOpsCore>,
1609    {
1610        let _mysterious_node = if flags.can_write() {
1611            name.entry.node.write_guard_state.lock().acquire(FileWriteGuardMode::WriteFile)?;
1612            Some(name.entry.node.clone())
1613        } else {
1614            None
1615        };
1616        let fs = name.entry.node.fs();
1617        let id = FileObjectId(current_task.kernel.next_file_object_id.next());
1618        let security_state = security::file_alloc_security(current_task);
1619        let file = FileHandle::new_cyclic(|weak_handle| {
1620            Self {
1621                ops,
1622                state: FileObjectState {
1623                    weak_handle: weak_handle.clone(),
1624                    id,
1625                    name: name.into_active(),
1626                    fs,
1627                    offset: RcuAtomic::new(0),
1628                    flags: AtomicOpenFlags::new(flags - OpenFlags::CREAT),
1629                    async_owner: Default::default(),
1630                    epoll_files: Default::default(),
1631                    lease: Default::default(),
1632                    _mysterious_node,
1633                    security_state,
1634                },
1635            }
1636            .into()
1637        });
1638        file.notify(InotifyMask::OPEN);
1639
1640        file.ops().open(locked.cast_locked::<FileOpsCore>(), &file, current_task)?;
1641        Ok(file)
1642    }
1643
1644    pub fn max_access_for_memory_mapping(&self) -> Access {
1645        let mut access = Access::EXIST;
1646        if self.can_exec() {
1647            access |= Access::EXEC;
1648        }
1649        let flags = self.flags.load(Ordering::Relaxed);
1650        if flags.can_read() {
1651            access |= Access::READ;
1652        }
1653        if flags.can_write() {
1654            access |= Access::WRITE;
1655        }
1656        access
1657    }
1658
1659    pub fn ops(&self) -> &dyn FileOps {
1660        self.ops.as_ref()
1661    }
1662
1663    pub fn ops_type_name(&self) -> &'static str {
1664        self.ops().type_name()
1665    }
1666
1667    pub fn is_non_blocking(&self) -> bool {
1668        self.flags().contains(OpenFlags::NONBLOCK)
1669    }
1670
1671    /// Common implementation for blocking operations.
1672    ///
1673    /// This function is used to implement the blocking operations for file objects. FileOps
1674    /// implementations should call this function to handle the blocking logic.
1675    ///
1676    /// The `op` parameter is a function that implements the non-blocking version of the operation.
1677    /// The function is called once without registering a waiter in case no wait is needed. If the
1678    /// operation returns EAGAIN and the file object is non-blocking, the function returns EAGAIN.
1679    ///
1680    /// If the operation returns EAGAIN and the file object is blocking, the function will block
1681    /// until the given events are triggered. At that time, the operation is retried. Notice that
1682    /// the `op` function can be called multiple times before the operation completes.
1683    ///
1684    /// The `deadline` parameter is the deadline for the operation. If the operation does not
1685    /// complete before the deadline, the function will return ETIMEDOUT.
1686    pub fn blocking_op<L, T, Op>(
1687        &self,
1688        locked: &mut Locked<L>,
1689        current_task: &CurrentTask,
1690        events: FdEvents,
1691        deadline: Option<zx::MonotonicInstant>,
1692        mut op: Op,
1693    ) -> Result<T, Errno>
1694    where
1695        L: LockEqualOrBefore<FileOpsCore>,
1696        Op: FnMut(&mut Locked<L>) -> Result<T, Errno>,
1697    {
1698        // Don't return EAGAIN for directories. This can happen because glibc always opens a
1699        // directory with O_NONBLOCK.
1700        let can_return_eagain = self.flags().contains(OpenFlags::NONBLOCK)
1701            && !self.flags().contains(OpenFlags::DIRECTORY);
1702        // Run the operation a first time without registering a waiter in case no wait is needed.
1703        match op(locked) {
1704            Err(errno) if errno == EAGAIN && !can_return_eagain => {}
1705            result => return result,
1706        }
1707
1708        let waiter = Waiter::new();
1709        loop {
1710            // Register the waiter before running the operation to prevent a race.
1711            self.wait_async(locked, current_task, &waiter, events, WaitCallback::none());
1712            match op(locked) {
1713                Err(e) if e == EAGAIN => {}
1714                result => return result,
1715            }
1716            let locked = locked.cast_locked::<FileOpsCore>();
1717            waiter
1718                .wait_until(
1719                    locked,
1720                    current_task,
1721                    deadline.unwrap_or(zx::MonotonicInstant::INFINITE),
1722                )
1723                .map_err(|e| if e == ETIMEDOUT { errno!(EAGAIN) } else { e })?;
1724        }
1725    }
1726
1727    pub fn is_seekable(&self) -> bool {
1728        self.ops().is_seekable()
1729    }
1730
1731    pub fn has_persistent_offsets(&self) -> bool {
1732        self.ops().has_persistent_offsets()
1733    }
1734
1735    /// Common implementation for `read` and `read_at`.
1736    fn read_internal<R>(&self, current_task: &CurrentTask, read: R) -> Result<usize, Errno>
1737    where
1738        R: FnOnce() -> Result<usize, Errno>,
1739    {
1740        security::file_permission(current_task, self, security::PermissionFlags::READ)?;
1741
1742        if !self.can_read() {
1743            return error!(EBADF);
1744        }
1745        let bytes_read = read()?;
1746
1747        // TODO(steveaustin) - omit updating time_access to allow info to be immutable
1748        // and thus allow simultaneous reads.
1749        self.update_atime();
1750        if bytes_read > 0 {
1751            self.notify(InotifyMask::ACCESS);
1752        }
1753
1754        Ok(bytes_read)
1755    }
1756
1757    pub fn read<L>(
1758        &self,
1759        locked: &mut Locked<L>,
1760        current_task: &CurrentTask,
1761        data: &mut dyn OutputBuffer,
1762    ) -> Result<usize, Errno>
1763    where
1764        L: LockEqualOrBefore<FileOpsCore>,
1765    {
1766        self.read_internal(current_task, || {
1767            let locked = locked.cast_locked::<FileOpsCore>();
1768            if !self.ops().has_persistent_offsets() {
1769                if data.available() > MAX_LFS_FILESIZE {
1770                    return error!(EINVAL);
1771                }
1772                return self.ops.read(locked, self, current_task, 0, data);
1773            }
1774
1775            let mut offset_guard = self.offset.copy();
1776            let offset = *offset_guard as usize;
1777            checked_add_offset_and_length(offset, data.available())?;
1778            let read = self.ops.read(locked, self, current_task, offset, data)?;
1779            *offset_guard += read as off_t;
1780            offset_guard.update();
1781            Ok(read)
1782        })
1783    }
1784
1785    pub fn read_at<L>(
1786        &self,
1787        locked: &mut Locked<L>,
1788        current_task: &CurrentTask,
1789        offset: usize,
1790        data: &mut dyn OutputBuffer,
1791    ) -> Result<usize, Errno>
1792    where
1793        L: LockEqualOrBefore<FileOpsCore>,
1794    {
1795        if !self.ops().is_seekable() {
1796            return error!(ESPIPE);
1797        }
1798        checked_add_offset_and_length(offset, data.available())?;
1799        let locked = locked.cast_locked::<FileOpsCore>();
1800        self.read_internal(current_task, || self.ops.read(locked, self, current_task, offset, data))
1801    }
1802
1803    /// Common checks before calling ops().write.
1804    fn write_common<L>(
1805        &self,
1806        locked: &mut Locked<L>,
1807        current_task: &CurrentTask,
1808        offset: usize,
1809        data: &mut dyn InputBuffer,
1810    ) -> Result<usize, Errno>
1811    where
1812        L: LockEqualOrBefore<FileOpsCore>,
1813    {
1814        security::file_permission(current_task, self, security::PermissionFlags::WRITE)?;
1815
1816        // We need to cap the size of `data` to prevent us from growing the file too large,
1817        // according to <https://man7.org/linux/man-pages/man2/write.2.html>:
1818        //
1819        //   The number of bytes written may be less than count if, for example, there is
1820        //   insufficient space on the underlying physical medium, or the RLIMIT_FSIZE resource
1821        //   limit is encountered (see setrlimit(2)),
1822        checked_add_offset_and_length(offset, data.available())?;
1823        let locked = locked.cast_locked::<FileOpsCore>();
1824        self.ops().write(locked, self, current_task, offset, data)
1825    }
1826
1827    /// Common wrapper work for `write` and `write_at`.
1828    fn write_fn<W, L>(
1829        &self,
1830        locked: &mut Locked<L>,
1831        current_task: &CurrentTask,
1832        write: W,
1833    ) -> Result<usize, Errno>
1834    where
1835        L: LockEqualOrBefore<FileOpsCore>,
1836        W: FnOnce(&mut Locked<L>) -> Result<usize, Errno>,
1837    {
1838        if !self.can_write() {
1839            return error!(EBADF);
1840        }
1841        self.node().clear_suid_and_sgid_bits(locked, current_task)?;
1842        let bytes_written = write(locked)?;
1843        self.node().update_ctime_mtime();
1844
1845        if bytes_written > 0 {
1846            self.notify(InotifyMask::MODIFY);
1847        }
1848
1849        Ok(bytes_written)
1850    }
1851
1852    pub fn write<L>(
1853        &self,
1854        locked: &mut Locked<L>,
1855        current_task: &CurrentTask,
1856        data: &mut dyn InputBuffer,
1857    ) -> Result<usize, Errno>
1858    where
1859        L: LockEqualOrBefore<FileOpsCore>,
1860    {
1861        self.write_fn(locked, current_task, |locked| {
1862            if !self.ops().has_persistent_offsets() {
1863                return self.write_common(locked, current_task, 0, data);
1864            }
1865            // TODO(https://fxbug.dev/333540469): write_fn should take L: LockBefore<FsNodeAppend>,
1866            // but FileOpsCore must be after FsNodeAppend
1867            #[allow(
1868                clippy::undocumented_unsafe_blocks,
1869                reason = "Force documented unsafe blocks in Starnix"
1870            )]
1871            let locked = unsafe { Unlocked::new() };
1872            let mut offset = self.offset.copy();
1873            let bytes_written = if self.flags().contains(OpenFlags::APPEND) {
1874                let (_guard, locked) = self.node().ops().append_lock_write(
1875                    locked.cast_locked::<BeforeFsNodeAppend>(),
1876                    self.node(),
1877                    current_task,
1878                )?;
1879                *offset = self.ops().seek(
1880                    locked.cast_locked::<FileOpsCore>(),
1881                    self,
1882                    current_task,
1883                    *offset,
1884                    SeekTarget::End(0),
1885                )?;
1886                self.write_common(locked, current_task, *offset as usize, data)
1887            } else {
1888                let (_guard, locked) = self.node().ops().append_lock_read(
1889                    locked.cast_locked::<BeforeFsNodeAppend>(),
1890                    self.node(),
1891                    current_task,
1892                )?;
1893                self.write_common(locked, current_task, *offset as usize, data)
1894            }?;
1895            if self.ops().writes_update_seek_offset() {
1896                *offset += bytes_written as off_t;
1897            }
1898            offset.update();
1899            Ok(bytes_written)
1900        })
1901    }
1902
1903    pub fn write_at<L>(
1904        &self,
1905        locked: &mut Locked<L>,
1906        current_task: &CurrentTask,
1907        mut offset: usize,
1908        data: &mut dyn InputBuffer,
1909    ) -> Result<usize, Errno>
1910    where
1911        L: LockEqualOrBefore<FileOpsCore>,
1912    {
1913        if !self.ops().is_seekable() {
1914            return error!(ESPIPE);
1915        }
1916        self.write_fn(locked, current_task, |_locked| {
1917            // TODO(https://fxbug.dev/333540469): write_fn should take L: LockBefore<FsNodeAppend>,
1918            // but FileOpsCore must be after FsNodeAppend
1919            #[allow(
1920                clippy::undocumented_unsafe_blocks,
1921                reason = "Force documented unsafe blocks in Starnix"
1922            )]
1923            let locked = unsafe { Unlocked::new() };
1924            if self.flags().contains(OpenFlags::APPEND) {
1925                let (_guard, locked) = self.node().append_lock.write_and(locked, current_task)?;
1926                // According to LTP test pwrite04:
1927                //
1928                //   POSIX requires that opening a file with the O_APPEND flag should have no effect on the
1929                //   location at which pwrite() writes data. However, on Linux, if a file is opened with
1930                //   O_APPEND, pwrite() appends data to the end of the file, regardless of the value of offset.
1931                if self.ops().is_seekable() {
1932                    checked_add_offset_and_length(offset, data.available())?;
1933                    offset = default_eof_offset(locked, self, current_task)? as usize;
1934                }
1935                self.write_common(locked, current_task, offset, data)
1936            } else {
1937                let (_guard, locked) = self.node().append_lock.read_and(locked, current_task)?;
1938                self.write_common(locked, current_task, offset, data)
1939            }
1940        })
1941    }
1942
1943    pub fn seek<L>(
1944        &self,
1945        locked: &mut Locked<L>,
1946        current_task: &CurrentTask,
1947        target: SeekTarget,
1948    ) -> Result<off_t, Errno>
1949    where
1950        L: LockEqualOrBefore<FileOpsCore>,
1951    {
1952        let locked = locked.cast_locked::<FileOpsCore>();
1953        let locked = locked;
1954
1955        if !self.ops().is_seekable() {
1956            return error!(ESPIPE);
1957        }
1958
1959        if !self.ops().has_persistent_offsets() {
1960            return self.ops().seek(locked, self, current_task, 0, target);
1961        }
1962
1963        let mut offset_guard = self.offset.copy();
1964        let new_offset = self.ops().seek(locked, self, current_task, *offset_guard, target)?;
1965        *offset_guard = new_offset;
1966        offset_guard.update();
1967        Ok(new_offset)
1968    }
1969
1970    pub fn sync(&self, current_task: &CurrentTask) -> Result<(), Errno> {
1971        self.ops().sync(self, current_task)
1972    }
1973
1974    pub fn data_sync(&self, current_task: &CurrentTask) -> Result<(), Errno> {
1975        self.ops().data_sync(self, current_task)
1976    }
1977
1978    pub fn get_memory<L>(
1979        &self,
1980        locked: &mut Locked<L>,
1981        current_task: &CurrentTask,
1982        length: Option<usize>,
1983        prot: ProtectionFlags,
1984    ) -> Result<Arc<MemoryObject>, Errno>
1985    where
1986        L: LockEqualOrBefore<FileOpsCore>,
1987    {
1988        if prot.contains(ProtectionFlags::READ) && !self.can_read() {
1989            return error!(EACCES);
1990        }
1991        if prot.contains(ProtectionFlags::WRITE) && !self.can_write() {
1992            return error!(EACCES);
1993        }
1994        if prot.contains(ProtectionFlags::EXEC) && !self.can_exec() {
1995            return error!(EPERM);
1996        }
1997        self.ops().get_memory(locked.cast_locked::<FileOpsCore>(), self, current_task, length, prot)
1998    }
1999
2000    pub fn mmap<L>(
2001        &self,
2002        locked: &mut Locked<L>,
2003        current_task: &CurrentTask,
2004        addr: DesiredAddress,
2005        memory_offset: u64,
2006        length: usize,
2007        prot_flags: ProtectionFlags,
2008        options: MappingOptions,
2009        filename: NamespaceNode,
2010    ) -> Result<UserAddress, Errno>
2011    where
2012        L: LockEqualOrBefore<FileOpsCore>,
2013    {
2014        let locked = locked.cast_locked::<FileOpsCore>();
2015        if !self.can_read() {
2016            return error!(EACCES);
2017        }
2018        if prot_flags.contains(ProtectionFlags::WRITE)
2019            && !self.can_write()
2020            && options.contains(MappingOptions::SHARED)
2021        {
2022            return error!(EACCES);
2023        }
2024        if prot_flags.contains(ProtectionFlags::EXEC) && !self.can_exec() {
2025            return error!(EPERM);
2026        }
2027        self.ops().mmap(
2028            locked,
2029            self,
2030            current_task,
2031            addr,
2032            memory_offset,
2033            length,
2034            prot_flags,
2035            options,
2036            filename,
2037        )
2038    }
2039
2040    pub fn readdir<L>(
2041        &self,
2042        locked: &mut Locked<L>,
2043        current_task: &CurrentTask,
2044        sink: &mut dyn DirentSink,
2045    ) -> Result<(), Errno>
2046    where
2047        L: LockEqualOrBefore<FileOpsCore>,
2048    {
2049        let locked = locked.cast_locked::<FileOpsCore>();
2050        if self.name.entry.is_dead() {
2051            return error!(ENOENT);
2052        }
2053
2054        security::file_permission(current_task, self, security::PermissionFlags::READ)?;
2055
2056        self.ops().readdir(locked, self, current_task, sink)?;
2057        self.update_atime();
2058        self.notify(InotifyMask::ACCESS);
2059        Ok(())
2060    }
2061
2062    pub fn ioctl(
2063        &self,
2064        locked: &mut Locked<Unlocked>,
2065        current_task: &CurrentTask,
2066        request: u32,
2067        arg: SyscallArg,
2068    ) -> Result<SyscallResult, Errno> {
2069        security::check_file_ioctl_access(current_task, &self, request)?;
2070
2071        if request == FIBMAP {
2072            security::check_task_capable(current_task, CAP_SYS_RAWIO)?;
2073
2074            // TODO: https://fxbug.dev/404795644 - eliminate this phoney response when the SELinux
2075            // Test Suite no longer requires it.
2076            if current_task.kernel().features.selinux_test_suite {
2077                let phoney_block = 0xbadf000du32;
2078                current_task.write_object(arg.into(), &phoney_block)?;
2079                return Ok(SUCCESS);
2080            }
2081        }
2082
2083        self.ops().ioctl(locked, self, current_task, request, arg)
2084    }
2085
2086    pub fn fcntl(
2087        &self,
2088        current_task: &CurrentTask,
2089        cmd: u32,
2090        arg: u64,
2091    ) -> Result<SyscallResult, Errno> {
2092        self.ops().fcntl(self, current_task, cmd, arg)
2093    }
2094
2095    pub fn ftruncate<L>(
2096        &self,
2097        locked: &mut Locked<L>,
2098        current_task: &CurrentTask,
2099        length: u64,
2100    ) -> Result<(), Errno>
2101    where
2102        L: LockBefore<BeforeFsNodeAppend>,
2103    {
2104        // The file must be opened with write permissions. Otherwise
2105        // truncating it is forbidden.
2106        if !self.can_write() {
2107            return error!(EINVAL);
2108        }
2109        self.node().ftruncate(locked, current_task, length)?;
2110        self.name.entry.notify_ignoring_excl_unlink(InotifyMask::MODIFY);
2111        Ok(())
2112    }
2113
2114    pub fn fallocate<L>(
2115        &self,
2116        locked: &mut Locked<L>,
2117        current_task: &CurrentTask,
2118        mode: FallocMode,
2119        offset: u64,
2120        length: u64,
2121    ) -> Result<(), Errno>
2122    where
2123        L: LockBefore<BeforeFsNodeAppend>,
2124    {
2125        // If the file is a pipe or FIFO, ESPIPE is returned.
2126        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2127        if self.node().is_fifo() {
2128            return error!(ESPIPE);
2129        }
2130
2131        // Must be a regular file or directory.
2132        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2133        if !self.node().is_dir() && !self.node().is_reg() {
2134            return error!(ENODEV);
2135        }
2136
2137        // The file must be opened with write permissions. Otherwise operation is forbidden.
2138        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2139        if !self.can_write() {
2140            return error!(EBADF);
2141        }
2142
2143        security::file_permission(current_task, self, security::PermissionFlags::WRITE)?;
2144
2145        self.node().fallocate(locked, current_task, mode, offset, length)?;
2146        self.notify(InotifyMask::MODIFY);
2147        Ok(())
2148    }
2149
2150    pub fn to_handle(
2151        &self,
2152        current_task: &CurrentTask,
2153    ) -> Result<Option<zx::NullableHandle>, Errno> {
2154        self.ops().to_handle(self, current_task)
2155    }
2156
2157    pub fn get_handles(
2158        &self,
2159        current_task: &CurrentTask,
2160    ) -> Result<Vec<zx::NullableHandle>, Errno> {
2161        self.ops().get_handles(self, current_task)
2162    }
2163
2164    pub fn as_thread_group_key(&self) -> Result<ThreadGroupKey, Errno> {
2165        self.ops().as_thread_group_key(self)
2166    }
2167
2168    /// Update the file flags.
2169    ///
2170    /// Writes the bits in `value` that are set in `mask` into the file flags.
2171    ///
2172    /// Does not provide any synchronization.
2173    pub fn update_file_flags(&self, value: OpenFlags, mask: OpenFlags) {
2174        self.flags.update(value, mask, Ordering::Relaxed, Ordering::Relaxed);
2175    }
2176
2177    /// Get the async owner of this file.
2178    ///
2179    /// See fcntl(F_GETOWN)
2180    pub fn get_async_owner(&self) -> FileAsyncOwner {
2181        *self.async_owner.lock()
2182    }
2183
2184    /// Set the async owner of this file.
2185    ///
2186    /// See fcntl(F_SETOWN)
2187    pub fn set_async_owner(&self, owner: FileAsyncOwner) {
2188        *self.async_owner.lock() = owner;
2189    }
2190
2191    /// See fcntl(F_GETLEASE)
2192    pub fn get_lease(&self, _current_task: &CurrentTask) -> FileLeaseType {
2193        *self.lease.lock()
2194    }
2195
2196    /// See fcntl(F_SETLEASE)
2197    pub fn set_lease(&self, current_task: &CurrentTask, lease: FileLeaseType) -> Result<(), Errno> {
2198        if !self.node().is_reg() {
2199            return error!(EINVAL);
2200        }
2201        security::check_file_lock_access(current_task, self)?;
2202        if lease == FileLeaseType::Read && self.can_write() {
2203            return error!(EAGAIN);
2204        }
2205        *self.lease.lock() = lease;
2206        Ok(())
2207    }
2208
2209    /// Wait on the specified events and call the EventHandler when ready
2210    pub fn wait_async<L>(
2211        &self,
2212        locked: &mut Locked<L>,
2213        current_task: &CurrentTask,
2214        waiter: &Waiter,
2215        events: FdEvents,
2216        handler: EventHandler,
2217    ) -> Option<WaitCanceler>
2218    where
2219        L: LockEqualOrBefore<FileOpsCore>,
2220    {
2221        self.ops().wait_async(
2222            locked.cast_locked::<FileOpsCore>(),
2223            self,
2224            current_task,
2225            waiter,
2226            events,
2227            handler,
2228        )
2229    }
2230
2231    /// The events currently active on this file.
2232    pub fn query_events<L>(
2233        &self,
2234        locked: &mut Locked<L>,
2235        current_task: &CurrentTask,
2236    ) -> Result<FdEvents, Errno>
2237    where
2238        L: LockEqualOrBefore<FileOpsCore>,
2239    {
2240        self.ops()
2241            .query_events(locked.cast_locked::<FileOpsCore>(), self, current_task)
2242            .map(FdEvents::add_equivalent_fd_events)
2243    }
2244
2245    pub fn record_lock(
2246        &self,
2247        locked: &mut Locked<Unlocked>,
2248        current_task: &CurrentTask,
2249        cmd: RecordLockCommand,
2250        flock: uapi::flock,
2251    ) -> Result<Option<uapi::flock>, Errno> {
2252        security::check_file_lock_access(current_task, self)?;
2253        self.node().record_lock(locked, current_task, self, cmd, flock)
2254    }
2255
2256    pub fn flush<L>(&self, locked: &mut Locked<L>, current_task: &CurrentTask, id: FdTableId)
2257    where
2258        L: LockEqualOrBefore<FileOpsCore>,
2259    {
2260        self.name.entry.node.record_lock_release(RecordLockOwner::FdTable(id));
2261        self.ops().flush(locked.cast_locked::<FileOpsCore>(), self, current_task)
2262    }
2263
2264    fn update_atime(&self) {
2265        if !self.flags().contains(OpenFlags::NOATIME) {
2266            self.name.update_atime();
2267        }
2268    }
2269
2270    pub fn readahead(
2271        &self,
2272        current_task: &CurrentTask,
2273        offset: usize,
2274        length: usize,
2275    ) -> Result<(), Errno> {
2276        // readfile() fails with EBADF if the file was not open for read.
2277        if !self.can_read() {
2278            return error!(EBADF);
2279        }
2280        checked_add_offset_and_length(offset, length)?;
2281        self.ops().readahead(self, current_task, offset, length)
2282    }
2283
2284    pub fn extra_fdinfo(
2285        &self,
2286        locked: &mut Locked<FileOpsCore>,
2287        current_task: &CurrentTask,
2288    ) -> Option<FsString> {
2289        let file = self.weak_handle.upgrade()?;
2290        self.ops().extra_fdinfo(locked, &file, current_task)
2291    }
2292
2293    /// Register the fd number of an `EpollFileObject` that listens to events from this
2294    /// `FileObject`.
2295    pub fn register_epfd(&self, file: &FileHandle) {
2296        self.epoll_files.lock().insert(WeakKey::from(file), file.weak_handle.clone());
2297    }
2298
2299    pub fn unregister_epfd(&self, file: &FileHandle) {
2300        self.epoll_files.lock().remove(&WeakKey::from(file));
2301    }
2302}
2303
2304impl Releasable for FileObject {
2305    type Context<'a> = CurrentTaskAndLocked<'a>;
2306
2307    fn release<'a>(self, context: CurrentTaskAndLocked<'a>) {
2308        let (locked, current_task) = context;
2309        // Release all wake leases associated with this file in the corresponding `WaitObject`
2310        // of each registered epfd.
2311        for (_, file) in self.epoll_files.lock().drain() {
2312            if let Some(file) = file.upgrade() {
2313                if let Some(epoll_object) = file.downcast_file::<EpollFileObject>() {
2314                    let _ = epoll_object.delete(current_task, &self);
2315                }
2316            }
2317        }
2318
2319        if self.can_write() {
2320            self.name.entry.node.write_guard_state.lock().release(FileWriteGuardMode::WriteFile);
2321        }
2322
2323        let locked = locked.cast_locked::<FileOpsCore>();
2324        let ops = self.ops;
2325        let state = self.state;
2326        ops.close(locked, &state, current_task);
2327        state.name.entry.node.on_file_closed(&state);
2328        let event =
2329            if state.can_write() { InotifyMask::CLOSE_WRITE } else { InotifyMask::CLOSE_NOWRITE };
2330        state.notify(event);
2331    }
2332}
2333
2334impl fmt::Debug for FileObject {
2335    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2336        f.debug_struct("FileObject")
2337            .field("name", &self.name)
2338            .field("fs", &self.fs.name())
2339            .field("offset", &self.offset)
2340            .field("flags", &self.flags)
2341            .field("ops_ty", &self.ops().type_name())
2342            .finish()
2343    }
2344}
2345
2346impl OnWakeOps for FileReleaser {
2347    fn on_wake(&self, _current_task: &CurrentTask, _baton_lease: &zx::NullableHandle) {}
2348}
2349
2350/// A FileObject with the type of its FileOps known. Dereferencing it returns the FileOps.
2351pub struct DowncastedFile<'a, Ops> {
2352    file: &'a FileObject,
2353    ops: &'a Ops,
2354}
2355impl<'a, Ops> Copy for DowncastedFile<'a, Ops> {}
2356impl<'a, Ops> Clone for DowncastedFile<'a, Ops> {
2357    fn clone(&self) -> Self {
2358        *self
2359    }
2360}
2361
2362impl<'a, Ops> DowncastedFile<'a, Ops> {
2363    pub fn file(&self) -> &'a FileObject {
2364        self.file
2365    }
2366}
2367
2368impl<'a, Ops> Deref for DowncastedFile<'a, Ops> {
2369    type Target = &'a Ops;
2370    fn deref(&self) -> &Self::Target {
2371        &self.ops
2372    }
2373}
2374
2375impl FileObject {
2376    /// Returns the `FileObject`'s `FileOps` as a `DowncastedFile<T>`, or `None` if the downcast
2377    /// fails.
2378    ///
2379    /// This is useful for syscalls that only operate on a certain type of file.
2380    pub fn downcast_file<'a, T>(&'a self) -> Option<DowncastedFile<'a, T>>
2381    where
2382        T: 'static,
2383    {
2384        let ops = self.ops().as_any().downcast_ref::<T>()?;
2385        Some(DowncastedFile { file: self, ops })
2386    }
2387}
2388
2389/// Invokes the specified one-way `method` on the `proxy` and waits until the `proxy`'s underlying
2390/// channel has been closed by the peer.
2391///
2392/// This is used in `close()` implementations when the `FileOps` wraps a FIDL resource that provides
2393/// a one-way API to request teardown, and acknowledges completion of teardown by closing the FIDL
2394/// channel, to ensure that the `close()` call does not return until the FIDL server has actually
2395/// processed the teardown request.
2396pub fn call_fidl_and_await_close<P, M>(method: M, proxy: &P)
2397where
2398    P: fidl::endpoints::SynchronousProxy,
2399    M: FnOnce(&P) -> Result<(), fidl::Error>,
2400{
2401    if let Err(e) = method(proxy) {
2402        log_error!("call_fidl_and_await_close: call {} failed: {e:?}", P::Protocol::DEBUG_NAME);
2403        return;
2404    }
2405    let channel = proxy.as_channel();
2406    let result = channel.wait_one(zx::Signals::CHANNEL_PEER_CLOSED, zx::MonotonicInstant::INFINITE);
2407    if let Err(status) = result.to_result() {
2408        log_error!(
2409            "call_fidl_and_await_close: wait_one {} failed: {status:?}",
2410            P::Protocol::DEBUG_NAME
2411        );
2412    }
2413}
2414
2415#[cfg(test)]
2416mod tests {
2417    use crate::fs::tmpfs::TmpFs;
2418    use crate::task::CurrentTask;
2419    use crate::task::dynamic_thread_spawner::SpawnRequestBuilder;
2420    use crate::testing::*;
2421    use crate::vfs::MountInfo;
2422    use crate::vfs::buffers::{VecInputBuffer, VecOutputBuffer};
2423    use starnix_sync::{Locked, Unlocked};
2424    use starnix_uapi::auth::FsCred;
2425    use starnix_uapi::device_id::DeviceId;
2426    use starnix_uapi::file_mode::FileMode;
2427    use starnix_uapi::open_flags::OpenFlags;
2428    use std::sync::Arc;
2429    use std::sync::atomic::{AtomicBool, Ordering};
2430    use zerocopy::{FromBytes, IntoBytes, LE, U64};
2431
2432    #[::fuchsia::test]
2433    async fn test_append_truncate_race() {
2434        spawn_kernel_and_run(async |locked, current_task| {
2435            let kernel = current_task.kernel();
2436            let root_fs = TmpFs::new_fs(locked, &kernel);
2437            let mount = MountInfo::detached();
2438            let root_node = Arc::clone(root_fs.root());
2439            let file = root_node
2440                .create_entry(
2441                    locked,
2442                    &current_task,
2443                    &mount,
2444                    "test".into(),
2445                    |locked, dir, mount, name| {
2446                        dir.create_node(
2447                            locked,
2448                            &current_task,
2449                            mount,
2450                            name,
2451                            FileMode::IFREG | FileMode::ALLOW_ALL,
2452                            DeviceId::NONE,
2453                            FsCred::root(),
2454                        )
2455                    },
2456                )
2457                .expect("create_node failed");
2458            let file_handle = file
2459                .open_anonymous(locked, &current_task, OpenFlags::APPEND | OpenFlags::RDWR)
2460                .expect("open failed");
2461            let done = Arc::new(AtomicBool::new(false));
2462
2463            let fh = file_handle.clone();
2464            let done_clone = done.clone();
2465            let closure = move |locked: &mut Locked<Unlocked>, current_task: &CurrentTask| {
2466                for i in 0..2000 {
2467                    fh.write(
2468                        locked,
2469                        current_task,
2470                        &mut VecInputBuffer::new(U64::<LE>::new(i).as_bytes()),
2471                    )
2472                    .expect("write failed");
2473                }
2474                done_clone.store(true, Ordering::SeqCst);
2475                let result: Result<(), starnix_uapi::errors::Errno> = Ok(());
2476                result
2477            };
2478            let (write_thread, req) =
2479                SpawnRequestBuilder::new().with_sync_closure(closure).build_with_sync_result();
2480            kernel.kthreads.spawner().spawn_from_request(req);
2481
2482            let fh = file_handle.clone();
2483            let done_clone = done.clone();
2484            let closure = move |locked: &mut Locked<Unlocked>, current_task: &CurrentTask| {
2485                while !done_clone.load(Ordering::SeqCst) {
2486                    fh.ftruncate(locked, current_task, 0).expect("truncate failed");
2487                }
2488                let result: Result<(), starnix_uapi::errors::Errno> = Ok(());
2489                result
2490            };
2491            let (truncate_thread, req) =
2492                SpawnRequestBuilder::new().with_sync_closure(closure).build_with_sync_result();
2493            kernel.kthreads.spawner().spawn_from_request(req);
2494
2495            // If we read from the file, we should always find an increasing sequence. If there are
2496            // races, then we might unexpectedly see zeroes.
2497            while !done.load(Ordering::SeqCst) {
2498                let mut buffer = VecOutputBuffer::new(4096);
2499                let amount = file_handle
2500                    .read_at(locked, &current_task, 0, &mut buffer)
2501                    .expect("read failed");
2502                let mut last = None;
2503                let buffer = &Vec::from(buffer)[..amount];
2504                for i in
2505                    buffer.chunks_exact(8).map(|chunk| U64::<LE>::read_from_bytes(chunk).unwrap())
2506                {
2507                    if let Some(last) = last {
2508                        assert!(i.get() > last, "buffer: {:?}", buffer);
2509                    }
2510                    last = Some(i.get());
2511                }
2512            }
2513
2514            let _ = write_thread().unwrap();
2515            let _ = truncate_thread().unwrap();
2516        })
2517        .await;
2518    }
2519}