Skip to main content

starnix_core/vfs/
file_object.rs

1// Cmpyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::memory::MemoryObject;
6use crate::mm::{DesiredAddress, MappingName, MappingOptions, MemoryAccessorExt, ProtectionFlags};
7use crate::power::OnWakeOps;
8use crate::security;
9use crate::task::{
10    CurrentTask, CurrentTaskAndLocked, EventHandler, Task, ThreadGroupKey, WaitCallback,
11    WaitCanceler, Waiter, register_delayed_release,
12};
13use crate::vfs::buffers::{InputBuffer, OutputBuffer};
14use crate::vfs::file_server::serve_file;
15use crate::vfs::fsverity::{
16    FsVerityState, {self},
17};
18use crate::vfs::{
19    ActiveNamespaceNode, DirentSink, EpollFileObject, EpollKey, FallocMode, FdTableId,
20    FileSystemHandle, FileWriteGuardMode, FsNodeHandle, FsString, NamespaceNode, RecordLockCommand,
21    RecordLockOwner,
22};
23use starnix_crypt::EncryptionKeyId;
24use starnix_lifecycle::{ObjectReleaser, ReleaserAction};
25use starnix_types::ownership::ReleaseGuard;
26use starnix_uapi::mount_flags::MountFlags;
27use starnix_uapi::user_address::ArchSpecific;
28
29use fidl::HandleBased;
30use fidl::endpoints::ProtocolMarker as _;
31use linux_uapi::{FSCRYPT_MODE_AES_256_CTS, FSCRYPT_MODE_AES_256_XTS};
32use starnix_logging::{
33    CATEGORY_STARNIX_MM, impossible_error, log_error, trace_duration, track_stub,
34};
35use starnix_sync::{
36    BeforeFsNodeAppend, FileOpsCore, LockBefore, LockEqualOrBefore, Locked, Mutex, Unlocked,
37};
38use starnix_syscalls::{SUCCESS, SyscallArg, SyscallResult};
39use starnix_types::math::round_up_to_system_page_size;
40use starnix_types::ownership::Releasable;
41use starnix_uapi::arc_key::WeakKey;
42use starnix_uapi::as_any::AsAny;
43use starnix_uapi::auth::{CAP_FOWNER, CAP_SYS_RAWIO};
44use starnix_uapi::errors::{EAGAIN, ETIMEDOUT, Errno};
45use starnix_uapi::file_lease::FileLeaseType;
46use starnix_uapi::file_mode::Access;
47use starnix_uapi::inotify_mask::InotifyMask;
48use starnix_uapi::open_flags::{AtomicOpenFlags, OpenFlags};
49use starnix_uapi::seal_flags::SealFlags;
50use starnix_uapi::user_address::{UserAddress, UserRef};
51use starnix_uapi::vfs::FdEvents;
52use starnix_uapi::{
53    FIBMAP, FIGETBSZ, FIONBIO, FIONREAD, FIOQSIZE, FS_CASEFOLD_FL, FS_IOC_ADD_ENCRYPTION_KEY,
54    FS_IOC_ENABLE_VERITY, FS_IOC_FSGETXATTR, FS_IOC_FSSETXATTR, FS_IOC_MEASURE_VERITY,
55    FS_IOC_READ_VERITY_METADATA, FS_IOC_REMOVE_ENCRYPTION_KEY, FS_IOC_SET_ENCRYPTION_POLICY,
56    FS_VERITY_FL, FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER, FSCRYPT_POLICY_V2, SEEK_CUR, SEEK_DATA,
57    SEEK_END, SEEK_HOLE, SEEK_SET, TCGETS, errno, error, fscrypt_add_key_arg, fscrypt_identifier,
58    fsxattr, off_t, pid_t, uapi,
59};
60use std::collections::HashMap;
61use std::fmt;
62use std::ops::Deref;
63use std::sync::atomic::Ordering;
64use std::sync::{Arc, Weak};
65
66pub const MAX_LFS_FILESIZE: usize = 0x7fff_ffff_ffff_ffff;
67
68pub fn checked_add_offset_and_length(offset: usize, length: usize) -> Result<usize, Errno> {
69    let end = offset.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
70    if end > MAX_LFS_FILESIZE {
71        return error!(EINVAL);
72    }
73    Ok(end)
74}
75
76#[derive(Debug)]
77pub enum SeekTarget {
78    /// Seek to the given offset relative to the start of the file.
79    Set(off_t),
80    /// Seek to the given offset relative to the current position.
81    Cur(off_t),
82    /// Seek to the given offset relative to the end of the file.
83    End(off_t),
84    /// Seek for the first data after the given offset,
85    Data(off_t),
86    /// Seek for the first hole after the given offset,
87    Hole(off_t),
88}
89
90impl SeekTarget {
91    pub fn from_raw(whence: u32, offset: off_t) -> Result<SeekTarget, Errno> {
92        match whence {
93            SEEK_SET => Ok(SeekTarget::Set(offset)),
94            SEEK_CUR => Ok(SeekTarget::Cur(offset)),
95            SEEK_END => Ok(SeekTarget::End(offset)),
96            SEEK_DATA => Ok(SeekTarget::Data(offset)),
97            SEEK_HOLE => Ok(SeekTarget::Hole(offset)),
98            _ => error!(EINVAL),
99        }
100    }
101
102    pub fn whence(&self) -> u32 {
103        match self {
104            Self::Set(_) => SEEK_SET,
105            Self::Cur(_) => SEEK_CUR,
106            Self::End(_) => SEEK_END,
107            Self::Data(_) => SEEK_DATA,
108            Self::Hole(_) => SEEK_HOLE,
109        }
110    }
111
112    pub fn offset(&self) -> off_t {
113        match self {
114            Self::Set(off)
115            | Self::Cur(off)
116            | Self::End(off)
117            | Self::Data(off)
118            | Self::Hole(off) => *off,
119        }
120    }
121}
122
123/// Corresponds to struct file_operations in Linux, plus any filesystem-specific data.
124pub trait FileOps: Send + Sync + AsAny + 'static {
125    /// Called when the FileObject is opened/created
126    fn open(
127        &self,
128        _locked: &mut Locked<FileOpsCore>,
129        _file: &FileObject,
130        _current_task: &CurrentTask,
131    ) -> Result<(), Errno> {
132        Ok(())
133    }
134
135    /// Called when the FileObject is destroyed.
136    fn close(
137        self: Box<Self>,
138        _locked: &mut Locked<FileOpsCore>,
139        _file: &FileObjectState,
140        _current_task: &CurrentTask,
141    ) {
142    }
143
144    /// Called every time close() is called on this file, even if the file is not ready to be
145    /// released.
146    fn flush(
147        &self,
148        _locked: &mut Locked<FileOpsCore>,
149        _file: &FileObject,
150        _current_task: &CurrentTask,
151    ) {
152    }
153
154    /// Returns whether the file has meaningful seek offsets. Returning `false` is only
155    /// optimization and will makes `FileObject` never hold the offset lock when calling `read` and
156    /// `write`.
157    fn has_persistent_offsets(&self) -> bool {
158        self.is_seekable()
159    }
160
161    /// Returns whether the file is seekable.
162    fn is_seekable(&self) -> bool;
163
164    /// Returns true if `write()` operations on the file will update the seek offset.
165    fn writes_update_seek_offset(&self) -> bool {
166        self.has_persistent_offsets()
167    }
168
169    /// Read from the file at an offset. If the file does not have persistent offsets (either
170    /// directly, or because it is not seekable), offset will be 0 and can be ignored.
171    /// Returns the number of bytes read.
172    fn read(
173        &self,
174        locked: &mut Locked<FileOpsCore>,
175        file: &FileObject,
176        current_task: &CurrentTask,
177        offset: usize,
178        data: &mut dyn OutputBuffer,
179    ) -> Result<usize, Errno>;
180
181    /// Write to the file with an offset. If the file does not have persistent offsets (either
182    /// directly, or because it is not seekable), offset will be 0 and can be ignored.
183    /// Returns the number of bytes written.
184    fn write(
185        &self,
186        locked: &mut Locked<FileOpsCore>,
187        file: &FileObject,
188        current_task: &CurrentTask,
189        offset: usize,
190        data: &mut dyn InputBuffer,
191    ) -> Result<usize, Errno>;
192
193    /// Adjust the `current_offset` if the file is seekable.
194    fn seek(
195        &self,
196        locked: &mut Locked<FileOpsCore>,
197        file: &FileObject,
198        current_task: &CurrentTask,
199        current_offset: off_t,
200        target: SeekTarget,
201    ) -> Result<off_t, Errno>;
202
203    /// Syncs cached state associated with the file descriptor to persistent storage.
204    ///
205    /// The method blocks until the synchronization is complete.
206    fn sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
207        file.node().ops().sync(file.node(), current_task)
208    }
209
210    /// Syncs cached data, and only enough metadata to retrieve said data, to persistent storage.
211    ///
212    /// The method blocks until the synchronization is complete.
213    fn data_sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
214        // TODO(https://fxbug.dev/297305634) make a default macro once data can be done separately
215        self.sync(file, current_task)
216    }
217
218    /// Returns a VMO representing this file. At least the requested protection flags must
219    /// be set on the VMO. Reading or writing the VMO must read or write the file. If this is not
220    /// possible given the requested protection, an error must be returned.
221    /// The `length` is a hint for the desired size of the VMO. The returned VMO may be larger or
222    /// smaller than the requested length.
223    /// This method is typically called by [`Self::mmap`].
224    fn get_memory(
225        &self,
226        _locked: &mut Locked<FileOpsCore>,
227        _file: &FileObject,
228        _current_task: &CurrentTask,
229        _length: Option<usize>,
230        _prot: ProtectionFlags,
231    ) -> Result<Arc<MemoryObject>, Errno> {
232        error!(ENODEV)
233    }
234
235    /// Responds to an mmap call. The default implementation calls [`Self::get_memory`] to get a VMO
236    /// and then maps it with [`crate::mm::MemoryManager::map`].
237    /// Only implement this trait method if your file needs to control mapping, or record where
238    /// a VMO gets mapped.
239    fn mmap(
240        &self,
241        locked: &mut Locked<FileOpsCore>,
242        file: &FileObject,
243        current_task: &CurrentTask,
244        addr: DesiredAddress,
245        memory_offset: u64,
246        length: usize,
247        prot_flags: ProtectionFlags,
248        options: MappingOptions,
249        filename: NamespaceNode,
250    ) -> Result<UserAddress, Errno> {
251        trace_duration!(CATEGORY_STARNIX_MM, "FileOpsDefaultMmap");
252        let min_memory_size = (memory_offset as usize)
253            .checked_add(round_up_to_system_page_size(length)?)
254            .ok_or_else(|| errno!(EINVAL))?;
255        let mut memory = if options.contains(MappingOptions::SHARED) {
256            trace_duration!(CATEGORY_STARNIX_MM, "GetSharedVmo");
257            self.get_memory(locked, file, current_task, Some(min_memory_size), prot_flags)?
258        } else {
259            trace_duration!(CATEGORY_STARNIX_MM, "GetPrivateVmo");
260            // TODO(tbodt): Use PRIVATE_CLONE to have the filesystem server do the clone for us.
261            let base_prot_flags = (prot_flags | ProtectionFlags::READ) - ProtectionFlags::WRITE;
262            let memory = self.get_memory(
263                locked,
264                file,
265                current_task,
266                Some(min_memory_size),
267                base_prot_flags,
268            )?;
269            let mut clone_flags = zx::VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE;
270            if !prot_flags.contains(ProtectionFlags::WRITE) {
271                clone_flags |= zx::VmoChildOptions::NO_WRITE;
272            }
273            trace_duration!(CATEGORY_STARNIX_MM, "CreatePrivateChildVmo");
274            Arc::new(
275                memory.create_child(clone_flags, 0, memory.get_size()).map_err(impossible_error)?,
276            )
277        };
278
279        // Write guard is necessary only for shared mappings. Note that this doesn't depend on
280        // `prot_flags` since these can be changed later with `mprotect()`.
281        let file_write_guard = if options.contains(MappingOptions::SHARED) && file.can_write() {
282            let node = &file.name.entry.node;
283            let state = node.write_guard_state.lock();
284
285            // `F_SEAL_FUTURE_WRITE` should allow `mmap(PROT_READ)`, but block
286            // `mprotect(PROT_WRITE)`. This is different from `F_SEAL_WRITE`, which blocks
287            // `mmap(PROT_READ)`. To handle this case correctly remove `WRITE` right from the
288            // VMO handle to ensure `mprotect(PROT_WRITE)` fails.
289            let seals = state.get_seals().unwrap_or(SealFlags::empty());
290            if seals.contains(SealFlags::FUTURE_WRITE)
291                && !seals.contains(SealFlags::WRITE)
292                && !prot_flags.contains(ProtectionFlags::WRITE)
293            {
294                let mut new_rights = zx::Rights::VMO_DEFAULT - zx::Rights::WRITE;
295                if prot_flags.contains(ProtectionFlags::EXEC) {
296                    new_rights |= zx::Rights::EXECUTE;
297                }
298                memory = Arc::new(memory.duplicate_handle(new_rights).map_err(impossible_error)?);
299
300                None
301            } else {
302                Some(FileWriteGuardMode::WriteMapping)
303            }
304        } else {
305            None
306        };
307
308        current_task.mm()?.map_memory(
309            addr,
310            memory,
311            memory_offset,
312            length,
313            prot_flags,
314            file.max_access_for_memory_mapping(),
315            options,
316            MappingName::File(filename.into_mapping(file_write_guard)?),
317        )
318    }
319
320    /// Respond to a `getdents` or `getdents64` calls.
321    ///
322    /// The `file.offset` lock will be held while entering this method. The implementation must look
323    /// at `sink.offset()` to read the current offset into the file.
324    fn readdir(
325        &self,
326        _locked: &mut Locked<FileOpsCore>,
327        _file: &FileObject,
328        _current_task: &CurrentTask,
329        _sink: &mut dyn DirentSink,
330    ) -> Result<(), Errno> {
331        error!(ENOTDIR)
332    }
333
334    /// Establish a one-shot, edge-triggered, asynchronous wait for the given FdEvents for the
335    /// given file and task. Returns `None` if this file does not support blocking waits.
336    ///
337    /// Active events are not considered. This is similar to the semantics of the
338    /// ZX_WAIT_ASYNC_EDGE flag on zx_wait_async. To avoid missing events, the caller must call
339    /// query_events after calling this.
340    ///
341    /// If your file does not support blocking waits, leave this as the default implementation.
342    fn wait_async(
343        &self,
344        _locked: &mut Locked<FileOpsCore>,
345        _file: &FileObject,
346        _current_task: &CurrentTask,
347        _waiter: &Waiter,
348        _events: FdEvents,
349        _handler: EventHandler,
350    ) -> Option<WaitCanceler> {
351        None
352    }
353
354    /// The events currently active on this file.
355    ///
356    /// If this function returns `POLLIN` or `POLLOUT`, then FileObject will
357    /// add `POLLRDNORM` and `POLLWRNORM`, respective, which are equivalent in
358    /// the Linux UAPI.
359    ///
360    /// See https://linux.die.net/man/2/poll
361    fn query_events(
362        &self,
363        _locked: &mut Locked<FileOpsCore>,
364        _file: &FileObject,
365        _current_task: &CurrentTask,
366    ) -> Result<FdEvents, Errno> {
367        Ok(FdEvents::POLLIN | FdEvents::POLLOUT)
368    }
369
370    fn ioctl(
371        &self,
372        locked: &mut Locked<Unlocked>,
373        file: &FileObject,
374        current_task: &CurrentTask,
375        request: u32,
376        arg: SyscallArg,
377    ) -> Result<SyscallResult, Errno> {
378        default_ioctl(file, locked, current_task, request, arg)
379    }
380
381    fn fcntl(
382        &self,
383        _file: &FileObject,
384        _current_task: &CurrentTask,
385        cmd: u32,
386        _arg: u64,
387    ) -> Result<SyscallResult, Errno> {
388        default_fcntl(cmd)
389    }
390
391    /// Return a handle that allows access to this file descritor through the zxio protocols.
392    ///
393    /// If None is returned, the file will act as if it was a fd to `/dev/null`.
394    fn to_handle(
395        &self,
396        file: &FileObject,
397        current_task: &CurrentTask,
398    ) -> Result<Option<zx::NullableHandle>, Errno> {
399        serve_file(current_task, file, current_task.current_creds().clone())
400            .map(|c| Some(c.0.into_handle().into()))
401    }
402
403    // Return a vector of handles. This is used in situations where there is more than one handle
404    // associated with this file descriptor.
405    //
406    // In Fuchsia, there is an expectation that there is a 1:1 mapping between a file descriptor and
407    // a handle. In general, we do not want to violate that rule. This function is intended to used
408    // in very limited circumstances (compatibility with Linux and Binder), where we need to violate
409    // rule.
410    //
411    // Specifically, we are using this to implement SyncFiles correctly, where a single SyncFile can
412    // represent multiple SyncPoints. Each SyncPoint contains a zx::Counter.
413    //
414    // If you chose to implement this function, to_handle() should return an error. You must also be
415    // aware that if these handles are passed to Fuchsia over Binder, they will be represented as
416    // single file descriptor, and you should use the composite_fd library to manage that file
417    // descriptor.
418    fn get_handles(
419        &self,
420        _file: &FileObject,
421        _current_task: &CurrentTask,
422    ) -> Result<Vec<zx::NullableHandle>, Errno> {
423        error!(ENOTSUP)
424    }
425
426    /// Returns the associated pid_t.
427    ///
428    /// Used by pidfd and `/proc/<pid>`. Unlikely to be used by other files.
429    fn as_thread_group_key(&self, _file: &FileObject) -> Result<ThreadGroupKey, Errno> {
430        error!(EBADF)
431    }
432
433    fn readahead(
434        &self,
435        _file: &FileObject,
436        _current_task: &CurrentTask,
437        _offset: usize,
438        _length: usize,
439    ) -> Result<(), Errno> {
440        error!(EINVAL)
441    }
442
443    /// Extra information that is included in the /proc/<pid>/fdfino/<fd> entry.
444    fn extra_fdinfo(
445        &self,
446        _locked: &mut Locked<FileOpsCore>,
447        _file: &FileHandle,
448        _current_task: &CurrentTask,
449    ) -> Option<FsString> {
450        None
451    }
452}
453
454/// Marker trait for implementation of FileOps that do not need to implement `close` and can
455/// then pass a wrapper object as the `FileOps` implementation.
456pub trait CloseFreeSafe {}
457impl<T: FileOps + CloseFreeSafe, P: Deref<Target = T> + Send + Sync + 'static> FileOps for P {
458    fn close(
459        self: Box<Self>,
460        _locked: &mut Locked<FileOpsCore>,
461        _file: &FileObjectState,
462        _current_task: &CurrentTask,
463    ) {
464        // This method cannot be delegated. T being `CloseFreeSafe` this is fine.
465    }
466
467    fn flush(
468        &self,
469        locked: &mut Locked<FileOpsCore>,
470        file: &FileObject,
471        current_task: &CurrentTask,
472    ) {
473        self.deref().flush(locked, file, current_task)
474    }
475
476    fn has_persistent_offsets(&self) -> bool {
477        self.deref().has_persistent_offsets()
478    }
479
480    fn writes_update_seek_offset(&self) -> bool {
481        self.deref().writes_update_seek_offset()
482    }
483
484    fn is_seekable(&self) -> bool {
485        self.deref().is_seekable()
486    }
487
488    fn read(
489        &self,
490        locked: &mut Locked<FileOpsCore>,
491        file: &FileObject,
492        current_task: &CurrentTask,
493        offset: usize,
494        data: &mut dyn OutputBuffer,
495    ) -> Result<usize, Errno> {
496        self.deref().read(locked, file, current_task, offset, data)
497    }
498
499    fn write(
500        &self,
501        locked: &mut Locked<FileOpsCore>,
502        file: &FileObject,
503        current_task: &CurrentTask,
504        offset: usize,
505        data: &mut dyn InputBuffer,
506    ) -> Result<usize, Errno> {
507        self.deref().write(locked, file, current_task, offset, data)
508    }
509
510    fn seek(
511        &self,
512        locked: &mut Locked<FileOpsCore>,
513        file: &FileObject,
514        current_task: &CurrentTask,
515        current_offset: off_t,
516        target: SeekTarget,
517    ) -> Result<off_t, Errno> {
518        self.deref().seek(locked, file, current_task, current_offset, target)
519    }
520
521    fn sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
522        self.deref().sync(file, current_task)
523    }
524
525    fn data_sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
526        self.deref().data_sync(file, current_task)
527    }
528
529    fn get_memory(
530        &self,
531        locked: &mut Locked<FileOpsCore>,
532        file: &FileObject,
533        current_task: &CurrentTask,
534        length: Option<usize>,
535        prot: ProtectionFlags,
536    ) -> Result<Arc<MemoryObject>, Errno> {
537        self.deref().get_memory(locked, file, current_task, length, prot)
538    }
539
540    fn mmap(
541        &self,
542        locked: &mut Locked<FileOpsCore>,
543        file: &FileObject,
544        current_task: &CurrentTask,
545        addr: DesiredAddress,
546        memory_offset: u64,
547        length: usize,
548        prot_flags: ProtectionFlags,
549        options: MappingOptions,
550        filename: NamespaceNode,
551    ) -> Result<UserAddress, Errno> {
552        self.deref().mmap(
553            locked,
554            file,
555            current_task,
556            addr,
557            memory_offset,
558            length,
559            prot_flags,
560            options,
561            filename,
562        )
563    }
564
565    fn readdir(
566        &self,
567        locked: &mut Locked<FileOpsCore>,
568        file: &FileObject,
569        current_task: &CurrentTask,
570        sink: &mut dyn DirentSink,
571    ) -> Result<(), Errno> {
572        self.deref().readdir(locked, file, current_task, sink)
573    }
574
575    fn wait_async(
576        &self,
577        locked: &mut Locked<FileOpsCore>,
578        file: &FileObject,
579        current_task: &CurrentTask,
580        waiter: &Waiter,
581        events: FdEvents,
582        handler: EventHandler,
583    ) -> Option<WaitCanceler> {
584        self.deref().wait_async(locked, file, current_task, waiter, events, handler)
585    }
586
587    fn query_events(
588        &self,
589        locked: &mut Locked<FileOpsCore>,
590        file: &FileObject,
591        current_task: &CurrentTask,
592    ) -> Result<FdEvents, Errno> {
593        self.deref().query_events(locked, file, current_task)
594    }
595
596    fn ioctl(
597        &self,
598        locked: &mut Locked<Unlocked>,
599        file: &FileObject,
600        current_task: &CurrentTask,
601        request: u32,
602        arg: SyscallArg,
603    ) -> Result<SyscallResult, Errno> {
604        self.deref().ioctl(locked, file, current_task, request, arg)
605    }
606
607    fn fcntl(
608        &self,
609        file: &FileObject,
610        current_task: &CurrentTask,
611        cmd: u32,
612        arg: u64,
613    ) -> Result<SyscallResult, Errno> {
614        self.deref().fcntl(file, current_task, cmd, arg)
615    }
616
617    fn to_handle(
618        &self,
619        file: &FileObject,
620        current_task: &CurrentTask,
621    ) -> Result<Option<zx::NullableHandle>, Errno> {
622        self.deref().to_handle(file, current_task)
623    }
624
625    fn get_handles(
626        &self,
627        file: &FileObject,
628        current_task: &CurrentTask,
629    ) -> Result<Vec<zx::NullableHandle>, Errno> {
630        self.deref().get_handles(file, current_task)
631    }
632
633    fn as_thread_group_key(&self, file: &FileObject) -> Result<ThreadGroupKey, Errno> {
634        self.deref().as_thread_group_key(file)
635    }
636
637    fn readahead(
638        &self,
639        file: &FileObject,
640        current_task: &CurrentTask,
641        offset: usize,
642        length: usize,
643    ) -> Result<(), Errno> {
644        self.deref().readahead(file, current_task, offset, length)
645    }
646
647    fn extra_fdinfo(
648        &self,
649        locked: &mut Locked<FileOpsCore>,
650        file: &FileHandle,
651        current_task: &CurrentTask,
652    ) -> Option<FsString> {
653        self.deref().extra_fdinfo(locked, file, current_task)
654    }
655}
656
657pub fn default_eof_offset<L>(
658    locked: &mut Locked<L>,
659    file: &FileObject,
660    current_task: &CurrentTask,
661) -> Result<off_t, Errno>
662where
663    L: LockEqualOrBefore<FileOpsCore>,
664{
665    Ok(file.node().get_size(locked, current_task)? as off_t)
666}
667
668/// Implement the seek method for a file. The computation from the end of the file must be provided
669/// through a callback.
670///
671/// Errors if the calculated offset is invalid.
672///
673/// - `current_offset`: The current position
674/// - `target`: The location to seek to.
675/// - `compute_end`: Compute the new offset from the end. Return an error if the operation is not
676///    supported.
677pub fn default_seek<F>(
678    current_offset: off_t,
679    target: SeekTarget,
680    compute_end: F,
681) -> Result<off_t, Errno>
682where
683    F: FnOnce() -> Result<off_t, Errno>,
684{
685    let new_offset = match target {
686        SeekTarget::Set(offset) => Some(offset),
687        SeekTarget::Cur(offset) => current_offset.checked_add(offset),
688        SeekTarget::End(offset) => compute_end()?.checked_add(offset),
689        SeekTarget::Data(offset) => {
690            let eof = compute_end().unwrap_or(off_t::MAX);
691            if offset >= eof {
692                return error!(ENXIO);
693            }
694            Some(offset)
695        }
696        SeekTarget::Hole(offset) => {
697            let eof = compute_end()?;
698            if offset >= eof {
699                return error!(ENXIO);
700            }
701            Some(eof)
702        }
703    }
704    .ok_or_else(|| errno!(EINVAL))?;
705
706    if new_offset < 0 {
707        return error!(EINVAL);
708    }
709
710    Ok(new_offset)
711}
712
713/// Implement the seek method for a file without an upper bound on the resulting offset.
714///
715/// This is useful for files without a defined size.
716///
717/// Errors if the calculated offset is invalid.
718///
719/// - `current_offset`: The current position
720/// - `target`: The location to seek to.
721pub fn unbounded_seek(current_offset: off_t, target: SeekTarget) -> Result<off_t, Errno> {
722    default_seek(current_offset, target, || Ok(MAX_LFS_FILESIZE as off_t))
723}
724
725#[macro_export]
726macro_rules! fileops_impl_delegate_read_write_and_seek {
727    ($self:ident, $delegate:expr) => {
728        fn is_seekable(&self) -> bool {
729            true
730        }
731
732        fn read(
733            &$self,
734            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
735            file: &FileObject,
736            current_task: &$crate::task::CurrentTask,
737            offset: usize,
738            data: &mut dyn $crate::vfs::buffers::OutputBuffer,
739        ) -> Result<usize, starnix_uapi::errors::Errno> {
740            $delegate.read(locked, file, current_task, offset, data)
741        }
742
743        fn write(
744            &$self,
745            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
746            file: &FileObject,
747            current_task: &$crate::task::CurrentTask,
748            offset: usize,
749            data: &mut dyn $crate::vfs::buffers::InputBuffer,
750        ) -> Result<usize, starnix_uapi::errors::Errno> {
751            $delegate.write(locked, file, current_task, offset, data)
752        }
753
754        fn seek(
755            &$self,
756        locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
757            file: &FileObject,
758            current_task: &$crate::task::CurrentTask,
759            current_offset: starnix_uapi::off_t,
760            target: $crate::vfs::SeekTarget,
761        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
762            $delegate.seek(locked, file, current_task, current_offset, target)
763        }
764    };
765}
766
767/// Implements [`FileOps::seek`] in a way that makes sense for seekable files.
768#[macro_export]
769macro_rules! fileops_impl_seekable {
770    () => {
771        fn is_seekable(&self) -> bool {
772            true
773        }
774
775        fn seek(
776            &self,
777            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
778            file: &$crate::vfs::FileObject,
779            current_task: &$crate::task::CurrentTask,
780            current_offset: starnix_uapi::off_t,
781            target: $crate::vfs::SeekTarget,
782        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
783            $crate::vfs::default_seek(current_offset, target, || {
784                $crate::vfs::default_eof_offset(locked, file, current_task)
785            })
786        }
787    };
788}
789
790/// Implements [`FileOps`] methods in a way that makes sense for non-seekable files.
791#[macro_export]
792macro_rules! fileops_impl_nonseekable {
793    () => {
794        fn is_seekable(&self) -> bool {
795            false
796        }
797
798        fn seek(
799            &self,
800            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
801            _file: &$crate::vfs::FileObject,
802            _current_task: &$crate::task::CurrentTask,
803            _current_offset: starnix_uapi::off_t,
804            _target: $crate::vfs::SeekTarget,
805        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
806            starnix_uapi::error!(ESPIPE)
807        }
808    };
809}
810
811/// Implements [`FileOps::seek`] methods in a way that makes sense for files that ignore
812/// seeking operations and always read/write at offset 0.
813#[macro_export]
814macro_rules! fileops_impl_seekless {
815    () => {
816        fn has_persistent_offsets(&self) -> bool {
817            false
818        }
819
820        fn is_seekable(&self) -> bool {
821            true
822        }
823
824        fn seek(
825            &self,
826            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
827            _file: &$crate::vfs::FileObject,
828            _current_task: &$crate::task::CurrentTask,
829            _current_offset: starnix_uapi::off_t,
830            _target: $crate::vfs::SeekTarget,
831        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
832            Ok(0)
833        }
834    };
835}
836
837#[macro_export]
838macro_rules! fileops_impl_dataless {
839    () => {
840        fn write(
841            &self,
842            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
843            _file: &$crate::vfs::FileObject,
844            _current_task: &$crate::task::CurrentTask,
845            _offset: usize,
846            _data: &mut dyn $crate::vfs::buffers::InputBuffer,
847        ) -> Result<usize, starnix_uapi::errors::Errno> {
848            starnix_uapi::error!(EINVAL)
849        }
850
851        fn read(
852            &self,
853            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
854            _file: &$crate::vfs::FileObject,
855            _current_task: &$crate::task::CurrentTask,
856            _offset: usize,
857            _data: &mut dyn $crate::vfs::buffers::OutputBuffer,
858        ) -> Result<usize, starnix_uapi::errors::Errno> {
859            starnix_uapi::error!(EINVAL)
860        }
861    };
862}
863
864/// Implements [`FileOps`] methods in a way that makes sense for directories. You must implement
865/// [`FileOps::seek`] and [`FileOps::readdir`].
866#[macro_export]
867macro_rules! fileops_impl_directory {
868    () => {
869        fn is_seekable(&self) -> bool {
870            true
871        }
872
873        fn read(
874            &self,
875            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
876            _file: &$crate::vfs::FileObject,
877            _current_task: &$crate::task::CurrentTask,
878            _offset: usize,
879            _data: &mut dyn $crate::vfs::buffers::OutputBuffer,
880        ) -> Result<usize, starnix_uapi::errors::Errno> {
881            starnix_uapi::error!(EISDIR)
882        }
883
884        fn write(
885            &self,
886            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
887            _file: &$crate::vfs::FileObject,
888            _current_task: &$crate::task::CurrentTask,
889            _offset: usize,
890            _data: &mut dyn $crate::vfs::buffers::InputBuffer,
891        ) -> Result<usize, starnix_uapi::errors::Errno> {
892            starnix_uapi::error!(EISDIR)
893        }
894    };
895}
896
897#[macro_export]
898macro_rules! fileops_impl_unbounded_seek {
899    () => {
900        fn seek(
901            &self,
902            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
903            _file: &$crate::vfs::FileObject,
904            _current_task: &$crate::task::CurrentTask,
905            current_offset: starnix_uapi::off_t,
906            target: $crate::vfs::SeekTarget,
907        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
908            $crate::vfs::unbounded_seek(current_offset, target)
909        }
910    };
911}
912
913#[macro_export]
914macro_rules! fileops_impl_noop_sync {
915    () => {
916        fn sync(
917            &self,
918            file: &$crate::vfs::FileObject,
919            _current_task: &$crate::task::CurrentTask,
920        ) -> Result<(), starnix_uapi::errors::Errno> {
921            if !file.node().is_reg() && !file.node().is_dir() {
922                return starnix_uapi::error!(EINVAL);
923            }
924            Ok(())
925        }
926    };
927}
928
929// Public re-export of macros allows them to be used like regular rust items.
930
931pub use fileops_impl_dataless;
932pub use fileops_impl_delegate_read_write_and_seek;
933pub use fileops_impl_directory;
934pub use fileops_impl_nonseekable;
935pub use fileops_impl_noop_sync;
936pub use fileops_impl_seekable;
937pub use fileops_impl_seekless;
938pub use fileops_impl_unbounded_seek;
939pub const AES256_KEY_SIZE: usize = 32;
940
941pub fn canonicalize_ioctl_request(current_task: &CurrentTask, request: u32) -> u32 {
942    if current_task.is_arch32() {
943        match request {
944            uapi::arch32::FS_IOC_GETFLAGS => uapi::FS_IOC_GETFLAGS,
945            uapi::arch32::FS_IOC_SETFLAGS => uapi::FS_IOC_SETFLAGS,
946            _ => request,
947        }
948    } else {
949        request
950    }
951}
952
953pub fn default_ioctl(
954    file: &FileObject,
955    locked: &mut Locked<Unlocked>,
956    current_task: &CurrentTask,
957    request: u32,
958    arg: SyscallArg,
959) -> Result<SyscallResult, Errno> {
960    match canonicalize_ioctl_request(current_task, request) {
961        TCGETS => error!(ENOTTY),
962        FIGETBSZ => {
963            let node = file.node();
964            let supported_file = node.is_reg() || node.is_dir();
965            if !supported_file {
966                return error!(ENOTTY);
967            }
968
969            let blocksize = file.node().stat(locked, current_task)?.st_blksize;
970            current_task.write_object(arg.into(), &blocksize)?;
971            Ok(SUCCESS)
972        }
973        FIONBIO => {
974            let arg_ref = UserAddress::from(arg).into();
975            let arg: i32 = current_task.read_object(arg_ref)?;
976            let val = if arg == 0 {
977                // Clear the NONBLOCK flag
978                OpenFlags::empty()
979            } else {
980                // Set the NONBLOCK flag
981                OpenFlags::NONBLOCK
982            };
983            file.update_file_flags(val, OpenFlags::NONBLOCK);
984            Ok(SUCCESS)
985        }
986        FIOQSIZE => {
987            let node = file.node();
988            let supported_file = node.is_reg() || node.is_dir();
989            if !supported_file {
990                return error!(ENOTTY);
991            }
992
993            let size = file.node().stat(locked, current_task)?.st_size;
994            current_task.write_object(arg.into(), &size)?;
995            Ok(SUCCESS)
996        }
997        FIONREAD => {
998            track_stub!(TODO("https://fxbug.dev/322874897"), "FIONREAD");
999            if !file.name.entry.node.is_reg() {
1000                return error!(ENOTTY);
1001            }
1002
1003            let size = file
1004                .name
1005                .entry
1006                .node
1007                .fetch_and_refresh_info(locked, current_task)
1008                .map_err(|_| errno!(EINVAL))?
1009                .size;
1010            let offset = usize::try_from(*file.offset.lock()).map_err(|_| errno!(EINVAL))?;
1011            let remaining =
1012                if size < offset { 0 } else { i32::try_from(size - offset).unwrap_or(i32::MAX) };
1013            current_task.write_object(arg.into(), &remaining)?;
1014            Ok(SUCCESS)
1015        }
1016        FS_IOC_FSGETXATTR => {
1017            track_stub!(TODO("https://fxbug.dev/322875209"), "FS_IOC_FSGETXATTR");
1018            let arg = UserAddress::from(arg).into();
1019            current_task.write_object(arg, &fsxattr::default())?;
1020            Ok(SUCCESS)
1021        }
1022        FS_IOC_FSSETXATTR => {
1023            track_stub!(TODO("https://fxbug.dev/322875271"), "FS_IOC_FSSETXATTR");
1024            let arg = UserAddress::from(arg).into();
1025            let _: fsxattr = current_task.read_object(arg)?;
1026            Ok(SUCCESS)
1027        }
1028        uapi::FS_IOC_GETFLAGS => {
1029            track_stub!(TODO("https://fxbug.dev/322874935"), "FS_IOC_GETFLAGS");
1030            let arg = UserRef::<u32>::from(arg);
1031            let mut flags: u32 = 0;
1032            if matches!(*file.node().fsverity.lock(), FsVerityState::FsVerity) {
1033                flags |= FS_VERITY_FL;
1034            }
1035            if file.node().info().casefold {
1036                flags |= FS_CASEFOLD_FL;
1037            }
1038            current_task.write_object(arg, &flags)?;
1039            Ok(SUCCESS)
1040        }
1041        uapi::FS_IOC_SETFLAGS => {
1042            track_stub!(TODO("https://fxbug.dev/322875367"), "FS_IOC_SETFLAGS");
1043            let arg = UserRef::<u32>::from(arg);
1044            let flags: u32 = current_task.read_object(arg)?;
1045            file.node().update_attributes(locked, current_task, |info| {
1046                info.casefold = flags & FS_CASEFOLD_FL != 0;
1047                Ok(())
1048            })?;
1049            Ok(SUCCESS)
1050        }
1051        FS_IOC_ENABLE_VERITY => {
1052            Ok(fsverity::ioctl::enable(locked, current_task, UserAddress::from(arg).into(), file)?)
1053        }
1054        FS_IOC_MEASURE_VERITY => {
1055            Ok(fsverity::ioctl::measure(locked, current_task, UserAddress::from(arg).into(), file)?)
1056        }
1057        FS_IOC_READ_VERITY_METADATA => {
1058            Ok(fsverity::ioctl::read_metadata(current_task, UserAddress::from(arg).into(), file)?)
1059        }
1060        FS_IOC_ADD_ENCRYPTION_KEY => {
1061            let fscrypt_add_key_ref = UserRef::<fscrypt_add_key_arg>::from(arg);
1062            let key_ref_addr = fscrypt_add_key_ref.next()?.addr();
1063            let mut fscrypt_add_key_arg = current_task.read_object(fscrypt_add_key_ref.clone())?;
1064            if fscrypt_add_key_arg.key_id != 0 {
1065                track_stub!(TODO("https://fxbug.dev/375649227"), "non-zero key ids");
1066                return error!(ENOTSUP);
1067            }
1068            if fscrypt_add_key_arg.key_spec.type_ != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER {
1069                track_stub!(TODO("https://fxbug.dev/375648306"), "fscrypt descriptor type");
1070                return error!(ENOTSUP);
1071            }
1072            let key = current_task
1073                .read_memory_to_vec(key_ref_addr, fscrypt_add_key_arg.raw_size as usize)?;
1074            let user_id = current_task.current_creds().uid;
1075
1076            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1077            let key_identifier = crypt_service.add_wrapping_key(&key, user_id)?;
1078            fscrypt_add_key_arg.key_spec.u.identifier =
1079                fscrypt_identifier { value: key_identifier, ..Default::default() };
1080            current_task.write_object(fscrypt_add_key_ref, &fscrypt_add_key_arg)?;
1081            Ok(SUCCESS)
1082        }
1083        FS_IOC_SET_ENCRYPTION_POLICY => {
1084            let fscrypt_policy_ref = UserRef::<uapi::fscrypt_policy_v2>::from(arg);
1085            let policy = current_task.read_object(fscrypt_policy_ref)?;
1086            if policy.version as u32 != FSCRYPT_POLICY_V2 {
1087                track_stub!(TODO("https://fxbug.dev/375649656"), "fscrypt policy v1");
1088                return error!(ENOTSUP);
1089            }
1090            if policy.flags != 0 {
1091                track_stub!(
1092                    TODO("https://fxbug.dev/375700939"),
1093                    "fscrypt policy flags",
1094                    policy.flags
1095                );
1096            }
1097            if policy.contents_encryption_mode as u32 != FSCRYPT_MODE_AES_256_XTS {
1098                track_stub!(
1099                    TODO("https://fxbug.dev/375684057"),
1100                    "fscrypt encryption modes",
1101                    policy.contents_encryption_mode
1102                );
1103            }
1104            if policy.filenames_encryption_mode as u32 != FSCRYPT_MODE_AES_256_CTS {
1105                track_stub!(
1106                    TODO("https://fxbug.dev/375684057"),
1107                    "fscrypt encryption modes",
1108                    policy.filenames_encryption_mode
1109                );
1110            }
1111            let user_id = current_task.current_creds().uid;
1112            if user_id != file.node().info().uid {
1113                security::check_task_capable(current_task, CAP_FOWNER)
1114                    .map_err(|_| errno!(EACCES))?;
1115            }
1116
1117            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1118            if let Some(users) =
1119                crypt_service.get_users_for_key(EncryptionKeyId::from(policy.master_key_identifier))
1120            {
1121                if !users.contains(&user_id) {
1122                    return error!(ENOKEY);
1123                }
1124            } else {
1125                track_stub!(
1126                    TODO("https://fxbug.dev/375067633"),
1127                    "users with CAP_FOWNER can set encryption policies with unadded keys"
1128                );
1129                return error!(ENOKEY);
1130            }
1131
1132            let attributes = file.node().fetch_and_refresh_info(locked, current_task)?;
1133            if let Some(wrapping_key_id) = &attributes.wrapping_key_id {
1134                if wrapping_key_id != &policy.master_key_identifier {
1135                    return error!(EEXIST);
1136                }
1137            } else {
1138                // Don't deadlock! update_attributes will also lock the attributes.
1139                std::mem::drop(attributes);
1140                file.node().update_attributes(locked, current_task, |info| {
1141                    info.wrapping_key_id = Some(policy.master_key_identifier);
1142                    Ok(())
1143                })?;
1144            }
1145            Ok(SUCCESS)
1146        }
1147        FS_IOC_REMOVE_ENCRYPTION_KEY => {
1148            let fscrypt_remove_key_arg_ref = UserRef::<uapi::fscrypt_remove_key_arg>::from(arg);
1149            let fscrypt_remove_key_arg = current_task.read_object(fscrypt_remove_key_arg_ref)?;
1150            if fscrypt_remove_key_arg.key_spec.type_ != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER {
1151                track_stub!(TODO("https://fxbug.dev/375648306"), "fscrypt descriptor type");
1152                return error!(ENOTSUP);
1153            }
1154            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1155            let user_id = current_task.current_creds().uid;
1156            #[allow(
1157                clippy::undocumented_unsafe_blocks,
1158                reason = "Force documented unsafe blocks in Starnix"
1159            )]
1160            let identifier = unsafe { fscrypt_remove_key_arg.key_spec.u.identifier.value };
1161            crypt_service.forget_wrapping_key(identifier, user_id)?;
1162            Ok(SUCCESS)
1163        }
1164        linux_uapi::FICLONE | linux_uapi::FICLONERANGE | linux_uapi::FIDEDUPERANGE => {
1165            error!(EOPNOTSUPP)
1166        }
1167        _ => {
1168            track_stub!(TODO("https://fxbug.dev/322874917"), "ioctl fallthrough", request);
1169            error!(ENOTTY)
1170        }
1171    }
1172}
1173
1174pub fn default_fcntl(cmd: u32) -> Result<SyscallResult, Errno> {
1175    track_stub!(TODO("https://fxbug.dev/322875704"), "default fcntl", cmd);
1176    error!(EINVAL)
1177}
1178
1179pub struct OPathOps {}
1180
1181impl OPathOps {
1182    pub fn new() -> OPathOps {
1183        OPathOps {}
1184    }
1185}
1186
1187impl FileOps for OPathOps {
1188    fileops_impl_noop_sync!();
1189
1190    fn has_persistent_offsets(&self) -> bool {
1191        false
1192    }
1193    fn is_seekable(&self) -> bool {
1194        true
1195    }
1196    fn read(
1197        &self,
1198        _locked: &mut Locked<FileOpsCore>,
1199        _file: &FileObject,
1200        _current_task: &CurrentTask,
1201        _offset: usize,
1202        _data: &mut dyn OutputBuffer,
1203    ) -> Result<usize, Errno> {
1204        error!(EBADF)
1205    }
1206    fn write(
1207        &self,
1208        _locked: &mut Locked<FileOpsCore>,
1209        _file: &FileObject,
1210        _current_task: &CurrentTask,
1211        _offset: usize,
1212        _data: &mut dyn InputBuffer,
1213    ) -> Result<usize, Errno> {
1214        error!(EBADF)
1215    }
1216    fn seek(
1217        &self,
1218        _locked: &mut Locked<FileOpsCore>,
1219        _file: &FileObject,
1220        _current_task: &CurrentTask,
1221        _current_offset: off_t,
1222        _target: SeekTarget,
1223    ) -> Result<off_t, Errno> {
1224        error!(EBADF)
1225    }
1226    fn get_memory(
1227        &self,
1228        _locked: &mut Locked<FileOpsCore>,
1229        _file: &FileObject,
1230        _current_task: &CurrentTask,
1231        _length: Option<usize>,
1232        _prot: ProtectionFlags,
1233    ) -> Result<Arc<MemoryObject>, Errno> {
1234        error!(EBADF)
1235    }
1236    fn readdir(
1237        &self,
1238        _locked: &mut Locked<FileOpsCore>,
1239        _file: &FileObject,
1240        _current_task: &CurrentTask,
1241        _sink: &mut dyn DirentSink,
1242    ) -> Result<(), Errno> {
1243        error!(EBADF)
1244    }
1245
1246    fn ioctl(
1247        &self,
1248        _locked: &mut Locked<Unlocked>,
1249        _file: &FileObject,
1250        _current_task: &CurrentTask,
1251        _request: u32,
1252        _arg: SyscallArg,
1253    ) -> Result<SyscallResult, Errno> {
1254        error!(EBADF)
1255    }
1256}
1257
1258pub struct ProxyFileOps(pub FileHandle);
1259
1260impl FileOps for ProxyFileOps {
1261    // `close` is not delegated because the last reference to a `ProxyFileOps` is not
1262    // necessarily the last reference of the proxied file. If this is the case, the
1263    // releaser will handle it.
1264    // These don't take &FileObject making it too hard to handle them properly in the macro
1265    fn has_persistent_offsets(&self) -> bool {
1266        self.0.ops().has_persistent_offsets()
1267    }
1268    fn writes_update_seek_offset(&self) -> bool {
1269        self.0.ops().writes_update_seek_offset()
1270    }
1271    fn is_seekable(&self) -> bool {
1272        self.0.ops().is_seekable()
1273    }
1274    // These take &mut Locked<L> as a second argument
1275    fn flush(
1276        &self,
1277        locked: &mut Locked<FileOpsCore>,
1278        _file: &FileObject,
1279        current_task: &CurrentTask,
1280    ) {
1281        self.0.ops().flush(locked, &self.0, current_task);
1282    }
1283    fn wait_async(
1284        &self,
1285        locked: &mut Locked<FileOpsCore>,
1286        _file: &FileObject,
1287        current_task: &CurrentTask,
1288        waiter: &Waiter,
1289        events: FdEvents,
1290        handler: EventHandler,
1291    ) -> Option<WaitCanceler> {
1292        self.0.ops().wait_async(locked, &self.0, current_task, waiter, events, handler)
1293    }
1294    fn query_events(
1295        &self,
1296        locked: &mut Locked<FileOpsCore>,
1297        _file: &FileObject,
1298        current_task: &CurrentTask,
1299    ) -> Result<FdEvents, Errno> {
1300        self.0.ops().query_events(locked, &self.0, current_task)
1301    }
1302    fn read(
1303        &self,
1304        locked: &mut Locked<FileOpsCore>,
1305        _file: &FileObject,
1306        current_task: &CurrentTask,
1307        offset: usize,
1308        data: &mut dyn OutputBuffer,
1309    ) -> Result<usize, Errno> {
1310        self.0.ops().read(locked, &self.0, current_task, offset, data)
1311    }
1312    fn write(
1313        &self,
1314        locked: &mut Locked<FileOpsCore>,
1315        _file: &FileObject,
1316        current_task: &CurrentTask,
1317        offset: usize,
1318        data: &mut dyn InputBuffer,
1319    ) -> Result<usize, Errno> {
1320        self.0.ops().write(locked, &self.0, current_task, offset, data)
1321    }
1322    fn ioctl(
1323        &self,
1324        locked: &mut Locked<Unlocked>,
1325        _file: &FileObject,
1326        current_task: &CurrentTask,
1327        request: u32,
1328        arg: SyscallArg,
1329    ) -> Result<SyscallResult, Errno> {
1330        self.0.ops().ioctl(locked, &self.0, current_task, request, arg)
1331    }
1332    fn fcntl(
1333        &self,
1334        _file: &FileObject,
1335        current_task: &CurrentTask,
1336        cmd: u32,
1337        arg: u64,
1338    ) -> Result<SyscallResult, Errno> {
1339        self.0.ops().fcntl(&self.0, current_task, cmd, arg)
1340    }
1341    fn readdir(
1342        &self,
1343        locked: &mut Locked<FileOpsCore>,
1344        _file: &FileObject,
1345        current_task: &CurrentTask,
1346        sink: &mut dyn DirentSink,
1347    ) -> Result<(), Errno> {
1348        self.0.ops().readdir(locked, &self.0, current_task, sink)
1349    }
1350    fn sync(&self, _file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
1351        self.0.ops().sync(&self.0, current_task)
1352    }
1353    fn data_sync(&self, _file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
1354        self.0.ops().sync(&self.0, current_task)
1355    }
1356    fn get_memory(
1357        &self,
1358        locked: &mut Locked<FileOpsCore>,
1359        _file: &FileObject,
1360        current_task: &CurrentTask,
1361        length: Option<usize>,
1362        prot: ProtectionFlags,
1363    ) -> Result<Arc<MemoryObject>, Errno> {
1364        self.0.ops.get_memory(locked, &self.0, current_task, length, prot)
1365    }
1366    fn mmap(
1367        &self,
1368        locked: &mut Locked<FileOpsCore>,
1369        _file: &FileObject,
1370        current_task: &CurrentTask,
1371        addr: DesiredAddress,
1372        memory_offset: u64,
1373        length: usize,
1374        prot_flags: ProtectionFlags,
1375        options: MappingOptions,
1376        filename: NamespaceNode,
1377    ) -> Result<UserAddress, Errno> {
1378        self.0.ops.mmap(
1379            locked,
1380            &self.0,
1381            current_task,
1382            addr,
1383            memory_offset,
1384            length,
1385            prot_flags,
1386            options,
1387            filename,
1388        )
1389    }
1390    fn seek(
1391        &self,
1392        locked: &mut Locked<FileOpsCore>,
1393        _file: &FileObject,
1394        current_task: &CurrentTask,
1395        offset: off_t,
1396        target: SeekTarget,
1397    ) -> Result<off_t, Errno> {
1398        self.0.ops.seek(locked, &self.0, current_task, offset, target)
1399    }
1400}
1401
1402#[derive(Debug, Default, Copy, Clone)]
1403pub enum FileAsyncOwner {
1404    #[default]
1405    Unowned,
1406    Thread(pid_t),
1407    Process(pid_t),
1408    ProcessGroup(pid_t),
1409}
1410
1411impl FileAsyncOwner {
1412    pub fn validate(self, current_task: &CurrentTask) -> Result<(), Errno> {
1413        match self {
1414            FileAsyncOwner::Unowned => (),
1415            FileAsyncOwner::Thread(id) | FileAsyncOwner::Process(id) => {
1416                Task::from_weak(&current_task.get_task(id))?;
1417            }
1418            FileAsyncOwner::ProcessGroup(pgid) => {
1419                current_task
1420                    .kernel()
1421                    .pids
1422                    .read()
1423                    .get_process_group(pgid)
1424                    .ok_or_else(|| errno!(ESRCH))?;
1425            }
1426        }
1427        Ok(())
1428    }
1429}
1430
1431#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
1432pub struct FileObjectId(u64);
1433
1434impl FileObjectId {
1435    pub fn as_epoll_key(&self) -> EpollKey {
1436        self.0 as EpollKey
1437    }
1438}
1439
1440/// A session with a file object.
1441///
1442/// Each time a client calls open(), we create a new FileObject from the
1443/// underlying FsNode that receives the open(). This object contains the state
1444/// that is specific to this sessions whereas the underlying FsNode contains
1445/// the state that is shared between all the sessions.
1446pub struct FileObject {
1447    ops: Box<dyn FileOps>,
1448    state: FileObjectState,
1449}
1450
1451impl std::ops::Deref for FileObject {
1452    type Target = FileObjectState;
1453    fn deref(&self) -> &Self::Target {
1454        &self.state
1455    }
1456}
1457
1458pub struct FileObjectState {
1459    /// Weak reference to the `FileHandle` of this `FileObject`. This allows to retrieve the
1460    /// `FileHandle` from a `FileObject`.
1461    pub weak_handle: WeakFileHandle,
1462
1463    /// A unique identifier for this file object.
1464    pub id: FileObjectId,
1465
1466    /// The NamespaceNode associated with this FileObject.
1467    ///
1468    /// Represents the name the process used to open this file.
1469    pub name: ActiveNamespaceNode,
1470
1471    pub fs: FileSystemHandle,
1472
1473    pub offset: Mutex<off_t>,
1474
1475    flags: AtomicOpenFlags,
1476
1477    async_owner: Mutex<FileAsyncOwner>,
1478
1479    /// A set of epoll file descriptor numbers that tracks which `EpollFileObject`s add this
1480    /// `FileObject` as the control file.
1481    epoll_files: Mutex<HashMap<FileHandleKey, WeakFileHandle>>,
1482
1483    /// See fcntl F_SETLEASE and F_GETLEASE.
1484    lease: Mutex<FileLeaseType>,
1485
1486    // This extra reference to the FsNode should not be needed, but it is needed to make
1487    // Inotify.ExcludeUnlinkInodeEvents pass.
1488    _mysterious_node: Option<FsNodeHandle>,
1489
1490    /// Opaque security state associated this file object.
1491    pub security_state: security::FileObjectState,
1492}
1493
1494pub enum FileObjectReleaserAction {}
1495impl ReleaserAction<FileObject> for FileObjectReleaserAction {
1496    fn release(file_object: ReleaseGuard<FileObject>) {
1497        register_delayed_release(file_object);
1498    }
1499}
1500pub type FileReleaser = ObjectReleaser<FileObject, FileObjectReleaserAction>;
1501pub type FileHandle = Arc<FileReleaser>;
1502pub type WeakFileHandle = Weak<FileReleaser>;
1503pub type FileHandleKey = WeakKey<FileReleaser>;
1504
1505impl FileObjectState {
1506    /// The FsNode from which this FileObject was created.
1507    pub fn node(&self) -> &FsNodeHandle {
1508        &self.name.entry.node
1509    }
1510
1511    pub fn flags(&self) -> OpenFlags {
1512        self.flags.load(Ordering::Relaxed)
1513    }
1514
1515    pub fn can_read(&self) -> bool {
1516        self.flags.load(Ordering::Relaxed).can_read()
1517    }
1518
1519    pub fn can_write(&self) -> bool {
1520        self.flags.load(Ordering::Relaxed).can_write()
1521    }
1522
1523    /// Returns false if the file is not allowed to be executed.
1524    pub fn can_exec(&self) -> bool {
1525        let mounted_no_exec = self.name.to_passive().mount.flags().contains(MountFlags::NOEXEC);
1526        let no_exec_seal = self
1527            .node()
1528            .write_guard_state
1529            .lock()
1530            .get_seals()
1531            .map(|seals| seals.contains(SealFlags::NO_EXEC))
1532            .unwrap_or(false);
1533        !(mounted_no_exec || no_exec_seal)
1534    }
1535
1536    // Notifies watchers on the current node and its parent about an event.
1537    pub fn notify(&self, event_mask: InotifyMask) {
1538        self.name.notify(event_mask)
1539    }
1540}
1541
1542impl FileObject {
1543    /// Create a FileObject that is not mounted in a namespace.
1544    ///
1545    /// In particular, this will create a new unrooted entries. This should not be used on
1546    /// file system with persistent entries, as the created entry will be out of sync with the one
1547    /// from the file system.
1548    ///
1549    /// The returned FileObject does not have a name.
1550    pub fn new_anonymous<L>(
1551        locked: &mut Locked<L>,
1552        current_task: &CurrentTask,
1553        ops: Box<dyn FileOps>,
1554        node: FsNodeHandle,
1555        flags: OpenFlags,
1556    ) -> FileHandle
1557    where
1558        L: LockEqualOrBefore<FileOpsCore>,
1559    {
1560        assert!(!node.fs().has_permanent_entries());
1561        Self::new(
1562            locked,
1563            current_task,
1564            ops,
1565            NamespaceNode::new_anonymous_unrooted(current_task, node),
1566            flags,
1567        )
1568        .expect("Failed to create anonymous FileObject")
1569    }
1570
1571    /// Create a FileObject with an associated NamespaceNode.
1572    ///
1573    /// This function is not typically called directly. Instead, consider
1574    /// calling NamespaceNode::open.
1575    pub fn new<L>(
1576        locked: &mut Locked<L>,
1577        current_task: &CurrentTask,
1578        ops: Box<dyn FileOps>,
1579        name: NamespaceNode,
1580        flags: OpenFlags,
1581    ) -> Result<FileHandle, Errno>
1582    where
1583        L: LockEqualOrBefore<FileOpsCore>,
1584    {
1585        let _mysterious_node = if flags.can_write() {
1586            name.entry.node.write_guard_state.lock().acquire(FileWriteGuardMode::WriteFile)?;
1587            Some(name.entry.node.clone())
1588        } else {
1589            None
1590        };
1591        let fs = name.entry.node.fs();
1592        let id = FileObjectId(current_task.kernel.next_file_object_id.next());
1593        let security_state = security::file_alloc_security(current_task);
1594        let file = FileHandle::new_cyclic(|weak_handle| {
1595            Self {
1596                ops,
1597                state: FileObjectState {
1598                    weak_handle: weak_handle.clone(),
1599                    id,
1600                    name: name.into_active(),
1601                    fs,
1602                    offset: Mutex::new(0),
1603                    flags: AtomicOpenFlags::new(flags - OpenFlags::CREAT),
1604                    async_owner: Default::default(),
1605                    epoll_files: Default::default(),
1606                    lease: Default::default(),
1607                    _mysterious_node,
1608                    security_state,
1609                },
1610            }
1611            .into()
1612        });
1613        file.notify(InotifyMask::OPEN);
1614
1615        file.ops().open(locked.cast_locked::<FileOpsCore>(), &file, current_task)?;
1616        Ok(file)
1617    }
1618
1619    pub fn max_access_for_memory_mapping(&self) -> Access {
1620        let mut access = Access::EXIST;
1621        if self.can_exec() {
1622            access |= Access::EXEC;
1623        }
1624        let flags = self.flags.load(Ordering::Relaxed);
1625        if flags.can_read() {
1626            access |= Access::READ;
1627        }
1628        if flags.can_write() {
1629            access |= Access::WRITE;
1630        }
1631        access
1632    }
1633
1634    pub fn ops(&self) -> &dyn FileOps {
1635        self.ops.as_ref()
1636    }
1637
1638    pub fn ops_type_name(&self) -> &'static str {
1639        self.ops().type_name()
1640    }
1641
1642    pub fn is_non_blocking(&self) -> bool {
1643        self.flags().contains(OpenFlags::NONBLOCK)
1644    }
1645
1646    /// Common implementation for blocking operations.
1647    ///
1648    /// This function is used to implement the blocking operations for file objects. FileOps
1649    /// implementations should call this function to handle the blocking logic.
1650    ///
1651    /// The `op` parameter is a function that implements the non-blocking version of the operation.
1652    /// The function is called once without registering a waiter in case no wait is needed. If the
1653    /// operation returns EAGAIN and the file object is non-blocking, the function returns EAGAIN.
1654    ///
1655    /// If the operation returns EAGAIN and the file object is blocking, the function will block
1656    /// until the given events are triggered. At that time, the operation is retried. Notice that
1657    /// the `op` function can be called multiple times before the operation completes.
1658    ///
1659    /// The `deadline` parameter is the deadline for the operation. If the operation does not
1660    /// complete before the deadline, the function will return ETIMEDOUT.
1661    pub fn blocking_op<L, T, Op>(
1662        &self,
1663        locked: &mut Locked<L>,
1664        current_task: &CurrentTask,
1665        events: FdEvents,
1666        deadline: Option<zx::MonotonicInstant>,
1667        mut op: Op,
1668    ) -> Result<T, Errno>
1669    where
1670        L: LockEqualOrBefore<FileOpsCore>,
1671        Op: FnMut(&mut Locked<L>) -> Result<T, Errno>,
1672    {
1673        // Don't return EAGAIN for directories. This can happen because glibc always opens a
1674        // directory with O_NONBLOCK.
1675        let can_return_eagain = self.flags().contains(OpenFlags::NONBLOCK)
1676            && !self.flags().contains(OpenFlags::DIRECTORY);
1677        // Run the operation a first time without registering a waiter in case no wait is needed.
1678        match op(locked) {
1679            Err(errno) if errno == EAGAIN && !can_return_eagain => {}
1680            result => return result,
1681        }
1682
1683        let waiter = Waiter::new();
1684        loop {
1685            // Register the waiter before running the operation to prevent a race.
1686            self.wait_async(locked, current_task, &waiter, events, WaitCallback::none());
1687            match op(locked) {
1688                Err(e) if e == EAGAIN => {}
1689                result => return result,
1690            }
1691            let locked = locked.cast_locked::<FileOpsCore>();
1692            waiter
1693                .wait_until(
1694                    locked,
1695                    current_task,
1696                    deadline.unwrap_or(zx::MonotonicInstant::INFINITE),
1697                )
1698                .map_err(|e| if e == ETIMEDOUT { errno!(EAGAIN) } else { e })?;
1699        }
1700    }
1701
1702    pub fn is_seekable(&self) -> bool {
1703        self.ops().is_seekable()
1704    }
1705
1706    pub fn has_persistent_offsets(&self) -> bool {
1707        self.ops().has_persistent_offsets()
1708    }
1709
1710    /// Common implementation for `read` and `read_at`.
1711    fn read_internal<R>(&self, current_task: &CurrentTask, read: R) -> Result<usize, Errno>
1712    where
1713        R: FnOnce() -> Result<usize, Errno>,
1714    {
1715        security::file_permission(current_task, self, security::PermissionFlags::READ)?;
1716
1717        if !self.can_read() {
1718            return error!(EBADF);
1719        }
1720        let bytes_read = read()?;
1721
1722        // TODO(steveaustin) - omit updating time_access to allow info to be immutable
1723        // and thus allow simultaneous reads.
1724        self.update_atime();
1725        if bytes_read > 0 {
1726            self.notify(InotifyMask::ACCESS);
1727        }
1728
1729        Ok(bytes_read)
1730    }
1731
1732    pub fn read<L>(
1733        &self,
1734        locked: &mut Locked<L>,
1735        current_task: &CurrentTask,
1736        data: &mut dyn OutputBuffer,
1737    ) -> Result<usize, Errno>
1738    where
1739        L: LockEqualOrBefore<FileOpsCore>,
1740    {
1741        self.read_internal(current_task, || {
1742            let locked = locked.cast_locked::<FileOpsCore>();
1743            if !self.ops().has_persistent_offsets() {
1744                if data.available() > MAX_LFS_FILESIZE {
1745                    return error!(EINVAL);
1746                }
1747                return self.ops.read(locked, self, current_task, 0, data);
1748            }
1749
1750            let mut offset_guard = self.offset.lock();
1751            let offset = *offset_guard as usize;
1752            checked_add_offset_and_length(offset, data.available())?;
1753            let read = self.ops.read(locked, self, current_task, offset, data)?;
1754            *offset_guard += read as off_t;
1755            Ok(read)
1756        })
1757    }
1758
1759    pub fn read_at<L>(
1760        &self,
1761        locked: &mut Locked<L>,
1762        current_task: &CurrentTask,
1763        offset: usize,
1764        data: &mut dyn OutputBuffer,
1765    ) -> Result<usize, Errno>
1766    where
1767        L: LockEqualOrBefore<FileOpsCore>,
1768    {
1769        if !self.ops().is_seekable() {
1770            return error!(ESPIPE);
1771        }
1772        checked_add_offset_and_length(offset, data.available())?;
1773        let locked = locked.cast_locked::<FileOpsCore>();
1774        self.read_internal(current_task, || self.ops.read(locked, self, current_task, offset, data))
1775    }
1776
1777    /// Common checks before calling ops().write.
1778    fn write_common<L>(
1779        &self,
1780        locked: &mut Locked<L>,
1781        current_task: &CurrentTask,
1782        offset: usize,
1783        data: &mut dyn InputBuffer,
1784    ) -> Result<usize, Errno>
1785    where
1786        L: LockEqualOrBefore<FileOpsCore>,
1787    {
1788        security::file_permission(current_task, self, security::PermissionFlags::WRITE)?;
1789
1790        // We need to cap the size of `data` to prevent us from growing the file too large,
1791        // according to <https://man7.org/linux/man-pages/man2/write.2.html>:
1792        //
1793        //   The number of bytes written may be less than count if, for example, there is
1794        //   insufficient space on the underlying physical medium, or the RLIMIT_FSIZE resource
1795        //   limit is encountered (see setrlimit(2)),
1796        checked_add_offset_and_length(offset, data.available())?;
1797        let locked = locked.cast_locked::<FileOpsCore>();
1798        self.ops().write(locked, self, current_task, offset, data)
1799    }
1800
1801    /// Common wrapper work for `write` and `write_at`.
1802    fn write_fn<W, L>(
1803        &self,
1804        locked: &mut Locked<L>,
1805        current_task: &CurrentTask,
1806        write: W,
1807    ) -> Result<usize, Errno>
1808    where
1809        L: LockEqualOrBefore<FileOpsCore>,
1810        W: FnOnce(&mut Locked<L>) -> Result<usize, Errno>,
1811    {
1812        if !self.can_write() {
1813            return error!(EBADF);
1814        }
1815        self.node().clear_suid_and_sgid_bits(locked, current_task)?;
1816        let bytes_written = write(locked)?;
1817        self.node().update_ctime_mtime();
1818
1819        if bytes_written > 0 {
1820            self.notify(InotifyMask::MODIFY);
1821        }
1822
1823        Ok(bytes_written)
1824    }
1825
1826    pub fn write<L>(
1827        &self,
1828        locked: &mut Locked<L>,
1829        current_task: &CurrentTask,
1830        data: &mut dyn InputBuffer,
1831    ) -> Result<usize, Errno>
1832    where
1833        L: LockEqualOrBefore<FileOpsCore>,
1834    {
1835        self.write_fn(locked, current_task, |locked| {
1836            if !self.ops().has_persistent_offsets() {
1837                return self.write_common(locked, current_task, 0, data);
1838            }
1839            // TODO(https://fxbug.dev/333540469): write_fn should take L: LockBefore<FsNodeAppend>,
1840            // but FileOpsCore must be after FsNodeAppend
1841            #[allow(
1842                clippy::undocumented_unsafe_blocks,
1843                reason = "Force documented unsafe blocks in Starnix"
1844            )]
1845            let locked = unsafe { Unlocked::new() };
1846            let mut offset = self.offset.lock();
1847            let bytes_written = if self.flags().contains(OpenFlags::APPEND) {
1848                let (_guard, locked) = self.node().append_lock.write_and(locked, current_task)?;
1849                *offset = self.ops().seek(
1850                    locked.cast_locked::<FileOpsCore>(),
1851                    self,
1852                    current_task,
1853                    *offset,
1854                    SeekTarget::End(0),
1855                )?;
1856                self.write_common(locked, current_task, *offset as usize, data)
1857            } else {
1858                let (_guard, locked) = self.node().append_lock.read_and(locked, current_task)?;
1859                self.write_common(locked, current_task, *offset as usize, data)
1860            }?;
1861            if self.ops().writes_update_seek_offset() {
1862                *offset += bytes_written as off_t;
1863            }
1864            Ok(bytes_written)
1865        })
1866    }
1867
1868    pub fn write_at<L>(
1869        &self,
1870        locked: &mut Locked<L>,
1871        current_task: &CurrentTask,
1872        mut offset: usize,
1873        data: &mut dyn InputBuffer,
1874    ) -> Result<usize, Errno>
1875    where
1876        L: LockEqualOrBefore<FileOpsCore>,
1877    {
1878        if !self.ops().is_seekable() {
1879            return error!(ESPIPE);
1880        }
1881        self.write_fn(locked, current_task, |_locked| {
1882            // TODO(https://fxbug.dev/333540469): write_fn should take L: LockBefore<FsNodeAppend>,
1883            // but FileOpsCore must be after FsNodeAppend
1884            #[allow(
1885                clippy::undocumented_unsafe_blocks,
1886                reason = "Force documented unsafe blocks in Starnix"
1887            )]
1888            let locked = unsafe { Unlocked::new() };
1889            let (_guard, locked) = self.node().append_lock.read_and(locked, current_task)?;
1890
1891            // According to LTP test pwrite04:
1892            //
1893            //   POSIX requires that opening a file with the O_APPEND flag should have no effect on the
1894            //   location at which pwrite() writes data. However, on Linux, if a file is opened with
1895            //   O_APPEND, pwrite() appends data to the end of the file, regardless of the value of offset.
1896            if self.flags().contains(OpenFlags::APPEND) && self.ops().is_seekable() {
1897                checked_add_offset_and_length(offset, data.available())?;
1898                offset = default_eof_offset(locked, self, current_task)? as usize;
1899            }
1900
1901            self.write_common(locked, current_task, offset, data)
1902        })
1903    }
1904
1905    pub fn seek<L>(
1906        &self,
1907        locked: &mut Locked<L>,
1908        current_task: &CurrentTask,
1909        target: SeekTarget,
1910    ) -> Result<off_t, Errno>
1911    where
1912        L: LockEqualOrBefore<FileOpsCore>,
1913    {
1914        let locked = locked.cast_locked::<FileOpsCore>();
1915        let locked = locked;
1916
1917        if !self.ops().is_seekable() {
1918            return error!(ESPIPE);
1919        }
1920
1921        if !self.ops().has_persistent_offsets() {
1922            return self.ops().seek(locked, self, current_task, 0, target);
1923        }
1924
1925        let mut offset_guard = self.offset.lock();
1926        let new_offset = self.ops().seek(locked, self, current_task, *offset_guard, target)?;
1927        *offset_guard = new_offset;
1928        Ok(new_offset)
1929    }
1930
1931    pub fn sync(&self, current_task: &CurrentTask) -> Result<(), Errno> {
1932        self.ops().sync(self, current_task)
1933    }
1934
1935    pub fn data_sync(&self, current_task: &CurrentTask) -> Result<(), Errno> {
1936        self.ops().data_sync(self, current_task)
1937    }
1938
1939    pub fn get_memory<L>(
1940        &self,
1941        locked: &mut Locked<L>,
1942        current_task: &CurrentTask,
1943        length: Option<usize>,
1944        prot: ProtectionFlags,
1945    ) -> Result<Arc<MemoryObject>, Errno>
1946    where
1947        L: LockEqualOrBefore<FileOpsCore>,
1948    {
1949        if prot.contains(ProtectionFlags::READ) && !self.can_read() {
1950            return error!(EACCES);
1951        }
1952        if prot.contains(ProtectionFlags::WRITE) && !self.can_write() {
1953            return error!(EACCES);
1954        }
1955        if prot.contains(ProtectionFlags::EXEC) && !self.can_exec() {
1956            return error!(EPERM);
1957        }
1958        self.ops().get_memory(locked.cast_locked::<FileOpsCore>(), self, current_task, length, prot)
1959    }
1960
1961    pub fn mmap<L>(
1962        &self,
1963        locked: &mut Locked<L>,
1964        current_task: &CurrentTask,
1965        addr: DesiredAddress,
1966        memory_offset: u64,
1967        length: usize,
1968        prot_flags: ProtectionFlags,
1969        options: MappingOptions,
1970        filename: NamespaceNode,
1971    ) -> Result<UserAddress, Errno>
1972    where
1973        L: LockEqualOrBefore<FileOpsCore>,
1974    {
1975        let locked = locked.cast_locked::<FileOpsCore>();
1976        if !self.can_read() {
1977            return error!(EACCES);
1978        }
1979        if prot_flags.contains(ProtectionFlags::WRITE)
1980            && !self.can_write()
1981            && options.contains(MappingOptions::SHARED)
1982        {
1983            return error!(EACCES);
1984        }
1985        if prot_flags.contains(ProtectionFlags::EXEC) && !self.can_exec() {
1986            return error!(EPERM);
1987        }
1988        self.ops().mmap(
1989            locked,
1990            self,
1991            current_task,
1992            addr,
1993            memory_offset,
1994            length,
1995            prot_flags,
1996            options,
1997            filename,
1998        )
1999    }
2000
2001    pub fn readdir<L>(
2002        &self,
2003        locked: &mut Locked<L>,
2004        current_task: &CurrentTask,
2005        sink: &mut dyn DirentSink,
2006    ) -> Result<(), Errno>
2007    where
2008        L: LockEqualOrBefore<FileOpsCore>,
2009    {
2010        let locked = locked.cast_locked::<FileOpsCore>();
2011        if self.name.entry.is_dead() {
2012            return error!(ENOENT);
2013        }
2014
2015        self.ops().readdir(locked, self, current_task, sink)?;
2016        self.update_atime();
2017        self.notify(InotifyMask::ACCESS);
2018        Ok(())
2019    }
2020
2021    pub fn ioctl(
2022        &self,
2023        locked: &mut Locked<Unlocked>,
2024        current_task: &CurrentTask,
2025        request: u32,
2026        arg: SyscallArg,
2027    ) -> Result<SyscallResult, Errno> {
2028        security::check_file_ioctl_access(current_task, &self, request)?;
2029
2030        if request == FIBMAP {
2031            security::check_task_capable(current_task, CAP_SYS_RAWIO)?;
2032
2033            // TODO: https://fxbug.dev/404795644 - eliminate this phoney response when the SELinux
2034            // Test Suite no longer requires it.
2035            if current_task.kernel().features.selinux_test_suite {
2036                let phoney_block = 0xbadf000du32;
2037                current_task.write_object(arg.into(), &phoney_block)?;
2038                return Ok(SUCCESS);
2039            }
2040        }
2041
2042        self.ops().ioctl(locked, self, current_task, request, arg)
2043    }
2044
2045    pub fn fcntl(
2046        &self,
2047        current_task: &CurrentTask,
2048        cmd: u32,
2049        arg: u64,
2050    ) -> Result<SyscallResult, Errno> {
2051        self.ops().fcntl(self, current_task, cmd, arg)
2052    }
2053
2054    pub fn ftruncate<L>(
2055        &self,
2056        locked: &mut Locked<L>,
2057        current_task: &CurrentTask,
2058        length: u64,
2059    ) -> Result<(), Errno>
2060    where
2061        L: LockBefore<BeforeFsNodeAppend>,
2062    {
2063        // The file must be opened with write permissions. Otherwise
2064        // truncating it is forbidden.
2065        if !self.can_write() {
2066            return error!(EINVAL);
2067        }
2068        self.node().ftruncate(locked, current_task, length)?;
2069        self.name.entry.notify_ignoring_excl_unlink(InotifyMask::MODIFY);
2070        Ok(())
2071    }
2072
2073    pub fn fallocate<L>(
2074        &self,
2075        locked: &mut Locked<L>,
2076        current_task: &CurrentTask,
2077        mode: FallocMode,
2078        offset: u64,
2079        length: u64,
2080    ) -> Result<(), Errno>
2081    where
2082        L: LockBefore<BeforeFsNodeAppend>,
2083    {
2084        // If the file is a pipe or FIFO, ESPIPE is returned.
2085        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2086        if self.node().is_fifo() {
2087            return error!(ESPIPE);
2088        }
2089
2090        // Must be a regular file or directory.
2091        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2092        if !self.node().is_dir() && !self.node().is_reg() {
2093            return error!(ENODEV);
2094        }
2095
2096        // The file must be opened with write permissions. Otherwise operation is forbidden.
2097        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2098        if !self.can_write() {
2099            return error!(EBADF);
2100        }
2101
2102        self.node().fallocate(locked, current_task, mode, offset, length)?;
2103        self.notify(InotifyMask::MODIFY);
2104        Ok(())
2105    }
2106
2107    pub fn to_handle(
2108        &self,
2109        current_task: &CurrentTask,
2110    ) -> Result<Option<zx::NullableHandle>, Errno> {
2111        self.ops().to_handle(self, current_task)
2112    }
2113
2114    pub fn get_handles(
2115        &self,
2116        current_task: &CurrentTask,
2117    ) -> Result<Vec<zx::NullableHandle>, Errno> {
2118        self.ops().get_handles(self, current_task)
2119    }
2120
2121    pub fn as_thread_group_key(&self) -> Result<ThreadGroupKey, Errno> {
2122        self.ops().as_thread_group_key(self)
2123    }
2124
2125    /// Update the file flags.
2126    ///
2127    /// Writes the bits in `value` that are set in `mask` into the file flags.
2128    ///
2129    /// Does not provide any synchronization.
2130    pub fn update_file_flags(&self, value: OpenFlags, mask: OpenFlags) {
2131        self.flags.update(value, mask, Ordering::Relaxed, Ordering::Relaxed);
2132    }
2133
2134    /// Get the async owner of this file.
2135    ///
2136    /// See fcntl(F_GETOWN)
2137    pub fn get_async_owner(&self) -> FileAsyncOwner {
2138        *self.async_owner.lock()
2139    }
2140
2141    /// Set the async owner of this file.
2142    ///
2143    /// See fcntl(F_SETOWN)
2144    pub fn set_async_owner(&self, owner: FileAsyncOwner) {
2145        *self.async_owner.lock() = owner;
2146    }
2147
2148    /// See fcntl(F_GETLEASE)
2149    pub fn get_lease(&self, _current_task: &CurrentTask) -> FileLeaseType {
2150        *self.lease.lock()
2151    }
2152
2153    /// See fcntl(F_SETLEASE)
2154    pub fn set_lease(
2155        &self,
2156        _current_task: &CurrentTask,
2157        lease: FileLeaseType,
2158    ) -> Result<(), Errno> {
2159        if !self.node().is_reg() {
2160            return error!(EINVAL);
2161        }
2162        if lease == FileLeaseType::Read && self.can_write() {
2163            return error!(EAGAIN);
2164        }
2165        *self.lease.lock() = lease;
2166        Ok(())
2167    }
2168
2169    /// Wait on the specified events and call the EventHandler when ready
2170    pub fn wait_async<L>(
2171        &self,
2172        locked: &mut Locked<L>,
2173        current_task: &CurrentTask,
2174        waiter: &Waiter,
2175        events: FdEvents,
2176        handler: EventHandler,
2177    ) -> Option<WaitCanceler>
2178    where
2179        L: LockEqualOrBefore<FileOpsCore>,
2180    {
2181        self.ops().wait_async(
2182            locked.cast_locked::<FileOpsCore>(),
2183            self,
2184            current_task,
2185            waiter,
2186            events,
2187            handler,
2188        )
2189    }
2190
2191    /// The events currently active on this file.
2192    pub fn query_events<L>(
2193        &self,
2194        locked: &mut Locked<L>,
2195        current_task: &CurrentTask,
2196    ) -> Result<FdEvents, Errno>
2197    where
2198        L: LockEqualOrBefore<FileOpsCore>,
2199    {
2200        self.ops()
2201            .query_events(locked.cast_locked::<FileOpsCore>(), self, current_task)
2202            .map(FdEvents::add_equivalent_fd_events)
2203    }
2204
2205    pub fn record_lock(
2206        &self,
2207        locked: &mut Locked<Unlocked>,
2208        current_task: &CurrentTask,
2209        cmd: RecordLockCommand,
2210        flock: uapi::flock,
2211    ) -> Result<Option<uapi::flock>, Errno> {
2212        self.node().record_lock(locked, current_task, self, cmd, flock)
2213    }
2214
2215    pub fn flush<L>(&self, locked: &mut Locked<L>, current_task: &CurrentTask, id: FdTableId)
2216    where
2217        L: LockEqualOrBefore<FileOpsCore>,
2218    {
2219        self.name.entry.node.record_lock_release(RecordLockOwner::FdTable(id));
2220        self.ops().flush(locked.cast_locked::<FileOpsCore>(), self, current_task)
2221    }
2222
2223    fn update_atime(&self) {
2224        if !self.flags().contains(OpenFlags::NOATIME) {
2225            self.name.update_atime();
2226        }
2227    }
2228
2229    pub fn readahead(
2230        &self,
2231        current_task: &CurrentTask,
2232        offset: usize,
2233        length: usize,
2234    ) -> Result<(), Errno> {
2235        // readfile() fails with EBADF if the file was not open for read.
2236        if !self.can_read() {
2237            return error!(EBADF);
2238        }
2239        checked_add_offset_and_length(offset, length)?;
2240        self.ops().readahead(self, current_task, offset, length)
2241    }
2242
2243    pub fn extra_fdinfo(
2244        &self,
2245        locked: &mut Locked<FileOpsCore>,
2246        current_task: &CurrentTask,
2247    ) -> Option<FsString> {
2248        let file = self.weak_handle.upgrade()?;
2249        self.ops().extra_fdinfo(locked, &file, current_task)
2250    }
2251
2252    /// Register the fd number of an `EpollFileObject` that listens to events from this
2253    /// `FileObject`.
2254    pub fn register_epfd(&self, file: &FileHandle) {
2255        self.epoll_files.lock().insert(WeakKey::from(file), file.weak_handle.clone());
2256    }
2257
2258    pub fn unregister_epfd(&self, file: &FileHandle) {
2259        self.epoll_files.lock().remove(&WeakKey::from(file));
2260    }
2261}
2262
2263impl Releasable for FileObject {
2264    type Context<'a> = CurrentTaskAndLocked<'a>;
2265
2266    fn release<'a>(self, context: CurrentTaskAndLocked<'a>) {
2267        let (locked, current_task) = context;
2268        // Release all wake leases associated with this file in the corresponding `WaitObject`
2269        // of each registered epfd.
2270        for (_, file) in self.epoll_files.lock().drain() {
2271            if let Some(file) = file.upgrade() {
2272                if let Some(epoll_object) = file.downcast_file::<EpollFileObject>() {
2273                    let _ = epoll_object.delete(current_task, &self);
2274                }
2275            }
2276        }
2277
2278        if self.can_write() {
2279            self.name.entry.node.write_guard_state.lock().release(FileWriteGuardMode::WriteFile);
2280        }
2281
2282        let locked = locked.cast_locked::<FileOpsCore>();
2283        let ops = self.ops;
2284        let state = self.state;
2285        ops.close(locked, &state, current_task);
2286        state.name.entry.node.on_file_closed(&state);
2287        let event =
2288            if state.can_write() { InotifyMask::CLOSE_WRITE } else { InotifyMask::CLOSE_NOWRITE };
2289        state.notify(event);
2290    }
2291}
2292
2293impl fmt::Debug for FileObject {
2294    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2295        f.debug_struct("FileObject")
2296            .field("name", &self.name)
2297            .field("fs", &self.fs.name())
2298            .field("offset", &self.offset)
2299            .field("flags", &self.flags)
2300            .field("ops_ty", &self.ops().type_name())
2301            .finish()
2302    }
2303}
2304
2305impl OnWakeOps for FileReleaser {
2306    fn on_wake(&self, _current_task: &CurrentTask, _baton_lease: &zx::NullableHandle) {}
2307}
2308
2309/// A FileObject with the type of its FileOps known. Dereferencing it returns the FileOps.
2310pub struct DowncastedFile<'a, Ops> {
2311    file: &'a FileObject,
2312    ops: &'a Ops,
2313}
2314impl<'a, Ops> Copy for DowncastedFile<'a, Ops> {}
2315impl<'a, Ops> Clone for DowncastedFile<'a, Ops> {
2316    fn clone(&self) -> Self {
2317        *self
2318    }
2319}
2320
2321impl<'a, Ops> DowncastedFile<'a, Ops> {
2322    pub fn file(&self) -> &'a FileObject {
2323        self.file
2324    }
2325}
2326
2327impl<'a, Ops> Deref for DowncastedFile<'a, Ops> {
2328    type Target = &'a Ops;
2329    fn deref(&self) -> &Self::Target {
2330        &self.ops
2331    }
2332}
2333
2334impl FileObject {
2335    /// Returns the `FileObject`'s `FileOps` as a `DowncastedFile<T>`, or `None` if the downcast
2336    /// fails.
2337    ///
2338    /// This is useful for syscalls that only operate on a certain type of file.
2339    pub fn downcast_file<'a, T>(&'a self) -> Option<DowncastedFile<'a, T>>
2340    where
2341        T: 'static,
2342    {
2343        let ops = self.ops().as_any().downcast_ref::<T>()?;
2344        Some(DowncastedFile { file: self, ops })
2345    }
2346}
2347
2348/// Invokes the specified one-way `method` on the `proxy` and waits until the `proxy`'s underlying
2349/// channel has been closed by the peer.
2350///
2351/// This is used in `close()` implementations when the `FileOps` wraps a FIDL resource that provides
2352/// a one-way API to request teardown, and acknowledges completion of teardown by closing the FIDL
2353/// channel, to ensure that the `close()` call does not return until the FIDL server has actually
2354/// processed the teardown request.
2355pub fn call_fidl_and_await_close<P, M>(method: M, proxy: &P)
2356where
2357    P: fidl::endpoints::SynchronousProxy,
2358    M: FnOnce(&P) -> Result<(), fidl::Error>,
2359{
2360    if let Err(e) = method(proxy) {
2361        log_error!("call_fidl_and_await_close: call {} failed: {e:?}", P::Protocol::DEBUG_NAME);
2362        return;
2363    }
2364    let channel = proxy.as_channel();
2365    let result = channel.wait_one(zx::Signals::CHANNEL_PEER_CLOSED, zx::MonotonicInstant::INFINITE);
2366    if let Err(status) = result.to_result() {
2367        log_error!(
2368            "call_fidl_and_await_close: wait_one {} failed: {status:?}",
2369            P::Protocol::DEBUG_NAME
2370        );
2371    }
2372}
2373
2374#[cfg(test)]
2375mod tests {
2376    use crate::fs::tmpfs::TmpFs;
2377    use crate::task::CurrentTask;
2378    use crate::task::dynamic_thread_spawner::SpawnRequestBuilder;
2379    use crate::testing::*;
2380    use crate::vfs::MountInfo;
2381    use crate::vfs::buffers::{VecInputBuffer, VecOutputBuffer};
2382    use starnix_sync::{Locked, Unlocked};
2383    use starnix_uapi::auth::FsCred;
2384    use starnix_uapi::device_id::DeviceId;
2385    use starnix_uapi::file_mode::FileMode;
2386    use starnix_uapi::open_flags::OpenFlags;
2387    use std::sync::Arc;
2388    use std::sync::atomic::{AtomicBool, Ordering};
2389    use zerocopy::{FromBytes, IntoBytes, LE, U64};
2390
2391    #[::fuchsia::test]
2392    async fn test_append_truncate_race() {
2393        spawn_kernel_and_run(async |locked, current_task| {
2394            let kernel = current_task.kernel();
2395            let root_fs = TmpFs::new_fs(locked, &kernel);
2396            let mount = MountInfo::detached();
2397            let root_node = Arc::clone(root_fs.root());
2398            let file = root_node
2399                .create_entry(
2400                    locked,
2401                    &current_task,
2402                    &mount,
2403                    "test".into(),
2404                    |locked, dir, mount, name| {
2405                        dir.create_node(
2406                            locked,
2407                            &current_task,
2408                            mount,
2409                            name,
2410                            FileMode::IFREG | FileMode::ALLOW_ALL,
2411                            DeviceId::NONE,
2412                            FsCred::root(),
2413                        )
2414                    },
2415                )
2416                .expect("create_node failed");
2417            let file_handle = file
2418                .open_anonymous(locked, &current_task, OpenFlags::APPEND | OpenFlags::RDWR)
2419                .expect("open failed");
2420            let done = Arc::new(AtomicBool::new(false));
2421
2422            let fh = file_handle.clone();
2423            let done_clone = done.clone();
2424            let closure = move |locked: &mut Locked<Unlocked>, current_task: &CurrentTask| {
2425                for i in 0..2000 {
2426                    fh.write(
2427                        locked,
2428                        current_task,
2429                        &mut VecInputBuffer::new(U64::<LE>::new(i).as_bytes()),
2430                    )
2431                    .expect("write failed");
2432                }
2433                done_clone.store(true, Ordering::SeqCst);
2434                let result: Result<(), starnix_uapi::errors::Errno> = Ok(());
2435                result
2436            };
2437            let (write_thread, req) =
2438                SpawnRequestBuilder::new().with_sync_closure(closure).build_with_sync_result();
2439            kernel.kthreads.spawner().spawn_from_request(req);
2440
2441            let fh = file_handle.clone();
2442            let done_clone = done.clone();
2443            let closure = move |locked: &mut Locked<Unlocked>, current_task: &CurrentTask| {
2444                while !done_clone.load(Ordering::SeqCst) {
2445                    fh.ftruncate(locked, current_task, 0).expect("truncate failed");
2446                }
2447                let result: Result<(), starnix_uapi::errors::Errno> = Ok(());
2448                result
2449            };
2450            let (truncate_thread, req) =
2451                SpawnRequestBuilder::new().with_sync_closure(closure).build_with_sync_result();
2452            kernel.kthreads.spawner().spawn_from_request(req);
2453
2454            // If we read from the file, we should always find an increasing sequence. If there are
2455            // races, then we might unexpectedly see zeroes.
2456            while !done.load(Ordering::SeqCst) {
2457                let mut buffer = VecOutputBuffer::new(4096);
2458                let amount = file_handle
2459                    .read_at(locked, &current_task, 0, &mut buffer)
2460                    .expect("read failed");
2461                let mut last = None;
2462                let buffer = &Vec::from(buffer)[..amount];
2463                for i in
2464                    buffer.chunks_exact(8).map(|chunk| U64::<LE>::read_from_bytes(chunk).unwrap())
2465                {
2466                    if let Some(last) = last {
2467                        assert!(i.get() > last, "buffer: {:?}", buffer);
2468                    }
2469                    last = Some(i.get());
2470                }
2471            }
2472
2473            let _ = write_thread().unwrap();
2474            let _ = truncate_thread().unwrap();
2475        })
2476        .await;
2477    }
2478}