starnix_core/vfs/
file_object.rs

1// Cmpyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::memory::MemoryObject;
6use crate::mm::{DesiredAddress, MappingName, MappingOptions, MemoryAccessorExt, ProtectionFlags};
7use crate::power::OnWakeOps;
8use crate::security;
9use crate::task::{
10    CurrentTask, CurrentTaskAndLocked, EventHandler, Task, ThreadGroupKey, WaitCallback,
11    WaitCanceler, Waiter, register_delayed_release,
12};
13use crate::vfs::buffers::{InputBuffer, OutputBuffer};
14use crate::vfs::file_server::serve_file;
15use crate::vfs::fsverity::{
16    FsVerityState, {self},
17};
18use crate::vfs::{
19    ActiveNamespaceNode, DirentSink, EpollFileObject, EpollKey, FallocMode, FdTableId,
20    FileSystemHandle, FileWriteGuardMode, FsNodeHandle, FsString, NamespaceNode, RecordLockCommand,
21    RecordLockOwner,
22};
23use starnix_crypt::EncryptionKeyId;
24use starnix_lifecycle::{ObjectReleaser, ReleaserAction};
25use starnix_types::ownership::ReleaseGuard;
26use starnix_uapi::mount_flags::MountFlags;
27use starnix_uapi::user_address::ArchSpecific;
28
29use fidl::HandleBased;
30use linux_uapi::{FSCRYPT_MODE_AES_256_CTS, FSCRYPT_MODE_AES_256_XTS};
31use starnix_logging::{
32    CATEGORY_STARNIX_MM, impossible_error, log_error, trace_duration, track_stub,
33};
34use starnix_sync::{
35    BeforeFsNodeAppend, FileOpsCore, LockBefore, LockEqualOrBefore, Locked, Mutex, Unlocked,
36};
37use starnix_syscalls::{SUCCESS, SyscallArg, SyscallResult};
38use starnix_types::math::round_up_to_system_page_size;
39use starnix_types::ownership::Releasable;
40use starnix_uapi::arc_key::WeakKey;
41use starnix_uapi::as_any::AsAny;
42use starnix_uapi::auth::{CAP_FOWNER, CAP_SYS_RAWIO};
43use starnix_uapi::errors::{EAGAIN, ETIMEDOUT, Errno};
44use starnix_uapi::file_lease::FileLeaseType;
45use starnix_uapi::file_mode::Access;
46use starnix_uapi::inotify_mask::InotifyMask;
47use starnix_uapi::open_flags::OpenFlags;
48use starnix_uapi::seal_flags::SealFlags;
49use starnix_uapi::user_address::{UserAddress, UserRef};
50use starnix_uapi::vfs::FdEvents;
51use starnix_uapi::{
52    FIBMAP, FIGETBSZ, FIONBIO, FIONREAD, FIOQSIZE, FS_CASEFOLD_FL, FS_IOC_ADD_ENCRYPTION_KEY,
53    FS_IOC_ENABLE_VERITY, FS_IOC_FSGETXATTR, FS_IOC_FSSETXATTR, FS_IOC_MEASURE_VERITY,
54    FS_IOC_READ_VERITY_METADATA, FS_IOC_REMOVE_ENCRYPTION_KEY, FS_IOC_SET_ENCRYPTION_POLICY,
55    FS_VERITY_FL, FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER, FSCRYPT_POLICY_V2, SEEK_CUR, SEEK_DATA,
56    SEEK_END, SEEK_HOLE, SEEK_SET, TCGETS, errno, error, fscrypt_add_key_arg, fscrypt_identifier,
57    fsxattr, off_t, pid_t, uapi,
58};
59use std::collections::HashMap;
60use std::fmt;
61use std::ops::Deref;
62use std::sync::{Arc, Weak};
63
64pub const MAX_LFS_FILESIZE: usize = 0x7fff_ffff_ffff_ffff;
65
66pub fn checked_add_offset_and_length(offset: usize, length: usize) -> Result<usize, Errno> {
67    let end = offset.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
68    if end > MAX_LFS_FILESIZE {
69        return error!(EINVAL);
70    }
71    Ok(end)
72}
73
74#[derive(Debug)]
75pub enum SeekTarget {
76    /// Seek to the given offset relative to the start of the file.
77    Set(off_t),
78    /// Seek to the given offset relative to the current position.
79    Cur(off_t),
80    /// Seek to the given offset relative to the end of the file.
81    End(off_t),
82    /// Seek for the first data after the given offset,
83    Data(off_t),
84    /// Seek for the first hole after the given offset,
85    Hole(off_t),
86}
87
88impl SeekTarget {
89    pub fn from_raw(whence: u32, offset: off_t) -> Result<SeekTarget, Errno> {
90        match whence {
91            SEEK_SET => Ok(SeekTarget::Set(offset)),
92            SEEK_CUR => Ok(SeekTarget::Cur(offset)),
93            SEEK_END => Ok(SeekTarget::End(offset)),
94            SEEK_DATA => Ok(SeekTarget::Data(offset)),
95            SEEK_HOLE => Ok(SeekTarget::Hole(offset)),
96            _ => error!(EINVAL),
97        }
98    }
99
100    pub fn whence(&self) -> u32 {
101        match self {
102            Self::Set(_) => SEEK_SET,
103            Self::Cur(_) => SEEK_CUR,
104            Self::End(_) => SEEK_END,
105            Self::Data(_) => SEEK_DATA,
106            Self::Hole(_) => SEEK_HOLE,
107        }
108    }
109
110    pub fn offset(&self) -> off_t {
111        match self {
112            Self::Set(off)
113            | Self::Cur(off)
114            | Self::End(off)
115            | Self::Data(off)
116            | Self::Hole(off) => *off,
117        }
118    }
119}
120
121/// Corresponds to struct file_operations in Linux, plus any filesystem-specific data.
122pub trait FileOps: Send + Sync + AsAny + 'static {
123    /// Called when the FileObject is opened/created
124    fn open(
125        &self,
126        _locked: &mut Locked<FileOpsCore>,
127        _file: &FileObject,
128        _current_task: &CurrentTask,
129    ) -> Result<(), Errno> {
130        Ok(())
131    }
132
133    /// Called when the FileObject is destroyed.
134    fn close(
135        self: Box<Self>,
136        _locked: &mut Locked<FileOpsCore>,
137        _file: &FileObjectState,
138        _current_task: &CurrentTask,
139    ) {
140    }
141
142    /// Called every time close() is called on this file, even if the file is not ready to be
143    /// released.
144    fn flush(
145        &self,
146        _locked: &mut Locked<FileOpsCore>,
147        _file: &FileObject,
148        _current_task: &CurrentTask,
149    ) {
150    }
151
152    /// Returns whether the file has meaningful seek offsets. Returning `false` is only
153    /// optimization and will makes `FileObject` never hold the offset lock when calling `read` and
154    /// `write`.
155    fn has_persistent_offsets(&self) -> bool {
156        self.is_seekable()
157    }
158
159    /// Returns whether the file is seekable.
160    fn is_seekable(&self) -> bool;
161
162    /// Returns true if `write()` operations on the file will update the seek offset.
163    fn writes_update_seek_offset(&self) -> bool {
164        self.has_persistent_offsets()
165    }
166
167    /// Read from the file at an offset. If the file does not have persistent offsets (either
168    /// directly, or because it is not seekable), offset will be 0 and can be ignored.
169    /// Returns the number of bytes read.
170    fn read(
171        &self,
172        locked: &mut Locked<FileOpsCore>,
173        file: &FileObject,
174        current_task: &CurrentTask,
175        offset: usize,
176        data: &mut dyn OutputBuffer,
177    ) -> Result<usize, Errno>;
178
179    /// Write to the file with an offset. If the file does not have persistent offsets (either
180    /// directly, or because it is not seekable), offset will be 0 and can be ignored.
181    /// Returns the number of bytes written.
182    fn write(
183        &self,
184        locked: &mut Locked<FileOpsCore>,
185        file: &FileObject,
186        current_task: &CurrentTask,
187        offset: usize,
188        data: &mut dyn InputBuffer,
189    ) -> Result<usize, Errno>;
190
191    /// Adjust the `current_offset` if the file is seekable.
192    fn seek(
193        &self,
194        locked: &mut Locked<FileOpsCore>,
195        file: &FileObject,
196        current_task: &CurrentTask,
197        current_offset: off_t,
198        target: SeekTarget,
199    ) -> Result<off_t, Errno>;
200
201    /// Syncs cached state associated with the file descriptor to persistent storage.
202    ///
203    /// The method blocks until the synchronization is complete.
204    fn sync(&self, file: &FileObject, _current_task: &CurrentTask) -> Result<(), Errno>;
205
206    /// Syncs cached data, and only enough metadata to retrieve said data, to persistent storage.
207    ///
208    /// The method blocks until the synchronization is complete.
209    fn data_sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
210        // TODO(https://fxbug.dev/297305634) make a default macro once data can be done separately
211        self.sync(file, current_task)
212    }
213
214    /// Returns a VMO representing this file. At least the requested protection flags must
215    /// be set on the VMO. Reading or writing the VMO must read or write the file. If this is not
216    /// possible given the requested protection, an error must be returned.
217    /// The `length` is a hint for the desired size of the VMO. The returned VMO may be larger or
218    /// smaller than the requested length.
219    /// This method is typically called by [`Self::mmap`].
220    fn get_memory(
221        &self,
222        _locked: &mut Locked<FileOpsCore>,
223        _file: &FileObject,
224        _current_task: &CurrentTask,
225        _length: Option<usize>,
226        _prot: ProtectionFlags,
227    ) -> Result<Arc<MemoryObject>, Errno> {
228        error!(ENODEV)
229    }
230
231    /// Responds to an mmap call. The default implementation calls [`Self::get_memory`] to get a VMO
232    /// and then maps it with [`crate::mm::MemoryManager::map`].
233    /// Only implement this trait method if your file needs to control mapping, or record where
234    /// a VMO gets mapped.
235    fn mmap(
236        &self,
237        locked: &mut Locked<FileOpsCore>,
238        file: &FileObject,
239        current_task: &CurrentTask,
240        addr: DesiredAddress,
241        memory_offset: u64,
242        length: usize,
243        prot_flags: ProtectionFlags,
244        options: MappingOptions,
245        filename: NamespaceNode,
246    ) -> Result<UserAddress, Errno> {
247        trace_duration!(CATEGORY_STARNIX_MM, "FileOpsDefaultMmap");
248        let min_memory_size = (memory_offset as usize)
249            .checked_add(round_up_to_system_page_size(length)?)
250            .ok_or_else(|| errno!(EINVAL))?;
251        let mut memory = if options.contains(MappingOptions::SHARED) {
252            trace_duration!(CATEGORY_STARNIX_MM, "GetSharedVmo");
253            self.get_memory(locked, file, current_task, Some(min_memory_size), prot_flags)?
254        } else {
255            trace_duration!(CATEGORY_STARNIX_MM, "GetPrivateVmo");
256            // TODO(tbodt): Use PRIVATE_CLONE to have the filesystem server do the clone for us.
257            let base_prot_flags = (prot_flags | ProtectionFlags::READ) - ProtectionFlags::WRITE;
258            let memory = self.get_memory(
259                locked,
260                file,
261                current_task,
262                Some(min_memory_size),
263                base_prot_flags,
264            )?;
265            let mut clone_flags = zx::VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE;
266            if !prot_flags.contains(ProtectionFlags::WRITE) {
267                clone_flags |= zx::VmoChildOptions::NO_WRITE;
268            }
269            trace_duration!(CATEGORY_STARNIX_MM, "CreatePrivateChildVmo");
270            Arc::new(
271                memory.create_child(clone_flags, 0, memory.get_size()).map_err(impossible_error)?,
272            )
273        };
274
275        // Write guard is necessary only for shared mappings. Note that this doesn't depend on
276        // `prot_flags` since these can be changed later with `mprotect()`.
277        let file_write_guard = if options.contains(MappingOptions::SHARED) && file.can_write() {
278            let node = &file.name.entry.node;
279            let state = node.write_guard_state.lock();
280
281            // `F_SEAL_FUTURE_WRITE` should allow `mmap(PROT_READ)`, but block
282            // `mprotect(PROT_WRITE)`. This is different from `F_SEAL_WRITE`, which blocks
283            // `mmap(PROT_READ)`. To handle this case correctly remove `WRITE` right from the
284            // VMO handle to ensure `mprotect(PROT_WRITE)` fails.
285            let seals = state.get_seals().unwrap_or(SealFlags::empty());
286            if seals.contains(SealFlags::FUTURE_WRITE)
287                && !seals.contains(SealFlags::WRITE)
288                && !prot_flags.contains(ProtectionFlags::WRITE)
289            {
290                let mut new_rights = zx::Rights::VMO_DEFAULT - zx::Rights::WRITE;
291                if prot_flags.contains(ProtectionFlags::EXEC) {
292                    new_rights |= zx::Rights::EXECUTE;
293                }
294                memory = Arc::new(memory.duplicate_handle(new_rights).map_err(impossible_error)?);
295
296                None
297            } else {
298                Some(FileWriteGuardMode::WriteMapping)
299            }
300        } else {
301            None
302        };
303
304        current_task.mm()?.map_memory(
305            addr,
306            memory,
307            memory_offset,
308            length,
309            prot_flags,
310            file.max_access_for_memory_mapping(),
311            options,
312            MappingName::File(filename.into_mapping(file_write_guard)?),
313        )
314    }
315
316    /// Respond to a `getdents` or `getdents64` calls.
317    ///
318    /// The `file.offset` lock will be held while entering this method. The implementation must look
319    /// at `sink.offset()` to read the current offset into the file.
320    fn readdir(
321        &self,
322        _locked: &mut Locked<FileOpsCore>,
323        _file: &FileObject,
324        _current_task: &CurrentTask,
325        _sink: &mut dyn DirentSink,
326    ) -> Result<(), Errno> {
327        error!(ENOTDIR)
328    }
329
330    /// Establish a one-shot, edge-triggered, asynchronous wait for the given FdEvents for the
331    /// given file and task. Returns `None` if this file does not support blocking waits.
332    ///
333    /// Active events are not considered. This is similar to the semantics of the
334    /// ZX_WAIT_ASYNC_EDGE flag on zx_wait_async. To avoid missing events, the caller must call
335    /// query_events after calling this.
336    ///
337    /// If your file does not support blocking waits, leave this as the default implementation.
338    fn wait_async(
339        &self,
340        _locked: &mut Locked<FileOpsCore>,
341        _file: &FileObject,
342        _current_task: &CurrentTask,
343        _waiter: &Waiter,
344        _events: FdEvents,
345        _handler: EventHandler,
346    ) -> Option<WaitCanceler> {
347        None
348    }
349
350    /// The events currently active on this file.
351    ///
352    /// If this function returns `POLLIN` or `POLLOUT`, then FileObject will
353    /// add `POLLRDNORM` and `POLLWRNORM`, respective, which are equivalent in
354    /// the Linux UAPI.
355    ///
356    /// See https://linux.die.net/man/2/poll
357    fn query_events(
358        &self,
359        _locked: &mut Locked<FileOpsCore>,
360        _file: &FileObject,
361        _current_task: &CurrentTask,
362    ) -> Result<FdEvents, Errno> {
363        Ok(FdEvents::POLLIN | FdEvents::POLLOUT)
364    }
365
366    fn ioctl(
367        &self,
368        locked: &mut Locked<Unlocked>,
369        file: &FileObject,
370        current_task: &CurrentTask,
371        request: u32,
372        arg: SyscallArg,
373    ) -> Result<SyscallResult, Errno> {
374        default_ioctl(file, locked, current_task, request, arg)
375    }
376
377    fn fcntl(
378        &self,
379        _file: &FileObject,
380        _current_task: &CurrentTask,
381        cmd: u32,
382        _arg: u64,
383    ) -> Result<SyscallResult, Errno> {
384        default_fcntl(cmd)
385    }
386
387    /// Return a handle that allows access to this file descritor through the zxio protocols.
388    ///
389    /// If None is returned, the file will act as if it was a fd to `/dev/null`.
390    fn to_handle(
391        &self,
392        file: &FileObject,
393        current_task: &CurrentTask,
394    ) -> Result<Option<zx::NullableHandle>, Errno> {
395        serve_file(current_task, file, current_task.full_current_creds())
396            .map(|c| Some(c.0.into_handle().into()))
397    }
398
399    /// Returns the associated pid_t.
400    ///
401    /// Used by pidfd and `/proc/<pid>`. Unlikely to be used by other files.
402    fn as_thread_group_key(&self, _file: &FileObject) -> Result<ThreadGroupKey, Errno> {
403        error!(EBADF)
404    }
405
406    fn readahead(
407        &self,
408        _file: &FileObject,
409        _current_task: &CurrentTask,
410        _offset: usize,
411        _length: usize,
412    ) -> Result<(), Errno> {
413        error!(EINVAL)
414    }
415
416    /// Extra information that is included in the /proc/<pid>/fdfino/<fd> entry.
417    fn extra_fdinfo(
418        &self,
419        _locked: &mut Locked<FileOpsCore>,
420        _file: &FileHandle,
421        _current_task: &CurrentTask,
422    ) -> Option<FsString> {
423        None
424    }
425}
426
427/// Marker trait for implementation of FileOps that do not need to implement `close` and can
428/// then pass a wrapper object as the `FileOps` implementation.
429pub trait CloseFreeSafe {}
430impl<T: FileOps + CloseFreeSafe, P: Deref<Target = T> + Send + Sync + 'static> FileOps for P {
431    fn close(
432        self: Box<Self>,
433        _locked: &mut Locked<FileOpsCore>,
434        _file: &FileObjectState,
435        _current_task: &CurrentTask,
436    ) {
437        // This method cannot be delegated. T being `CloseFreeSafe` this is fine.
438    }
439
440    fn flush(
441        &self,
442        locked: &mut Locked<FileOpsCore>,
443        file: &FileObject,
444        current_task: &CurrentTask,
445    ) {
446        self.deref().flush(locked, file, current_task)
447    }
448
449    fn has_persistent_offsets(&self) -> bool {
450        self.deref().has_persistent_offsets()
451    }
452
453    fn writes_update_seek_offset(&self) -> bool {
454        self.deref().writes_update_seek_offset()
455    }
456
457    fn is_seekable(&self) -> bool {
458        self.deref().is_seekable()
459    }
460
461    fn read(
462        &self,
463        locked: &mut Locked<FileOpsCore>,
464        file: &FileObject,
465        current_task: &CurrentTask,
466        offset: usize,
467        data: &mut dyn OutputBuffer,
468    ) -> Result<usize, Errno> {
469        self.deref().read(locked, file, current_task, offset, data)
470    }
471
472    fn write(
473        &self,
474        locked: &mut Locked<FileOpsCore>,
475        file: &FileObject,
476        current_task: &CurrentTask,
477        offset: usize,
478        data: &mut dyn InputBuffer,
479    ) -> Result<usize, Errno> {
480        self.deref().write(locked, file, current_task, offset, data)
481    }
482
483    fn seek(
484        &self,
485        locked: &mut Locked<FileOpsCore>,
486        file: &FileObject,
487        current_task: &CurrentTask,
488        current_offset: off_t,
489        target: SeekTarget,
490    ) -> Result<off_t, Errno> {
491        self.deref().seek(locked, file, current_task, current_offset, target)
492    }
493
494    fn sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
495        self.deref().sync(file, current_task)
496    }
497
498    fn data_sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
499        self.deref().data_sync(file, current_task)
500    }
501
502    fn get_memory(
503        &self,
504        locked: &mut Locked<FileOpsCore>,
505        file: &FileObject,
506        current_task: &CurrentTask,
507        length: Option<usize>,
508        prot: ProtectionFlags,
509    ) -> Result<Arc<MemoryObject>, Errno> {
510        self.deref().get_memory(locked, file, current_task, length, prot)
511    }
512
513    fn mmap(
514        &self,
515        locked: &mut Locked<FileOpsCore>,
516        file: &FileObject,
517        current_task: &CurrentTask,
518        addr: DesiredAddress,
519        memory_offset: u64,
520        length: usize,
521        prot_flags: ProtectionFlags,
522        options: MappingOptions,
523        filename: NamespaceNode,
524    ) -> Result<UserAddress, Errno> {
525        self.deref().mmap(
526            locked,
527            file,
528            current_task,
529            addr,
530            memory_offset,
531            length,
532            prot_flags,
533            options,
534            filename,
535        )
536    }
537
538    fn readdir(
539        &self,
540        locked: &mut Locked<FileOpsCore>,
541        file: &FileObject,
542        current_task: &CurrentTask,
543        sink: &mut dyn DirentSink,
544    ) -> Result<(), Errno> {
545        self.deref().readdir(locked, file, current_task, sink)
546    }
547
548    fn wait_async(
549        &self,
550        locked: &mut Locked<FileOpsCore>,
551        file: &FileObject,
552        current_task: &CurrentTask,
553        waiter: &Waiter,
554        events: FdEvents,
555        handler: EventHandler,
556    ) -> Option<WaitCanceler> {
557        self.deref().wait_async(locked, file, current_task, waiter, events, handler)
558    }
559
560    fn query_events(
561        &self,
562        locked: &mut Locked<FileOpsCore>,
563        file: &FileObject,
564        current_task: &CurrentTask,
565    ) -> Result<FdEvents, Errno> {
566        self.deref().query_events(locked, file, current_task)
567    }
568
569    fn ioctl(
570        &self,
571        locked: &mut Locked<Unlocked>,
572        file: &FileObject,
573        current_task: &CurrentTask,
574        request: u32,
575        arg: SyscallArg,
576    ) -> Result<SyscallResult, Errno> {
577        self.deref().ioctl(locked, file, current_task, request, arg)
578    }
579
580    fn fcntl(
581        &self,
582        file: &FileObject,
583        current_task: &CurrentTask,
584        cmd: u32,
585        arg: u64,
586    ) -> Result<SyscallResult, Errno> {
587        self.deref().fcntl(file, current_task, cmd, arg)
588    }
589
590    fn to_handle(
591        &self,
592        file: &FileObject,
593        current_task: &CurrentTask,
594    ) -> Result<Option<zx::NullableHandle>, Errno> {
595        self.deref().to_handle(file, current_task)
596    }
597
598    fn as_thread_group_key(&self, file: &FileObject) -> Result<ThreadGroupKey, Errno> {
599        self.deref().as_thread_group_key(file)
600    }
601
602    fn readahead(
603        &self,
604        file: &FileObject,
605        current_task: &CurrentTask,
606        offset: usize,
607        length: usize,
608    ) -> Result<(), Errno> {
609        self.deref().readahead(file, current_task, offset, length)
610    }
611
612    fn extra_fdinfo(
613        &self,
614        locked: &mut Locked<FileOpsCore>,
615        file: &FileHandle,
616        current_task: &CurrentTask,
617    ) -> Option<FsString> {
618        self.deref().extra_fdinfo(locked, file, current_task)
619    }
620}
621
622pub fn default_eof_offset<L>(
623    locked: &mut Locked<L>,
624    file: &FileObject,
625    current_task: &CurrentTask,
626) -> Result<off_t, Errno>
627where
628    L: LockEqualOrBefore<FileOpsCore>,
629{
630    Ok(file.node().get_size(locked, current_task)? as off_t)
631}
632
633/// Implement the seek method for a file. The computation from the end of the file must be provided
634/// through a callback.
635///
636/// Errors if the calculated offset is invalid.
637///
638/// - `current_offset`: The current position
639/// - `target`: The location to seek to.
640/// - `compute_end`: Compute the new offset from the end. Return an error if the operation is not
641///    supported.
642pub fn default_seek<F>(
643    current_offset: off_t,
644    target: SeekTarget,
645    compute_end: F,
646) -> Result<off_t, Errno>
647where
648    F: FnOnce() -> Result<off_t, Errno>,
649{
650    let new_offset = match target {
651        SeekTarget::Set(offset) => Some(offset),
652        SeekTarget::Cur(offset) => current_offset.checked_add(offset),
653        SeekTarget::End(offset) => compute_end()?.checked_add(offset),
654        SeekTarget::Data(offset) => {
655            let eof = compute_end().unwrap_or(off_t::MAX);
656            if offset >= eof {
657                return error!(ENXIO);
658            }
659            Some(offset)
660        }
661        SeekTarget::Hole(offset) => {
662            let eof = compute_end()?;
663            if offset >= eof {
664                return error!(ENXIO);
665            }
666            Some(eof)
667        }
668    }
669    .ok_or_else(|| errno!(EINVAL))?;
670
671    if new_offset < 0 {
672        return error!(EINVAL);
673    }
674
675    Ok(new_offset)
676}
677
678/// Implement the seek method for a file without an upper bound on the resulting offset.
679///
680/// This is useful for files without a defined size.
681///
682/// Errors if the calculated offset is invalid.
683///
684/// - `current_offset`: The current position
685/// - `target`: The location to seek to.
686pub fn unbounded_seek(current_offset: off_t, target: SeekTarget) -> Result<off_t, Errno> {
687    default_seek(current_offset, target, || Ok(MAX_LFS_FILESIZE as off_t))
688}
689
690#[macro_export]
691macro_rules! fileops_impl_delegate_read_and_seek {
692    ($self:ident, $delegate:expr) => {
693        fn is_seekable(&self) -> bool {
694            true
695        }
696
697        fn read(
698            &$self,
699            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
700            file: &FileObject,
701            current_task: &$crate::task::CurrentTask,
702            offset: usize,
703            data: &mut dyn $crate::vfs::buffers::OutputBuffer,
704        ) -> Result<usize, starnix_uapi::errors::Errno> {
705            $delegate.read(locked, file, current_task, offset, data)
706        }
707
708        fn seek(
709            &$self,
710        locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
711            file: &FileObject,
712            current_task: &$crate::task::CurrentTask,
713            current_offset: starnix_uapi::off_t,
714            target: $crate::vfs::SeekTarget,
715        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
716            $delegate.seek(locked, file, current_task, current_offset, target)
717        }
718    };
719}
720
721/// Implements [`FileOps::seek`] in a way that makes sense for seekable files.
722#[macro_export]
723macro_rules! fileops_impl_seekable {
724    () => {
725        fn is_seekable(&self) -> bool {
726            true
727        }
728
729        fn seek(
730            &self,
731            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
732            file: &$crate::vfs::FileObject,
733            current_task: &$crate::task::CurrentTask,
734            current_offset: starnix_uapi::off_t,
735            target: $crate::vfs::SeekTarget,
736        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
737            $crate::vfs::default_seek(current_offset, target, || {
738                $crate::vfs::default_eof_offset(locked, file, current_task)
739            })
740        }
741    };
742}
743
744/// Implements [`FileOps`] methods in a way that makes sense for non-seekable files.
745#[macro_export]
746macro_rules! fileops_impl_nonseekable {
747    () => {
748        fn is_seekable(&self) -> bool {
749            false
750        }
751
752        fn seek(
753            &self,
754            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
755            _file: &$crate::vfs::FileObject,
756            _current_task: &$crate::task::CurrentTask,
757            _current_offset: starnix_uapi::off_t,
758            _target: $crate::vfs::SeekTarget,
759        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
760            starnix_uapi::error!(ESPIPE)
761        }
762    };
763}
764
765/// Implements [`FileOps::seek`] methods in a way that makes sense for files that ignore
766/// seeking operations and always read/write at offset 0.
767#[macro_export]
768macro_rules! fileops_impl_seekless {
769    () => {
770        fn has_persistent_offsets(&self) -> bool {
771            false
772        }
773
774        fn is_seekable(&self) -> bool {
775            true
776        }
777
778        fn seek(
779            &self,
780            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
781            _file: &$crate::vfs::FileObject,
782            _current_task: &$crate::task::CurrentTask,
783            _current_offset: starnix_uapi::off_t,
784            _target: $crate::vfs::SeekTarget,
785        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
786            Ok(0)
787        }
788    };
789}
790
791#[macro_export]
792macro_rules! fileops_impl_dataless {
793    () => {
794        fn write(
795            &self,
796            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
797            _file: &$crate::vfs::FileObject,
798            _current_task: &$crate::task::CurrentTask,
799            _offset: usize,
800            _data: &mut dyn $crate::vfs::buffers::InputBuffer,
801        ) -> Result<usize, starnix_uapi::errors::Errno> {
802            starnix_uapi::error!(EINVAL)
803        }
804
805        fn read(
806            &self,
807            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
808            _file: &$crate::vfs::FileObject,
809            _current_task: &$crate::task::CurrentTask,
810            _offset: usize,
811            _data: &mut dyn $crate::vfs::buffers::OutputBuffer,
812        ) -> Result<usize, starnix_uapi::errors::Errno> {
813            starnix_uapi::error!(EINVAL)
814        }
815    };
816}
817
818/// Implements [`FileOps`] methods in a way that makes sense for directories. You must implement
819/// [`FileOps::seek`] and [`FileOps::readdir`].
820#[macro_export]
821macro_rules! fileops_impl_directory {
822    () => {
823        fn is_seekable(&self) -> bool {
824            true
825        }
826
827        fn read(
828            &self,
829            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
830            _file: &$crate::vfs::FileObject,
831            _current_task: &$crate::task::CurrentTask,
832            _offset: usize,
833            _data: &mut dyn $crate::vfs::buffers::OutputBuffer,
834        ) -> Result<usize, starnix_uapi::errors::Errno> {
835            starnix_uapi::error!(EISDIR)
836        }
837
838        fn write(
839            &self,
840            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
841            _file: &$crate::vfs::FileObject,
842            _current_task: &$crate::task::CurrentTask,
843            _offset: usize,
844            _data: &mut dyn $crate::vfs::buffers::InputBuffer,
845        ) -> Result<usize, starnix_uapi::errors::Errno> {
846            starnix_uapi::error!(EISDIR)
847        }
848    };
849}
850
851#[macro_export]
852macro_rules! fileops_impl_unbounded_seek {
853    () => {
854        fn seek(
855            &self,
856            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
857            _file: &$crate::vfs::FileObject,
858            _current_task: &$crate::task::CurrentTask,
859            current_offset: starnix_uapi::off_t,
860            target: $crate::vfs::SeekTarget,
861        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
862            $crate::vfs::unbounded_seek(current_offset, target)
863        }
864    };
865}
866
867#[macro_export]
868macro_rules! fileops_impl_noop_sync {
869    () => {
870        fn sync(
871            &self,
872            file: &$crate::vfs::FileObject,
873            _current_task: &$crate::task::CurrentTask,
874        ) -> Result<(), starnix_uapi::errors::Errno> {
875            if !file.node().is_reg() && !file.node().is_dir() {
876                return starnix_uapi::error!(EINVAL);
877            }
878            Ok(())
879        }
880    };
881}
882
883// Public re-export of macros allows them to be used like regular rust items.
884
885pub use {
886    fileops_impl_dataless, fileops_impl_delegate_read_and_seek, fileops_impl_directory,
887    fileops_impl_nonseekable, fileops_impl_noop_sync, fileops_impl_seekable, fileops_impl_seekless,
888    fileops_impl_unbounded_seek,
889};
890pub const AES256_KEY_SIZE: usize = 32;
891
892pub fn canonicalize_ioctl_request(current_task: &CurrentTask, request: u32) -> u32 {
893    if current_task.is_arch32() {
894        match request {
895            uapi::arch32::FS_IOC_GETFLAGS => uapi::FS_IOC_GETFLAGS,
896            uapi::arch32::FS_IOC_SETFLAGS => uapi::FS_IOC_SETFLAGS,
897            _ => request,
898        }
899    } else {
900        request
901    }
902}
903
904pub fn default_ioctl(
905    file: &FileObject,
906    locked: &mut Locked<Unlocked>,
907    current_task: &CurrentTask,
908    request: u32,
909    arg: SyscallArg,
910) -> Result<SyscallResult, Errno> {
911    match canonicalize_ioctl_request(current_task, request) {
912        TCGETS => error!(ENOTTY),
913        FIGETBSZ => {
914            let node = file.node();
915            let supported_file = node.is_reg() || node.is_dir();
916            if !supported_file {
917                return error!(ENOTTY);
918            }
919
920            let blocksize = file.node().stat(locked, current_task)?.st_blksize;
921            current_task.write_object(arg.into(), &blocksize)?;
922            Ok(SUCCESS)
923        }
924        FIONBIO => {
925            let arg_ref = UserAddress::from(arg).into();
926            let arg: i32 = current_task.read_object(arg_ref)?;
927            let val = if arg == 0 {
928                // Clear the NONBLOCK flag
929                OpenFlags::empty()
930            } else {
931                // Set the NONBLOCK flag
932                OpenFlags::NONBLOCK
933            };
934            file.update_file_flags(val, OpenFlags::NONBLOCK);
935            Ok(SUCCESS)
936        }
937        FIOQSIZE => {
938            let node = file.node();
939            let supported_file = node.is_reg() || node.is_dir();
940            if !supported_file {
941                return error!(ENOTTY);
942            }
943
944            let size = file.node().stat(locked, current_task)?.st_size;
945            current_task.write_object(arg.into(), &size)?;
946            Ok(SUCCESS)
947        }
948        FIONREAD => {
949            track_stub!(TODO("https://fxbug.dev/322874897"), "FIONREAD");
950            if !file.name.entry.node.is_reg() {
951                return error!(ENOTTY);
952            }
953
954            let size = file
955                .name
956                .entry
957                .node
958                .fetch_and_refresh_info(locked, current_task)
959                .map_err(|_| errno!(EINVAL))?
960                .size;
961            let offset = usize::try_from(*file.offset.lock()).map_err(|_| errno!(EINVAL))?;
962            let remaining =
963                if size < offset { 0 } else { i32::try_from(size - offset).unwrap_or(i32::MAX) };
964            current_task.write_object(arg.into(), &remaining)?;
965            Ok(SUCCESS)
966        }
967        FS_IOC_FSGETXATTR => {
968            track_stub!(TODO("https://fxbug.dev/322875209"), "FS_IOC_FSGETXATTR");
969            let arg = UserAddress::from(arg).into();
970            current_task.write_object(arg, &fsxattr::default())?;
971            Ok(SUCCESS)
972        }
973        FS_IOC_FSSETXATTR => {
974            track_stub!(TODO("https://fxbug.dev/322875271"), "FS_IOC_FSSETXATTR");
975            let arg = UserAddress::from(arg).into();
976            let _: fsxattr = current_task.read_object(arg)?;
977            Ok(SUCCESS)
978        }
979        uapi::FS_IOC_GETFLAGS => {
980            track_stub!(TODO("https://fxbug.dev/322874935"), "FS_IOC_GETFLAGS");
981            let arg = UserRef::<u32>::from(arg);
982            let mut flags: u32 = 0;
983            if matches!(*file.node().fsverity.lock(), FsVerityState::FsVerity) {
984                flags |= FS_VERITY_FL;
985            }
986            if file.node().info().casefold {
987                flags |= FS_CASEFOLD_FL;
988            }
989            current_task.write_object(arg, &flags)?;
990            Ok(SUCCESS)
991        }
992        uapi::FS_IOC_SETFLAGS => {
993            track_stub!(TODO("https://fxbug.dev/322875367"), "FS_IOC_SETFLAGS");
994            let arg = UserRef::<u32>::from(arg);
995            let flags: u32 = current_task.read_object(arg)?;
996            file.node().update_attributes(locked, current_task, |info| {
997                info.casefold = flags & FS_CASEFOLD_FL != 0;
998                Ok(())
999            })?;
1000            Ok(SUCCESS)
1001        }
1002        FS_IOC_ENABLE_VERITY => {
1003            Ok(fsverity::ioctl::enable(locked, current_task, UserAddress::from(arg).into(), file)?)
1004        }
1005        FS_IOC_MEASURE_VERITY => {
1006            Ok(fsverity::ioctl::measure(locked, current_task, UserAddress::from(arg).into(), file)?)
1007        }
1008        FS_IOC_READ_VERITY_METADATA => {
1009            Ok(fsverity::ioctl::read_metadata(current_task, UserAddress::from(arg).into(), file)?)
1010        }
1011        FS_IOC_ADD_ENCRYPTION_KEY => {
1012            let fscrypt_add_key_ref = UserRef::<fscrypt_add_key_arg>::from(arg);
1013            let key_ref_addr = fscrypt_add_key_ref.next()?.addr();
1014            let mut fscrypt_add_key_arg = current_task.read_object(fscrypt_add_key_ref.clone())?;
1015            if fscrypt_add_key_arg.key_id != 0 {
1016                track_stub!(TODO("https://fxbug.dev/375649227"), "non-zero key ids");
1017                return error!(ENOTSUP);
1018            }
1019            if fscrypt_add_key_arg.key_spec.type_ != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER {
1020                track_stub!(TODO("https://fxbug.dev/375648306"), "fscrypt descriptor type");
1021                return error!(ENOTSUP);
1022            }
1023            let key = current_task
1024                .read_memory_to_vec(key_ref_addr, fscrypt_add_key_arg.raw_size as usize)?;
1025            let user_id = current_task.with_current_creds(|creds| creds.uid);
1026
1027            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1028            let key_identifier = crypt_service.add_wrapping_key(&key, user_id)?;
1029            fscrypt_add_key_arg.key_spec.u.identifier =
1030                fscrypt_identifier { value: key_identifier, ..Default::default() };
1031            current_task.write_object(fscrypt_add_key_ref, &fscrypt_add_key_arg)?;
1032            Ok(SUCCESS)
1033        }
1034        FS_IOC_SET_ENCRYPTION_POLICY => {
1035            let fscrypt_policy_ref = UserRef::<uapi::fscrypt_policy_v2>::from(arg);
1036            let policy = current_task.read_object(fscrypt_policy_ref)?;
1037            if policy.version as u32 != FSCRYPT_POLICY_V2 {
1038                track_stub!(TODO("https://fxbug.dev/375649656"), "fscrypt policy v1");
1039                return error!(ENOTSUP);
1040            }
1041            if policy.flags != 0 {
1042                track_stub!(
1043                    TODO("https://fxbug.dev/375700939"),
1044                    "fscrypt policy flags",
1045                    policy.flags
1046                );
1047            }
1048            if policy.contents_encryption_mode as u32 != FSCRYPT_MODE_AES_256_XTS {
1049                track_stub!(
1050                    TODO("https://fxbug.dev/375684057"),
1051                    "fscrypt encryption modes",
1052                    policy.contents_encryption_mode
1053                );
1054            }
1055            if policy.filenames_encryption_mode as u32 != FSCRYPT_MODE_AES_256_CTS {
1056                track_stub!(
1057                    TODO("https://fxbug.dev/375684057"),
1058                    "fscrypt encryption modes",
1059                    policy.filenames_encryption_mode
1060                );
1061            }
1062            let user_id = current_task.with_current_creds(|creds| creds.uid);
1063            if user_id != file.node().info().uid {
1064                security::check_task_capable(current_task, CAP_FOWNER)
1065                    .map_err(|_| errno!(EACCES))?;
1066            }
1067
1068            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1069            if let Some(users) =
1070                crypt_service.get_users_for_key(EncryptionKeyId::from(policy.master_key_identifier))
1071            {
1072                if !users.contains(&user_id) {
1073                    return error!(ENOKEY);
1074                }
1075            } else {
1076                track_stub!(
1077                    TODO("https://fxbug.dev/375067633"),
1078                    "users with CAP_FOWNER can set encryption policies with unadded keys"
1079                );
1080                return error!(ENOKEY);
1081            }
1082
1083            let attributes = file.node().fetch_and_refresh_info(locked, current_task)?;
1084            if let Some(wrapping_key_id) = &attributes.wrapping_key_id {
1085                if wrapping_key_id != &policy.master_key_identifier {
1086                    return error!(EEXIST);
1087                }
1088            } else {
1089                // Don't deadlock! update_attributes will also lock the attributes.
1090                std::mem::drop(attributes);
1091                file.node().update_attributes(locked, current_task, |info| {
1092                    info.wrapping_key_id = Some(policy.master_key_identifier);
1093                    Ok(())
1094                })?;
1095            }
1096            Ok(SUCCESS)
1097        }
1098        FS_IOC_REMOVE_ENCRYPTION_KEY => {
1099            let fscrypt_remove_key_arg_ref = UserRef::<uapi::fscrypt_remove_key_arg>::from(arg);
1100            let fscrypt_remove_key_arg = current_task.read_object(fscrypt_remove_key_arg_ref)?;
1101            if fscrypt_remove_key_arg.key_spec.type_ != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER {
1102                track_stub!(TODO("https://fxbug.dev/375648306"), "fscrypt descriptor type");
1103                return error!(ENOTSUP);
1104            }
1105            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1106            let user_id = current_task.with_current_creds(|creds| creds.uid);
1107            #[allow(
1108                clippy::undocumented_unsafe_blocks,
1109                reason = "Force documented unsafe blocks in Starnix"
1110            )]
1111            let identifier = unsafe { fscrypt_remove_key_arg.key_spec.u.identifier.value };
1112            crypt_service.forget_wrapping_key(identifier, user_id)?;
1113            Ok(SUCCESS)
1114        }
1115        _ => {
1116            track_stub!(TODO("https://fxbug.dev/322874917"), "ioctl fallthrough", request);
1117            error!(ENOTTY)
1118        }
1119    }
1120}
1121
1122pub fn default_fcntl(cmd: u32) -> Result<SyscallResult, Errno> {
1123    track_stub!(TODO("https://fxbug.dev/322875704"), "default fcntl", cmd);
1124    error!(EINVAL)
1125}
1126
1127pub struct OPathOps {}
1128
1129impl OPathOps {
1130    pub fn new() -> OPathOps {
1131        OPathOps {}
1132    }
1133}
1134
1135impl FileOps for OPathOps {
1136    fileops_impl_noop_sync!();
1137
1138    fn has_persistent_offsets(&self) -> bool {
1139        false
1140    }
1141    fn is_seekable(&self) -> bool {
1142        true
1143    }
1144    fn read(
1145        &self,
1146        _locked: &mut Locked<FileOpsCore>,
1147        _file: &FileObject,
1148        _current_task: &CurrentTask,
1149        _offset: usize,
1150        _data: &mut dyn OutputBuffer,
1151    ) -> Result<usize, Errno> {
1152        error!(EBADF)
1153    }
1154    fn write(
1155        &self,
1156        _locked: &mut Locked<FileOpsCore>,
1157        _file: &FileObject,
1158        _current_task: &CurrentTask,
1159        _offset: usize,
1160        _data: &mut dyn InputBuffer,
1161    ) -> Result<usize, Errno> {
1162        error!(EBADF)
1163    }
1164    fn seek(
1165        &self,
1166        _locked: &mut Locked<FileOpsCore>,
1167        _file: &FileObject,
1168        _current_task: &CurrentTask,
1169        _current_offset: off_t,
1170        _target: SeekTarget,
1171    ) -> Result<off_t, Errno> {
1172        error!(EBADF)
1173    }
1174    fn get_memory(
1175        &self,
1176        _locked: &mut Locked<FileOpsCore>,
1177        _file: &FileObject,
1178        _current_task: &CurrentTask,
1179        _length: Option<usize>,
1180        _prot: ProtectionFlags,
1181    ) -> Result<Arc<MemoryObject>, Errno> {
1182        error!(EBADF)
1183    }
1184    fn readdir(
1185        &self,
1186        _locked: &mut Locked<FileOpsCore>,
1187        _file: &FileObject,
1188        _current_task: &CurrentTask,
1189        _sink: &mut dyn DirentSink,
1190    ) -> Result<(), Errno> {
1191        error!(EBADF)
1192    }
1193
1194    fn ioctl(
1195        &self,
1196        _locked: &mut Locked<Unlocked>,
1197        _file: &FileObject,
1198        _current_task: &CurrentTask,
1199        _request: u32,
1200        _arg: SyscallArg,
1201    ) -> Result<SyscallResult, Errno> {
1202        error!(EBADF)
1203    }
1204}
1205
1206pub struct ProxyFileOps(pub FileHandle);
1207
1208impl FileOps for ProxyFileOps {
1209    // `close` is not delegated because the last reference to a `ProxyFileOps` is not
1210    // necessarily the last reference of the proxied file. If this is the case, the
1211    // releaser will handle it.
1212    // These don't take &FileObject making it too hard to handle them properly in the macro
1213    fn has_persistent_offsets(&self) -> bool {
1214        self.0.ops().has_persistent_offsets()
1215    }
1216    fn writes_update_seek_offset(&self) -> bool {
1217        self.0.ops().writes_update_seek_offset()
1218    }
1219    fn is_seekable(&self) -> bool {
1220        self.0.ops().is_seekable()
1221    }
1222    // These take &mut Locked<L> as a second argument
1223    fn flush(
1224        &self,
1225        locked: &mut Locked<FileOpsCore>,
1226        _file: &FileObject,
1227        current_task: &CurrentTask,
1228    ) {
1229        self.0.ops().flush(locked, &self.0, current_task);
1230    }
1231    fn wait_async(
1232        &self,
1233        locked: &mut Locked<FileOpsCore>,
1234        _file: &FileObject,
1235        current_task: &CurrentTask,
1236        waiter: &Waiter,
1237        events: FdEvents,
1238        handler: EventHandler,
1239    ) -> Option<WaitCanceler> {
1240        self.0.ops().wait_async(locked, &self.0, current_task, waiter, events, handler)
1241    }
1242    fn query_events(
1243        &self,
1244        locked: &mut Locked<FileOpsCore>,
1245        _file: &FileObject,
1246        current_task: &CurrentTask,
1247    ) -> Result<FdEvents, Errno> {
1248        self.0.ops().query_events(locked, &self.0, current_task)
1249    }
1250    fn read(
1251        &self,
1252        locked: &mut Locked<FileOpsCore>,
1253        _file: &FileObject,
1254        current_task: &CurrentTask,
1255        offset: usize,
1256        data: &mut dyn OutputBuffer,
1257    ) -> Result<usize, Errno> {
1258        self.0.ops().read(locked, &self.0, current_task, offset, data)
1259    }
1260    fn write(
1261        &self,
1262        locked: &mut Locked<FileOpsCore>,
1263        _file: &FileObject,
1264        current_task: &CurrentTask,
1265        offset: usize,
1266        data: &mut dyn InputBuffer,
1267    ) -> Result<usize, Errno> {
1268        self.0.ops().write(locked, &self.0, current_task, offset, data)
1269    }
1270    fn ioctl(
1271        &self,
1272        locked: &mut Locked<Unlocked>,
1273        _file: &FileObject,
1274        current_task: &CurrentTask,
1275        request: u32,
1276        arg: SyscallArg,
1277    ) -> Result<SyscallResult, Errno> {
1278        self.0.ops().ioctl(locked, &self.0, current_task, request, arg)
1279    }
1280    fn fcntl(
1281        &self,
1282        _file: &FileObject,
1283        current_task: &CurrentTask,
1284        cmd: u32,
1285        arg: u64,
1286    ) -> Result<SyscallResult, Errno> {
1287        self.0.ops().fcntl(&self.0, current_task, cmd, arg)
1288    }
1289    fn readdir(
1290        &self,
1291        locked: &mut Locked<FileOpsCore>,
1292        _file: &FileObject,
1293        current_task: &CurrentTask,
1294        sink: &mut dyn DirentSink,
1295    ) -> Result<(), Errno> {
1296        self.0.ops().readdir(locked, &self.0, current_task, sink)
1297    }
1298    fn sync(&self, _file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
1299        self.0.ops().sync(&self.0, current_task)
1300    }
1301    fn data_sync(&self, _file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
1302        self.0.ops().sync(&self.0, current_task)
1303    }
1304    fn get_memory(
1305        &self,
1306        locked: &mut Locked<FileOpsCore>,
1307        _file: &FileObject,
1308        current_task: &CurrentTask,
1309        length: Option<usize>,
1310        prot: ProtectionFlags,
1311    ) -> Result<Arc<MemoryObject>, Errno> {
1312        self.0.ops.get_memory(locked, &self.0, current_task, length, prot)
1313    }
1314    fn mmap(
1315        &self,
1316        locked: &mut Locked<FileOpsCore>,
1317        _file: &FileObject,
1318        current_task: &CurrentTask,
1319        addr: DesiredAddress,
1320        memory_offset: u64,
1321        length: usize,
1322        prot_flags: ProtectionFlags,
1323        options: MappingOptions,
1324        filename: NamespaceNode,
1325    ) -> Result<UserAddress, Errno> {
1326        self.0.ops.mmap(
1327            locked,
1328            &self.0,
1329            current_task,
1330            addr,
1331            memory_offset,
1332            length,
1333            prot_flags,
1334            options,
1335            filename,
1336        )
1337    }
1338    fn seek(
1339        &self,
1340        locked: &mut Locked<FileOpsCore>,
1341        _file: &FileObject,
1342        current_task: &CurrentTask,
1343        offset: off_t,
1344        target: SeekTarget,
1345    ) -> Result<off_t, Errno> {
1346        self.0.ops.seek(locked, &self.0, current_task, offset, target)
1347    }
1348}
1349
1350#[derive(Debug, Default, Copy, Clone)]
1351pub enum FileAsyncOwner {
1352    #[default]
1353    Unowned,
1354    Thread(pid_t),
1355    Process(pid_t),
1356    ProcessGroup(pid_t),
1357}
1358
1359impl FileAsyncOwner {
1360    pub fn validate(self, current_task: &CurrentTask) -> Result<(), Errno> {
1361        match self {
1362            FileAsyncOwner::Unowned => (),
1363            FileAsyncOwner::Thread(id) | FileAsyncOwner::Process(id) => {
1364                Task::from_weak(&current_task.get_task(id))?;
1365            }
1366            FileAsyncOwner::ProcessGroup(pgid) => {
1367                current_task
1368                    .kernel()
1369                    .pids
1370                    .read()
1371                    .get_process_group(pgid)
1372                    .ok_or_else(|| errno!(ESRCH))?;
1373            }
1374        }
1375        Ok(())
1376    }
1377}
1378
1379#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
1380pub struct FileObjectId(u64);
1381
1382impl FileObjectId {
1383    pub fn as_epoll_key(&self) -> EpollKey {
1384        self.0 as EpollKey
1385    }
1386}
1387
1388/// A session with a file object.
1389///
1390/// Each time a client calls open(), we create a new FileObject from the
1391/// underlying FsNode that receives the open(). This object contains the state
1392/// that is specific to this sessions whereas the underlying FsNode contains
1393/// the state that is shared between all the sessions.
1394pub struct FileObject {
1395    ops: Box<dyn FileOps>,
1396    state: FileObjectState,
1397}
1398
1399impl std::ops::Deref for FileObject {
1400    type Target = FileObjectState;
1401    fn deref(&self) -> &Self::Target {
1402        &self.state
1403    }
1404}
1405
1406pub struct FileObjectState {
1407    /// Weak reference to the `FileHandle` of this `FileObject`. This allows to retrieve the
1408    /// `FileHandle` from a `FileObject`.
1409    pub weak_handle: WeakFileHandle,
1410
1411    /// A unique identifier for this file object.
1412    pub id: FileObjectId,
1413
1414    /// The NamespaceNode associated with this FileObject.
1415    ///
1416    /// Represents the name the process used to open this file.
1417    pub name: ActiveNamespaceNode,
1418
1419    pub fs: FileSystemHandle,
1420
1421    pub offset: Mutex<off_t>,
1422
1423    flags: Mutex<OpenFlags>,
1424
1425    async_owner: Mutex<FileAsyncOwner>,
1426
1427    /// A set of epoll file descriptor numbers that tracks which `EpollFileObject`s add this
1428    /// `FileObject` as the control file.
1429    epoll_files: Mutex<HashMap<FileHandleKey, WeakFileHandle>>,
1430
1431    /// See fcntl F_SETLEASE and F_GETLEASE.
1432    lease: Mutex<FileLeaseType>,
1433
1434    // This extra reference to the FsNode should not be needed, but it is needed to make
1435    // Inotify.ExcludeUnlinkInodeEvents pass.
1436    _mysterious_node: Option<FsNodeHandle>,
1437
1438    /// Opaque security state associated this file object.
1439    pub security_state: security::FileObjectState,
1440}
1441
1442pub enum FileObjectReleaserAction {}
1443impl ReleaserAction<FileObject> for FileObjectReleaserAction {
1444    fn release(file_object: ReleaseGuard<FileObject>) {
1445        register_delayed_release(file_object);
1446    }
1447}
1448pub type FileReleaser = ObjectReleaser<FileObject, FileObjectReleaserAction>;
1449pub type FileHandle = Arc<FileReleaser>;
1450pub type WeakFileHandle = Weak<FileReleaser>;
1451pub type FileHandleKey = WeakKey<FileReleaser>;
1452
1453impl FileObjectState {
1454    /// The FsNode from which this FileObject was created.
1455    pub fn node(&self) -> &FsNodeHandle {
1456        &self.name.entry.node
1457    }
1458
1459    pub fn flags(&self) -> OpenFlags {
1460        *self.flags.lock()
1461    }
1462
1463    pub fn can_read(&self) -> bool {
1464        // TODO: Consider caching the access mode outside of this lock
1465        // because it cannot change.
1466        self.flags.lock().can_read()
1467    }
1468
1469    pub fn can_write(&self) -> bool {
1470        // TODO: Consider caching the access mode outside of this lock
1471        // because it cannot change.
1472        self.flags.lock().can_write()
1473    }
1474
1475    /// Returns false if the file is not allowed to be executed.
1476    pub fn can_exec(&self) -> bool {
1477        let mounted_no_exec = self.name.to_passive().mount.flags().contains(MountFlags::NOEXEC);
1478        let no_exec_seal = self
1479            .node()
1480            .write_guard_state
1481            .lock()
1482            .get_seals()
1483            .map(|seals| seals.contains(SealFlags::NO_EXEC))
1484            .unwrap_or(false);
1485        !(mounted_no_exec || no_exec_seal)
1486    }
1487
1488    // Notifies watchers on the current node and its parent about an event.
1489    pub fn notify(&self, event_mask: InotifyMask) {
1490        self.name.notify(event_mask)
1491    }
1492}
1493
1494impl FileObject {
1495    /// Create a FileObject that is not mounted in a namespace.
1496    ///
1497    /// In particular, this will create a new unrooted entries. This should not be used on
1498    /// file system with persistent entries, as the created entry will be out of sync with the one
1499    /// from the file system.
1500    ///
1501    /// The returned FileObject does not have a name.
1502    pub fn new_anonymous<L>(
1503        locked: &mut Locked<L>,
1504        current_task: &CurrentTask,
1505        ops: Box<dyn FileOps>,
1506        node: FsNodeHandle,
1507        flags: OpenFlags,
1508    ) -> FileHandle
1509    where
1510        L: LockEqualOrBefore<FileOpsCore>,
1511    {
1512        assert!(!node.fs().has_permanent_entries());
1513        Self::new(
1514            locked,
1515            current_task,
1516            ops,
1517            NamespaceNode::new_anonymous_unrooted(current_task, node),
1518            flags,
1519        )
1520        .expect("Failed to create anonymous FileObject")
1521    }
1522
1523    /// Create a FileObject with an associated NamespaceNode.
1524    ///
1525    /// This function is not typically called directly. Instead, consider
1526    /// calling NamespaceNode::open.
1527    pub fn new<L>(
1528        locked: &mut Locked<L>,
1529        current_task: &CurrentTask,
1530        ops: Box<dyn FileOps>,
1531        name: NamespaceNode,
1532        flags: OpenFlags,
1533    ) -> Result<FileHandle, Errno>
1534    where
1535        L: LockEqualOrBefore<FileOpsCore>,
1536    {
1537        let _mysterious_node = if flags.can_write() {
1538            name.entry.node.write_guard_state.lock().acquire(FileWriteGuardMode::WriteFile)?;
1539            Some(name.entry.node.clone())
1540        } else {
1541            None
1542        };
1543        let fs = name.entry.node.fs();
1544        let id = FileObjectId(current_task.kernel.next_file_object_id.next());
1545        let security_state = security::file_alloc_security(current_task);
1546        let file = FileHandle::new_cyclic(|weak_handle| {
1547            Self {
1548                ops,
1549                state: FileObjectState {
1550                    weak_handle: weak_handle.clone(),
1551                    id,
1552                    name: name.into_active(),
1553                    fs,
1554                    offset: Mutex::new(0),
1555                    flags: Mutex::new(flags - OpenFlags::CREAT),
1556                    async_owner: Default::default(),
1557                    epoll_files: Default::default(),
1558                    lease: Default::default(),
1559                    _mysterious_node,
1560                    security_state,
1561                },
1562            }
1563            .into()
1564        });
1565        file.notify(InotifyMask::OPEN);
1566
1567        file.ops().open(locked.cast_locked::<FileOpsCore>(), &file, current_task)?;
1568        Ok(file)
1569    }
1570
1571    pub fn max_access_for_memory_mapping(&self) -> Access {
1572        let mut access = Access::EXIST;
1573        if self.can_exec() {
1574            access |= Access::EXEC;
1575        }
1576        let flags = self.flags.lock();
1577        if flags.can_read() {
1578            access |= Access::READ;
1579        }
1580        if flags.can_write() {
1581            access |= Access::WRITE;
1582        }
1583        access
1584    }
1585
1586    pub fn ops(&self) -> &dyn FileOps {
1587        self.ops.as_ref()
1588    }
1589
1590    pub fn ops_type_name(&self) -> &'static str {
1591        self.ops().type_name()
1592    }
1593
1594    pub fn is_non_blocking(&self) -> bool {
1595        self.flags().contains(OpenFlags::NONBLOCK)
1596    }
1597
1598    /// Common implementation for blocking operations.
1599    ///
1600    /// This function is used to implement the blocking operations for file objects. FileOps
1601    /// implementations should call this function to handle the blocking logic.
1602    ///
1603    /// The `op` parameter is a function that implements the non-blocking version of the operation.
1604    /// The function is called once without registering a waiter in case no wait is needed. If the
1605    /// operation returns EAGAIN and the file object is non-blocking, the function returns EAGAIN.
1606    ///
1607    /// If the operation returns EAGAIN and the file object is blocking, the function will block
1608    /// until the given events are triggered. At that time, the operation is retried. Notice that
1609    /// the `op` function can be called multiple times before the operation completes.
1610    ///
1611    /// The `deadline` parameter is the deadline for the operation. If the operation does not
1612    /// complete before the deadline, the function will return ETIMEDOUT.
1613    pub fn blocking_op<L, T, Op>(
1614        &self,
1615        locked: &mut Locked<L>,
1616        current_task: &CurrentTask,
1617        events: FdEvents,
1618        deadline: Option<zx::MonotonicInstant>,
1619        mut op: Op,
1620    ) -> Result<T, Errno>
1621    where
1622        L: LockEqualOrBefore<FileOpsCore>,
1623        Op: FnMut(&mut Locked<L>) -> Result<T, Errno>,
1624    {
1625        // Don't return EAGAIN for directories. This can happen because glibc always opens a
1626        // directory with O_NONBLOCK.
1627        let can_return_eagain = self.flags().contains(OpenFlags::NONBLOCK)
1628            && !self.flags().contains(OpenFlags::DIRECTORY);
1629        // Run the operation a first time without registering a waiter in case no wait is needed.
1630        match op(locked) {
1631            Err(errno) if errno == EAGAIN && !can_return_eagain => {}
1632            result => return result,
1633        }
1634
1635        let waiter = Waiter::new();
1636        loop {
1637            // Register the waiter before running the operation to prevent a race.
1638            self.wait_async(locked, current_task, &waiter, events, WaitCallback::none());
1639            match op(locked) {
1640                Err(e) if e == EAGAIN => {}
1641                result => return result,
1642            }
1643            let locked = locked.cast_locked::<FileOpsCore>();
1644            waiter
1645                .wait_until(
1646                    locked,
1647                    current_task,
1648                    deadline.unwrap_or(zx::MonotonicInstant::INFINITE),
1649                )
1650                .map_err(|e| if e == ETIMEDOUT { errno!(EAGAIN) } else { e })?;
1651        }
1652    }
1653
1654    pub fn is_seekable(&self) -> bool {
1655        self.ops().is_seekable()
1656    }
1657
1658    pub fn has_persistent_offsets(&self) -> bool {
1659        self.ops().has_persistent_offsets()
1660    }
1661
1662    /// Common implementation for `read` and `read_at`.
1663    fn read_internal<R>(&self, current_task: &CurrentTask, read: R) -> Result<usize, Errno>
1664    where
1665        R: FnOnce() -> Result<usize, Errno>,
1666    {
1667        security::file_permission(current_task, self, security::PermissionFlags::READ)?;
1668
1669        if !self.can_read() {
1670            return error!(EBADF);
1671        }
1672        let bytes_read = read()?;
1673
1674        // TODO(steveaustin) - omit updating time_access to allow info to be immutable
1675        // and thus allow simultaneous reads.
1676        self.update_atime();
1677        if bytes_read > 0 {
1678            self.notify(InotifyMask::ACCESS);
1679        }
1680
1681        Ok(bytes_read)
1682    }
1683
1684    pub fn read<L>(
1685        &self,
1686        locked: &mut Locked<L>,
1687        current_task: &CurrentTask,
1688        data: &mut dyn OutputBuffer,
1689    ) -> Result<usize, Errno>
1690    where
1691        L: LockEqualOrBefore<FileOpsCore>,
1692    {
1693        self.read_internal(current_task, || {
1694            let locked = locked.cast_locked::<FileOpsCore>();
1695            if !self.ops().has_persistent_offsets() {
1696                if data.available() > MAX_LFS_FILESIZE {
1697                    return error!(EINVAL);
1698                }
1699                return self.ops.read(locked, self, current_task, 0, data);
1700            }
1701
1702            let mut offset_guard = self.offset.lock();
1703            let offset = *offset_guard as usize;
1704            checked_add_offset_and_length(offset, data.available())?;
1705            let read = self.ops.read(locked, self, current_task, offset, data)?;
1706            *offset_guard += read as off_t;
1707            Ok(read)
1708        })
1709    }
1710
1711    pub fn read_at<L>(
1712        &self,
1713        locked: &mut Locked<L>,
1714        current_task: &CurrentTask,
1715        offset: usize,
1716        data: &mut dyn OutputBuffer,
1717    ) -> Result<usize, Errno>
1718    where
1719        L: LockEqualOrBefore<FileOpsCore>,
1720    {
1721        if !self.ops().is_seekable() {
1722            return error!(ESPIPE);
1723        }
1724        checked_add_offset_and_length(offset, data.available())?;
1725        let locked = locked.cast_locked::<FileOpsCore>();
1726        self.read_internal(current_task, || self.ops.read(locked, self, current_task, offset, data))
1727    }
1728
1729    /// Common checks before calling ops().write.
1730    fn write_common<L>(
1731        &self,
1732        locked: &mut Locked<L>,
1733        current_task: &CurrentTask,
1734        offset: usize,
1735        data: &mut dyn InputBuffer,
1736    ) -> Result<usize, Errno>
1737    where
1738        L: LockEqualOrBefore<FileOpsCore>,
1739    {
1740        security::file_permission(current_task, self, security::PermissionFlags::WRITE)?;
1741
1742        // We need to cap the size of `data` to prevent us from growing the file too large,
1743        // according to <https://man7.org/linux/man-pages/man2/write.2.html>:
1744        //
1745        //   The number of bytes written may be less than count if, for example, there is
1746        //   insufficient space on the underlying physical medium, or the RLIMIT_FSIZE resource
1747        //   limit is encountered (see setrlimit(2)),
1748        checked_add_offset_and_length(offset, data.available())?;
1749        let locked = locked.cast_locked::<FileOpsCore>();
1750        self.ops().write(locked, self, current_task, offset, data)
1751    }
1752
1753    /// Common wrapper work for `write` and `write_at`.
1754    fn write_fn<W, L>(
1755        &self,
1756        locked: &mut Locked<L>,
1757        current_task: &CurrentTask,
1758        write: W,
1759    ) -> Result<usize, Errno>
1760    where
1761        L: LockEqualOrBefore<FileOpsCore>,
1762        W: FnOnce(&mut Locked<L>) -> Result<usize, Errno>,
1763    {
1764        if !self.can_write() {
1765            return error!(EBADF);
1766        }
1767        self.node().clear_suid_and_sgid_bits(locked, current_task)?;
1768        let bytes_written = write(locked)?;
1769        self.node().update_ctime_mtime();
1770
1771        if bytes_written > 0 {
1772            self.notify(InotifyMask::MODIFY);
1773        }
1774
1775        Ok(bytes_written)
1776    }
1777
1778    pub fn write<L>(
1779        &self,
1780        locked: &mut Locked<L>,
1781        current_task: &CurrentTask,
1782        data: &mut dyn InputBuffer,
1783    ) -> Result<usize, Errno>
1784    where
1785        L: LockEqualOrBefore<FileOpsCore>,
1786    {
1787        self.write_fn(locked, current_task, |locked| {
1788            if !self.ops().has_persistent_offsets() {
1789                return self.write_common(locked, current_task, 0, data);
1790            }
1791            // TODO(https://fxbug.dev/333540469): write_fn should take L: LockBefore<FsNodeAppend>,
1792            // but FileOpsCore must be after FsNodeAppend
1793            #[allow(
1794                clippy::undocumented_unsafe_blocks,
1795                reason = "Force documented unsafe blocks in Starnix"
1796            )]
1797            let locked = unsafe { Unlocked::new() };
1798            let mut offset = self.offset.lock();
1799            let bytes_written = if self.flags().contains(OpenFlags::APPEND) {
1800                let (_guard, locked) = self.node().append_lock.write_and(locked, current_task)?;
1801                *offset = self.ops().seek(
1802                    locked.cast_locked::<FileOpsCore>(),
1803                    self,
1804                    current_task,
1805                    *offset,
1806                    SeekTarget::End(0),
1807                )?;
1808                self.write_common(locked, current_task, *offset as usize, data)
1809            } else {
1810                let (_guard, locked) = self.node().append_lock.read_and(locked, current_task)?;
1811                self.write_common(locked, current_task, *offset as usize, data)
1812            }?;
1813            if self.ops().writes_update_seek_offset() {
1814                *offset += bytes_written as off_t;
1815            }
1816            Ok(bytes_written)
1817        })
1818    }
1819
1820    pub fn write_at<L>(
1821        &self,
1822        locked: &mut Locked<L>,
1823        current_task: &CurrentTask,
1824        mut offset: usize,
1825        data: &mut dyn InputBuffer,
1826    ) -> Result<usize, Errno>
1827    where
1828        L: LockEqualOrBefore<FileOpsCore>,
1829    {
1830        if !self.ops().is_seekable() {
1831            return error!(ESPIPE);
1832        }
1833        self.write_fn(locked, current_task, |_locked| {
1834            // TODO(https://fxbug.dev/333540469): write_fn should take L: LockBefore<FsNodeAppend>,
1835            // but FileOpsCore must be after FsNodeAppend
1836            #[allow(
1837                clippy::undocumented_unsafe_blocks,
1838                reason = "Force documented unsafe blocks in Starnix"
1839            )]
1840            let locked = unsafe { Unlocked::new() };
1841            let (_guard, locked) = self.node().append_lock.read_and(locked, current_task)?;
1842
1843            // According to LTP test pwrite04:
1844            //
1845            //   POSIX requires that opening a file with the O_APPEND flag should have no effect on the
1846            //   location at which pwrite() writes data. However, on Linux, if a file is opened with
1847            //   O_APPEND, pwrite() appends data to the end of the file, regardless of the value of offset.
1848            if self.flags().contains(OpenFlags::APPEND) && self.ops().is_seekable() {
1849                checked_add_offset_and_length(offset, data.available())?;
1850                offset = default_eof_offset(locked, self, current_task)? as usize;
1851            }
1852
1853            self.write_common(locked, current_task, offset, data)
1854        })
1855    }
1856
1857    pub fn seek<L>(
1858        &self,
1859        locked: &mut Locked<L>,
1860        current_task: &CurrentTask,
1861        target: SeekTarget,
1862    ) -> Result<off_t, Errno>
1863    where
1864        L: LockEqualOrBefore<FileOpsCore>,
1865    {
1866        let locked = locked.cast_locked::<FileOpsCore>();
1867        let locked = locked;
1868
1869        if !self.ops().is_seekable() {
1870            return error!(ESPIPE);
1871        }
1872
1873        if !self.ops().has_persistent_offsets() {
1874            return self.ops().seek(locked, self, current_task, 0, target);
1875        }
1876
1877        let mut offset_guard = self.offset.lock();
1878        let new_offset = self.ops().seek(locked, self, current_task, *offset_guard, target)?;
1879        *offset_guard = new_offset;
1880        Ok(new_offset)
1881    }
1882
1883    pub fn sync(&self, current_task: &CurrentTask) -> Result<(), Errno> {
1884        self.ops().sync(self, current_task)
1885    }
1886
1887    pub fn data_sync(&self, current_task: &CurrentTask) -> Result<(), Errno> {
1888        self.ops().data_sync(self, current_task)
1889    }
1890
1891    pub fn get_memory<L>(
1892        &self,
1893        locked: &mut Locked<L>,
1894        current_task: &CurrentTask,
1895        length: Option<usize>,
1896        prot: ProtectionFlags,
1897    ) -> Result<Arc<MemoryObject>, Errno>
1898    where
1899        L: LockEqualOrBefore<FileOpsCore>,
1900    {
1901        if prot.contains(ProtectionFlags::READ) && !self.can_read() {
1902            return error!(EACCES);
1903        }
1904        if prot.contains(ProtectionFlags::WRITE) && !self.can_write() {
1905            return error!(EACCES);
1906        }
1907        if prot.contains(ProtectionFlags::EXEC) && !self.can_exec() {
1908            return error!(EPERM);
1909        }
1910        self.ops().get_memory(locked.cast_locked::<FileOpsCore>(), self, current_task, length, prot)
1911    }
1912
1913    pub fn mmap<L>(
1914        &self,
1915        locked: &mut Locked<L>,
1916        current_task: &CurrentTask,
1917        addr: DesiredAddress,
1918        memory_offset: u64,
1919        length: usize,
1920        prot_flags: ProtectionFlags,
1921        options: MappingOptions,
1922        filename: NamespaceNode,
1923    ) -> Result<UserAddress, Errno>
1924    where
1925        L: LockEqualOrBefore<FileOpsCore>,
1926    {
1927        let locked = locked.cast_locked::<FileOpsCore>();
1928        if !self.can_read() {
1929            return error!(EACCES);
1930        }
1931        if prot_flags.contains(ProtectionFlags::WRITE)
1932            && !self.can_write()
1933            && options.contains(MappingOptions::SHARED)
1934        {
1935            return error!(EACCES);
1936        }
1937        if prot_flags.contains(ProtectionFlags::EXEC) && !self.can_exec() {
1938            return error!(EPERM);
1939        }
1940        self.ops().mmap(
1941            locked,
1942            self,
1943            current_task,
1944            addr,
1945            memory_offset,
1946            length,
1947            prot_flags,
1948            options,
1949            filename,
1950        )
1951    }
1952
1953    pub fn readdir<L>(
1954        &self,
1955        locked: &mut Locked<L>,
1956        current_task: &CurrentTask,
1957        sink: &mut dyn DirentSink,
1958    ) -> Result<(), Errno>
1959    where
1960        L: LockEqualOrBefore<FileOpsCore>,
1961    {
1962        let locked = locked.cast_locked::<FileOpsCore>();
1963        if self.name.entry.read().is_dead() {
1964            return error!(ENOENT);
1965        }
1966
1967        self.ops().readdir(locked, self, current_task, sink)?;
1968        self.update_atime();
1969        self.notify(InotifyMask::ACCESS);
1970        Ok(())
1971    }
1972
1973    pub fn ioctl(
1974        &self,
1975        locked: &mut Locked<Unlocked>,
1976        current_task: &CurrentTask,
1977        request: u32,
1978        arg: SyscallArg,
1979    ) -> Result<SyscallResult, Errno> {
1980        security::check_file_ioctl_access(current_task, &self, request)?;
1981
1982        if request == FIBMAP {
1983            security::check_task_capable(current_task, CAP_SYS_RAWIO)?;
1984
1985            // TODO: https://fxbug.dev/404795644 - eliminate this phoney response when the SELinux
1986            // Test Suite no longer requires it.
1987            if current_task.kernel().features.selinux_test_suite {
1988                let phoney_block = 0xbadf000du32;
1989                current_task.write_object(arg.into(), &phoney_block)?;
1990                return Ok(SUCCESS);
1991            }
1992        }
1993
1994        self.ops().ioctl(locked, self, current_task, request, arg)
1995    }
1996
1997    pub fn fcntl(
1998        &self,
1999        current_task: &CurrentTask,
2000        cmd: u32,
2001        arg: u64,
2002    ) -> Result<SyscallResult, Errno> {
2003        self.ops().fcntl(self, current_task, cmd, arg)
2004    }
2005
2006    pub fn ftruncate<L>(
2007        &self,
2008        locked: &mut Locked<L>,
2009        current_task: &CurrentTask,
2010        length: u64,
2011    ) -> Result<(), Errno>
2012    where
2013        L: LockBefore<BeforeFsNodeAppend>,
2014    {
2015        // The file must be opened with write permissions. Otherwise
2016        // truncating it is forbidden.
2017        if !self.can_write() {
2018            return error!(EINVAL);
2019        }
2020        self.node().ftruncate(locked, current_task, length)?;
2021        self.name.entry.notify_ignoring_excl_unlink(InotifyMask::MODIFY);
2022        Ok(())
2023    }
2024
2025    pub fn fallocate<L>(
2026        &self,
2027        locked: &mut Locked<L>,
2028        current_task: &CurrentTask,
2029        mode: FallocMode,
2030        offset: u64,
2031        length: u64,
2032    ) -> Result<(), Errno>
2033    where
2034        L: LockBefore<BeforeFsNodeAppend>,
2035    {
2036        // If the file is a pipe or FIFO, ESPIPE is returned.
2037        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2038        if self.node().is_fifo() {
2039            return error!(ESPIPE);
2040        }
2041
2042        // Must be a regular file or directory.
2043        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2044        if !self.node().is_dir() && !self.node().is_reg() {
2045            return error!(ENODEV);
2046        }
2047
2048        // The file must be opened with write permissions. Otherwise operation is forbidden.
2049        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2050        if !self.can_write() {
2051            return error!(EBADF);
2052        }
2053
2054        self.node().fallocate(locked, current_task, mode, offset, length)?;
2055        self.notify(InotifyMask::MODIFY);
2056        Ok(())
2057    }
2058
2059    pub fn to_handle(
2060        &self,
2061        current_task: &CurrentTask,
2062    ) -> Result<Option<zx::NullableHandle>, Errno> {
2063        self.ops().to_handle(self, current_task)
2064    }
2065
2066    pub fn as_thread_group_key(&self) -> Result<ThreadGroupKey, Errno> {
2067        self.ops().as_thread_group_key(self)
2068    }
2069
2070    pub fn update_file_flags(&self, value: OpenFlags, mask: OpenFlags) {
2071        let mask_bits = mask.bits();
2072        let mut flags = self.flags.lock();
2073        let bits = (flags.bits() & !mask_bits) | (value.bits() & mask_bits);
2074        *flags = OpenFlags::from_bits_truncate(bits);
2075    }
2076
2077    /// Get the async owner of this file.
2078    ///
2079    /// See fcntl(F_GETOWN)
2080    pub fn get_async_owner(&self) -> FileAsyncOwner {
2081        *self.async_owner.lock()
2082    }
2083
2084    /// Set the async owner of this file.
2085    ///
2086    /// See fcntl(F_SETOWN)
2087    pub fn set_async_owner(&self, owner: FileAsyncOwner) {
2088        *self.async_owner.lock() = owner;
2089    }
2090
2091    /// See fcntl(F_GETLEASE)
2092    pub fn get_lease(&self, _current_task: &CurrentTask) -> FileLeaseType {
2093        *self.lease.lock()
2094    }
2095
2096    /// See fcntl(F_SETLEASE)
2097    pub fn set_lease(
2098        &self,
2099        _current_task: &CurrentTask,
2100        lease: FileLeaseType,
2101    ) -> Result<(), Errno> {
2102        if !self.node().is_reg() {
2103            return error!(EINVAL);
2104        }
2105        if lease == FileLeaseType::Read && self.can_write() {
2106            return error!(EAGAIN);
2107        }
2108        *self.lease.lock() = lease;
2109        Ok(())
2110    }
2111
2112    /// Wait on the specified events and call the EventHandler when ready
2113    pub fn wait_async<L>(
2114        &self,
2115        locked: &mut Locked<L>,
2116        current_task: &CurrentTask,
2117        waiter: &Waiter,
2118        events: FdEvents,
2119        handler: EventHandler,
2120    ) -> Option<WaitCanceler>
2121    where
2122        L: LockEqualOrBefore<FileOpsCore>,
2123    {
2124        self.ops().wait_async(
2125            locked.cast_locked::<FileOpsCore>(),
2126            self,
2127            current_task,
2128            waiter,
2129            events,
2130            handler,
2131        )
2132    }
2133
2134    /// The events currently active on this file.
2135    pub fn query_events<L>(
2136        &self,
2137        locked: &mut Locked<L>,
2138        current_task: &CurrentTask,
2139    ) -> Result<FdEvents, Errno>
2140    where
2141        L: LockEqualOrBefore<FileOpsCore>,
2142    {
2143        self.ops()
2144            .query_events(locked.cast_locked::<FileOpsCore>(), self, current_task)
2145            .map(FdEvents::add_equivalent_fd_events)
2146    }
2147
2148    pub fn record_lock(
2149        &self,
2150        locked: &mut Locked<Unlocked>,
2151        current_task: &CurrentTask,
2152        cmd: RecordLockCommand,
2153        flock: uapi::flock,
2154    ) -> Result<Option<uapi::flock>, Errno> {
2155        self.node().record_lock(locked, current_task, self, cmd, flock)
2156    }
2157
2158    pub fn flush<L>(&self, locked: &mut Locked<L>, current_task: &CurrentTask, id: FdTableId)
2159    where
2160        L: LockEqualOrBefore<FileOpsCore>,
2161    {
2162        self.name.entry.node.record_lock_release(RecordLockOwner::FdTable(id));
2163        self.ops().flush(locked.cast_locked::<FileOpsCore>(), self, current_task)
2164    }
2165
2166    fn update_atime(&self) {
2167        if !self.flags().contains(OpenFlags::NOATIME) {
2168            self.name.update_atime();
2169        }
2170    }
2171
2172    pub fn readahead(
2173        &self,
2174        current_task: &CurrentTask,
2175        offset: usize,
2176        length: usize,
2177    ) -> Result<(), Errno> {
2178        // readfile() fails with EBADF if the file was not open for read.
2179        if !self.can_read() {
2180            return error!(EBADF);
2181        }
2182        checked_add_offset_and_length(offset, length)?;
2183        self.ops().readahead(self, current_task, offset, length)
2184    }
2185
2186    pub fn extra_fdinfo(
2187        &self,
2188        locked: &mut Locked<FileOpsCore>,
2189        current_task: &CurrentTask,
2190    ) -> Option<FsString> {
2191        let file = self.weak_handle.upgrade()?;
2192        self.ops().extra_fdinfo(locked, &file, current_task)
2193    }
2194
2195    /// Register the fd number of an `EpollFileObject` that listens to events from this
2196    /// `FileObject`.
2197    pub fn register_epfd(&self, file: &FileHandle) {
2198        self.epoll_files.lock().insert(WeakKey::from(file), file.weak_handle.clone());
2199    }
2200
2201    pub fn unregister_epfd(&self, file: &FileHandle) {
2202        self.epoll_files.lock().remove(&WeakKey::from(file));
2203    }
2204}
2205
2206impl Releasable for FileObject {
2207    type Context<'a> = CurrentTaskAndLocked<'a>;
2208
2209    fn release<'a>(self, context: CurrentTaskAndLocked<'a>) {
2210        let (locked, current_task) = context;
2211        // Release all wake leases associated with this file in the corresponding `WaitObject`
2212        // of each registered epfd.
2213        for (_, file) in self.epoll_files.lock().drain() {
2214            if let Some(file) = file.upgrade() {
2215                if let Some(epoll_object) = file.downcast_file::<EpollFileObject>() {
2216                    current_task
2217                        .kernel()
2218                        .suspend_resume_manager
2219                        .remove_epoll(self.id.as_epoll_key());
2220                    let _ = epoll_object.delete(&self);
2221                }
2222            }
2223        }
2224
2225        if self.can_write() {
2226            self.name.entry.node.write_guard_state.lock().release(FileWriteGuardMode::WriteFile);
2227        }
2228
2229        let locked = locked.cast_locked::<FileOpsCore>();
2230        let ops = self.ops;
2231        let state = self.state;
2232        ops.close(locked, &state, current_task);
2233        state.name.entry.node.on_file_closed(&state);
2234        let event =
2235            if state.can_write() { InotifyMask::CLOSE_WRITE } else { InotifyMask::CLOSE_NOWRITE };
2236        state.notify(event);
2237    }
2238}
2239
2240impl fmt::Debug for FileObject {
2241    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2242        f.debug_struct("FileObject")
2243            .field("name", &self.name)
2244            .field("fs", &self.fs.name())
2245            .field("offset", &self.offset)
2246            .field("flags", &self.flags)
2247            .field("ops_ty", &self.ops().type_name())
2248            .finish()
2249    }
2250}
2251
2252impl OnWakeOps for FileReleaser {
2253    /// Called when the underneath `FileOps` is waken up by the power framework.
2254    fn on_wake(&self, current_task: &CurrentTask, baton_lease: &zx::NullableHandle) {
2255        // Activate associated wake leases in registered epfd.
2256        for (_, file) in self.epoll_files.lock().iter() {
2257            if let Some(file) = file.upgrade() {
2258                if let Some(epoll_file) = file.downcast_file::<EpollFileObject>() {
2259                    if let Some(weak_handle) = self.weak_handle.upgrade() {
2260                        if let Err(e) =
2261                            epoll_file.activate_lease(current_task, &weak_handle, baton_lease)
2262                        {
2263                            log_error!("Failed to activate wake lease in epoll control file: {e}");
2264                        }
2265                    }
2266                }
2267            }
2268        }
2269    }
2270}
2271
2272/// A FileObject with the type of its FileOps known. Dereferencing it returns the FileOps.
2273pub struct DowncastedFile<'a, Ops> {
2274    file: &'a FileObject,
2275    ops: &'a Ops,
2276}
2277impl<'a, Ops> Copy for DowncastedFile<'a, Ops> {}
2278impl<'a, Ops> Clone for DowncastedFile<'a, Ops> {
2279    fn clone(&self) -> Self {
2280        *self
2281    }
2282}
2283
2284impl<'a, Ops> DowncastedFile<'a, Ops> {
2285    pub fn file(&self) -> &'a FileObject {
2286        self.file
2287    }
2288}
2289
2290impl<'a, Ops> Deref for DowncastedFile<'a, Ops> {
2291    type Target = &'a Ops;
2292    fn deref(&self) -> &Self::Target {
2293        &self.ops
2294    }
2295}
2296
2297impl FileObject {
2298    /// Returns the `FileObject`'s `FileOps` as a `DowncastedFile<T>`, or `None` if the downcast
2299    /// fails.
2300    ///
2301    /// This is useful for syscalls that only operate on a certain type of file.
2302    pub fn downcast_file<'a, T>(&'a self) -> Option<DowncastedFile<'a, T>>
2303    where
2304        T: 'static,
2305    {
2306        let ops = self.ops().as_any().downcast_ref::<T>()?;
2307        Some(DowncastedFile { file: self, ops })
2308    }
2309}
2310
2311#[cfg(test)]
2312mod tests {
2313    use crate::fs::tmpfs::TmpFs;
2314    use crate::task::CurrentTask;
2315    use crate::task::dynamic_thread_spawner::SpawnRequestBuilder;
2316    use crate::testing::*;
2317    use crate::vfs::MountInfo;
2318    use crate::vfs::buffers::{VecInputBuffer, VecOutputBuffer};
2319    use starnix_sync::{Locked, Unlocked};
2320    use starnix_uapi::auth::FsCred;
2321    use starnix_uapi::device_type::DeviceType;
2322    use starnix_uapi::file_mode::FileMode;
2323    use starnix_uapi::open_flags::OpenFlags;
2324    use std::sync::Arc;
2325    use std::sync::atomic::{AtomicBool, Ordering};
2326    use zerocopy::{FromBytes, IntoBytes, LE, U64};
2327
2328    #[::fuchsia::test]
2329    async fn test_append_truncate_race() {
2330        spawn_kernel_and_run(async |locked, current_task| {
2331            let kernel = current_task.kernel();
2332            let root_fs = TmpFs::new_fs(locked, &kernel);
2333            let mount = MountInfo::detached();
2334            let root_node = Arc::clone(root_fs.root());
2335            let file = root_node
2336                .create_entry(
2337                    locked,
2338                    &current_task,
2339                    &mount,
2340                    "test".into(),
2341                    |locked, dir, mount, name| {
2342                        dir.create_node(
2343                            locked,
2344                            &current_task,
2345                            mount,
2346                            name,
2347                            FileMode::IFREG | FileMode::ALLOW_ALL,
2348                            DeviceType::NONE,
2349                            FsCred::root(),
2350                        )
2351                    },
2352                )
2353                .expect("create_node failed");
2354            let file_handle = file
2355                .open_anonymous(locked, &current_task, OpenFlags::APPEND | OpenFlags::RDWR)
2356                .expect("open failed");
2357            let done = Arc::new(AtomicBool::new(false));
2358
2359            let fh = file_handle.clone();
2360            let done_clone = done.clone();
2361            let closure = move |locked: &mut Locked<Unlocked>, current_task: &CurrentTask| {
2362                for i in 0..2000 {
2363                    fh.write(
2364                        locked,
2365                        current_task,
2366                        &mut VecInputBuffer::new(U64::<LE>::new(i).as_bytes()),
2367                    )
2368                    .expect("write failed");
2369                }
2370                done_clone.store(true, Ordering::SeqCst);
2371                let result: Result<(), starnix_uapi::errors::Errno> = Ok(());
2372                result
2373            };
2374            let (write_thread, req) =
2375                SpawnRequestBuilder::new().with_sync_closure(closure).build_with_sync_result();
2376            kernel.kthreads.spawner().spawn_from_request(req);
2377
2378            let fh = file_handle.clone();
2379            let done_clone = done.clone();
2380            let closure = move |locked: &mut Locked<Unlocked>, current_task: &CurrentTask| {
2381                while !done_clone.load(Ordering::SeqCst) {
2382                    fh.ftruncate(locked, current_task, 0).expect("truncate failed");
2383                }
2384                let result: Result<(), starnix_uapi::errors::Errno> = Ok(());
2385                result
2386            };
2387            let (truncate_thread, req) =
2388                SpawnRequestBuilder::new().with_sync_closure(closure).build_with_sync_result();
2389            kernel.kthreads.spawner().spawn_from_request(req);
2390
2391            // If we read from the file, we should always find an increasing sequence. If there are
2392            // races, then we might unexpectedly see zeroes.
2393            while !done.load(Ordering::SeqCst) {
2394                let mut buffer = VecOutputBuffer::new(4096);
2395                let amount = file_handle
2396                    .read_at(locked, &current_task, 0, &mut buffer)
2397                    .expect("read failed");
2398                let mut last = None;
2399                let buffer = &Vec::from(buffer)[..amount];
2400                for i in
2401                    buffer.chunks_exact(8).map(|chunk| U64::<LE>::read_from_bytes(chunk).unwrap())
2402                {
2403                    if let Some(last) = last {
2404                        assert!(i.get() > last, "buffer: {:?}", buffer);
2405                    }
2406                    last = Some(i.get());
2407                }
2408            }
2409
2410            let _ = write_thread().unwrap();
2411            let _ = truncate_thread().unwrap();
2412        })
2413        .await;
2414    }
2415}