starnix_core/vfs/
file_object.rs

1// Cmpyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::memory::MemoryObject;
6use crate::mm::{DesiredAddress, MappingName, MappingOptions, MemoryAccessorExt, ProtectionFlags};
7use crate::power::{OnWakeOps, WakeupSourceOrigin};
8use crate::security;
9use crate::task::{
10    CurrentTask, CurrentTaskAndLocked, EventHandler, Task, ThreadGroupKey, WaitCallback,
11    WaitCanceler, Waiter, register_delayed_release,
12};
13use crate::vfs::buffers::{InputBuffer, OutputBuffer};
14use crate::vfs::file_server::serve_file;
15use crate::vfs::fsverity::{
16    FsVerityState, {self},
17};
18use crate::vfs::{
19    ActiveNamespaceNode, DirentSink, EpollFileObject, EpollKey, FallocMode, FdTableId,
20    FileSystemHandle, FileWriteGuardMode, FsNodeHandle, FsString, NamespaceNode, RecordLockCommand,
21    RecordLockOwner, wakeup_source_name_for_epoll,
22};
23use starnix_crypt::EncryptionKeyId;
24use starnix_lifecycle::{ObjectReleaser, ReleaserAction};
25use starnix_types::ownership::ReleaseGuard;
26use starnix_uapi::mount_flags::MountFlags;
27use starnix_uapi::user_address::ArchSpecific;
28
29use fidl::HandleBased;
30use linux_uapi::{FSCRYPT_MODE_AES_256_CTS, FSCRYPT_MODE_AES_256_XTS};
31use starnix_logging::{CATEGORY_STARNIX_MM, impossible_error, trace_duration, track_stub};
32use starnix_rcu::rcu_hash_map::RcuHashMap;
33use starnix_sync::{
34    BeforeFsNodeAppend, FileOpsCore, LockBefore, LockEqualOrBefore, Locked, Mutex, Unlocked,
35};
36use starnix_syscalls::{SUCCESS, SyscallArg, SyscallResult};
37use starnix_types::math::round_up_to_system_page_size;
38use starnix_types::ownership::Releasable;
39use starnix_uapi::arc_key::WeakKey;
40use starnix_uapi::as_any::AsAny;
41use starnix_uapi::auth::{CAP_FOWNER, CAP_SYS_RAWIO};
42use starnix_uapi::errors::{EAGAIN, ETIMEDOUT, Errno};
43use starnix_uapi::file_lease::FileLeaseType;
44use starnix_uapi::file_mode::Access;
45use starnix_uapi::inotify_mask::InotifyMask;
46use starnix_uapi::open_flags::OpenFlags;
47use starnix_uapi::seal_flags::SealFlags;
48use starnix_uapi::user_address::{UserAddress, UserRef};
49use starnix_uapi::vfs::FdEvents;
50use starnix_uapi::{
51    FIBMAP, FIGETBSZ, FIONBIO, FIONREAD, FIOQSIZE, FS_CASEFOLD_FL, FS_IOC_ADD_ENCRYPTION_KEY,
52    FS_IOC_ENABLE_VERITY, FS_IOC_FSGETXATTR, FS_IOC_FSSETXATTR, FS_IOC_MEASURE_VERITY,
53    FS_IOC_READ_VERITY_METADATA, FS_IOC_REMOVE_ENCRYPTION_KEY, FS_IOC_SET_ENCRYPTION_POLICY,
54    FS_VERITY_FL, FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER, FSCRYPT_POLICY_V2, SEEK_CUR, SEEK_DATA,
55    SEEK_END, SEEK_HOLE, SEEK_SET, TCGETS, errno, error, fscrypt_add_key_arg, fscrypt_identifier,
56    fsxattr, off_t, pid_t, uapi,
57};
58use std::fmt;
59use std::ops::Deref;
60use std::sync::{Arc, Weak};
61
62pub const MAX_LFS_FILESIZE: usize = 0x7fff_ffff_ffff_ffff;
63
64pub fn checked_add_offset_and_length(offset: usize, length: usize) -> Result<usize, Errno> {
65    let end = offset.checked_add(length).ok_or_else(|| errno!(EINVAL))?;
66    if end > MAX_LFS_FILESIZE {
67        return error!(EINVAL);
68    }
69    Ok(end)
70}
71
72#[derive(Debug)]
73pub enum SeekTarget {
74    /// Seek to the given offset relative to the start of the file.
75    Set(off_t),
76    /// Seek to the given offset relative to the current position.
77    Cur(off_t),
78    /// Seek to the given offset relative to the end of the file.
79    End(off_t),
80    /// Seek for the first data after the given offset,
81    Data(off_t),
82    /// Seek for the first hole after the given offset,
83    Hole(off_t),
84}
85
86impl SeekTarget {
87    pub fn from_raw(whence: u32, offset: off_t) -> Result<SeekTarget, Errno> {
88        match whence {
89            SEEK_SET => Ok(SeekTarget::Set(offset)),
90            SEEK_CUR => Ok(SeekTarget::Cur(offset)),
91            SEEK_END => Ok(SeekTarget::End(offset)),
92            SEEK_DATA => Ok(SeekTarget::Data(offset)),
93            SEEK_HOLE => Ok(SeekTarget::Hole(offset)),
94            _ => error!(EINVAL),
95        }
96    }
97
98    pub fn whence(&self) -> u32 {
99        match self {
100            Self::Set(_) => SEEK_SET,
101            Self::Cur(_) => SEEK_CUR,
102            Self::End(_) => SEEK_END,
103            Self::Data(_) => SEEK_DATA,
104            Self::Hole(_) => SEEK_HOLE,
105        }
106    }
107
108    pub fn offset(&self) -> off_t {
109        match self {
110            Self::Set(off)
111            | Self::Cur(off)
112            | Self::End(off)
113            | Self::Data(off)
114            | Self::Hole(off) => *off,
115        }
116    }
117}
118
119/// Corresponds to struct file_operations in Linux, plus any filesystem-specific data.
120pub trait FileOps: Send + Sync + AsAny + 'static {
121    /// Called when the FileObject is opened/created
122    fn open(
123        &self,
124        _locked: &mut Locked<FileOpsCore>,
125        _file: &FileObject,
126        _current_task: &CurrentTask,
127    ) -> Result<(), Errno> {
128        Ok(())
129    }
130
131    /// Called when the FileObject is destroyed.
132    fn close(
133        self: Box<Self>,
134        _locked: &mut Locked<FileOpsCore>,
135        _file: &FileObjectState,
136        _current_task: &CurrentTask,
137    ) {
138    }
139
140    /// Called every time close() is called on this file, even if the file is not ready to be
141    /// released.
142    fn flush(
143        &self,
144        _locked: &mut Locked<FileOpsCore>,
145        _file: &FileObject,
146        _current_task: &CurrentTask,
147    ) {
148    }
149
150    /// Returns whether the file has meaningful seek offsets. Returning `false` is only
151    /// optimization and will makes `FileObject` never hold the offset lock when calling `read` and
152    /// `write`.
153    fn has_persistent_offsets(&self) -> bool {
154        self.is_seekable()
155    }
156
157    /// Returns whether the file is seekable.
158    fn is_seekable(&self) -> bool;
159
160    /// Returns true if `write()` operations on the file will update the seek offset.
161    fn writes_update_seek_offset(&self) -> bool {
162        self.has_persistent_offsets()
163    }
164
165    /// Read from the file at an offset. If the file does not have persistent offsets (either
166    /// directly, or because it is not seekable), offset will be 0 and can be ignored.
167    /// Returns the number of bytes read.
168    fn read(
169        &self,
170        locked: &mut Locked<FileOpsCore>,
171        file: &FileObject,
172        current_task: &CurrentTask,
173        offset: usize,
174        data: &mut dyn OutputBuffer,
175    ) -> Result<usize, Errno>;
176
177    /// Write to the file with an offset. If the file does not have persistent offsets (either
178    /// directly, or because it is not seekable), offset will be 0 and can be ignored.
179    /// Returns the number of bytes written.
180    fn write(
181        &self,
182        locked: &mut Locked<FileOpsCore>,
183        file: &FileObject,
184        current_task: &CurrentTask,
185        offset: usize,
186        data: &mut dyn InputBuffer,
187    ) -> Result<usize, Errno>;
188
189    /// Adjust the `current_offset` if the file is seekable.
190    fn seek(
191        &self,
192        locked: &mut Locked<FileOpsCore>,
193        file: &FileObject,
194        current_task: &CurrentTask,
195        current_offset: off_t,
196        target: SeekTarget,
197    ) -> Result<off_t, Errno>;
198
199    /// Syncs cached state associated with the file descriptor to persistent storage.
200    ///
201    /// The method blocks until the synchronization is complete.
202    fn sync(&self, file: &FileObject, _current_task: &CurrentTask) -> Result<(), Errno>;
203
204    /// Syncs cached data, and only enough metadata to retrieve said data, to persistent storage.
205    ///
206    /// The method blocks until the synchronization is complete.
207    fn data_sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
208        // TODO(https://fxbug.dev/297305634) make a default macro once data can be done separately
209        self.sync(file, current_task)
210    }
211
212    /// Returns a VMO representing this file. At least the requested protection flags must
213    /// be set on the VMO. Reading or writing the VMO must read or write the file. If this is not
214    /// possible given the requested protection, an error must be returned.
215    /// The `length` is a hint for the desired size of the VMO. The returned VMO may be larger or
216    /// smaller than the requested length.
217    /// This method is typically called by [`Self::mmap`].
218    fn get_memory(
219        &self,
220        _locked: &mut Locked<FileOpsCore>,
221        _file: &FileObject,
222        _current_task: &CurrentTask,
223        _length: Option<usize>,
224        _prot: ProtectionFlags,
225    ) -> Result<Arc<MemoryObject>, Errno> {
226        error!(ENODEV)
227    }
228
229    /// Responds to an mmap call. The default implementation calls [`Self::get_memory`] to get a VMO
230    /// and then maps it with [`crate::mm::MemoryManager::map`].
231    /// Only implement this trait method if your file needs to control mapping, or record where
232    /// a VMO gets mapped.
233    fn mmap(
234        &self,
235        locked: &mut Locked<FileOpsCore>,
236        file: &FileObject,
237        current_task: &CurrentTask,
238        addr: DesiredAddress,
239        memory_offset: u64,
240        length: usize,
241        prot_flags: ProtectionFlags,
242        options: MappingOptions,
243        filename: NamespaceNode,
244    ) -> Result<UserAddress, Errno> {
245        trace_duration!(CATEGORY_STARNIX_MM, "FileOpsDefaultMmap");
246        let min_memory_size = (memory_offset as usize)
247            .checked_add(round_up_to_system_page_size(length)?)
248            .ok_or_else(|| errno!(EINVAL))?;
249        let mut memory = if options.contains(MappingOptions::SHARED) {
250            trace_duration!(CATEGORY_STARNIX_MM, "GetSharedVmo");
251            self.get_memory(locked, file, current_task, Some(min_memory_size), prot_flags)?
252        } else {
253            trace_duration!(CATEGORY_STARNIX_MM, "GetPrivateVmo");
254            // TODO(tbodt): Use PRIVATE_CLONE to have the filesystem server do the clone for us.
255            let base_prot_flags = (prot_flags | ProtectionFlags::READ) - ProtectionFlags::WRITE;
256            let memory = self.get_memory(
257                locked,
258                file,
259                current_task,
260                Some(min_memory_size),
261                base_prot_flags,
262            )?;
263            let mut clone_flags = zx::VmoChildOptions::SNAPSHOT_AT_LEAST_ON_WRITE;
264            if !prot_flags.contains(ProtectionFlags::WRITE) {
265                clone_flags |= zx::VmoChildOptions::NO_WRITE;
266            }
267            trace_duration!(CATEGORY_STARNIX_MM, "CreatePrivateChildVmo");
268            Arc::new(
269                memory.create_child(clone_flags, 0, memory.get_size()).map_err(impossible_error)?,
270            )
271        };
272
273        // Write guard is necessary only for shared mappings. Note that this doesn't depend on
274        // `prot_flags` since these can be changed later with `mprotect()`.
275        let file_write_guard = if options.contains(MappingOptions::SHARED) && file.can_write() {
276            let node = &file.name.entry.node;
277            let state = node.write_guard_state.lock();
278
279            // `F_SEAL_FUTURE_WRITE` should allow `mmap(PROT_READ)`, but block
280            // `mprotect(PROT_WRITE)`. This is different from `F_SEAL_WRITE`, which blocks
281            // `mmap(PROT_READ)`. To handle this case correctly remove `WRITE` right from the
282            // VMO handle to ensure `mprotect(PROT_WRITE)` fails.
283            let seals = state.get_seals().unwrap_or(SealFlags::empty());
284            if seals.contains(SealFlags::FUTURE_WRITE)
285                && !seals.contains(SealFlags::WRITE)
286                && !prot_flags.contains(ProtectionFlags::WRITE)
287            {
288                let mut new_rights = zx::Rights::VMO_DEFAULT - zx::Rights::WRITE;
289                if prot_flags.contains(ProtectionFlags::EXEC) {
290                    new_rights |= zx::Rights::EXECUTE;
291                }
292                memory = Arc::new(memory.duplicate_handle(new_rights).map_err(impossible_error)?);
293
294                None
295            } else {
296                Some(FileWriteGuardMode::WriteMapping)
297            }
298        } else {
299            None
300        };
301
302        current_task.mm()?.map_memory(
303            addr,
304            memory,
305            memory_offset,
306            length,
307            prot_flags,
308            file.max_access_for_memory_mapping(),
309            options,
310            MappingName::File(filename.into_mapping(file_write_guard)?),
311        )
312    }
313
314    /// Respond to a `getdents` or `getdents64` calls.
315    ///
316    /// The `file.offset` lock will be held while entering this method. The implementation must look
317    /// at `sink.offset()` to read the current offset into the file.
318    fn readdir(
319        &self,
320        _locked: &mut Locked<FileOpsCore>,
321        _file: &FileObject,
322        _current_task: &CurrentTask,
323        _sink: &mut dyn DirentSink,
324    ) -> Result<(), Errno> {
325        error!(ENOTDIR)
326    }
327
328    /// Establish a one-shot, edge-triggered, asynchronous wait for the given FdEvents for the
329    /// given file and task. Returns `None` if this file does not support blocking waits.
330    ///
331    /// Active events are not considered. This is similar to the semantics of the
332    /// ZX_WAIT_ASYNC_EDGE flag on zx_wait_async. To avoid missing events, the caller must call
333    /// query_events after calling this.
334    ///
335    /// If your file does not support blocking waits, leave this as the default implementation.
336    fn wait_async(
337        &self,
338        _locked: &mut Locked<FileOpsCore>,
339        _file: &FileObject,
340        _current_task: &CurrentTask,
341        _waiter: &Waiter,
342        _events: FdEvents,
343        _handler: EventHandler,
344    ) -> Option<WaitCanceler> {
345        None
346    }
347
348    /// The events currently active on this file.
349    ///
350    /// If this function returns `POLLIN` or `POLLOUT`, then FileObject will
351    /// add `POLLRDNORM` and `POLLWRNORM`, respective, which are equivalent in
352    /// the Linux UAPI.
353    ///
354    /// See https://linux.die.net/man/2/poll
355    fn query_events(
356        &self,
357        _locked: &mut Locked<FileOpsCore>,
358        _file: &FileObject,
359        _current_task: &CurrentTask,
360    ) -> Result<FdEvents, Errno> {
361        Ok(FdEvents::POLLIN | FdEvents::POLLOUT)
362    }
363
364    fn ioctl(
365        &self,
366        locked: &mut Locked<Unlocked>,
367        file: &FileObject,
368        current_task: &CurrentTask,
369        request: u32,
370        arg: SyscallArg,
371    ) -> Result<SyscallResult, Errno> {
372        default_ioctl(file, locked, current_task, request, arg)
373    }
374
375    fn fcntl(
376        &self,
377        _file: &FileObject,
378        _current_task: &CurrentTask,
379        cmd: u32,
380        _arg: u64,
381    ) -> Result<SyscallResult, Errno> {
382        default_fcntl(cmd)
383    }
384
385    /// Return a handle that allows access to this file descritor through the zxio protocols.
386    ///
387    /// If None is returned, the file will act as if it was a fd to `/dev/null`.
388    fn to_handle(
389        &self,
390        file: &FileObject,
391        current_task: &CurrentTask,
392    ) -> Result<Option<zx::NullableHandle>, Errno> {
393        serve_file(current_task, file, current_task.full_current_creds())
394            .map(|c| Some(c.0.into_handle().into()))
395    }
396
397    /// Returns the associated pid_t.
398    ///
399    /// Used by pidfd and `/proc/<pid>`. Unlikely to be used by other files.
400    fn as_thread_group_key(&self, _file: &FileObject) -> Result<ThreadGroupKey, Errno> {
401        error!(EBADF)
402    }
403
404    fn readahead(
405        &self,
406        _file: &FileObject,
407        _current_task: &CurrentTask,
408        _offset: usize,
409        _length: usize,
410    ) -> Result<(), Errno> {
411        error!(EINVAL)
412    }
413
414    /// Extra information that is included in the /proc/<pid>/fdfino/<fd> entry.
415    fn extra_fdinfo(
416        &self,
417        _locked: &mut Locked<FileOpsCore>,
418        _file: &FileHandle,
419        _current_task: &CurrentTask,
420    ) -> Option<FsString> {
421        None
422    }
423}
424
425/// Marker trait for implementation of FileOps that do not need to implement `close` and can
426/// then pass a wrapper object as the `FileOps` implementation.
427pub trait CloseFreeSafe {}
428impl<T: FileOps + CloseFreeSafe, P: Deref<Target = T> + Send + Sync + 'static> FileOps for P {
429    fn close(
430        self: Box<Self>,
431        _locked: &mut Locked<FileOpsCore>,
432        _file: &FileObjectState,
433        _current_task: &CurrentTask,
434    ) {
435        // This method cannot be delegated. T being `CloseFreeSafe` this is fine.
436    }
437
438    fn flush(
439        &self,
440        locked: &mut Locked<FileOpsCore>,
441        file: &FileObject,
442        current_task: &CurrentTask,
443    ) {
444        self.deref().flush(locked, file, current_task)
445    }
446
447    fn has_persistent_offsets(&self) -> bool {
448        self.deref().has_persistent_offsets()
449    }
450
451    fn writes_update_seek_offset(&self) -> bool {
452        self.deref().writes_update_seek_offset()
453    }
454
455    fn is_seekable(&self) -> bool {
456        self.deref().is_seekable()
457    }
458
459    fn read(
460        &self,
461        locked: &mut Locked<FileOpsCore>,
462        file: &FileObject,
463        current_task: &CurrentTask,
464        offset: usize,
465        data: &mut dyn OutputBuffer,
466    ) -> Result<usize, Errno> {
467        self.deref().read(locked, file, current_task, offset, data)
468    }
469
470    fn write(
471        &self,
472        locked: &mut Locked<FileOpsCore>,
473        file: &FileObject,
474        current_task: &CurrentTask,
475        offset: usize,
476        data: &mut dyn InputBuffer,
477    ) -> Result<usize, Errno> {
478        self.deref().write(locked, file, current_task, offset, data)
479    }
480
481    fn seek(
482        &self,
483        locked: &mut Locked<FileOpsCore>,
484        file: &FileObject,
485        current_task: &CurrentTask,
486        current_offset: off_t,
487        target: SeekTarget,
488    ) -> Result<off_t, Errno> {
489        self.deref().seek(locked, file, current_task, current_offset, target)
490    }
491
492    fn sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
493        self.deref().sync(file, current_task)
494    }
495
496    fn data_sync(&self, file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
497        self.deref().data_sync(file, current_task)
498    }
499
500    fn get_memory(
501        &self,
502        locked: &mut Locked<FileOpsCore>,
503        file: &FileObject,
504        current_task: &CurrentTask,
505        length: Option<usize>,
506        prot: ProtectionFlags,
507    ) -> Result<Arc<MemoryObject>, Errno> {
508        self.deref().get_memory(locked, file, current_task, length, prot)
509    }
510
511    fn mmap(
512        &self,
513        locked: &mut Locked<FileOpsCore>,
514        file: &FileObject,
515        current_task: &CurrentTask,
516        addr: DesiredAddress,
517        memory_offset: u64,
518        length: usize,
519        prot_flags: ProtectionFlags,
520        options: MappingOptions,
521        filename: NamespaceNode,
522    ) -> Result<UserAddress, Errno> {
523        self.deref().mmap(
524            locked,
525            file,
526            current_task,
527            addr,
528            memory_offset,
529            length,
530            prot_flags,
531            options,
532            filename,
533        )
534    }
535
536    fn readdir(
537        &self,
538        locked: &mut Locked<FileOpsCore>,
539        file: &FileObject,
540        current_task: &CurrentTask,
541        sink: &mut dyn DirentSink,
542    ) -> Result<(), Errno> {
543        self.deref().readdir(locked, file, current_task, sink)
544    }
545
546    fn wait_async(
547        &self,
548        locked: &mut Locked<FileOpsCore>,
549        file: &FileObject,
550        current_task: &CurrentTask,
551        waiter: &Waiter,
552        events: FdEvents,
553        handler: EventHandler,
554    ) -> Option<WaitCanceler> {
555        self.deref().wait_async(locked, file, current_task, waiter, events, handler)
556    }
557
558    fn query_events(
559        &self,
560        locked: &mut Locked<FileOpsCore>,
561        file: &FileObject,
562        current_task: &CurrentTask,
563    ) -> Result<FdEvents, Errno> {
564        self.deref().query_events(locked, file, current_task)
565    }
566
567    fn ioctl(
568        &self,
569        locked: &mut Locked<Unlocked>,
570        file: &FileObject,
571        current_task: &CurrentTask,
572        request: u32,
573        arg: SyscallArg,
574    ) -> Result<SyscallResult, Errno> {
575        self.deref().ioctl(locked, file, current_task, request, arg)
576    }
577
578    fn fcntl(
579        &self,
580        file: &FileObject,
581        current_task: &CurrentTask,
582        cmd: u32,
583        arg: u64,
584    ) -> Result<SyscallResult, Errno> {
585        self.deref().fcntl(file, current_task, cmd, arg)
586    }
587
588    fn to_handle(
589        &self,
590        file: &FileObject,
591        current_task: &CurrentTask,
592    ) -> Result<Option<zx::NullableHandle>, Errno> {
593        self.deref().to_handle(file, current_task)
594    }
595
596    fn as_thread_group_key(&self, file: &FileObject) -> Result<ThreadGroupKey, Errno> {
597        self.deref().as_thread_group_key(file)
598    }
599
600    fn readahead(
601        &self,
602        file: &FileObject,
603        current_task: &CurrentTask,
604        offset: usize,
605        length: usize,
606    ) -> Result<(), Errno> {
607        self.deref().readahead(file, current_task, offset, length)
608    }
609
610    fn extra_fdinfo(
611        &self,
612        locked: &mut Locked<FileOpsCore>,
613        file: &FileHandle,
614        current_task: &CurrentTask,
615    ) -> Option<FsString> {
616        self.deref().extra_fdinfo(locked, file, current_task)
617    }
618}
619
620pub fn default_eof_offset<L>(
621    locked: &mut Locked<L>,
622    file: &FileObject,
623    current_task: &CurrentTask,
624) -> Result<off_t, Errno>
625where
626    L: LockEqualOrBefore<FileOpsCore>,
627{
628    Ok(file.node().get_size(locked, current_task)? as off_t)
629}
630
631/// Implement the seek method for a file. The computation from the end of the file must be provided
632/// through a callback.
633///
634/// Errors if the calculated offset is invalid.
635///
636/// - `current_offset`: The current position
637/// - `target`: The location to seek to.
638/// - `compute_end`: Compute the new offset from the end. Return an error if the operation is not
639///    supported.
640pub fn default_seek<F>(
641    current_offset: off_t,
642    target: SeekTarget,
643    compute_end: F,
644) -> Result<off_t, Errno>
645where
646    F: FnOnce() -> Result<off_t, Errno>,
647{
648    let new_offset = match target {
649        SeekTarget::Set(offset) => Some(offset),
650        SeekTarget::Cur(offset) => current_offset.checked_add(offset),
651        SeekTarget::End(offset) => compute_end()?.checked_add(offset),
652        SeekTarget::Data(offset) => {
653            let eof = compute_end().unwrap_or(off_t::MAX);
654            if offset >= eof {
655                return error!(ENXIO);
656            }
657            Some(offset)
658        }
659        SeekTarget::Hole(offset) => {
660            let eof = compute_end()?;
661            if offset >= eof {
662                return error!(ENXIO);
663            }
664            Some(eof)
665        }
666    }
667    .ok_or_else(|| errno!(EINVAL))?;
668
669    if new_offset < 0 {
670        return error!(EINVAL);
671    }
672
673    Ok(new_offset)
674}
675
676/// Implement the seek method for a file without an upper bound on the resulting offset.
677///
678/// This is useful for files without a defined size.
679///
680/// Errors if the calculated offset is invalid.
681///
682/// - `current_offset`: The current position
683/// - `target`: The location to seek to.
684pub fn unbounded_seek(current_offset: off_t, target: SeekTarget) -> Result<off_t, Errno> {
685    default_seek(current_offset, target, || Ok(MAX_LFS_FILESIZE as off_t))
686}
687
688#[macro_export]
689macro_rules! fileops_impl_delegate_read_and_seek {
690    ($self:ident, $delegate:expr) => {
691        fn is_seekable(&self) -> bool {
692            true
693        }
694
695        fn read(
696            &$self,
697            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
698            file: &FileObject,
699            current_task: &$crate::task::CurrentTask,
700            offset: usize,
701            data: &mut dyn $crate::vfs::buffers::OutputBuffer,
702        ) -> Result<usize, starnix_uapi::errors::Errno> {
703            $delegate.read(locked, file, current_task, offset, data)
704        }
705
706        fn seek(
707            &$self,
708        locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
709            file: &FileObject,
710            current_task: &$crate::task::CurrentTask,
711            current_offset: starnix_uapi::off_t,
712            target: $crate::vfs::SeekTarget,
713        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
714            $delegate.seek(locked, file, current_task, current_offset, target)
715        }
716    };
717}
718
719/// Implements [`FileOps::seek`] in a way that makes sense for seekable files.
720#[macro_export]
721macro_rules! fileops_impl_seekable {
722    () => {
723        fn is_seekable(&self) -> bool {
724            true
725        }
726
727        fn seek(
728            &self,
729            locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
730            file: &$crate::vfs::FileObject,
731            current_task: &$crate::task::CurrentTask,
732            current_offset: starnix_uapi::off_t,
733            target: $crate::vfs::SeekTarget,
734        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
735            $crate::vfs::default_seek(current_offset, target, || {
736                $crate::vfs::default_eof_offset(locked, file, current_task)
737            })
738        }
739    };
740}
741
742/// Implements [`FileOps`] methods in a way that makes sense for non-seekable files.
743#[macro_export]
744macro_rules! fileops_impl_nonseekable {
745    () => {
746        fn is_seekable(&self) -> bool {
747            false
748        }
749
750        fn seek(
751            &self,
752            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
753            _file: &$crate::vfs::FileObject,
754            _current_task: &$crate::task::CurrentTask,
755            _current_offset: starnix_uapi::off_t,
756            _target: $crate::vfs::SeekTarget,
757        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
758            starnix_uapi::error!(ESPIPE)
759        }
760    };
761}
762
763/// Implements [`FileOps::seek`] methods in a way that makes sense for files that ignore
764/// seeking operations and always read/write at offset 0.
765#[macro_export]
766macro_rules! fileops_impl_seekless {
767    () => {
768        fn has_persistent_offsets(&self) -> bool {
769            false
770        }
771
772        fn is_seekable(&self) -> bool {
773            true
774        }
775
776        fn seek(
777            &self,
778            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
779            _file: &$crate::vfs::FileObject,
780            _current_task: &$crate::task::CurrentTask,
781            _current_offset: starnix_uapi::off_t,
782            _target: $crate::vfs::SeekTarget,
783        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
784            Ok(0)
785        }
786    };
787}
788
789#[macro_export]
790macro_rules! fileops_impl_dataless {
791    () => {
792        fn write(
793            &self,
794            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
795            _file: &$crate::vfs::FileObject,
796            _current_task: &$crate::task::CurrentTask,
797            _offset: usize,
798            _data: &mut dyn $crate::vfs::buffers::InputBuffer,
799        ) -> Result<usize, starnix_uapi::errors::Errno> {
800            starnix_uapi::error!(EINVAL)
801        }
802
803        fn read(
804            &self,
805            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
806            _file: &$crate::vfs::FileObject,
807            _current_task: &$crate::task::CurrentTask,
808            _offset: usize,
809            _data: &mut dyn $crate::vfs::buffers::OutputBuffer,
810        ) -> Result<usize, starnix_uapi::errors::Errno> {
811            starnix_uapi::error!(EINVAL)
812        }
813    };
814}
815
816/// Implements [`FileOps`] methods in a way that makes sense for directories. You must implement
817/// [`FileOps::seek`] and [`FileOps::readdir`].
818#[macro_export]
819macro_rules! fileops_impl_directory {
820    () => {
821        fn is_seekable(&self) -> bool {
822            true
823        }
824
825        fn read(
826            &self,
827            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
828            _file: &$crate::vfs::FileObject,
829            _current_task: &$crate::task::CurrentTask,
830            _offset: usize,
831            _data: &mut dyn $crate::vfs::buffers::OutputBuffer,
832        ) -> Result<usize, starnix_uapi::errors::Errno> {
833            starnix_uapi::error!(EISDIR)
834        }
835
836        fn write(
837            &self,
838            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
839            _file: &$crate::vfs::FileObject,
840            _current_task: &$crate::task::CurrentTask,
841            _offset: usize,
842            _data: &mut dyn $crate::vfs::buffers::InputBuffer,
843        ) -> Result<usize, starnix_uapi::errors::Errno> {
844            starnix_uapi::error!(EISDIR)
845        }
846    };
847}
848
849#[macro_export]
850macro_rules! fileops_impl_unbounded_seek {
851    () => {
852        fn seek(
853            &self,
854            _locked: &mut starnix_sync::Locked<starnix_sync::FileOpsCore>,
855            _file: &$crate::vfs::FileObject,
856            _current_task: &$crate::task::CurrentTask,
857            current_offset: starnix_uapi::off_t,
858            target: $crate::vfs::SeekTarget,
859        ) -> Result<starnix_uapi::off_t, starnix_uapi::errors::Errno> {
860            $crate::vfs::unbounded_seek(current_offset, target)
861        }
862    };
863}
864
865#[macro_export]
866macro_rules! fileops_impl_noop_sync {
867    () => {
868        fn sync(
869            &self,
870            file: &$crate::vfs::FileObject,
871            _current_task: &$crate::task::CurrentTask,
872        ) -> Result<(), starnix_uapi::errors::Errno> {
873            if !file.node().is_reg() && !file.node().is_dir() {
874                return starnix_uapi::error!(EINVAL);
875            }
876            Ok(())
877        }
878    };
879}
880
881// Public re-export of macros allows them to be used like regular rust items.
882
883pub use {
884    fileops_impl_dataless, fileops_impl_delegate_read_and_seek, fileops_impl_directory,
885    fileops_impl_nonseekable, fileops_impl_noop_sync, fileops_impl_seekable, fileops_impl_seekless,
886    fileops_impl_unbounded_seek,
887};
888pub const AES256_KEY_SIZE: usize = 32;
889
890pub fn canonicalize_ioctl_request(current_task: &CurrentTask, request: u32) -> u32 {
891    if current_task.is_arch32() {
892        match request {
893            uapi::arch32::FS_IOC_GETFLAGS => uapi::FS_IOC_GETFLAGS,
894            uapi::arch32::FS_IOC_SETFLAGS => uapi::FS_IOC_SETFLAGS,
895            _ => request,
896        }
897    } else {
898        request
899    }
900}
901
902pub fn default_ioctl(
903    file: &FileObject,
904    locked: &mut Locked<Unlocked>,
905    current_task: &CurrentTask,
906    request: u32,
907    arg: SyscallArg,
908) -> Result<SyscallResult, Errno> {
909    match canonicalize_ioctl_request(current_task, request) {
910        TCGETS => error!(ENOTTY),
911        FIGETBSZ => {
912            let node = file.node();
913            let supported_file = node.is_reg() || node.is_dir();
914            if !supported_file {
915                return error!(ENOTTY);
916            }
917
918            let blocksize = file.node().stat(locked, current_task)?.st_blksize;
919            current_task.write_object(arg.into(), &blocksize)?;
920            Ok(SUCCESS)
921        }
922        FIONBIO => {
923            let arg_ref = UserAddress::from(arg).into();
924            let arg: i32 = current_task.read_object(arg_ref)?;
925            let val = if arg == 0 {
926                // Clear the NONBLOCK flag
927                OpenFlags::empty()
928            } else {
929                // Set the NONBLOCK flag
930                OpenFlags::NONBLOCK
931            };
932            file.update_file_flags(val, OpenFlags::NONBLOCK);
933            Ok(SUCCESS)
934        }
935        FIOQSIZE => {
936            let node = file.node();
937            let supported_file = node.is_reg() || node.is_dir();
938            if !supported_file {
939                return error!(ENOTTY);
940            }
941
942            let size = file.node().stat(locked, current_task)?.st_size;
943            current_task.write_object(arg.into(), &size)?;
944            Ok(SUCCESS)
945        }
946        FIONREAD => {
947            track_stub!(TODO("https://fxbug.dev/322874897"), "FIONREAD");
948            if !file.name.entry.node.is_reg() {
949                return error!(ENOTTY);
950            }
951
952            let size = file
953                .name
954                .entry
955                .node
956                .fetch_and_refresh_info(locked, current_task)
957                .map_err(|_| errno!(EINVAL))?
958                .size;
959            let offset = usize::try_from(*file.offset.lock()).map_err(|_| errno!(EINVAL))?;
960            let remaining =
961                if size < offset { 0 } else { i32::try_from(size - offset).unwrap_or(i32::MAX) };
962            current_task.write_object(arg.into(), &remaining)?;
963            Ok(SUCCESS)
964        }
965        FS_IOC_FSGETXATTR => {
966            track_stub!(TODO("https://fxbug.dev/322875209"), "FS_IOC_FSGETXATTR");
967            let arg = UserAddress::from(arg).into();
968            current_task.write_object(arg, &fsxattr::default())?;
969            Ok(SUCCESS)
970        }
971        FS_IOC_FSSETXATTR => {
972            track_stub!(TODO("https://fxbug.dev/322875271"), "FS_IOC_FSSETXATTR");
973            let arg = UserAddress::from(arg).into();
974            let _: fsxattr = current_task.read_object(arg)?;
975            Ok(SUCCESS)
976        }
977        uapi::FS_IOC_GETFLAGS => {
978            track_stub!(TODO("https://fxbug.dev/322874935"), "FS_IOC_GETFLAGS");
979            let arg = UserRef::<u32>::from(arg);
980            let mut flags: u32 = 0;
981            if matches!(*file.node().fsverity.lock(), FsVerityState::FsVerity) {
982                flags |= FS_VERITY_FL;
983            }
984            if file.node().info().casefold {
985                flags |= FS_CASEFOLD_FL;
986            }
987            current_task.write_object(arg, &flags)?;
988            Ok(SUCCESS)
989        }
990        uapi::FS_IOC_SETFLAGS => {
991            track_stub!(TODO("https://fxbug.dev/322875367"), "FS_IOC_SETFLAGS");
992            let arg = UserRef::<u32>::from(arg);
993            let flags: u32 = current_task.read_object(arg)?;
994            file.node().update_attributes(locked, current_task, |info| {
995                info.casefold = flags & FS_CASEFOLD_FL != 0;
996                Ok(())
997            })?;
998            Ok(SUCCESS)
999        }
1000        FS_IOC_ENABLE_VERITY => {
1001            Ok(fsverity::ioctl::enable(locked, current_task, UserAddress::from(arg).into(), file)?)
1002        }
1003        FS_IOC_MEASURE_VERITY => {
1004            Ok(fsverity::ioctl::measure(locked, current_task, UserAddress::from(arg).into(), file)?)
1005        }
1006        FS_IOC_READ_VERITY_METADATA => {
1007            Ok(fsverity::ioctl::read_metadata(current_task, UserAddress::from(arg).into(), file)?)
1008        }
1009        FS_IOC_ADD_ENCRYPTION_KEY => {
1010            let fscrypt_add_key_ref = UserRef::<fscrypt_add_key_arg>::from(arg);
1011            let key_ref_addr = fscrypt_add_key_ref.next()?.addr();
1012            let mut fscrypt_add_key_arg = current_task.read_object(fscrypt_add_key_ref.clone())?;
1013            if fscrypt_add_key_arg.key_id != 0 {
1014                track_stub!(TODO("https://fxbug.dev/375649227"), "non-zero key ids");
1015                return error!(ENOTSUP);
1016            }
1017            if fscrypt_add_key_arg.key_spec.type_ != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER {
1018                track_stub!(TODO("https://fxbug.dev/375648306"), "fscrypt descriptor type");
1019                return error!(ENOTSUP);
1020            }
1021            let key = current_task
1022                .read_memory_to_vec(key_ref_addr, fscrypt_add_key_arg.raw_size as usize)?;
1023            let user_id = current_task.current_creds().uid;
1024
1025            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1026            let key_identifier = crypt_service.add_wrapping_key(&key, user_id)?;
1027            fscrypt_add_key_arg.key_spec.u.identifier =
1028                fscrypt_identifier { value: key_identifier, ..Default::default() };
1029            current_task.write_object(fscrypt_add_key_ref, &fscrypt_add_key_arg)?;
1030            Ok(SUCCESS)
1031        }
1032        FS_IOC_SET_ENCRYPTION_POLICY => {
1033            let fscrypt_policy_ref = UserRef::<uapi::fscrypt_policy_v2>::from(arg);
1034            let policy = current_task.read_object(fscrypt_policy_ref)?;
1035            if policy.version as u32 != FSCRYPT_POLICY_V2 {
1036                track_stub!(TODO("https://fxbug.dev/375649656"), "fscrypt policy v1");
1037                return error!(ENOTSUP);
1038            }
1039            if policy.flags != 0 {
1040                track_stub!(
1041                    TODO("https://fxbug.dev/375700939"),
1042                    "fscrypt policy flags",
1043                    policy.flags
1044                );
1045            }
1046            if policy.contents_encryption_mode as u32 != FSCRYPT_MODE_AES_256_XTS {
1047                track_stub!(
1048                    TODO("https://fxbug.dev/375684057"),
1049                    "fscrypt encryption modes",
1050                    policy.contents_encryption_mode
1051                );
1052            }
1053            if policy.filenames_encryption_mode as u32 != FSCRYPT_MODE_AES_256_CTS {
1054                track_stub!(
1055                    TODO("https://fxbug.dev/375684057"),
1056                    "fscrypt encryption modes",
1057                    policy.filenames_encryption_mode
1058                );
1059            }
1060            let user_id = current_task.current_creds().uid;
1061            if user_id != file.node().info().uid {
1062                security::check_task_capable(current_task, CAP_FOWNER)
1063                    .map_err(|_| errno!(EACCES))?;
1064            }
1065
1066            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1067            if let Some(users) =
1068                crypt_service.get_users_for_key(EncryptionKeyId::from(policy.master_key_identifier))
1069            {
1070                if !users.contains(&user_id) {
1071                    return error!(ENOKEY);
1072                }
1073            } else {
1074                track_stub!(
1075                    TODO("https://fxbug.dev/375067633"),
1076                    "users with CAP_FOWNER can set encryption policies with unadded keys"
1077                );
1078                return error!(ENOKEY);
1079            }
1080
1081            let attributes = file.node().fetch_and_refresh_info(locked, current_task)?;
1082            if let Some(wrapping_key_id) = &attributes.wrapping_key_id {
1083                if wrapping_key_id != &policy.master_key_identifier {
1084                    return error!(EEXIST);
1085                }
1086            } else {
1087                // Don't deadlock! update_attributes will also lock the attributes.
1088                std::mem::drop(attributes);
1089                file.node().update_attributes(locked, current_task, |info| {
1090                    info.wrapping_key_id = Some(policy.master_key_identifier);
1091                    Ok(())
1092                })?;
1093            }
1094            Ok(SUCCESS)
1095        }
1096        FS_IOC_REMOVE_ENCRYPTION_KEY => {
1097            let fscrypt_remove_key_arg_ref = UserRef::<uapi::fscrypt_remove_key_arg>::from(arg);
1098            let fscrypt_remove_key_arg = current_task.read_object(fscrypt_remove_key_arg_ref)?;
1099            if fscrypt_remove_key_arg.key_spec.type_ != FSCRYPT_KEY_SPEC_TYPE_IDENTIFIER {
1100                track_stub!(TODO("https://fxbug.dev/375648306"), "fscrypt descriptor type");
1101                return error!(ENOTSUP);
1102            }
1103            let crypt_service = file.node().fs().crypt_service().ok_or_else(|| errno!(ENOTSUP))?;
1104            let user_id = current_task.current_creds().uid;
1105            #[allow(
1106                clippy::undocumented_unsafe_blocks,
1107                reason = "Force documented unsafe blocks in Starnix"
1108            )]
1109            let identifier = unsafe { fscrypt_remove_key_arg.key_spec.u.identifier.value };
1110            crypt_service.forget_wrapping_key(identifier, user_id)?;
1111            Ok(SUCCESS)
1112        }
1113        _ => {
1114            track_stub!(TODO("https://fxbug.dev/322874917"), "ioctl fallthrough", request);
1115            error!(ENOTTY)
1116        }
1117    }
1118}
1119
1120pub fn default_fcntl(cmd: u32) -> Result<SyscallResult, Errno> {
1121    track_stub!(TODO("https://fxbug.dev/322875704"), "default fcntl", cmd);
1122    error!(EINVAL)
1123}
1124
1125pub struct OPathOps {}
1126
1127impl OPathOps {
1128    pub fn new() -> OPathOps {
1129        OPathOps {}
1130    }
1131}
1132
1133impl FileOps for OPathOps {
1134    fileops_impl_noop_sync!();
1135
1136    fn has_persistent_offsets(&self) -> bool {
1137        false
1138    }
1139    fn is_seekable(&self) -> bool {
1140        true
1141    }
1142    fn read(
1143        &self,
1144        _locked: &mut Locked<FileOpsCore>,
1145        _file: &FileObject,
1146        _current_task: &CurrentTask,
1147        _offset: usize,
1148        _data: &mut dyn OutputBuffer,
1149    ) -> Result<usize, Errno> {
1150        error!(EBADF)
1151    }
1152    fn write(
1153        &self,
1154        _locked: &mut Locked<FileOpsCore>,
1155        _file: &FileObject,
1156        _current_task: &CurrentTask,
1157        _offset: usize,
1158        _data: &mut dyn InputBuffer,
1159    ) -> Result<usize, Errno> {
1160        error!(EBADF)
1161    }
1162    fn seek(
1163        &self,
1164        _locked: &mut Locked<FileOpsCore>,
1165        _file: &FileObject,
1166        _current_task: &CurrentTask,
1167        _current_offset: off_t,
1168        _target: SeekTarget,
1169    ) -> Result<off_t, Errno> {
1170        error!(EBADF)
1171    }
1172    fn get_memory(
1173        &self,
1174        _locked: &mut Locked<FileOpsCore>,
1175        _file: &FileObject,
1176        _current_task: &CurrentTask,
1177        _length: Option<usize>,
1178        _prot: ProtectionFlags,
1179    ) -> Result<Arc<MemoryObject>, Errno> {
1180        error!(EBADF)
1181    }
1182    fn readdir(
1183        &self,
1184        _locked: &mut Locked<FileOpsCore>,
1185        _file: &FileObject,
1186        _current_task: &CurrentTask,
1187        _sink: &mut dyn DirentSink,
1188    ) -> Result<(), Errno> {
1189        error!(EBADF)
1190    }
1191
1192    fn ioctl(
1193        &self,
1194        _locked: &mut Locked<Unlocked>,
1195        _file: &FileObject,
1196        _current_task: &CurrentTask,
1197        _request: u32,
1198        _arg: SyscallArg,
1199    ) -> Result<SyscallResult, Errno> {
1200        error!(EBADF)
1201    }
1202}
1203
1204pub struct ProxyFileOps(pub FileHandle);
1205
1206impl FileOps for ProxyFileOps {
1207    // `close` is not delegated because the last reference to a `ProxyFileOps` is not
1208    // necessarily the last reference of the proxied file. If this is the case, the
1209    // releaser will handle it.
1210    // These don't take &FileObject making it too hard to handle them properly in the macro
1211    fn has_persistent_offsets(&self) -> bool {
1212        self.0.ops().has_persistent_offsets()
1213    }
1214    fn writes_update_seek_offset(&self) -> bool {
1215        self.0.ops().writes_update_seek_offset()
1216    }
1217    fn is_seekable(&self) -> bool {
1218        self.0.ops().is_seekable()
1219    }
1220    // These take &mut Locked<L> as a second argument
1221    fn flush(
1222        &self,
1223        locked: &mut Locked<FileOpsCore>,
1224        _file: &FileObject,
1225        current_task: &CurrentTask,
1226    ) {
1227        self.0.ops().flush(locked, &self.0, current_task);
1228    }
1229    fn wait_async(
1230        &self,
1231        locked: &mut Locked<FileOpsCore>,
1232        _file: &FileObject,
1233        current_task: &CurrentTask,
1234        waiter: &Waiter,
1235        events: FdEvents,
1236        handler: EventHandler,
1237    ) -> Option<WaitCanceler> {
1238        self.0.ops().wait_async(locked, &self.0, current_task, waiter, events, handler)
1239    }
1240    fn query_events(
1241        &self,
1242        locked: &mut Locked<FileOpsCore>,
1243        _file: &FileObject,
1244        current_task: &CurrentTask,
1245    ) -> Result<FdEvents, Errno> {
1246        self.0.ops().query_events(locked, &self.0, current_task)
1247    }
1248    fn read(
1249        &self,
1250        locked: &mut Locked<FileOpsCore>,
1251        _file: &FileObject,
1252        current_task: &CurrentTask,
1253        offset: usize,
1254        data: &mut dyn OutputBuffer,
1255    ) -> Result<usize, Errno> {
1256        self.0.ops().read(locked, &self.0, current_task, offset, data)
1257    }
1258    fn write(
1259        &self,
1260        locked: &mut Locked<FileOpsCore>,
1261        _file: &FileObject,
1262        current_task: &CurrentTask,
1263        offset: usize,
1264        data: &mut dyn InputBuffer,
1265    ) -> Result<usize, Errno> {
1266        self.0.ops().write(locked, &self.0, current_task, offset, data)
1267    }
1268    fn ioctl(
1269        &self,
1270        locked: &mut Locked<Unlocked>,
1271        _file: &FileObject,
1272        current_task: &CurrentTask,
1273        request: u32,
1274        arg: SyscallArg,
1275    ) -> Result<SyscallResult, Errno> {
1276        self.0.ops().ioctl(locked, &self.0, current_task, request, arg)
1277    }
1278    fn fcntl(
1279        &self,
1280        _file: &FileObject,
1281        current_task: &CurrentTask,
1282        cmd: u32,
1283        arg: u64,
1284    ) -> Result<SyscallResult, Errno> {
1285        self.0.ops().fcntl(&self.0, current_task, cmd, arg)
1286    }
1287    fn readdir(
1288        &self,
1289        locked: &mut Locked<FileOpsCore>,
1290        _file: &FileObject,
1291        current_task: &CurrentTask,
1292        sink: &mut dyn DirentSink,
1293    ) -> Result<(), Errno> {
1294        self.0.ops().readdir(locked, &self.0, current_task, sink)
1295    }
1296    fn sync(&self, _file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
1297        self.0.ops().sync(&self.0, current_task)
1298    }
1299    fn data_sync(&self, _file: &FileObject, current_task: &CurrentTask) -> Result<(), Errno> {
1300        self.0.ops().sync(&self.0, current_task)
1301    }
1302    fn get_memory(
1303        &self,
1304        locked: &mut Locked<FileOpsCore>,
1305        _file: &FileObject,
1306        current_task: &CurrentTask,
1307        length: Option<usize>,
1308        prot: ProtectionFlags,
1309    ) -> Result<Arc<MemoryObject>, Errno> {
1310        self.0.ops.get_memory(locked, &self.0, current_task, length, prot)
1311    }
1312    fn mmap(
1313        &self,
1314        locked: &mut Locked<FileOpsCore>,
1315        _file: &FileObject,
1316        current_task: &CurrentTask,
1317        addr: DesiredAddress,
1318        memory_offset: u64,
1319        length: usize,
1320        prot_flags: ProtectionFlags,
1321        options: MappingOptions,
1322        filename: NamespaceNode,
1323    ) -> Result<UserAddress, Errno> {
1324        self.0.ops.mmap(
1325            locked,
1326            &self.0,
1327            current_task,
1328            addr,
1329            memory_offset,
1330            length,
1331            prot_flags,
1332            options,
1333            filename,
1334        )
1335    }
1336    fn seek(
1337        &self,
1338        locked: &mut Locked<FileOpsCore>,
1339        _file: &FileObject,
1340        current_task: &CurrentTask,
1341        offset: off_t,
1342        target: SeekTarget,
1343    ) -> Result<off_t, Errno> {
1344        self.0.ops.seek(locked, &self.0, current_task, offset, target)
1345    }
1346}
1347
1348#[derive(Debug, Default, Copy, Clone)]
1349pub enum FileAsyncOwner {
1350    #[default]
1351    Unowned,
1352    Thread(pid_t),
1353    Process(pid_t),
1354    ProcessGroup(pid_t),
1355}
1356
1357impl FileAsyncOwner {
1358    pub fn validate(self, current_task: &CurrentTask) -> Result<(), Errno> {
1359        match self {
1360            FileAsyncOwner::Unowned => (),
1361            FileAsyncOwner::Thread(id) | FileAsyncOwner::Process(id) => {
1362                Task::from_weak(&current_task.get_task(id))?;
1363            }
1364            FileAsyncOwner::ProcessGroup(pgid) => {
1365                current_task
1366                    .kernel()
1367                    .pids
1368                    .read()
1369                    .get_process_group(pgid)
1370                    .ok_or_else(|| errno!(ESRCH))?;
1371            }
1372        }
1373        Ok(())
1374    }
1375}
1376
1377#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
1378pub struct FileObjectId(u64);
1379
1380impl FileObjectId {
1381    pub fn as_epoll_key(&self) -> EpollKey {
1382        self.0 as EpollKey
1383    }
1384}
1385
1386/// A session with a file object.
1387///
1388/// Each time a client calls open(), we create a new FileObject from the
1389/// underlying FsNode that receives the open(). This object contains the state
1390/// that is specific to this sessions whereas the underlying FsNode contains
1391/// the state that is shared between all the sessions.
1392pub struct FileObject {
1393    ops: Box<dyn FileOps>,
1394    state: FileObjectState,
1395}
1396
1397impl std::ops::Deref for FileObject {
1398    type Target = FileObjectState;
1399    fn deref(&self) -> &Self::Target {
1400        &self.state
1401    }
1402}
1403
1404pub struct FileObjectState {
1405    /// Weak reference to the `FileHandle` of this `FileObject`. This allows to retrieve the
1406    /// `FileHandle` from a `FileObject`.
1407    pub weak_handle: WeakFileHandle,
1408
1409    /// A unique identifier for this file object.
1410    pub id: FileObjectId,
1411
1412    /// The NamespaceNode associated with this FileObject.
1413    ///
1414    /// Represents the name the process used to open this file.
1415    pub name: ActiveNamespaceNode,
1416
1417    pub fs: FileSystemHandle,
1418
1419    pub offset: Mutex<off_t>,
1420
1421    flags: Mutex<OpenFlags>,
1422
1423    async_owner: Mutex<FileAsyncOwner>,
1424
1425    /// A set of epoll file descriptor numbers that tracks which `EpollFileObject`s add this
1426    /// `FileObject` as the control file.
1427    epoll_files: RcuHashMap<FileHandleKey, WeakFileHandle>,
1428
1429    /// See fcntl F_SETLEASE and F_GETLEASE.
1430    lease: Mutex<FileLeaseType>,
1431
1432    // This extra reference to the FsNode should not be needed, but it is needed to make
1433    // Inotify.ExcludeUnlinkInodeEvents pass.
1434    _mysterious_node: Option<FsNodeHandle>,
1435
1436    /// Opaque security state associated this file object.
1437    pub security_state: security::FileObjectState,
1438}
1439
1440pub enum FileObjectReleaserAction {}
1441impl ReleaserAction<FileObject> for FileObjectReleaserAction {
1442    fn release(file_object: ReleaseGuard<FileObject>) {
1443        register_delayed_release(file_object);
1444    }
1445}
1446pub type FileReleaser = ObjectReleaser<FileObject, FileObjectReleaserAction>;
1447pub type FileHandle = Arc<FileReleaser>;
1448pub type WeakFileHandle = Weak<FileReleaser>;
1449pub type FileHandleKey = WeakKey<FileReleaser>;
1450
1451impl FileObjectState {
1452    /// The FsNode from which this FileObject was created.
1453    pub fn node(&self) -> &FsNodeHandle {
1454        &self.name.entry.node
1455    }
1456
1457    pub fn flags(&self) -> OpenFlags {
1458        *self.flags.lock()
1459    }
1460
1461    pub fn can_read(&self) -> bool {
1462        // TODO: Consider caching the access mode outside of this lock
1463        // because it cannot change.
1464        self.flags.lock().can_read()
1465    }
1466
1467    pub fn can_write(&self) -> bool {
1468        // TODO: Consider caching the access mode outside of this lock
1469        // because it cannot change.
1470        self.flags.lock().can_write()
1471    }
1472
1473    /// Returns false if the file is not allowed to be executed.
1474    pub fn can_exec(&self) -> bool {
1475        let mounted_no_exec = self.name.to_passive().mount.flags().contains(MountFlags::NOEXEC);
1476        let no_exec_seal = self
1477            .node()
1478            .write_guard_state
1479            .lock()
1480            .get_seals()
1481            .map(|seals| seals.contains(SealFlags::NO_EXEC))
1482            .unwrap_or(false);
1483        !(mounted_no_exec || no_exec_seal)
1484    }
1485
1486    // Notifies watchers on the current node and its parent about an event.
1487    pub fn notify(&self, event_mask: InotifyMask) {
1488        self.name.notify(event_mask)
1489    }
1490}
1491
1492impl FileObject {
1493    /// Create a FileObject that is not mounted in a namespace.
1494    ///
1495    /// In particular, this will create a new unrooted entries. This should not be used on
1496    /// file system with persistent entries, as the created entry will be out of sync with the one
1497    /// from the file system.
1498    ///
1499    /// The returned FileObject does not have a name.
1500    pub fn new_anonymous<L>(
1501        locked: &mut Locked<L>,
1502        current_task: &CurrentTask,
1503        ops: Box<dyn FileOps>,
1504        node: FsNodeHandle,
1505        flags: OpenFlags,
1506    ) -> FileHandle
1507    where
1508        L: LockEqualOrBefore<FileOpsCore>,
1509    {
1510        assert!(!node.fs().has_permanent_entries());
1511        Self::new(
1512            locked,
1513            current_task,
1514            ops,
1515            NamespaceNode::new_anonymous_unrooted(current_task, node),
1516            flags,
1517        )
1518        .expect("Failed to create anonymous FileObject")
1519    }
1520
1521    /// Create a FileObject with an associated NamespaceNode.
1522    ///
1523    /// This function is not typically called directly. Instead, consider
1524    /// calling NamespaceNode::open.
1525    pub fn new<L>(
1526        locked: &mut Locked<L>,
1527        current_task: &CurrentTask,
1528        ops: Box<dyn FileOps>,
1529        name: NamespaceNode,
1530        flags: OpenFlags,
1531    ) -> Result<FileHandle, Errno>
1532    where
1533        L: LockEqualOrBefore<FileOpsCore>,
1534    {
1535        let _mysterious_node = if flags.can_write() {
1536            name.entry.node.write_guard_state.lock().acquire(FileWriteGuardMode::WriteFile)?;
1537            Some(name.entry.node.clone())
1538        } else {
1539            None
1540        };
1541        let fs = name.entry.node.fs();
1542        let id = FileObjectId(current_task.kernel.next_file_object_id.next());
1543        let security_state = security::file_alloc_security(current_task);
1544        let file = FileHandle::new_cyclic(|weak_handle| {
1545            Self {
1546                ops,
1547                state: FileObjectState {
1548                    weak_handle: weak_handle.clone(),
1549                    id,
1550                    name: name.into_active(),
1551                    fs,
1552                    offset: Mutex::new(0),
1553                    flags: Mutex::new(flags - OpenFlags::CREAT),
1554                    async_owner: Default::default(),
1555                    epoll_files: Default::default(),
1556                    lease: Default::default(),
1557                    _mysterious_node,
1558                    security_state,
1559                },
1560            }
1561            .into()
1562        });
1563        file.notify(InotifyMask::OPEN);
1564
1565        file.ops().open(locked.cast_locked::<FileOpsCore>(), &file, current_task)?;
1566        Ok(file)
1567    }
1568
1569    pub fn max_access_for_memory_mapping(&self) -> Access {
1570        let mut access = Access::EXIST;
1571        if self.can_exec() {
1572            access |= Access::EXEC;
1573        }
1574        let flags = self.flags.lock();
1575        if flags.can_read() {
1576            access |= Access::READ;
1577        }
1578        if flags.can_write() {
1579            access |= Access::WRITE;
1580        }
1581        access
1582    }
1583
1584    pub fn ops(&self) -> &dyn FileOps {
1585        self.ops.as_ref()
1586    }
1587
1588    pub fn ops_type_name(&self) -> &'static str {
1589        self.ops().type_name()
1590    }
1591
1592    pub fn is_non_blocking(&self) -> bool {
1593        self.flags().contains(OpenFlags::NONBLOCK)
1594    }
1595
1596    /// Common implementation for blocking operations.
1597    ///
1598    /// This function is used to implement the blocking operations for file objects. FileOps
1599    /// implementations should call this function to handle the blocking logic.
1600    ///
1601    /// The `op` parameter is a function that implements the non-blocking version of the operation.
1602    /// The function is called once without registering a waiter in case no wait is needed. If the
1603    /// operation returns EAGAIN and the file object is non-blocking, the function returns EAGAIN.
1604    ///
1605    /// If the operation returns EAGAIN and the file object is blocking, the function will block
1606    /// until the given events are triggered. At that time, the operation is retried. Notice that
1607    /// the `op` function can be called multiple times before the operation completes.
1608    ///
1609    /// The `deadline` parameter is the deadline for the operation. If the operation does not
1610    /// complete before the deadline, the function will return ETIMEDOUT.
1611    pub fn blocking_op<L, T, Op>(
1612        &self,
1613        locked: &mut Locked<L>,
1614        current_task: &CurrentTask,
1615        events: FdEvents,
1616        deadline: Option<zx::MonotonicInstant>,
1617        mut op: Op,
1618    ) -> Result<T, Errno>
1619    where
1620        L: LockEqualOrBefore<FileOpsCore>,
1621        Op: FnMut(&mut Locked<L>) -> Result<T, Errno>,
1622    {
1623        // Don't return EAGAIN for directories. This can happen because glibc always opens a
1624        // directory with O_NONBLOCK.
1625        let can_return_eagain = self.flags().contains(OpenFlags::NONBLOCK)
1626            && !self.flags().contains(OpenFlags::DIRECTORY);
1627        // Run the operation a first time without registering a waiter in case no wait is needed.
1628        match op(locked) {
1629            Err(errno) if errno == EAGAIN && !can_return_eagain => {}
1630            result => return result,
1631        }
1632
1633        let waiter = Waiter::new();
1634        loop {
1635            // Register the waiter before running the operation to prevent a race.
1636            self.wait_async(locked, current_task, &waiter, events, WaitCallback::none());
1637            match op(locked) {
1638                Err(e) if e == EAGAIN => {}
1639                result => return result,
1640            }
1641            let locked = locked.cast_locked::<FileOpsCore>();
1642            waiter
1643                .wait_until(
1644                    locked,
1645                    current_task,
1646                    deadline.unwrap_or(zx::MonotonicInstant::INFINITE),
1647                )
1648                .map_err(|e| if e == ETIMEDOUT { errno!(EAGAIN) } else { e })?;
1649        }
1650    }
1651
1652    pub fn is_seekable(&self) -> bool {
1653        self.ops().is_seekable()
1654    }
1655
1656    pub fn has_persistent_offsets(&self) -> bool {
1657        self.ops().has_persistent_offsets()
1658    }
1659
1660    /// Common implementation for `read` and `read_at`.
1661    fn read_internal<R>(&self, current_task: &CurrentTask, read: R) -> Result<usize, Errno>
1662    where
1663        R: FnOnce() -> Result<usize, Errno>,
1664    {
1665        security::file_permission(current_task, self, security::PermissionFlags::READ)?;
1666
1667        if !self.can_read() {
1668            return error!(EBADF);
1669        }
1670        let bytes_read = read()?;
1671
1672        // TODO(steveaustin) - omit updating time_access to allow info to be immutable
1673        // and thus allow simultaneous reads.
1674        self.update_atime();
1675        if bytes_read > 0 {
1676            self.notify(InotifyMask::ACCESS);
1677        }
1678
1679        Ok(bytes_read)
1680    }
1681
1682    pub fn read<L>(
1683        &self,
1684        locked: &mut Locked<L>,
1685        current_task: &CurrentTask,
1686        data: &mut dyn OutputBuffer,
1687    ) -> Result<usize, Errno>
1688    where
1689        L: LockEqualOrBefore<FileOpsCore>,
1690    {
1691        self.read_internal(current_task, || {
1692            let locked = locked.cast_locked::<FileOpsCore>();
1693            if !self.ops().has_persistent_offsets() {
1694                if data.available() > MAX_LFS_FILESIZE {
1695                    return error!(EINVAL);
1696                }
1697                return self.ops.read(locked, self, current_task, 0, data);
1698            }
1699
1700            let mut offset_guard = self.offset.lock();
1701            let offset = *offset_guard as usize;
1702            checked_add_offset_and_length(offset, data.available())?;
1703            let read = self.ops.read(locked, self, current_task, offset, data)?;
1704            *offset_guard += read as off_t;
1705            Ok(read)
1706        })
1707    }
1708
1709    pub fn read_at<L>(
1710        &self,
1711        locked: &mut Locked<L>,
1712        current_task: &CurrentTask,
1713        offset: usize,
1714        data: &mut dyn OutputBuffer,
1715    ) -> Result<usize, Errno>
1716    where
1717        L: LockEqualOrBefore<FileOpsCore>,
1718    {
1719        if !self.ops().is_seekable() {
1720            return error!(ESPIPE);
1721        }
1722        checked_add_offset_and_length(offset, data.available())?;
1723        let locked = locked.cast_locked::<FileOpsCore>();
1724        self.read_internal(current_task, || self.ops.read(locked, self, current_task, offset, data))
1725    }
1726
1727    /// Common checks before calling ops().write.
1728    fn write_common<L>(
1729        &self,
1730        locked: &mut Locked<L>,
1731        current_task: &CurrentTask,
1732        offset: usize,
1733        data: &mut dyn InputBuffer,
1734    ) -> Result<usize, Errno>
1735    where
1736        L: LockEqualOrBefore<FileOpsCore>,
1737    {
1738        security::file_permission(current_task, self, security::PermissionFlags::WRITE)?;
1739
1740        // We need to cap the size of `data` to prevent us from growing the file too large,
1741        // according to <https://man7.org/linux/man-pages/man2/write.2.html>:
1742        //
1743        //   The number of bytes written may be less than count if, for example, there is
1744        //   insufficient space on the underlying physical medium, or the RLIMIT_FSIZE resource
1745        //   limit is encountered (see setrlimit(2)),
1746        checked_add_offset_and_length(offset, data.available())?;
1747        let locked = locked.cast_locked::<FileOpsCore>();
1748        self.ops().write(locked, self, current_task, offset, data)
1749    }
1750
1751    /// Common wrapper work for `write` and `write_at`.
1752    fn write_fn<W, L>(
1753        &self,
1754        locked: &mut Locked<L>,
1755        current_task: &CurrentTask,
1756        write: W,
1757    ) -> Result<usize, Errno>
1758    where
1759        L: LockEqualOrBefore<FileOpsCore>,
1760        W: FnOnce(&mut Locked<L>) -> Result<usize, Errno>,
1761    {
1762        if !self.can_write() {
1763            return error!(EBADF);
1764        }
1765        self.node().clear_suid_and_sgid_bits(locked, current_task)?;
1766        let bytes_written = write(locked)?;
1767        self.node().update_ctime_mtime();
1768
1769        if bytes_written > 0 {
1770            self.notify(InotifyMask::MODIFY);
1771        }
1772
1773        Ok(bytes_written)
1774    }
1775
1776    pub fn write<L>(
1777        &self,
1778        locked: &mut Locked<L>,
1779        current_task: &CurrentTask,
1780        data: &mut dyn InputBuffer,
1781    ) -> Result<usize, Errno>
1782    where
1783        L: LockEqualOrBefore<FileOpsCore>,
1784    {
1785        self.write_fn(locked, current_task, |locked| {
1786            if !self.ops().has_persistent_offsets() {
1787                return self.write_common(locked, current_task, 0, data);
1788            }
1789            // TODO(https://fxbug.dev/333540469): write_fn should take L: LockBefore<FsNodeAppend>,
1790            // but FileOpsCore must be after FsNodeAppend
1791            #[allow(
1792                clippy::undocumented_unsafe_blocks,
1793                reason = "Force documented unsafe blocks in Starnix"
1794            )]
1795            let locked = unsafe { Unlocked::new() };
1796            let mut offset = self.offset.lock();
1797            let bytes_written = if self.flags().contains(OpenFlags::APPEND) {
1798                let (_guard, locked) = self.node().append_lock.write_and(locked, current_task)?;
1799                *offset = self.ops().seek(
1800                    locked.cast_locked::<FileOpsCore>(),
1801                    self,
1802                    current_task,
1803                    *offset,
1804                    SeekTarget::End(0),
1805                )?;
1806                self.write_common(locked, current_task, *offset as usize, data)
1807            } else {
1808                let (_guard, locked) = self.node().append_lock.read_and(locked, current_task)?;
1809                self.write_common(locked, current_task, *offset as usize, data)
1810            }?;
1811            if self.ops().writes_update_seek_offset() {
1812                *offset += bytes_written as off_t;
1813            }
1814            Ok(bytes_written)
1815        })
1816    }
1817
1818    pub fn write_at<L>(
1819        &self,
1820        locked: &mut Locked<L>,
1821        current_task: &CurrentTask,
1822        mut offset: usize,
1823        data: &mut dyn InputBuffer,
1824    ) -> Result<usize, Errno>
1825    where
1826        L: LockEqualOrBefore<FileOpsCore>,
1827    {
1828        if !self.ops().is_seekable() {
1829            return error!(ESPIPE);
1830        }
1831        self.write_fn(locked, current_task, |_locked| {
1832            // TODO(https://fxbug.dev/333540469): write_fn should take L: LockBefore<FsNodeAppend>,
1833            // but FileOpsCore must be after FsNodeAppend
1834            #[allow(
1835                clippy::undocumented_unsafe_blocks,
1836                reason = "Force documented unsafe blocks in Starnix"
1837            )]
1838            let locked = unsafe { Unlocked::new() };
1839            let (_guard, locked) = self.node().append_lock.read_and(locked, current_task)?;
1840
1841            // According to LTP test pwrite04:
1842            //
1843            //   POSIX requires that opening a file with the O_APPEND flag should have no effect on the
1844            //   location at which pwrite() writes data. However, on Linux, if a file is opened with
1845            //   O_APPEND, pwrite() appends data to the end of the file, regardless of the value of offset.
1846            if self.flags().contains(OpenFlags::APPEND) && self.ops().is_seekable() {
1847                checked_add_offset_and_length(offset, data.available())?;
1848                offset = default_eof_offset(locked, self, current_task)? as usize;
1849            }
1850
1851            self.write_common(locked, current_task, offset, data)
1852        })
1853    }
1854
1855    pub fn seek<L>(
1856        &self,
1857        locked: &mut Locked<L>,
1858        current_task: &CurrentTask,
1859        target: SeekTarget,
1860    ) -> Result<off_t, Errno>
1861    where
1862        L: LockEqualOrBefore<FileOpsCore>,
1863    {
1864        let locked = locked.cast_locked::<FileOpsCore>();
1865        let locked = locked;
1866
1867        if !self.ops().is_seekable() {
1868            return error!(ESPIPE);
1869        }
1870
1871        if !self.ops().has_persistent_offsets() {
1872            return self.ops().seek(locked, self, current_task, 0, target);
1873        }
1874
1875        let mut offset_guard = self.offset.lock();
1876        let new_offset = self.ops().seek(locked, self, current_task, *offset_guard, target)?;
1877        *offset_guard = new_offset;
1878        Ok(new_offset)
1879    }
1880
1881    pub fn sync(&self, current_task: &CurrentTask) -> Result<(), Errno> {
1882        self.ops().sync(self, current_task)
1883    }
1884
1885    pub fn data_sync(&self, current_task: &CurrentTask) -> Result<(), Errno> {
1886        self.ops().data_sync(self, current_task)
1887    }
1888
1889    pub fn get_memory<L>(
1890        &self,
1891        locked: &mut Locked<L>,
1892        current_task: &CurrentTask,
1893        length: Option<usize>,
1894        prot: ProtectionFlags,
1895    ) -> Result<Arc<MemoryObject>, Errno>
1896    where
1897        L: LockEqualOrBefore<FileOpsCore>,
1898    {
1899        if prot.contains(ProtectionFlags::READ) && !self.can_read() {
1900            return error!(EACCES);
1901        }
1902        if prot.contains(ProtectionFlags::WRITE) && !self.can_write() {
1903            return error!(EACCES);
1904        }
1905        if prot.contains(ProtectionFlags::EXEC) && !self.can_exec() {
1906            return error!(EPERM);
1907        }
1908        self.ops().get_memory(locked.cast_locked::<FileOpsCore>(), self, current_task, length, prot)
1909    }
1910
1911    pub fn mmap<L>(
1912        &self,
1913        locked: &mut Locked<L>,
1914        current_task: &CurrentTask,
1915        addr: DesiredAddress,
1916        memory_offset: u64,
1917        length: usize,
1918        prot_flags: ProtectionFlags,
1919        options: MappingOptions,
1920        filename: NamespaceNode,
1921    ) -> Result<UserAddress, Errno>
1922    where
1923        L: LockEqualOrBefore<FileOpsCore>,
1924    {
1925        let locked = locked.cast_locked::<FileOpsCore>();
1926        if !self.can_read() {
1927            return error!(EACCES);
1928        }
1929        if prot_flags.contains(ProtectionFlags::WRITE)
1930            && !self.can_write()
1931            && options.contains(MappingOptions::SHARED)
1932        {
1933            return error!(EACCES);
1934        }
1935        if prot_flags.contains(ProtectionFlags::EXEC) && !self.can_exec() {
1936            return error!(EPERM);
1937        }
1938        self.ops().mmap(
1939            locked,
1940            self,
1941            current_task,
1942            addr,
1943            memory_offset,
1944            length,
1945            prot_flags,
1946            options,
1947            filename,
1948        )
1949    }
1950
1951    pub fn readdir<L>(
1952        &self,
1953        locked: &mut Locked<L>,
1954        current_task: &CurrentTask,
1955        sink: &mut dyn DirentSink,
1956    ) -> Result<(), Errno>
1957    where
1958        L: LockEqualOrBefore<FileOpsCore>,
1959    {
1960        let locked = locked.cast_locked::<FileOpsCore>();
1961        if self.name.entry.read().is_dead() {
1962            return error!(ENOENT);
1963        }
1964
1965        self.ops().readdir(locked, self, current_task, sink)?;
1966        self.update_atime();
1967        self.notify(InotifyMask::ACCESS);
1968        Ok(())
1969    }
1970
1971    pub fn ioctl(
1972        &self,
1973        locked: &mut Locked<Unlocked>,
1974        current_task: &CurrentTask,
1975        request: u32,
1976        arg: SyscallArg,
1977    ) -> Result<SyscallResult, Errno> {
1978        security::check_file_ioctl_access(current_task, &self, request)?;
1979
1980        if request == FIBMAP {
1981            security::check_task_capable(current_task, CAP_SYS_RAWIO)?;
1982
1983            // TODO: https://fxbug.dev/404795644 - eliminate this phoney response when the SELinux
1984            // Test Suite no longer requires it.
1985            if current_task.kernel().features.selinux_test_suite {
1986                let phoney_block = 0xbadf000du32;
1987                current_task.write_object(arg.into(), &phoney_block)?;
1988                return Ok(SUCCESS);
1989            }
1990        }
1991
1992        self.ops().ioctl(locked, self, current_task, request, arg)
1993    }
1994
1995    pub fn fcntl(
1996        &self,
1997        current_task: &CurrentTask,
1998        cmd: u32,
1999        arg: u64,
2000    ) -> Result<SyscallResult, Errno> {
2001        self.ops().fcntl(self, current_task, cmd, arg)
2002    }
2003
2004    pub fn ftruncate<L>(
2005        &self,
2006        locked: &mut Locked<L>,
2007        current_task: &CurrentTask,
2008        length: u64,
2009    ) -> Result<(), Errno>
2010    where
2011        L: LockBefore<BeforeFsNodeAppend>,
2012    {
2013        // The file must be opened with write permissions. Otherwise
2014        // truncating it is forbidden.
2015        if !self.can_write() {
2016            return error!(EINVAL);
2017        }
2018        self.node().ftruncate(locked, current_task, length)?;
2019        self.name.entry.notify_ignoring_excl_unlink(InotifyMask::MODIFY);
2020        Ok(())
2021    }
2022
2023    pub fn fallocate<L>(
2024        &self,
2025        locked: &mut Locked<L>,
2026        current_task: &CurrentTask,
2027        mode: FallocMode,
2028        offset: u64,
2029        length: u64,
2030    ) -> Result<(), Errno>
2031    where
2032        L: LockBefore<BeforeFsNodeAppend>,
2033    {
2034        // If the file is a pipe or FIFO, ESPIPE is returned.
2035        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2036        if self.node().is_fifo() {
2037            return error!(ESPIPE);
2038        }
2039
2040        // Must be a regular file or directory.
2041        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2042        if !self.node().is_dir() && !self.node().is_reg() {
2043            return error!(ENODEV);
2044        }
2045
2046        // The file must be opened with write permissions. Otherwise operation is forbidden.
2047        // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2048        if !self.can_write() {
2049            return error!(EBADF);
2050        }
2051
2052        self.node().fallocate(locked, current_task, mode, offset, length)?;
2053        self.notify(InotifyMask::MODIFY);
2054        Ok(())
2055    }
2056
2057    pub fn to_handle(
2058        &self,
2059        current_task: &CurrentTask,
2060    ) -> Result<Option<zx::NullableHandle>, Errno> {
2061        self.ops().to_handle(self, current_task)
2062    }
2063
2064    pub fn as_thread_group_key(&self) -> Result<ThreadGroupKey, Errno> {
2065        self.ops().as_thread_group_key(self)
2066    }
2067
2068    pub fn update_file_flags(&self, value: OpenFlags, mask: OpenFlags) {
2069        let mask_bits = mask.bits();
2070        let mut flags = self.flags.lock();
2071        let bits = (flags.bits() & !mask_bits) | (value.bits() & mask_bits);
2072        *flags = OpenFlags::from_bits_truncate(bits);
2073    }
2074
2075    /// Get the async owner of this file.
2076    ///
2077    /// See fcntl(F_GETOWN)
2078    pub fn get_async_owner(&self) -> FileAsyncOwner {
2079        *self.async_owner.lock()
2080    }
2081
2082    /// Set the async owner of this file.
2083    ///
2084    /// See fcntl(F_SETOWN)
2085    pub fn set_async_owner(&self, owner: FileAsyncOwner) {
2086        *self.async_owner.lock() = owner;
2087    }
2088
2089    /// See fcntl(F_GETLEASE)
2090    pub fn get_lease(&self, _current_task: &CurrentTask) -> FileLeaseType {
2091        *self.lease.lock()
2092    }
2093
2094    /// See fcntl(F_SETLEASE)
2095    pub fn set_lease(
2096        &self,
2097        _current_task: &CurrentTask,
2098        lease: FileLeaseType,
2099    ) -> Result<(), Errno> {
2100        if !self.node().is_reg() {
2101            return error!(EINVAL);
2102        }
2103        if lease == FileLeaseType::Read && self.can_write() {
2104            return error!(EAGAIN);
2105        }
2106        *self.lease.lock() = lease;
2107        Ok(())
2108    }
2109
2110    /// Wait on the specified events and call the EventHandler when ready
2111    pub fn wait_async<L>(
2112        &self,
2113        locked: &mut Locked<L>,
2114        current_task: &CurrentTask,
2115        waiter: &Waiter,
2116        events: FdEvents,
2117        handler: EventHandler,
2118    ) -> Option<WaitCanceler>
2119    where
2120        L: LockEqualOrBefore<FileOpsCore>,
2121    {
2122        self.ops().wait_async(
2123            locked.cast_locked::<FileOpsCore>(),
2124            self,
2125            current_task,
2126            waiter,
2127            events,
2128            handler,
2129        )
2130    }
2131
2132    /// The events currently active on this file.
2133    pub fn query_events<L>(
2134        &self,
2135        locked: &mut Locked<L>,
2136        current_task: &CurrentTask,
2137    ) -> Result<FdEvents, Errno>
2138    where
2139        L: LockEqualOrBefore<FileOpsCore>,
2140    {
2141        self.ops()
2142            .query_events(locked.cast_locked::<FileOpsCore>(), self, current_task)
2143            .map(FdEvents::add_equivalent_fd_events)
2144    }
2145
2146    pub fn record_lock(
2147        &self,
2148        locked: &mut Locked<Unlocked>,
2149        current_task: &CurrentTask,
2150        cmd: RecordLockCommand,
2151        flock: uapi::flock,
2152    ) -> Result<Option<uapi::flock>, Errno> {
2153        self.node().record_lock(locked, current_task, self, cmd, flock)
2154    }
2155
2156    pub fn flush<L>(&self, locked: &mut Locked<L>, current_task: &CurrentTask, id: FdTableId)
2157    where
2158        L: LockEqualOrBefore<FileOpsCore>,
2159    {
2160        self.name.entry.node.record_lock_release(RecordLockOwner::FdTable(id));
2161        self.ops().flush(locked.cast_locked::<FileOpsCore>(), self, current_task)
2162    }
2163
2164    fn update_atime(&self) {
2165        if !self.flags().contains(OpenFlags::NOATIME) {
2166            self.name.update_atime();
2167        }
2168    }
2169
2170    pub fn readahead(
2171        &self,
2172        current_task: &CurrentTask,
2173        offset: usize,
2174        length: usize,
2175    ) -> Result<(), Errno> {
2176        // readfile() fails with EBADF if the file was not open for read.
2177        if !self.can_read() {
2178            return error!(EBADF);
2179        }
2180        checked_add_offset_and_length(offset, length)?;
2181        self.ops().readahead(self, current_task, offset, length)
2182    }
2183
2184    pub fn extra_fdinfo(
2185        &self,
2186        locked: &mut Locked<FileOpsCore>,
2187        current_task: &CurrentTask,
2188    ) -> Option<FsString> {
2189        let file = self.weak_handle.upgrade()?;
2190        self.ops().extra_fdinfo(locked, &file, current_task)
2191    }
2192
2193    /// Register the fd number of an `EpollFileObject` that listens to events from this
2194    /// `FileObject`.
2195    pub fn register_epfd(&self, file: &FileHandle) {
2196        self.epoll_files.lock().insert(WeakKey::from(file), file.weak_handle.clone());
2197    }
2198
2199    pub fn unregister_epfd(&self, file: &FileHandle) {
2200        self.epoll_files.lock().remove(&WeakKey::from(file));
2201    }
2202}
2203
2204impl Releasable for FileObject {
2205    type Context<'a> = CurrentTaskAndLocked<'a>;
2206
2207    fn release<'a>(self, context: CurrentTaskAndLocked<'a>) {
2208        let (locked, current_task) = context;
2209        // Release all wake leases associated with this file in the corresponding `WaitObject`
2210        // of each registered epfd.
2211        for (_, file) in self.epoll_files.lock().drain() {
2212            if let Some(file) = file.upgrade() {
2213                if let Some(epoll_object) = file.downcast_file::<EpollFileObject>() {
2214                    current_task.kernel().suspend_resume_manager.deactivate_wakeup_source(
2215                        &WakeupSourceOrigin::Epoll(wakeup_source_name_for_epoll(
2216                            current_task,
2217                            self.id.as_epoll_key(),
2218                        )),
2219                    );
2220                    let _ = epoll_object.delete(&self);
2221                }
2222            }
2223        }
2224
2225        if self.can_write() {
2226            self.name.entry.node.write_guard_state.lock().release(FileWriteGuardMode::WriteFile);
2227        }
2228
2229        let locked = locked.cast_locked::<FileOpsCore>();
2230        let ops = self.ops;
2231        let state = self.state;
2232        ops.close(locked, &state, current_task);
2233        state.name.entry.node.on_file_closed(&state);
2234        let event =
2235            if state.can_write() { InotifyMask::CLOSE_WRITE } else { InotifyMask::CLOSE_NOWRITE };
2236        state.notify(event);
2237    }
2238}
2239
2240impl fmt::Debug for FileObject {
2241    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2242        f.debug_struct("FileObject")
2243            .field("name", &self.name)
2244            .field("fs", &self.fs.name())
2245            .field("offset", &self.offset)
2246            .field("flags", &self.flags)
2247            .field("ops_ty", &self.ops().type_name())
2248            .finish()
2249    }
2250}
2251
2252impl OnWakeOps for FileReleaser {
2253    fn on_wake(&self, _current_task: &CurrentTask, _baton_lease: &zx::NullableHandle) {}
2254}
2255
2256/// A FileObject with the type of its FileOps known. Dereferencing it returns the FileOps.
2257pub struct DowncastedFile<'a, Ops> {
2258    file: &'a FileObject,
2259    ops: &'a Ops,
2260}
2261impl<'a, Ops> Copy for DowncastedFile<'a, Ops> {}
2262impl<'a, Ops> Clone for DowncastedFile<'a, Ops> {
2263    fn clone(&self) -> Self {
2264        *self
2265    }
2266}
2267
2268impl<'a, Ops> DowncastedFile<'a, Ops> {
2269    pub fn file(&self) -> &'a FileObject {
2270        self.file
2271    }
2272}
2273
2274impl<'a, Ops> Deref for DowncastedFile<'a, Ops> {
2275    type Target = &'a Ops;
2276    fn deref(&self) -> &Self::Target {
2277        &self.ops
2278    }
2279}
2280
2281impl FileObject {
2282    /// Returns the `FileObject`'s `FileOps` as a `DowncastedFile<T>`, or `None` if the downcast
2283    /// fails.
2284    ///
2285    /// This is useful for syscalls that only operate on a certain type of file.
2286    pub fn downcast_file<'a, T>(&'a self) -> Option<DowncastedFile<'a, T>>
2287    where
2288        T: 'static,
2289    {
2290        let ops = self.ops().as_any().downcast_ref::<T>()?;
2291        Some(DowncastedFile { file: self, ops })
2292    }
2293}
2294
2295#[cfg(test)]
2296mod tests {
2297    use crate::fs::tmpfs::TmpFs;
2298    use crate::task::CurrentTask;
2299    use crate::task::dynamic_thread_spawner::SpawnRequestBuilder;
2300    use crate::testing::*;
2301    use crate::vfs::MountInfo;
2302    use crate::vfs::buffers::{VecInputBuffer, VecOutputBuffer};
2303    use starnix_sync::{Locked, Unlocked};
2304    use starnix_uapi::auth::FsCred;
2305    use starnix_uapi::device_type::DeviceType;
2306    use starnix_uapi::file_mode::FileMode;
2307    use starnix_uapi::open_flags::OpenFlags;
2308    use std::sync::Arc;
2309    use std::sync::atomic::{AtomicBool, Ordering};
2310    use zerocopy::{FromBytes, IntoBytes, LE, U64};
2311
2312    #[::fuchsia::test]
2313    async fn test_append_truncate_race() {
2314        spawn_kernel_and_run(async |locked, current_task| {
2315            let kernel = current_task.kernel();
2316            let root_fs = TmpFs::new_fs(locked, &kernel);
2317            let mount = MountInfo::detached();
2318            let root_node = Arc::clone(root_fs.root());
2319            let file = root_node
2320                .create_entry(
2321                    locked,
2322                    &current_task,
2323                    &mount,
2324                    "test".into(),
2325                    |locked, dir, mount, name| {
2326                        dir.create_node(
2327                            locked,
2328                            &current_task,
2329                            mount,
2330                            name,
2331                            FileMode::IFREG | FileMode::ALLOW_ALL,
2332                            DeviceType::NONE,
2333                            FsCred::root(),
2334                        )
2335                    },
2336                )
2337                .expect("create_node failed");
2338            let file_handle = file
2339                .open_anonymous(locked, &current_task, OpenFlags::APPEND | OpenFlags::RDWR)
2340                .expect("open failed");
2341            let done = Arc::new(AtomicBool::new(false));
2342
2343            let fh = file_handle.clone();
2344            let done_clone = done.clone();
2345            let closure = move |locked: &mut Locked<Unlocked>, current_task: &CurrentTask| {
2346                for i in 0..2000 {
2347                    fh.write(
2348                        locked,
2349                        current_task,
2350                        &mut VecInputBuffer::new(U64::<LE>::new(i).as_bytes()),
2351                    )
2352                    .expect("write failed");
2353                }
2354                done_clone.store(true, Ordering::SeqCst);
2355                let result: Result<(), starnix_uapi::errors::Errno> = Ok(());
2356                result
2357            };
2358            let (write_thread, req) =
2359                SpawnRequestBuilder::new().with_sync_closure(closure).build_with_sync_result();
2360            kernel.kthreads.spawner().spawn_from_request(req);
2361
2362            let fh = file_handle.clone();
2363            let done_clone = done.clone();
2364            let closure = move |locked: &mut Locked<Unlocked>, current_task: &CurrentTask| {
2365                while !done_clone.load(Ordering::SeqCst) {
2366                    fh.ftruncate(locked, current_task, 0).expect("truncate failed");
2367                }
2368                let result: Result<(), starnix_uapi::errors::Errno> = Ok(());
2369                result
2370            };
2371            let (truncate_thread, req) =
2372                SpawnRequestBuilder::new().with_sync_closure(closure).build_with_sync_result();
2373            kernel.kthreads.spawner().spawn_from_request(req);
2374
2375            // If we read from the file, we should always find an increasing sequence. If there are
2376            // races, then we might unexpectedly see zeroes.
2377            while !done.load(Ordering::SeqCst) {
2378                let mut buffer = VecOutputBuffer::new(4096);
2379                let amount = file_handle
2380                    .read_at(locked, &current_task, 0, &mut buffer)
2381                    .expect("read failed");
2382                let mut last = None;
2383                let buffer = &Vec::from(buffer)[..amount];
2384                for i in
2385                    buffer.chunks_exact(8).map(|chunk| U64::<LE>::read_from_bytes(chunk).unwrap())
2386                {
2387                    if let Some(last) = last {
2388                        assert!(i.get() > last, "buffer: {:?}", buffer);
2389                    }
2390                    last = Some(i.get());
2391                }
2392            }
2393
2394            let _ = write_thread().unwrap();
2395            let _ = truncate_thread().unwrap();
2396        })
2397        .await;
2398    }
2399}