Skip to main content

starnix_core/vfs/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::{IOVecPtr, MemoryAccessor, MemoryAccessorExt, PAGE_SIZE};
6use crate::security;
7use crate::syscalls::time::{ITimerSpecPtr, TimeSpecPtr, TimeValPtr};
8use crate::task::{CurrentTask, EventHandler, ProcessEntryRef, ReadyItem, ReadyItemKey, Waiter};
9use crate::time::{Timeline, TimerWakeup};
10use crate::vfs::aio::AioContext;
11use crate::vfs::buffers::{UserBuffersInputBuffer, UserBuffersOutputBuffer};
12use crate::vfs::eventfd::{EventFdType, new_eventfd};
13use crate::vfs::fs_args::MountParams;
14use crate::vfs::inotify::InotifyFileObject;
15use crate::vfs::io_uring::{IORING_MAX_ENTRIES, IoUringFileObject};
16use crate::vfs::pidfd::new_pidfd;
17use crate::vfs::pipe::{PipeFileObject, new_pipe};
18use crate::vfs::timer::TimerFile;
19use crate::vfs::{
20    CheckAccessReason, DirentSink64, EpollFileObject, FallocMode, FdFlags, FdNumber,
21    FileAsyncOwner, FileHandle, FileSystemOptions, FlockOperation, FsStr, FsString, LookupContext,
22    Mount, NamespaceNode, PathWithReachability, RecordLockCommand, RenameFlags, SeekTarget,
23    StatxFlags, SymlinkMode, SymlinkTarget, TargetFdNumber, TimeUpdateType, UnlinkKind,
24    ValueOrSize, WdNumber, WhatToMount, XattrOp, checked_add_offset_and_length, new_memfd,
25    new_zombie_pidfd, splice,
26};
27use starnix_logging::{log_trace, track_stub};
28use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Mutex, Unlocked};
29use starnix_syscalls::{SUCCESS, SyscallArg, SyscallResult};
30use starnix_types::time::{
31    duration_from_poll_timeout, duration_from_timespec, time_from_timespec, timespec_from_duration,
32};
33use starnix_types::user_buffer::UserBuffer;
34use starnix_uapi::auth::{
35    CAP_BLOCK_SUSPEND, CAP_DAC_READ_SEARCH, CAP_LEASE, CAP_SYS_ADMIN, CAP_WAKE_ALARM, Capabilities,
36    Credentials, PTRACE_MODE_ATTACH_REALCREDS,
37};
38use starnix_uapi::device_id::DeviceId;
39use starnix_uapi::errors::{
40    EFAULT, EINTR, ENAMETOOLONG, ENOTSUP, ETIMEDOUT, Errno, ErrnoResultExt,
41};
42use starnix_uapi::file_lease::FileLeaseType;
43use starnix_uapi::file_mode::{Access, AccessCheck, FileMode};
44use starnix_uapi::inotify_mask::InotifyMask;
45use starnix_uapi::mount_flags::MountFlags;
46use starnix_uapi::open_flags::OpenFlags;
47use starnix_uapi::personality::PersonalityFlags;
48use starnix_uapi::resource_limits::Resource;
49use starnix_uapi::seal_flags::SealFlags;
50use starnix_uapi::signals::SigSet;
51use starnix_uapi::unmount_flags::UnmountFlags;
52use starnix_uapi::user_address::{MultiArchUserRef, UserAddress, UserCString, UserRef};
53use starnix_uapi::user_value::UserValue;
54use starnix_uapi::vfs::{EpollEvent, FdEvents, ResolveFlags};
55use starnix_uapi::{
56    __kernel_fd_set, AT_EACCESS, AT_EMPTY_PATH, AT_NO_AUTOMOUNT, AT_REMOVEDIR, AT_SYMLINK_FOLLOW,
57    AT_SYMLINK_NOFOLLOW, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM, CLOCK_MONOTONIC, CLOCK_REALTIME,
58    CLOCK_REALTIME_ALARM, CLOSE_RANGE_CLOEXEC, CLOSE_RANGE_UNSHARE, EFD_CLOEXEC, EFD_NONBLOCK,
59    EFD_SEMAPHORE, EPOLL_CLOEXEC, EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD, F_ADD_SEALS,
60    F_DUPFD, F_DUPFD_CLOEXEC, F_GET_SEALS, F_GETFD, F_GETFL, F_GETLEASE, F_GETLK, F_GETLK64,
61    F_GETOWN, F_GETOWN_EX, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW, F_OWNER_PGRP, F_OWNER_PID,
62    F_OWNER_TID, F_SETFD, F_SETFL, F_SETLEASE, F_SETLK, F_SETLK64, F_SETLKW, F_SETLKW64, F_SETOWN,
63    F_SETOWN_EX, F_SETSIG, FIOCLEX, FIONCLEX, IN_CLOEXEC, IN_NONBLOCK, MFD_ALLOW_SEALING,
64    MFD_CLOEXEC, MFD_EXEC, MFD_HUGE_MASK, MFD_HUGE_SHIFT, MFD_HUGETLB, MFD_NOEXEC_SEAL, NAME_MAX,
65    O_CLOEXEC, O_CREAT, O_NOFOLLOW, O_PATH, O_TMPFILE, PIDFD_NONBLOCK, POLLERR, POLLHUP, POLLIN,
66    POLLOUT, POLLPRI, POLLRDBAND, POLLRDNORM, POLLWRBAND, POLLWRNORM, POSIX_FADV_DONTNEED,
67    POSIX_FADV_NOREUSE, POSIX_FADV_NORMAL, POSIX_FADV_RANDOM, POSIX_FADV_SEQUENTIAL,
68    POSIX_FADV_WILLNEED, RWF_SUPPORTED, TFD_CLOEXEC, TFD_NONBLOCK, TFD_TIMER_ABSTIME,
69    TFD_TIMER_CANCEL_ON_SET, XATTR_CREATE, XATTR_NAME_MAX, XATTR_REPLACE, aio_context_t, errno,
70    error, f_owner_ex, io_event, io_uring_params,
71    io_uring_register_op_IORING_REGISTER_BUFFERS as IORING_REGISTER_BUFFERS,
72    io_uring_register_op_IORING_REGISTER_IOWQ_MAX_WORKERS as IORING_REGISTER_IOWQ_MAX_WORKERS,
73    io_uring_register_op_IORING_REGISTER_PBUF_RING as IORING_REGISTER_PBUF_RING,
74    io_uring_register_op_IORING_REGISTER_PBUF_STATUS as IORING_REGISTER_PBUF_STATUS,
75    io_uring_register_op_IORING_REGISTER_RING_FDS as IORING_REGISTER_RING_FDS,
76    io_uring_register_op_IORING_UNREGISTER_BUFFERS as IORING_UNREGISTER_BUFFERS,
77    io_uring_register_op_IORING_UNREGISTER_PBUF_RING as IORING_UNREGISTER_PBUF_RING,
78    io_uring_register_op_IORING_UNREGISTER_RING_FDS as IORING_UNREGISTER_RING_FDS, iocb, off_t,
79    pid_t, pollfd, pselect6_sigmask, sigset_t, statx, timespec, uapi, uid_t,
80};
81use std::cmp::Ordering;
82use std::collections::VecDeque;
83use std::marker::PhantomData;
84use std::sync::{Arc, atomic};
85use std::usize;
86use zerocopy::{Immutable, IntoBytes};
87
88uapi::check_arch_independent_layout! {
89    pollfd {
90        fd,
91        events,
92        revents,
93    }
94
95    io_event {
96        data,
97        obj,
98        res,
99        res2,
100    }
101
102    iocb {
103        aio_data,
104        aio_key,
105        aio_rw_flags,
106        aio_lio_opcode,
107        aio_reqprio,
108        aio_fildes,
109        aio_buf,
110        aio_nbytes,
111        aio_offset,
112        aio_reserved2,
113        aio_flags,
114        aio_resfd,
115    }
116
117    statx_timestamp {
118        tv_sec,
119        tv_nsec,
120    }
121
122    statx {
123        stx_mask,
124        stx_blksize,
125        stx_attributes,
126        stx_nlink,
127        stx_uid,
128        stx_gid,
129        stx_mode,
130        stx_ino,
131        stx_size,
132        stx_blocks,
133        stx_attributes_mask,
134        stx_atime,
135        stx_btime,
136        stx_ctime,
137        stx_mtime,
138        stx_rdev_major,
139        stx_rdev_minor,
140        stx_dev_major,
141        stx_dev_minor,
142        stx_mnt_id,
143        stx_dio_mem_align,
144        stx_dio_offset_align,
145        stx_subvol,
146        stx_atomic_write_unit_min,
147        stx_atomic_write_unit_max,
148        stx_atomic_write_segments_max,
149    }
150
151    io_sqring_offsets {
152        head,
153        tail,
154        ring_mask,
155        ring_entries,
156        flags,
157        dropped,
158        array,
159        resv1,
160        user_addr,
161    }
162
163    io_cqring_offsets {
164        head,
165        tail,
166        ring_mask,
167        ring_entries,
168        overflow,
169        cqes,
170        flags,
171        resv1,
172        user_addr,
173    }
174
175    io_uring_params {
176        sq_entries,
177        cq_entries,
178        flags,
179        sq_thread_cpu,
180        sq_thread_idle,
181        features,
182        wq_fd,
183        resv,
184        sq_off,
185        cq_off,
186    }
187
188    io_uring_rsrc_update {
189        offset,
190        resv,
191        data,
192    }
193
194    io_uring_buf_reg {
195        ring_addr,
196        ring_entries,
197        bgid,
198        flags,
199        resv,
200    }
201}
202
203// Constants from bionic/libc/include/sys/stat.h
204const UTIME_NOW: i64 = 0x3fffffff;
205const UTIME_OMIT: i64 = 0x3ffffffe;
206
207pub type OffsetPtr = MultiArchUserRef<uapi::off_t, uapi::arch32::off_t>;
208pub type IocbPtr = MultiArchUserRef<iocb, iocb>;
209pub type IocbPtrPtr = MultiArchUserRef<IocbPtr, IocbPtr>;
210
211pub fn sys_read(
212    locked: &mut Locked<Unlocked>,
213    current_task: &CurrentTask,
214    fd: FdNumber,
215    address: UserAddress,
216    length: usize,
217) -> Result<usize, Errno> {
218    let file = current_task.get_file(fd)?;
219    file.read(
220        locked,
221        current_task,
222        &mut UserBuffersOutputBuffer::unified_new_at(current_task, address, length)?,
223    )
224    .map_eintr(|| errno!(ERESTARTSYS))
225}
226
227pub fn sys_write(
228    locked: &mut Locked<Unlocked>,
229    current_task: &CurrentTask,
230    fd: FdNumber,
231    address: UserAddress,
232    length: usize,
233) -> Result<usize, Errno> {
234    let file = current_task.get_file(fd)?;
235    file.write(
236        locked,
237        current_task,
238        &mut UserBuffersInputBuffer::unified_new_at(current_task, address, length)?,
239    )
240    .map_eintr(|| errno!(ERESTARTSYS))
241}
242
243pub fn sys_close(
244    _locked: &mut Locked<Unlocked>,
245    current_task: &CurrentTask,
246    fd: FdNumber,
247) -> Result<(), Errno> {
248    current_task.live().files.close(fd)?;
249    Ok(())
250}
251
252pub fn sys_close_range(
253    locked: &mut Locked<Unlocked>,
254    current_task: &CurrentTask,
255    first: u32,
256    last: u32,
257    flags: u32,
258) -> Result<(), Errno> {
259    if first > last || flags & !(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC) != 0 {
260        return error!(EINVAL);
261    }
262    let live_task = current_task.live();
263    if flags & CLOSE_RANGE_UNSHARE != 0 {
264        live_task.files.unshare();
265    }
266    let in_range = |fd: FdNumber| fd.raw() as u32 >= first && fd.raw() as u32 <= last;
267    if flags & CLOSE_RANGE_CLOEXEC != 0 {
268        live_task.files.retain(locked, current_task, |fd, flags| {
269            if in_range(fd) {
270                *flags |= FdFlags::CLOEXEC;
271            }
272            true
273        });
274    } else {
275        live_task.files.retain(locked, current_task, |fd, _| !in_range(fd));
276    }
277    Ok(())
278}
279
280pub fn sys_lseek(
281    locked: &mut Locked<Unlocked>,
282    current_task: &CurrentTask,
283    fd: FdNumber,
284    offset: off_t,
285    whence: u32,
286) -> Result<off_t, Errno> {
287    let file = current_task.get_file(fd)?;
288    file.seek(locked, current_task, SeekTarget::from_raw(whence, offset)?)
289}
290
291pub fn sys_fcntl(
292    locked: &mut Locked<Unlocked>,
293    current_task: &CurrentTask,
294    fd: FdNumber,
295    cmd: u32,
296    arg: u64,
297) -> Result<SyscallResult, Errno> {
298    let file = match cmd {
299        F_DUPFD | F_DUPFD_CLOEXEC | F_GETFD | F_SETFD | F_GETFL => {
300            current_task.get_file_allowing_opath(fd)?
301        }
302        _ => current_task.get_file(fd)?,
303    };
304
305    security::check_file_fcntl_access(current_task, &file, cmd, arg)?;
306
307    match cmd {
308        F_DUPFD | F_DUPFD_CLOEXEC => {
309            let fd_number = arg as i32;
310            let flags = if cmd == F_DUPFD_CLOEXEC { FdFlags::CLOEXEC } else { FdFlags::empty() };
311            let newfd = current_task.live().files.duplicate(
312                locked,
313                current_task,
314                fd,
315                TargetFdNumber::Minimum(FdNumber::from_raw(fd_number)),
316                flags,
317            )?;
318            Ok(newfd.into())
319        }
320        F_GETOWN => match file.get_async_owner() {
321            FileAsyncOwner::Unowned => Ok(0.into()),
322            FileAsyncOwner::Thread(tid) => Ok(tid.into()),
323            FileAsyncOwner::Process(pid) => Ok(pid.into()),
324            FileAsyncOwner::ProcessGroup(pgid) => Ok((-pgid).into()),
325        },
326        F_GETOWN_EX => {
327            let maybe_owner = match file.get_async_owner() {
328                FileAsyncOwner::Unowned => None,
329                FileAsyncOwner::Thread(tid) => {
330                    Some(uapi::f_owner_ex { type_: F_OWNER_TID as i32, pid: tid })
331                }
332                FileAsyncOwner::Process(pid) => {
333                    Some(uapi::f_owner_ex { type_: F_OWNER_PID as i32, pid })
334                }
335                FileAsyncOwner::ProcessGroup(pgid) => {
336                    Some(uapi::f_owner_ex { type_: F_OWNER_PGRP as i32, pid: pgid })
337                }
338            };
339            if let Some(owner) = maybe_owner {
340                let user_owner: UserRef<f_owner_ex> =
341                    UserRef::<uapi::f_owner_ex>::new(UserAddress::from(arg));
342                current_task.write_object(user_owner, &owner)?;
343            }
344            Ok(SUCCESS)
345        }
346        F_SETOWN => {
347            let pid = (arg as u32) as i32;
348            let owner = match pid.cmp(&0) {
349                Ordering::Equal => FileAsyncOwner::Unowned,
350                Ordering::Greater => FileAsyncOwner::Process(pid),
351                Ordering::Less => {
352                    FileAsyncOwner::ProcessGroup(pid.checked_neg().ok_or_else(|| errno!(EINVAL))?)
353                }
354            };
355            owner.validate(current_task)?;
356            // TODO: https://fxbug.dev/364569860 - Integrate with LSM file_setfowner hook.
357            file.set_async_owner(owner);
358            Ok(SUCCESS)
359        }
360        F_SETOWN_EX => {
361            let user_owner = UserRef::<uapi::f_owner_ex>::new(UserAddress::from(arg));
362            let requested_owner = current_task.read_object(user_owner)?;
363            let mut owner = match requested_owner.type_ as u32 {
364                F_OWNER_TID => FileAsyncOwner::Thread(requested_owner.pid),
365                F_OWNER_PID => FileAsyncOwner::Process(requested_owner.pid),
366                F_OWNER_PGRP => FileAsyncOwner::ProcessGroup(requested_owner.pid),
367                _ => return error!(EINVAL),
368            };
369            if requested_owner.pid == 0 {
370                owner = FileAsyncOwner::Unowned;
371            }
372            owner.validate(current_task)?;
373            file.set_async_owner(owner);
374            Ok(SUCCESS)
375        }
376        F_GETFD => Ok(current_task.live().files.get_fd_flags_allowing_opath(fd)?.into()),
377        F_SETFD => {
378            current_task
379                .live()
380                .files
381                .set_fd_flags_allowing_opath(fd, FdFlags::from_bits_truncate(arg as u32))?;
382            Ok(SUCCESS)
383        }
384        F_GETFL => {
385            // O_PATH allowed for:
386            //
387            //   Retrieving open file status flags using the fcntl(2)
388            //   F_GETFL operation: the returned flags will include the
389            //   bit O_PATH.
390            //
391            // See https://man7.org/linux/man-pages/man2/open.2.html
392            Ok(file.flags().into())
393        }
394        F_SETFL => {
395            let settable_flags = OpenFlags::APPEND
396                | OpenFlags::DIRECT
397                | OpenFlags::NOATIME
398                | OpenFlags::NONBLOCK
399                | OpenFlags::ASYNC;
400            let requested_flags =
401                OpenFlags::from_bits_truncate((arg as u32) & settable_flags.bits());
402
403            // If `NOATIME` flag is being set then check that it's allowed.
404            if requested_flags.contains(OpenFlags::NOATIME)
405                && !file.flags().contains(OpenFlags::NOATIME)
406            {
407                file.name.check_o_noatime_allowed(current_task)?;
408            }
409
410            file.update_file_flags(requested_flags, settable_flags);
411            Ok(SUCCESS)
412        }
413        F_SETLK | F_SETLKW | F_GETLK => {
414            let flock_ref =
415                MultiArchUserRef::<uapi::flock, uapi::arch32::flock>::new(current_task, arg);
416            let flock = current_task.read_multi_arch_object(flock_ref)?;
417            let cmd = RecordLockCommand::from_raw(cmd).ok_or_else(|| errno!(EINVAL))?;
418            if let Some(flock) = file.record_lock(locked, current_task, cmd, flock)? {
419                current_task.write_multi_arch_object(flock_ref, flock)?;
420            }
421            Ok(SUCCESS)
422        }
423        F_SETLK64 | F_SETLKW64 | F_GETLK64 | F_OFD_GETLK | F_OFD_SETLK | F_OFD_SETLKW => {
424            let flock_ref =
425                MultiArchUserRef::<uapi::flock, uapi::arch32::flock64>::new(current_task, arg);
426            let flock = current_task.read_multi_arch_object(flock_ref)?;
427            let cmd = RecordLockCommand::from_raw(cmd).ok_or_else(|| errno!(EINVAL))?;
428            if let Some(flock) = file.record_lock(locked, current_task, cmd, flock)? {
429                current_task.write_multi_arch_object(flock_ref, flock)?;
430            }
431            Ok(SUCCESS)
432        }
433        F_ADD_SEALS => {
434            if !file.can_write() {
435                // Cannot add seals if the file is not writable
436                return error!(EPERM);
437            }
438            let mut state = file.name.entry.node.write_guard_state.lock();
439            let flags = SealFlags::from_bits_truncate(arg as u32);
440            state.try_add_seal(flags)?;
441            Ok(SUCCESS)
442        }
443        F_GET_SEALS => {
444            let state = file.name.entry.node.write_guard_state.lock();
445            Ok(state.get_seals()?.into())
446        }
447        F_SETLEASE => {
448            let fsuid = current_task.current_creds().fsuid;
449            if fsuid != file.node().info().uid {
450                security::check_task_capable(current_task, CAP_LEASE)?;
451            }
452            let lease = FileLeaseType::from_bits(arg as u32)?;
453            file.set_lease(current_task, lease)?;
454            Ok(SUCCESS)
455        }
456        F_GETLEASE => Ok(file.get_lease(current_task).into()),
457        F_SETSIG => {
458            track_stub!(TODO("https://fxbug.dev/437972675"), "F_SETSIG");
459            return error!(EINVAL);
460        }
461        _ => file.fcntl(current_task, cmd, arg),
462    }
463}
464
465pub fn sys_pread64(
466    locked: &mut Locked<Unlocked>,
467    current_task: &CurrentTask,
468    fd: FdNumber,
469    address: UserAddress,
470    length: usize,
471    offset: off_t,
472) -> Result<usize, Errno> {
473    let file = current_task.get_file(fd)?;
474    let offset = offset.try_into().map_err(|_| errno!(EINVAL))?;
475    file.read_at(
476        locked,
477        current_task,
478        offset,
479        &mut UserBuffersOutputBuffer::unified_new_at(current_task, address, length)?,
480    )
481}
482
483pub fn sys_pwrite64(
484    locked: &mut Locked<Unlocked>,
485    current_task: &CurrentTask,
486    fd: FdNumber,
487    address: UserAddress,
488    length: usize,
489    offset: off_t,
490) -> Result<usize, Errno> {
491    let file = current_task.get_file(fd)?;
492    let offset = offset.try_into().map_err(|_| errno!(EINVAL))?;
493    file.write_at(
494        locked,
495        current_task,
496        offset,
497        &mut UserBuffersInputBuffer::unified_new_at(current_task, address, length)?,
498    )
499}
500
501fn do_readv(
502    locked: &mut Locked<Unlocked>,
503    current_task: &CurrentTask,
504    fd: FdNumber,
505    iovec_addr: IOVecPtr,
506    iovec_count: UserValue<i32>,
507    offset: Option<off_t>,
508    flags: u32,
509) -> Result<usize, Errno> {
510    if flags & !RWF_SUPPORTED != 0 {
511        return error!(EOPNOTSUPP);
512    }
513    if flags != 0 {
514        track_stub!(TODO("https://fxbug.dev/322875072"), "preadv2 flags", flags);
515    }
516    let file = current_task.get_file(fd)?;
517    let iovec = current_task.read_iovec(iovec_addr, iovec_count)?;
518    let mut data = UserBuffersOutputBuffer::unified_new(current_task, iovec)?;
519    if let Some(offset) = offset {
520        file.read_at(
521            locked,
522            current_task,
523            offset.try_into().map_err(|_| errno!(EINVAL))?,
524            &mut data,
525        )
526    } else {
527        file.read(locked, current_task, &mut data)
528    }
529}
530
531pub fn sys_readv(
532    locked: &mut Locked<Unlocked>,
533    current_task: &CurrentTask,
534    fd: FdNumber,
535    iovec_addr: IOVecPtr,
536    iovec_count: UserValue<i32>,
537) -> Result<usize, Errno> {
538    do_readv(locked, current_task, fd, iovec_addr, iovec_count, None, 0)
539}
540
541pub fn sys_preadv(
542    locked: &mut Locked<Unlocked>,
543    current_task: &CurrentTask,
544    fd: FdNumber,
545    iovec_addr: IOVecPtr,
546    iovec_count: UserValue<i32>,
547    offset: off_t,
548) -> Result<usize, Errno> {
549    do_readv(locked, current_task, fd, iovec_addr, iovec_count, Some(offset), 0)
550}
551
552pub fn sys_preadv2(
553    locked: &mut Locked<Unlocked>,
554    current_task: &CurrentTask,
555    fd: FdNumber,
556    iovec_addr: IOVecPtr,
557    iovec_count: UserValue<i32>,
558    offset: off_t,
559    _unused: SyscallArg, // On 32-bit systems, holds the upper 32 bits of offset.
560    flags: u32,
561) -> Result<usize, Errno> {
562    let offset = if offset == -1 { None } else { Some(offset) };
563    do_readv(locked, current_task, fd, iovec_addr, iovec_count, offset, flags)
564}
565
566fn do_writev(
567    locked: &mut Locked<Unlocked>,
568    current_task: &CurrentTask,
569    fd: FdNumber,
570    iovec_addr: IOVecPtr,
571    iovec_count: UserValue<i32>,
572    offset: Option<off_t>,
573    flags: u32,
574) -> Result<usize, Errno> {
575    if flags & !RWF_SUPPORTED != 0 {
576        return error!(EOPNOTSUPP);
577    }
578    if flags != 0 {
579        track_stub!(TODO("https://fxbug.dev/322874523"), "pwritev2 flags", flags);
580    }
581
582    let file = current_task.get_file(fd)?;
583    let iovec = current_task.read_iovec(iovec_addr, iovec_count)?;
584    let mut data = UserBuffersInputBuffer::unified_new(current_task, iovec)?;
585    let res = if let Some(offset) = offset {
586        file.write_at(
587            locked,
588            current_task,
589            offset.try_into().map_err(|_| errno!(EINVAL))?,
590            &mut data,
591        )
592    } else {
593        file.write(locked, current_task, &mut data)
594    };
595
596    match &res {
597        Err(e) if e.code == EFAULT => {
598            track_stub!(TODO("https://fxbug.dev/297370529"), "allow partial writes")
599        }
600        _ => (),
601    }
602
603    res
604}
605
606pub fn sys_writev(
607    locked: &mut Locked<Unlocked>,
608    current_task: &CurrentTask,
609    fd: FdNumber,
610    iovec_addr: IOVecPtr,
611    iovec_count: UserValue<i32>,
612) -> Result<usize, Errno> {
613    do_writev(locked, current_task, fd, iovec_addr, iovec_count, None, 0)
614}
615
616pub fn sys_pwritev(
617    locked: &mut Locked<Unlocked>,
618    current_task: &CurrentTask,
619    fd: FdNumber,
620    iovec_addr: IOVecPtr,
621    iovec_count: UserValue<i32>,
622    offset: off_t,
623) -> Result<usize, Errno> {
624    do_writev(locked, current_task, fd, iovec_addr, iovec_count, Some(offset), 0)
625}
626
627pub fn sys_pwritev2(
628    locked: &mut Locked<Unlocked>,
629    current_task: &CurrentTask,
630    fd: FdNumber,
631    iovec_addr: IOVecPtr,
632    iovec_count: UserValue<i32>,
633    offset: off_t,
634    _unused: SyscallArg, // On 32-bit systems, holds the upper 32 bits of offset.
635    flags: u32,
636) -> Result<usize, Errno> {
637    let offset = if offset == -1 { None } else { Some(offset) };
638    do_writev(locked, current_task, fd, iovec_addr, iovec_count, offset, flags)
639}
640
641type StatFsPtr = MultiArchUserRef<uapi::statfs, uapi::arch32::statfs>;
642
643pub fn fstatfs<T32: IntoBytes + Immutable + TryFrom<uapi::statfs>>(
644    locked: &mut Locked<Unlocked>,
645    current_task: &CurrentTask,
646    fd: FdNumber,
647    user_buf: MultiArchUserRef<uapi::statfs, T32>,
648) -> Result<(), Errno> {
649    // O_PATH allowed for:
650    //
651    //   fstatfs(2) (since Linux 3.12).
652    //
653    // See https://man7.org/linux/man-pages/man2/open.2.html
654    let file = current_task.get_file_allowing_opath(fd)?;
655    let mut stat = file.fs.statfs(locked, current_task)?;
656    stat.f_flags |= file.name.mount.flags().bits() as i64;
657    current_task.write_multi_arch_object(user_buf, stat)?;
658    Ok(())
659}
660
661pub fn sys_fstatfs(
662    locked: &mut Locked<Unlocked>,
663    current_task: &CurrentTask,
664    fd: FdNumber,
665    user_buf: StatFsPtr,
666) -> Result<(), Errno> {
667    fstatfs(locked, current_task, fd, user_buf)
668}
669
670fn statfs<T32: IntoBytes + Immutable + TryFrom<uapi::statfs>>(
671    locked: &mut Locked<Unlocked>,
672    current_task: &CurrentTask,
673    user_path: UserCString,
674    user_buf: MultiArchUserRef<uapi::statfs, T32>,
675) -> Result<(), Errno> {
676    let name =
677        lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?;
678    let fs = name.entry.node.fs();
679    let mut stat = fs.statfs(locked, current_task)?;
680    stat.f_flags |= name.mount.flags().bits() as i64;
681    current_task.write_multi_arch_object(user_buf, stat)?;
682    Ok(())
683}
684
685pub fn sys_statfs(
686    locked: &mut Locked<Unlocked>,
687    current_task: &CurrentTask,
688    user_path: UserCString,
689    user_buf: StatFsPtr,
690) -> Result<(), Errno> {
691    statfs(locked, current_task, user_path, user_buf)
692}
693
694pub fn sys_sendfile(
695    locked: &mut Locked<Unlocked>,
696    current_task: &CurrentTask,
697    out_fd: FdNumber,
698    in_fd: FdNumber,
699    user_offset: OffsetPtr,
700    count: i32,
701) -> Result<usize, Errno> {
702    splice::sendfile(locked, current_task, out_fd, in_fd, user_offset, count)
703}
704
705/// A convenient wrapper for Task::open_file_at.
706///
707/// Reads user_path from user memory and then calls through to Task::open_file_at.
708fn open_file_at(
709    locked: &mut Locked<Unlocked>,
710    current_task: &CurrentTask,
711    dir_fd: FdNumber,
712    user_path: UserCString,
713    flags: u32,
714    mode: FileMode,
715    resolve_flags: ResolveFlags,
716) -> Result<FileHandle, Errno> {
717    let path = current_task.read_path(user_path)?;
718    log_trace!(dir_fd:%, path:%; "open_file_at");
719    current_task.open_file_at(
720        locked,
721        dir_fd,
722        path.as_ref(),
723        OpenFlags::from_bits_truncate(flags),
724        mode,
725        resolve_flags,
726        AccessCheck::default(),
727    )
728}
729
730fn lookup_parent_at<T, F>(
731    locked: &mut Locked<Unlocked>,
732    current_task: &CurrentTask,
733    dir_fd: FdNumber,
734    user_path: UserCString,
735    callback: F,
736) -> Result<T, Errno>
737where
738    F: Fn(&mut Locked<Unlocked>, LookupContext, NamespaceNode, &FsStr) -> Result<T, Errno>,
739{
740    let path = current_task.read_path(user_path)?;
741    log_trace!(dir_fd:%, path:%; "lookup_parent_at");
742    if path.is_empty() {
743        return error!(ENOENT);
744    }
745    let mut context = LookupContext::default();
746    let (parent, basename) =
747        current_task.lookup_parent_at(locked, &mut context, dir_fd, path.as_ref())?;
748    callback(locked, context, parent, basename)
749}
750
751/// Options for lookup_at.
752#[derive(Debug, Default, Copy, Clone)]
753pub struct LookupFlags {
754    /// Whether AT_EMPTY_PATH was supplied.
755    allow_empty_path: bool,
756
757    /// Used to implement AT_SYMLINK_NOFOLLOW.
758    symlink_mode: SymlinkMode,
759
760    /// Automount directories on the path.
761    // TODO(https://fxbug.dev/297370602): Support the `AT_NO_AUTOMOUNT` flag.
762    #[allow(dead_code)]
763    automount: bool,
764}
765
766impl LookupFlags {
767    fn no_follow() -> Self {
768        Self { symlink_mode: SymlinkMode::NoFollow, ..Default::default() }
769    }
770
771    fn from_bits(flags: u32, allowed_flags: u32) -> Result<Self, Errno> {
772        if flags & !allowed_flags != 0 {
773            return error!(EINVAL);
774        }
775        let follow_symlinks = if allowed_flags & AT_SYMLINK_FOLLOW != 0 {
776            flags & AT_SYMLINK_FOLLOW != 0
777        } else {
778            flags & AT_SYMLINK_NOFOLLOW == 0
779        };
780        let automount =
781            if allowed_flags & AT_NO_AUTOMOUNT != 0 { flags & AT_NO_AUTOMOUNT == 0 } else { false };
782        if automount {
783            track_stub!(TODO("https://fxbug.dev/297370602"), "LookupFlags::automount");
784        }
785        Ok(LookupFlags {
786            allow_empty_path: (flags & AT_EMPTY_PATH != 0)
787                || (flags & O_PATH != 0 && flags & O_NOFOLLOW != 0),
788            symlink_mode: if follow_symlinks { SymlinkMode::Follow } else { SymlinkMode::NoFollow },
789            automount,
790        })
791    }
792}
793
794impl From<StatxFlags> for LookupFlags {
795    fn from(flags: StatxFlags) -> Self {
796        let lookup_flags = StatxFlags::AT_SYMLINK_NOFOLLOW
797            | StatxFlags::AT_EMPTY_PATH
798            | StatxFlags::AT_NO_AUTOMOUNT;
799        Self::from_bits((flags & lookup_flags).bits(), lookup_flags.bits()).unwrap()
800    }
801}
802
803pub fn lookup_at<L>(
804    locked: &mut Locked<L>,
805    current_task: &CurrentTask,
806    dir_fd: FdNumber,
807    user_path: UserCString,
808    options: LookupFlags,
809) -> Result<NamespaceNode, Errno>
810where
811    L: LockEqualOrBefore<FileOpsCore>,
812{
813    let path = current_task.read_path(user_path)?;
814    log_trace!(dir_fd:%, path:%; "lookup_at");
815    if path.is_empty() {
816        if options.allow_empty_path {
817            let (node, _) = current_task.resolve_dir_fd(
818                locked,
819                dir_fd,
820                path.as_ref(),
821                ResolveFlags::empty(),
822            )?;
823            return Ok(node);
824        }
825        return error!(ENOENT);
826    }
827
828    let mut parent_context = LookupContext::default();
829    let (parent, basename) =
830        current_task.lookup_parent_at(locked, &mut parent_context, dir_fd, path.as_ref())?;
831
832    let mut child_context = if parent_context.must_be_directory {
833        // The child must resolve to a directory. This is because a trailing slash
834        // was found in the path. If the child is a symlink, we should follow it.
835        // See https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap03.html#tag_21_03_00_75
836        parent_context.with(SymlinkMode::Follow)
837    } else {
838        parent_context.with(options.symlink_mode)
839    };
840
841    parent.lookup_child(locked, current_task, &mut child_context, basename)
842}
843
844fn do_openat(
845    locked: &mut Locked<Unlocked>,
846    current_task: &CurrentTask,
847    dir_fd: FdNumber,
848    user_path: UserCString,
849    flags: u32,
850    mode: FileMode,
851    resolve_flags: ResolveFlags,
852) -> Result<FdNumber, Errno> {
853    let file = open_file_at(locked, current_task, dir_fd, user_path, flags, mode, resolve_flags)?;
854    let fd_flags = get_fd_flags(flags);
855    current_task.add_file(locked, file, fd_flags)
856}
857
858pub fn sys_openat(
859    locked: &mut Locked<Unlocked>,
860    current_task: &CurrentTask,
861    dir_fd: FdNumber,
862    user_path: UserCString,
863    flags: u32,
864    mode: FileMode,
865) -> Result<FdNumber, Errno> {
866    do_openat(locked, current_task, dir_fd, user_path, flags, mode, ResolveFlags::empty())
867}
868
869pub fn sys_openat2(
870    locked: &mut Locked<Unlocked>,
871    current_task: &CurrentTask,
872    dir_fd: FdNumber,
873    user_path: UserCString,
874    how_ref: UserRef<uapi::open_how>,
875    size: usize,
876) -> Result<FdNumber, Errno> {
877    const EXPECTED_SIZE: usize = std::mem::size_of::<uapi::open_how>();
878    if size < EXPECTED_SIZE {
879        return error!(EINVAL);
880    }
881
882    let how = current_task.read_object(how_ref)?;
883
884    // If the `size` is greater than expected, then we need to check that any extra bytes after
885    // `open_how` are set to 0. This is needed to properly handle the case when `open_how` is
886    // extended with new fields in the future. There is no upper limit on the buffer size, so we
887    // limit size of each read to one page.
888    let mut pos = EXPECTED_SIZE;
889    while pos < size {
890        let length = std::cmp::min(size - pos, *PAGE_SIZE as usize);
891        let extra_bytes =
892            current_task.read_buffer(&UserBuffer { address: (how_ref.addr() + pos)?, length })?;
893        for b in extra_bytes {
894            if b != 0 {
895                return error!(E2BIG);
896            }
897        }
898        pos += length;
899    }
900
901    let flags: u32 = how.flags.try_into().map_err(|_| errno!(EINVAL))?;
902
903    // `mode` can be specified only with `O_CREAT` or `O_TMPFILE`.
904    let allowed_mode_flags = if (flags & (O_CREAT | O_TMPFILE)) > 0 { 0o7777 } else { 0 };
905    if (how.mode & !allowed_mode_flags) != 0 {
906        return error!(EINVAL);
907    }
908
909    let mode = FileMode::from_bits(how.mode.try_into().map_err(|_| errno!(EINVAL))?);
910    let resolve_flags =
911        ResolveFlags::from_bits(how.resolve.try_into().map_err(|_| errno!(EINVAL))?)
912            .ok_or_else(|| errno!(EINVAL))?;
913
914    if resolve_flags.contains(ResolveFlags::CACHED) {
915        track_stub!(TODO("https://fxbug.dev/326474574"), "openat2: RESOLVE_CACHED");
916        return error!(EAGAIN);
917    }
918
919    do_openat(locked, current_task, dir_fd, user_path, flags, mode, resolve_flags)
920}
921
922pub fn sys_faccessat(
923    locked: &mut Locked<Unlocked>,
924    current_task: &CurrentTask,
925    dir_fd: FdNumber,
926    user_path: UserCString,
927    mode: u32,
928) -> Result<(), Errno> {
929    sys_faccessat2(locked, current_task, dir_fd, user_path, mode, 0)
930}
931
932pub fn sys_faccessat2(
933    locked: &mut Locked<Unlocked>,
934    current_task: &CurrentTask,
935    dir_fd: FdNumber,
936    user_path: UserCString,
937    mode: u32,
938    flags: u32,
939) -> Result<(), Errno> {
940    let mut access_check = || {
941        let mode = Access::try_from(mode)?;
942        let lookup_flags = LookupFlags::from_bits(flags, AT_SYMLINK_NOFOLLOW | AT_EACCESS)?;
943        let name = lookup_at(locked, current_task, dir_fd, user_path, lookup_flags)?;
944        name.check_access(locked, current_task, mode, CheckAccessReason::Access)
945    };
946    // Unless `AT_ACCESS` is set, perform lookup & access-checking using real UID & GID.
947    if flags & AT_EACCESS == 0 {
948        let mut temporary_creds = Credentials::clone(&current_task.current_creds());
949        temporary_creds.fsuid = temporary_creds.uid;
950        temporary_creds.fsgid = temporary_creds.gid;
951
952        // access() for root users should use permitted capabilities instead of effective capabilities.
953        // access() for non-root users should use an empty set of capabilities.
954        if temporary_creds.uid == 0 {
955            temporary_creds.cap_effective = temporary_creds.cap_permitted;
956        } else {
957            temporary_creds.cap_effective = Capabilities::empty();
958        }
959
960        current_task.override_creds(temporary_creds.into(), access_check)
961    } else {
962        access_check()
963    }
964}
965
966pub fn sys_getdents64(
967    locked: &mut Locked<Unlocked>,
968    current_task: &CurrentTask,
969    fd: FdNumber,
970    user_buffer: UserAddress,
971    user_capacity: usize,
972) -> Result<usize, Errno> {
973    let file = current_task.get_file(fd)?;
974    let mut offset = file.offset.copy();
975    let mut sink = DirentSink64::new(current_task, &mut *offset, user_buffer, user_capacity);
976    let result = file.readdir(locked, current_task, &mut sink);
977    let ret = sink.map_result_with_actual(result);
978    offset.update();
979    ret
980}
981
982pub fn sys_chroot(
983    locked: &mut Locked<Unlocked>,
984    current_task: &CurrentTask,
985    user_path: UserCString,
986) -> Result<(), Errno> {
987    let name =
988        lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?;
989    if !name.entry.node.is_dir() {
990        return error!(ENOTDIR);
991    }
992
993    current_task.fs().chroot(locked, current_task, name)?;
994    Ok(())
995}
996
997pub fn sys_chdir(
998    locked: &mut Locked<Unlocked>,
999    current_task: &CurrentTask,
1000    user_path: UserCString,
1001) -> Result<(), Errno> {
1002    let name =
1003        lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?;
1004    if !name.entry.node.is_dir() {
1005        return error!(ENOTDIR);
1006    }
1007    current_task.fs().chdir(locked, current_task, name)
1008}
1009
1010pub fn sys_fchdir(
1011    locked: &mut Locked<Unlocked>,
1012    current_task: &CurrentTask,
1013    fd: FdNumber,
1014) -> Result<(), Errno> {
1015    // O_PATH allowed for:
1016    //
1017    //   fchdir(2), if the file descriptor refers to a directory
1018    //   (since Linux 3.5).
1019    //
1020    // See https://man7.org/linux/man-pages/man2/open.2.html
1021    let file = current_task.get_file_allowing_opath(fd)?;
1022    if !file.name.entry.node.is_dir() {
1023        return error!(ENOTDIR);
1024    }
1025    current_task.fs().chdir(locked, current_task, file.name.to_passive())
1026}
1027
1028pub fn sys_fstat(
1029    locked: &mut Locked<Unlocked>,
1030    current_task: &CurrentTask,
1031    fd: FdNumber,
1032    buffer: UserRef<uapi::stat>,
1033) -> Result<(), Errno> {
1034    // O_PATH allowed for:
1035    //
1036    //   fstat(2) (since Linux 3.6).
1037    //
1038    // See https://man7.org/linux/man-pages/man2/open.2.html
1039    let file = current_task.get_file_allowing_opath(fd)?;
1040    let result = file.node().stat(locked, current_task)?;
1041    current_task.write_object(buffer, &result)?;
1042    Ok(())
1043}
1044
1045type StatPtr = MultiArchUserRef<uapi::stat, uapi::arch32::stat64>;
1046
1047// TODO(https://fxbug.dev/485370648) remove when unnecessary
1048fn get_fake_ion_stat() -> uapi::stat {
1049    uapi::stat {
1050        st_mode: uapi::S_IFCHR | 0o666,
1051        st_rdev: DeviceId::new(10, 59).bits(),
1052        st_nlink: 1,
1053        st_blksize: 4096,
1054        ..Default::default()
1055    }
1056}
1057
1058// TODO(https://fxbug.dev/485370648) remove when unnecessary
1059fn get_fake_ion_statx() -> statx {
1060    statx {
1061        stx_mask: uapi::STATX_BASIC_STATS,
1062        stx_mode: (uapi::S_IFCHR | 0o666) as u16,
1063        stx_rdev_major: 10,
1064        stx_rdev_minor: 59,
1065        stx_nlink: 1,
1066        stx_blksize: 4096,
1067        ..Default::default()
1068    }
1069}
1070
1071pub fn sys_fstatat64(
1072    locked: &mut Locked<Unlocked>,
1073    current_task: &CurrentTask,
1074    dir_fd: FdNumber,
1075    user_path: UserCString,
1076    buffer: StatPtr,
1077    flags: u32,
1078) -> Result<(), Errno> {
1079    let lookup_flags =
1080        LookupFlags::from_bits(flags, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)?;
1081    let result = match lookup_at(locked, current_task, dir_fd, user_path, lookup_flags) {
1082        Ok(name) => name.entry.node.stat(locked, current_task)?,
1083        // TODO(https://fxbug.dev/485370648) remove when unnecessary
1084        Err(e) if e == errno!(ENOENT) && current_task.kernel().features.fake_ion => {
1085            let path = current_task.read_path(user_path)?;
1086            if path == b"/dev/ion" {
1087                get_fake_ion_stat()
1088            } else {
1089                return Err(e);
1090            }
1091        }
1092        Err(e) => return Err(e),
1093    };
1094    current_task.write_multi_arch_object(buffer, result)?;
1095    Ok(())
1096}
1097
1098pub use sys_fstatat64 as sys_newfstatat;
1099
1100pub fn sys_statx(
1101    locked: &mut Locked<Unlocked>,
1102    current_task: &CurrentTask,
1103    dir_fd: FdNumber,
1104    user_path: UserCString,
1105    flags: u32,
1106    mask: u32,
1107    statxbuf: UserRef<statx>,
1108) -> Result<(), Errno> {
1109    let statx_flags = StatxFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
1110    if statx_flags & (StatxFlags::AT_STATX_FORCE_SYNC | StatxFlags::AT_STATX_DONT_SYNC)
1111        == (StatxFlags::AT_STATX_FORCE_SYNC | StatxFlags::AT_STATX_DONT_SYNC)
1112    {
1113        return error!(EINVAL);
1114    }
1115
1116    let result =
1117        match lookup_at(locked, current_task, dir_fd, user_path, LookupFlags::from(statx_flags)) {
1118            Ok(name) => name.entry.node.statx(locked, current_task, statx_flags, mask)?,
1119            // TODO(https://fxbug.dev/485370648) remove when unnecessary
1120            Err(e) if e == errno!(ENOENT) && current_task.kernel().features.fake_ion => {
1121                let path = current_task.read_path(user_path)?;
1122                if path == b"/dev/ion" {
1123                    get_fake_ion_statx()
1124                } else {
1125                    return Err(e);
1126                }
1127            }
1128            Err(e) => return Err(e),
1129        };
1130    current_task.write_object(statxbuf, &result)?;
1131    Ok(())
1132}
1133
1134pub fn sys_readlinkat(
1135    locked: &mut Locked<Unlocked>,
1136    current_task: &CurrentTask,
1137    dir_fd: FdNumber,
1138    user_path: UserCString,
1139    buffer: UserAddress,
1140    buffer_size: usize,
1141) -> Result<usize, Errno> {
1142    let path = current_task.read_path(user_path)?;
1143    let lookup_flags = if path.is_empty() {
1144        if dir_fd == FdNumber::AT_FDCWD {
1145            return error!(ENOENT);
1146        }
1147        LookupFlags {
1148            allow_empty_path: true,
1149            symlink_mode: SymlinkMode::NoFollow,
1150            ..Default::default()
1151        }
1152    } else {
1153        LookupFlags::no_follow()
1154    };
1155    let name = lookup_at(locked, current_task, dir_fd, user_path, lookup_flags)?;
1156
1157    let target = match name.readlink(locked, current_task)? {
1158        SymlinkTarget::Path(path) => path,
1159        SymlinkTarget::Node(node) => node.path(&current_task.fs()),
1160    };
1161
1162    if buffer_size == 0 {
1163        return error!(EINVAL);
1164    }
1165    // Cap the returned length at buffer_size.
1166    let length = std::cmp::min(buffer_size, target.len());
1167    current_task.write_memory(buffer, &target[..length])?;
1168    Ok(length)
1169}
1170
1171pub fn sys_truncate(
1172    locked: &mut Locked<Unlocked>,
1173    current_task: &CurrentTask,
1174    user_path: UserCString,
1175    length: off_t,
1176) -> Result<(), Errno> {
1177    let length = length.try_into().map_err(|_| errno!(EINVAL))?;
1178    let name =
1179        lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?;
1180    name.truncate(locked, current_task, length)?;
1181    Ok(())
1182}
1183
1184pub fn sys_ftruncate(
1185    locked: &mut Locked<Unlocked>,
1186    current_task: &CurrentTask,
1187    fd: FdNumber,
1188    length: off_t,
1189) -> Result<(), Errno> {
1190    let length = length.try_into().map_err(|_| errno!(EINVAL))?;
1191    let file = current_task.get_file(fd)?;
1192    file.ftruncate(locked, current_task, length)?;
1193    Ok(())
1194}
1195
1196pub fn sys_mkdirat(
1197    locked: &mut Locked<Unlocked>,
1198    current_task: &CurrentTask,
1199    dir_fd: FdNumber,
1200    user_path: UserCString,
1201    mode: FileMode,
1202) -> Result<(), Errno> {
1203    let path = current_task.read_path(user_path)?;
1204
1205    if path.is_empty() {
1206        return error!(ENOENT);
1207    }
1208    let (parent, basename) = current_task.lookup_parent_at(
1209        locked,
1210        &mut LookupContext::default(),
1211        dir_fd,
1212        path.as_ref(),
1213    )?;
1214    parent.create_node(
1215        locked,
1216        current_task,
1217        basename,
1218        mode.with_type(FileMode::IFDIR),
1219        DeviceId::NONE,
1220    )?;
1221    Ok(())
1222}
1223
1224pub fn sys_mknodat(
1225    locked: &mut Locked<Unlocked>,
1226    current_task: &CurrentTask,
1227    dir_fd: FdNumber,
1228    user_path: UserCString,
1229    mode: FileMode,
1230    dev: DeviceId,
1231) -> Result<(), Errno> {
1232    let file_type = match mode.fmt() {
1233        FileMode::IFREG
1234        | FileMode::IFCHR
1235        | FileMode::IFBLK
1236        | FileMode::IFIFO
1237        | FileMode::IFSOCK => mode.fmt(),
1238        FileMode::EMPTY => FileMode::IFREG,
1239        _ => return error!(EINVAL),
1240    };
1241    lookup_parent_at(locked, current_task, dir_fd, user_path, |locked, _, parent, basename| {
1242        parent.create_node(locked, current_task, basename, mode.with_type(file_type), dev)
1243    })?;
1244    Ok(())
1245}
1246
1247pub fn sys_linkat(
1248    locked: &mut Locked<Unlocked>,
1249    current_task: &CurrentTask,
1250    old_dir_fd: FdNumber,
1251    old_user_path: UserCString,
1252    new_dir_fd: FdNumber,
1253    new_user_path: UserCString,
1254    flags: u32,
1255) -> Result<(), Errno> {
1256    if flags & !(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH) != 0 {
1257        track_stub!(TODO("https://fxbug.dev/322875706"), "linkat unknown flags", flags);
1258        return error!(EINVAL);
1259    }
1260
1261    if flags & AT_EMPTY_PATH != 0 {
1262        security::check_task_capable(current_task, CAP_DAC_READ_SEARCH)
1263            .map_err(|_| errno!(ENOENT))?;
1264    }
1265
1266    let flags = LookupFlags::from_bits(flags, AT_EMPTY_PATH | AT_SYMLINK_FOLLOW)?;
1267    let target = lookup_at(locked, current_task, old_dir_fd, old_user_path, flags)?;
1268    lookup_parent_at(
1269        locked,
1270        current_task,
1271        new_dir_fd,
1272        new_user_path,
1273        |locked, context, parent, basename| {
1274            // The path to a new link cannot end in `/`. That would imply that we are dereferencing
1275            // the link to a directory.
1276            if context.must_be_directory {
1277                return error!(ENOENT);
1278            }
1279            if target.mount != parent.mount {
1280                return error!(EXDEV);
1281            }
1282            parent.link(locked, current_task, basename, &target.entry.node)
1283        },
1284    )?;
1285
1286    Ok(())
1287}
1288
1289pub fn sys_unlinkat(
1290    locked: &mut Locked<Unlocked>,
1291    current_task: &CurrentTask,
1292    dir_fd: FdNumber,
1293    user_path: UserCString,
1294    flags: u32,
1295) -> Result<(), Errno> {
1296    if flags & !AT_REMOVEDIR != 0 {
1297        return error!(EINVAL);
1298    }
1299    let kind =
1300        if flags & AT_REMOVEDIR != 0 { UnlinkKind::Directory } else { UnlinkKind::NonDirectory };
1301    lookup_parent_at(
1302        locked,
1303        current_task,
1304        dir_fd,
1305        user_path,
1306        |locked, context, parent, basename| {
1307            parent.unlink(locked, current_task, basename, kind, context.must_be_directory)
1308        },
1309    )?;
1310    Ok(())
1311}
1312
1313pub fn sys_renameat2(
1314    locked: &mut Locked<Unlocked>,
1315    current_task: &CurrentTask,
1316    old_dir_fd: FdNumber,
1317    old_user_path: UserCString,
1318    new_dir_fd: FdNumber,
1319    new_user_path: UserCString,
1320    flags: u32,
1321) -> Result<(), Errno> {
1322    let flags = RenameFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
1323    if flags.intersects(RenameFlags::INTERNAL) {
1324        return error!(EINVAL);
1325    };
1326
1327    // RENAME_EXCHANGE cannot be combined with the other flags.
1328    if flags.contains(RenameFlags::EXCHANGE)
1329        && flags.intersects(RenameFlags::NOREPLACE | RenameFlags::WHITEOUT)
1330    {
1331        return error!(EINVAL);
1332    }
1333
1334    // RENAME_WHITEOUT is not supported.
1335    if flags.contains(RenameFlags::WHITEOUT) {
1336        track_stub!(TODO("https://fxbug.dev/322875416"), "RENAME_WHITEOUT");
1337        return error!(ENOSYS);
1338    };
1339
1340    let mut lookup = |dir_fd, user_path| {
1341        lookup_parent_at(locked, current_task, dir_fd, user_path, |_, _, parent, basename| {
1342            Ok((parent, basename.to_owned()))
1343        })
1344    };
1345
1346    let (old_parent, old_basename) = lookup(old_dir_fd, old_user_path)?;
1347    let (new_parent, new_basename) = lookup(new_dir_fd, new_user_path)?;
1348
1349    if new_basename.len() > NAME_MAX as usize {
1350        return error!(ENAMETOOLONG);
1351    }
1352
1353    NamespaceNode::rename(
1354        locked,
1355        current_task,
1356        &old_parent,
1357        old_basename.as_ref(),
1358        &new_parent,
1359        new_basename.as_ref(),
1360        flags,
1361    )
1362}
1363
1364pub fn sys_fchmod(
1365    locked: &mut Locked<Unlocked>,
1366    current_task: &CurrentTask,
1367    fd: FdNumber,
1368    mode: FileMode,
1369) -> Result<(), Errno> {
1370    // Remove the filetype from the mode.
1371    let mode = mode & FileMode::PERMISSIONS;
1372    let file = current_task.get_file(fd)?;
1373    file.name.entry.node.chmod(locked, current_task, &file.name.mount, mode)?;
1374    file.name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB);
1375    Ok(())
1376}
1377
1378pub fn sys_fchmodat(
1379    locked: &mut Locked<Unlocked>,
1380    current_task: &CurrentTask,
1381    dir_fd: FdNumber,
1382    user_path: UserCString,
1383    mode: FileMode,
1384) -> Result<(), Errno> {
1385    // Remove the filetype from the mode.
1386    let mode = mode & FileMode::PERMISSIONS;
1387    let name = lookup_at(locked, current_task, dir_fd, user_path, LookupFlags::default())?;
1388    name.entry.node.chmod(locked, current_task, &name.mount, mode)?;
1389    name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB);
1390    Ok(())
1391}
1392
1393fn maybe_uid(id: u32) -> Option<uid_t> {
1394    if id == u32::MAX { None } else { Some(id) }
1395}
1396
1397pub fn sys_fchown(
1398    locked: &mut Locked<Unlocked>,
1399    current_task: &CurrentTask,
1400    fd: FdNumber,
1401    owner: u32,
1402    group: u32,
1403) -> Result<(), Errno> {
1404    let file = current_task.get_file(fd)?;
1405    file.name.entry.node.chown(
1406        locked,
1407        current_task,
1408        &file.name.mount,
1409        maybe_uid(owner),
1410        maybe_uid(group),
1411    )?;
1412    file.name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB);
1413    Ok(())
1414}
1415
1416pub fn sys_fchownat(
1417    locked: &mut Locked<Unlocked>,
1418    current_task: &CurrentTask,
1419    dir_fd: FdNumber,
1420    user_path: UserCString,
1421    owner: u32,
1422    group: u32,
1423    flags: u32,
1424) -> Result<(), Errno> {
1425    let flags = LookupFlags::from_bits(flags, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW)?;
1426    let name = lookup_at(locked, current_task, dir_fd, user_path, flags)?;
1427    name.entry.node.chown(locked, current_task, &name.mount, maybe_uid(owner), maybe_uid(group))?;
1428    name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB);
1429    Ok(())
1430}
1431
1432fn read_xattr_name(current_task: &CurrentTask, name_addr: UserCString) -> Result<FsString, Errno> {
1433    let name = current_task
1434        .read_c_string_to_vec(name_addr, XATTR_NAME_MAX as usize + 1)
1435        .map_err(|e| if e == ENAMETOOLONG { errno!(ERANGE) } else { e })?;
1436    if name.is_empty() {
1437        return error!(ERANGE);
1438    }
1439    let dot_index = memchr::memchr(b'.', &name).ok_or_else(|| errno!(ENOTSUP))?;
1440    if name[dot_index + 1..].is_empty() {
1441        return error!(EINVAL);
1442    }
1443    match &name[..dot_index] {
1444        b"user" | b"security" | b"trusted" | b"system" => {}
1445        _ => return error!(ENOTSUP),
1446    }
1447    Ok(name)
1448}
1449
1450fn do_getxattr(
1451    locked: &mut Locked<Unlocked>,
1452    current_task: &CurrentTask,
1453    node: &NamespaceNode,
1454    name_addr: UserCString,
1455    value_addr: UserAddress,
1456    size: usize,
1457) -> Result<usize, Errno> {
1458    let name = read_xattr_name(current_task, name_addr)?;
1459    let value =
1460        match node.entry.node.get_xattr(locked, current_task, &node.mount, name.as_ref(), size)? {
1461            ValueOrSize::Size(s) => return Ok(s),
1462            ValueOrSize::Value(v) => v,
1463        };
1464    if size == 0 {
1465        return Ok(value.len());
1466    }
1467    if size < value.len() {
1468        return error!(ERANGE);
1469    }
1470    current_task.write_memory(value_addr, &value)
1471}
1472
1473pub fn sys_getxattr(
1474    locked: &mut Locked<Unlocked>,
1475    current_task: &CurrentTask,
1476    path_addr: UserCString,
1477    name_addr: UserCString,
1478    value_addr: UserAddress,
1479    size: usize,
1480) -> Result<usize, Errno> {
1481    let node =
1482        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?;
1483    do_getxattr(locked, current_task, &node, name_addr, value_addr, size)
1484}
1485
1486pub fn sys_fgetxattr(
1487    locked: &mut Locked<Unlocked>,
1488    current_task: &CurrentTask,
1489    fd: FdNumber,
1490    name_addr: UserCString,
1491    value_addr: UserAddress,
1492    size: usize,
1493) -> Result<usize, Errno> {
1494    let file = current_task.get_file(fd)?;
1495    do_getxattr(locked, current_task, &file.name, name_addr, value_addr, size)
1496}
1497
1498pub fn sys_lgetxattr(
1499    locked: &mut Locked<Unlocked>,
1500    current_task: &CurrentTask,
1501    path_addr: UserCString,
1502    name_addr: UserCString,
1503    value_addr: UserAddress,
1504    size: usize,
1505) -> Result<usize, Errno> {
1506    let node =
1507        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?;
1508    do_getxattr(locked, current_task, &node, name_addr, value_addr, size)
1509}
1510
1511fn do_setxattr(
1512    locked: &mut Locked<Unlocked>,
1513    current_task: &CurrentTask,
1514    node: &NamespaceNode,
1515    name_addr: UserCString,
1516    value_addr: UserAddress,
1517    size: usize,
1518    flags: u32,
1519) -> Result<(), Errno> {
1520    if size > XATTR_NAME_MAX as usize {
1521        return error!(E2BIG);
1522    }
1523
1524    let op = match flags {
1525        0 => XattrOp::Set,
1526        XATTR_CREATE => XattrOp::Create,
1527        XATTR_REPLACE => XattrOp::Replace,
1528        _ => return error!(EINVAL),
1529    };
1530    let name = read_xattr_name(current_task, name_addr)?;
1531    let value = FsString::from(current_task.read_memory_to_vec(value_addr, size)?);
1532    node.entry.node.set_xattr(locked, current_task, &node.mount, name.as_ref(), value.as_ref(), op)
1533}
1534
1535pub fn sys_fsetxattr(
1536    locked: &mut Locked<Unlocked>,
1537    current_task: &CurrentTask,
1538    fd: FdNumber,
1539    name_addr: UserCString,
1540    value_addr: UserAddress,
1541    size: usize,
1542    flags: u32,
1543) -> Result<(), Errno> {
1544    let file = current_task.get_file(fd)?;
1545    do_setxattr(locked, current_task, &file.name, name_addr, value_addr, size, flags)
1546}
1547
1548pub fn sys_lsetxattr(
1549    locked: &mut Locked<Unlocked>,
1550    current_task: &CurrentTask,
1551    path_addr: UserCString,
1552    name_addr: UserCString,
1553    value_addr: UserAddress,
1554    size: usize,
1555    flags: u32,
1556) -> Result<(), Errno> {
1557    let node =
1558        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?;
1559    do_setxattr(locked, current_task, &node, name_addr, value_addr, size, flags)
1560}
1561
1562pub fn sys_setxattr(
1563    locked: &mut Locked<Unlocked>,
1564    current_task: &CurrentTask,
1565    path_addr: UserCString,
1566    name_addr: UserCString,
1567    value_addr: UserAddress,
1568    size: usize,
1569    flags: u32,
1570) -> Result<(), Errno> {
1571    let node =
1572        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?;
1573    do_setxattr(locked, current_task, &node, name_addr, value_addr, size, flags)
1574}
1575
1576fn do_removexattr(
1577    locked: &mut Locked<Unlocked>,
1578    current_task: &CurrentTask,
1579    node: &NamespaceNode,
1580    name_addr: UserCString,
1581) -> Result<(), Errno> {
1582    let mode = node.entry.node.info().mode;
1583    if mode.is_chr() || mode.is_fifo() {
1584        return error!(EPERM);
1585    }
1586    let name = read_xattr_name(current_task, name_addr)?;
1587    node.entry.node.remove_xattr(locked, current_task, &node.mount, name.as_ref())
1588}
1589
1590pub fn sys_removexattr(
1591    locked: &mut Locked<Unlocked>,
1592    current_task: &CurrentTask,
1593    path_addr: UserCString,
1594    name_addr: UserCString,
1595) -> Result<(), Errno> {
1596    let node =
1597        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?;
1598    do_removexattr(locked, current_task, &node, name_addr)
1599}
1600
1601pub fn sys_lremovexattr(
1602    locked: &mut Locked<Unlocked>,
1603    current_task: &CurrentTask,
1604    path_addr: UserCString,
1605    name_addr: UserCString,
1606) -> Result<(), Errno> {
1607    let node =
1608        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?;
1609    do_removexattr(locked, current_task, &node, name_addr)
1610}
1611
1612pub fn sys_fremovexattr(
1613    locked: &mut Locked<Unlocked>,
1614    current_task: &CurrentTask,
1615    fd: FdNumber,
1616    name_addr: UserCString,
1617) -> Result<(), Errno> {
1618    let file = current_task.get_file(fd)?;
1619    do_removexattr(locked, current_task, &file.name, name_addr)
1620}
1621
1622fn do_listxattr(
1623    locked: &mut Locked<Unlocked>,
1624    current_task: &CurrentTask,
1625    node: &NamespaceNode,
1626    list_addr: UserAddress,
1627    size: usize,
1628) -> Result<usize, Errno> {
1629    let security_xattr = security::fs_node_listsecurity(current_task, &node.entry.node);
1630    let xattrs = match node.entry.node.list_xattrs(locked, current_task, size) {
1631        Ok(ValueOrSize::Size(s)) => return Ok(s + security_xattr.map_or(0, |s| s.len() + 1)),
1632        Ok(ValueOrSize::Value(mut v)) => {
1633            if let Some(security_value) = security_xattr {
1634                if !v.contains(&security_value) {
1635                    v.push(security_value);
1636                }
1637            }
1638            v
1639        }
1640        Err(e) => {
1641            if e.code != ENOTSUP || security_xattr.is_none() {
1642                return Err(e);
1643            }
1644            vec![security_xattr.unwrap()]
1645        }
1646    };
1647
1648    let mut list = vec![];
1649    for name in xattrs.iter() {
1650        list.extend_from_slice(name);
1651        list.push(b'\0');
1652    }
1653    if size == 0 {
1654        return Ok(list.len());
1655    }
1656    if size < list.len() {
1657        return error!(ERANGE);
1658    }
1659    current_task.write_memory(list_addr, &list)
1660}
1661
1662pub fn sys_listxattr(
1663    locked: &mut Locked<Unlocked>,
1664    current_task: &CurrentTask,
1665    path_addr: UserCString,
1666    list_addr: UserAddress,
1667    size: usize,
1668) -> Result<usize, Errno> {
1669    let node =
1670        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?;
1671    do_listxattr(locked, current_task, &node, list_addr, size)
1672}
1673
1674pub fn sys_llistxattr(
1675    locked: &mut Locked<Unlocked>,
1676    current_task: &CurrentTask,
1677    path_addr: UserCString,
1678    list_addr: UserAddress,
1679    size: usize,
1680) -> Result<usize, Errno> {
1681    let node =
1682        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?;
1683    do_listxattr(locked, current_task, &node, list_addr, size)
1684}
1685
1686pub fn sys_flistxattr(
1687    locked: &mut Locked<Unlocked>,
1688    current_task: &CurrentTask,
1689    fd: FdNumber,
1690    list_addr: UserAddress,
1691    size: usize,
1692) -> Result<usize, Errno> {
1693    let file = current_task.get_file(fd)?;
1694    do_listxattr(locked, current_task, &file.name, list_addr, size)
1695}
1696
1697pub fn sys_getcwd(
1698    _locked: &mut Locked<Unlocked>,
1699    current_task: &CurrentTask,
1700    buf: UserAddress,
1701    size: usize,
1702) -> Result<usize, Errno> {
1703    let root = current_task.fs().root();
1704    let cwd = current_task.fs().cwd();
1705    let mut user_cwd = match cwd.path_from_root(Some(&root)) {
1706        PathWithReachability::Reachable(path) => path,
1707        PathWithReachability::Unreachable(mut path) => {
1708            let mut combined = vec![];
1709            combined.extend_from_slice(b"(unreachable)");
1710            combined.append(&mut path);
1711            combined.into()
1712        }
1713    };
1714    user_cwd.push(b'\0');
1715    if user_cwd.len() > size {
1716        return error!(ERANGE);
1717    }
1718    current_task.write_memory(buf, &user_cwd)?;
1719    Ok(user_cwd.len())
1720}
1721
1722pub fn sys_umask(
1723    _locked: &mut Locked<Unlocked>,
1724    current_task: &CurrentTask,
1725    umask: FileMode,
1726) -> Result<FileMode, Errno> {
1727    Ok(current_task.fs().set_umask(umask))
1728}
1729
1730fn get_fd_flags(flags: u32) -> FdFlags {
1731    if flags & O_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() }
1732}
1733
1734pub fn sys_pipe2(
1735    locked: &mut Locked<Unlocked>,
1736    current_task: &CurrentTask,
1737    user_pipe: UserRef<FdNumber>,
1738    flags: u32,
1739) -> Result<(), Errno> {
1740    let supported_file_flags = OpenFlags::NONBLOCK | OpenFlags::DIRECT;
1741    if flags & !(O_CLOEXEC | supported_file_flags.bits()) != 0 {
1742        return error!(EINVAL);
1743    }
1744    let (read, write) = new_pipe(locked, current_task)?;
1745
1746    let file_flags = OpenFlags::from_bits_truncate(flags & supported_file_flags.bits());
1747    read.update_file_flags(file_flags, supported_file_flags);
1748    write.update_file_flags(file_flags, supported_file_flags);
1749
1750    let fd_flags = get_fd_flags(flags);
1751    let fd_read = current_task.add_file(locked, read, fd_flags)?;
1752    let fd_write = current_task.add_file(locked, write, fd_flags)?;
1753    log_trace!("pipe2 -> [{:#x}, {:#x}]", fd_read.raw(), fd_write.raw());
1754
1755    current_task.write_object(user_pipe, &fd_read)?;
1756    let user_pipe = user_pipe.next()?;
1757    current_task.write_object(user_pipe, &fd_write)?;
1758
1759    Ok(())
1760}
1761
1762pub fn sys_ioctl(
1763    locked: &mut Locked<Unlocked>,
1764    current_task: &CurrentTask,
1765    fd: FdNumber,
1766    request: u32,
1767    arg: SyscallArg,
1768) -> Result<SyscallResult, Errno> {
1769    match request {
1770        FIOCLEX | FIONCLEX => {
1771            current_task.live().files.ioctl_fd_flags(current_task, fd, request)?;
1772            Ok(SUCCESS)
1773        }
1774        _ => {
1775            let file = current_task.get_file(fd)?;
1776            file.ioctl(locked, current_task, request, arg)
1777        }
1778    }
1779}
1780
1781pub fn sys_symlinkat(
1782    locked: &mut Locked<Unlocked>,
1783    current_task: &CurrentTask,
1784    user_target: UserCString,
1785    new_dir_fd: FdNumber,
1786    user_path: UserCString,
1787) -> Result<(), Errno> {
1788    let target = current_task.read_path(user_target)?;
1789    if target.is_empty() {
1790        return error!(ENOENT);
1791    }
1792
1793    let path = current_task.read_path(user_path)?;
1794    // TODO: This check could probably be moved into parent.symlink(..).
1795    if path.is_empty() {
1796        return error!(ENOENT);
1797    }
1798
1799    let res = lookup_parent_at(
1800        locked,
1801        current_task,
1802        new_dir_fd,
1803        user_path,
1804        |locked, context, parent, basename| {
1805            // The path to a new symlink cannot end in `/`. That would imply that we are dereferencing
1806            // the symlink to a directory.
1807            //
1808            // See https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap03.html#tag_21_03_00_75
1809            if context.must_be_directory {
1810                return error!(ENOENT);
1811            }
1812            parent.create_symlink(locked, current_task, basename, target.as_ref())
1813        },
1814    );
1815    res?;
1816    Ok(())
1817}
1818
1819pub fn sys_dup(
1820    locked: &mut Locked<Unlocked>,
1821    current_task: &CurrentTask,
1822    oldfd: FdNumber,
1823) -> Result<FdNumber, Errno> {
1824    current_task.live().files.duplicate(
1825        locked,
1826        current_task,
1827        oldfd,
1828        TargetFdNumber::Default,
1829        FdFlags::empty(),
1830    )
1831}
1832
1833pub fn sys_dup3(
1834    locked: &mut Locked<Unlocked>,
1835    current_task: &CurrentTask,
1836    oldfd: FdNumber,
1837    newfd: FdNumber,
1838    flags: u32,
1839) -> Result<FdNumber, Errno> {
1840    if oldfd == newfd {
1841        return error!(EINVAL);
1842    }
1843    if flags & !O_CLOEXEC != 0 {
1844        return error!(EINVAL);
1845    }
1846    let fd_flags = get_fd_flags(flags);
1847    current_task.live().files.duplicate(
1848        locked,
1849        current_task,
1850        oldfd,
1851        TargetFdNumber::Specific(newfd),
1852        fd_flags,
1853    )?;
1854    Ok(newfd)
1855}
1856
1857/// A memfd file descriptor cannot have a name longer than 250 bytes, including
1858/// the null terminator.
1859///
1860/// See Errors section of https://man7.org/linux/man-pages/man2/memfd_create.2.html
1861const MEMFD_NAME_MAX_LEN: usize = 250;
1862
1863pub fn sys_memfd_create(
1864    locked: &mut Locked<Unlocked>,
1865    current_task: &CurrentTask,
1866    user_name: UserCString,
1867    flags: u32,
1868) -> Result<FdNumber, Errno> {
1869    const HUGE_SHIFTED_MASK: u32 = MFD_HUGE_MASK << MFD_HUGE_SHIFT;
1870
1871    if flags
1872        & !(MFD_CLOEXEC
1873            | MFD_ALLOW_SEALING
1874            | MFD_HUGETLB
1875            | HUGE_SHIFTED_MASK
1876            | MFD_NOEXEC_SEAL
1877            | MFD_EXEC)
1878        != 0
1879    {
1880        track_stub!(TODO("https://fxbug.dev/322875665"), "memfd_create unknown flags", flags);
1881        return error!(EINVAL);
1882    }
1883
1884    let _huge_page_size = if flags & MFD_HUGETLB != 0 {
1885        Some(flags & HUGE_SHIFTED_MASK)
1886    } else {
1887        if flags & HUGE_SHIFTED_MASK != 0 {
1888            return error!(EINVAL);
1889        }
1890        None
1891    };
1892
1893    let name = current_task
1894        .read_c_string_to_vec(user_name, MEMFD_NAME_MAX_LEN)
1895        .map_err(|e| if e == ENAMETOOLONG { errno!(EINVAL) } else { e })?;
1896
1897    // This behavior matches MEMFD_NOEXEC_SCOPE_EXEC, which states:
1898    //   > memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like MFD_EXEC was set.
1899    //
1900    // This behavior can be changed on Linux via sysctl vm.memfd_noexec, which is pid namespaced.
1901    // We do not currently support changing this behavior.
1902    let seals = if flags & MFD_NOEXEC_SEAL != 0 {
1903        SealFlags::NO_EXEC
1904    } else if flags & MFD_ALLOW_SEALING != 0 {
1905        SealFlags::empty()
1906    } else {
1907        // Forbid sealing, by sealing the seal operation.
1908        SealFlags::SEAL
1909    };
1910
1911    let file = new_memfd(locked, current_task, name, seals, OpenFlags::RDWR)?;
1912
1913    let mut fd_flags = FdFlags::empty();
1914    if flags & MFD_CLOEXEC != 0 {
1915        fd_flags |= FdFlags::CLOEXEC;
1916    }
1917    let fd = current_task.add_file(locked, file, fd_flags)?;
1918    Ok(fd)
1919}
1920
1921pub fn sys_mount(
1922    locked: &mut Locked<Unlocked>,
1923    current_task: &CurrentTask,
1924    source_addr: UserCString,
1925    target_addr: UserCString,
1926    filesystemtype_addr: UserCString,
1927    flags: u32,
1928    data_addr: UserCString,
1929) -> Result<(), Errno> {
1930    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1931
1932    let flags = MountFlags::from_bits(flags).ok_or_else(|| {
1933        track_stub!(
1934            TODO("https://fxbug.dev/322875327"),
1935            "mount unknown flags",
1936            flags & !MountFlags::from_bits_truncate(flags).bits()
1937        );
1938        errno!(EINVAL)
1939    })?;
1940
1941    let target =
1942        lookup_at(locked, current_task, FdNumber::AT_FDCWD, target_addr, LookupFlags::default())?;
1943
1944    security::sb_mount(current_task, &target, flags)?;
1945
1946    if flags.contains(MountFlags::REMOUNT) {
1947        do_mount_remount(current_task, target, flags, data_addr)
1948    } else if flags.contains(MountFlags::BIND) {
1949        do_mount_bind(locked, current_task, source_addr, target, flags)
1950    } else if flags.intersects(MountFlags::SHARED | MountFlags::PRIVATE | MountFlags::DOWNSTREAM) {
1951        do_mount_change_propagation_type(current_task, target, flags)
1952    } else if flags.contains(MountFlags::MOVE) {
1953        do_mount_move(locked, current_task, source_addr, target)
1954    } else {
1955        do_mount_create(
1956            locked,
1957            current_task,
1958            source_addr,
1959            target,
1960            filesystemtype_addr,
1961            data_addr,
1962            flags,
1963        )
1964    }
1965}
1966
1967fn do_mount_remount(
1968    current_task: &CurrentTask,
1969    target: NamespaceNode,
1970    flags: MountFlags,
1971    data_addr: UserCString,
1972) -> Result<(), Errno> {
1973    if !data_addr.is_null() {
1974        track_stub!(TODO("https://fxbug.dev/322875506"), "MS_REMOUNT: Updating data");
1975    }
1976    let mount = target.mount_if_root()?;
1977
1978    let data = current_task.read_path_if_non_null(data_addr)?;
1979    let mount_options =
1980        security::sb_eat_lsm_opts(current_task.kernel(), &mut MountParams::parse(data.as_ref())?)?;
1981
1982    // From <https://man7.org/linux/man-pages/man2/mount.2.html>
1983    //
1984    //   Since Linux 2.6.26, the MS_REMOUNT flag can be used with MS_BIND
1985    //   to modify only the per-mount-point flags.  This is particularly
1986    //   useful for setting or clearing the "read-only" flag on a mount
1987    //   without changing the underlying filesystem.
1988    if !flags.contains(MountFlags::BIND) {
1989        security::sb_remount(current_task, &mount, mount_options)?;
1990        mount.reconfigure_fs(current_task, flags.file_system_flags())?;
1991    }
1992
1993    let updated_flags = flags & MountFlags::CHANGEABLE_WITH_REMOUNT;
1994    mount.update_flags(updated_flags.mountpoint_flags());
1995
1996    Ok(())
1997}
1998
1999fn do_mount_bind(
2000    locked: &mut Locked<Unlocked>,
2001    current_task: &CurrentTask,
2002    source_addr: UserCString,
2003    target: NamespaceNode,
2004    flags: MountFlags,
2005) -> Result<(), Errno> {
2006    let source =
2007        lookup_at(locked, current_task, FdNumber::AT_FDCWD, source_addr, LookupFlags::default())?;
2008    log_trace!(
2009        source:% = source.path(&current_task.fs()),
2010        target:% = target.path(&current_task.fs()),
2011        flags:?;
2012        "do_mount_bind",
2013    );
2014    target.mount(WhatToMount::Bind(source), flags.mountpoint_flags())
2015}
2016
2017fn do_mount_change_propagation_type(
2018    current_task: &CurrentTask,
2019    target: NamespaceNode,
2020    flags: MountFlags,
2021) -> Result<(), Errno> {
2022    log_trace!(
2023        target:% = target.path(&current_task.fs()),
2024        flags:?;
2025        "do_mount_change_propagation_type",
2026    );
2027
2028    // Flag validation. Of the three propagation type flags, exactly one must be passed. The only
2029    // valid flags other than propagation type are MS_SILENT and MS_REC.
2030    //
2031    // Use if statements to find the first propagation type flag, then check for valid flags using
2032    // only the first propagation flag and MS_REC / MS_SILENT as valid flags.
2033    let propagation_flag = if flags.contains(MountFlags::SHARED) {
2034        MountFlags::SHARED
2035    } else if flags.contains(MountFlags::PRIVATE) {
2036        MountFlags::PRIVATE
2037    } else if flags.contains(MountFlags::DOWNSTREAM) {
2038        MountFlags::DOWNSTREAM
2039    } else {
2040        return error!(EINVAL);
2041    };
2042    if flags.intersects(!(propagation_flag | MountFlags::REC | MountFlags::SILENT)) {
2043        return error!(EINVAL);
2044    }
2045
2046    let mount = target.mount_if_root()?;
2047    mount.change_propagation(propagation_flag, flags.contains(MountFlags::REC));
2048    Ok(())
2049}
2050
2051fn do_mount_move(
2052    locked: &mut Locked<Unlocked>,
2053    current_task: &CurrentTask,
2054    source_addr: UserCString,
2055    target: NamespaceNode,
2056) -> Result<(), Errno> {
2057    let source =
2058        lookup_at(locked, current_task, FdNumber::AT_FDCWD, source_addr, LookupFlags::default())?;
2059    let source_mount = source.mount_if_root()?;
2060    Mount::move_mount(source_mount, target.mount.as_ref().expect(""), &target.entry)
2061}
2062
2063fn do_mount_create(
2064    locked: &mut Locked<Unlocked>,
2065    current_task: &CurrentTask,
2066    source_addr: UserCString,
2067    target: NamespaceNode,
2068    filesystemtype_addr: UserCString,
2069    data_addr: UserCString,
2070    flags: MountFlags,
2071) -> Result<(), Errno> {
2072    let source = current_task.read_path_if_non_null(source_addr)?;
2073    let fs_type = current_task.read_path(filesystemtype_addr)?;
2074    let data = current_task.read_path_if_non_null(data_addr)?;
2075    log_trace!(
2076        source:%,
2077        target:% = target.path(&current_task.fs()),
2078        fs_type:%,
2079        data:%;
2080        "do_mount_create",
2081    );
2082
2083    let options = FileSystemOptions {
2084        source: source.into(),
2085        flags: flags.file_system_flags().into(),
2086        params: MountParams::parse(data.as_ref())?,
2087    };
2088
2089    let fs = current_task.create_filesystem(locked, fs_type.as_ref(), options)?;
2090
2091    security::sb_kern_mount(current_task, &fs)?;
2092    target.mount(WhatToMount::Fs(fs), flags.mountpoint_flags())
2093}
2094
2095pub fn sys_umount2(
2096    locked: &mut Locked<Unlocked>,
2097    current_task: &CurrentTask,
2098    target_addr: UserCString,
2099    flags: u32,
2100) -> Result<(), Errno> {
2101    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
2102
2103    let unmount_flags = UnmountFlags::from_bits(flags).ok_or_else(|| {
2104        track_stub!(
2105            TODO("https://fxbug.dev/322875327"),
2106            "unmount unknown flags",
2107            flags & !UnmountFlags::from_bits_truncate(flags).bits()
2108        );
2109        errno!(EINVAL)
2110    })?;
2111
2112    if unmount_flags.contains(UnmountFlags::EXPIRE)
2113        && (unmount_flags.contains(UnmountFlags::FORCE)
2114            || unmount_flags.contains(UnmountFlags::DETACH))
2115    {
2116        return error!(EINVAL);
2117    }
2118
2119    let lookup_flags = if unmount_flags.contains(UnmountFlags::NOFOLLOW) {
2120        LookupFlags::no_follow()
2121    } else {
2122        LookupFlags::default()
2123    };
2124    let target = lookup_at(locked, current_task, FdNumber::AT_FDCWD, target_addr, lookup_flags)?;
2125
2126    security::sb_umount(current_task, &target, unmount_flags)?;
2127
2128    target.unmount(unmount_flags)
2129}
2130
2131pub fn sys_eventfd2(
2132    locked: &mut Locked<Unlocked>,
2133    current_task: &CurrentTask,
2134    value: u32,
2135    flags: u32,
2136) -> Result<FdNumber, Errno> {
2137    if flags & !(EFD_CLOEXEC | EFD_NONBLOCK | EFD_SEMAPHORE) != 0 {
2138        return error!(EINVAL);
2139    }
2140    let blocking = (flags & EFD_NONBLOCK) == 0;
2141    let eventfd_type =
2142        if (flags & EFD_SEMAPHORE) == 0 { EventFdType::Counter } else { EventFdType::Semaphore };
2143    let file = new_eventfd(locked, current_task, value, eventfd_type, blocking);
2144    let fd_flags = if flags & EFD_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() };
2145    let fd = current_task.add_file(locked, file, fd_flags)?;
2146    Ok(fd)
2147}
2148
2149pub fn sys_pidfd_open(
2150    locked: &mut Locked<Unlocked>,
2151    current_task: &CurrentTask,
2152    pid: pid_t,
2153    flags: u32,
2154) -> Result<FdNumber, Errno> {
2155    if flags & !PIDFD_NONBLOCK != 0 {
2156        return error!(EINVAL);
2157    }
2158    if pid <= 0 {
2159        return error!(EINVAL);
2160    }
2161
2162    let file = {
2163        let pid_table = current_task.kernel().pids.read();
2164
2165        let blocking = (flags & PIDFD_NONBLOCK) == 0;
2166        let open_flags = if blocking { OpenFlags::empty() } else { OpenFlags::NONBLOCK };
2167
2168        // Validate that a process (and not just a task) entry exists for the PID.
2169        let task = pid_table.get_task(pid).ok();
2170        let file = match (pid_table.get_process(pid), task) {
2171            (Some(ProcessEntryRef::Process(proc)), Some(task)) => {
2172                new_pidfd(locked, current_task, &proc, &*task.mm()?, open_flags)
2173            }
2174            (Some(ProcessEntryRef::Zombie(_)), _) => {
2175                new_zombie_pidfd(locked, current_task, open_flags)
2176            }
2177            (None, Some(_)) => return error!(EINVAL),
2178            _ => return error!(ESRCH),
2179        };
2180        file
2181    };
2182
2183    current_task.add_file(locked, file, FdFlags::CLOEXEC)
2184}
2185
2186pub fn sys_pidfd_getfd(
2187    locked: &mut Locked<Unlocked>,
2188    current_task: &CurrentTask,
2189    pidfd: FdNumber,
2190    targetfd: FdNumber,
2191    flags: u32,
2192) -> Result<FdNumber, Errno> {
2193    if flags != 0 {
2194        return error!(EINVAL);
2195    }
2196
2197    let file = current_task.get_file(pidfd)?;
2198    let tg = file.as_thread_group_key()?;
2199    let tg = tg.upgrade().ok_or_else(|| errno!(ESRCH))?;
2200    let task = tg.read().get_live_task()?;
2201
2202    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &task)?;
2203
2204    let target_file = task.live()?.files.get(targetfd)?;
2205    current_task.add_file(locked, target_file, FdFlags::CLOEXEC)
2206}
2207
2208pub fn sys_timerfd_create(
2209    locked: &mut Locked<Unlocked>,
2210    current_task: &CurrentTask,
2211    clock_id: u32,
2212    flags: u32,
2213) -> Result<FdNumber, Errno> {
2214    let timeline = match clock_id {
2215        CLOCK_MONOTONIC => Timeline::Monotonic,
2216        CLOCK_BOOTTIME | CLOCK_BOOTTIME_ALARM => Timeline::BootInstant,
2217        CLOCK_REALTIME | CLOCK_REALTIME_ALARM => Timeline::RealTime,
2218        _ => return error!(EINVAL),
2219    };
2220    let timer_type = match clock_id {
2221        CLOCK_MONOTONIC | CLOCK_BOOTTIME | CLOCK_REALTIME => TimerWakeup::Regular,
2222        CLOCK_BOOTTIME_ALARM | CLOCK_REALTIME_ALARM => {
2223            security::check_task_capable(current_task, CAP_WAKE_ALARM)?;
2224            TimerWakeup::Alarm
2225        }
2226        _ => return error!(EINVAL),
2227    };
2228    if flags & !(TFD_NONBLOCK | TFD_CLOEXEC) != 0 {
2229        track_stub!(TODO("https://fxbug.dev/322875488"), "timerfd_create unknown flags", flags);
2230        return error!(EINVAL);
2231    }
2232    log_trace!("timerfd_create(clock_id={:?}, flags={:#x})", clock_id, flags);
2233
2234    let mut open_flags = OpenFlags::RDWR;
2235    if flags & TFD_NONBLOCK != 0 {
2236        open_flags |= OpenFlags::NONBLOCK;
2237    }
2238
2239    let mut fd_flags = FdFlags::empty();
2240    if flags & TFD_CLOEXEC != 0 {
2241        fd_flags |= FdFlags::CLOEXEC;
2242    };
2243
2244    let timer = TimerFile::new_file(locked, current_task, timer_type, timeline, open_flags)?;
2245    let fd = current_task.add_file(locked, timer, fd_flags)?;
2246    Ok(fd)
2247}
2248
2249pub fn sys_timerfd_gettime(
2250    _locked: &mut Locked<Unlocked>,
2251    current_task: &CurrentTask,
2252    fd: FdNumber,
2253    user_current_value: ITimerSpecPtr,
2254) -> Result<(), Errno> {
2255    let file = current_task.get_file(fd)?;
2256    let timer_file = file.downcast_file::<TimerFile>().ok_or_else(|| errno!(EINVAL))?;
2257    let timer_info = timer_file.current_timer_spec();
2258    log_trace!("timerfd_gettime(fd={:?}, current_value={:?})", fd, timer_info);
2259    current_task.write_multi_arch_object(user_current_value, timer_info)?;
2260    Ok(())
2261}
2262
2263pub fn sys_timerfd_settime(
2264    _locked: &mut Locked<Unlocked>,
2265    current_task: &CurrentTask,
2266    fd: FdNumber,
2267    flags: u32,
2268    user_new_value: ITimerSpecPtr,
2269    user_old_value: ITimerSpecPtr,
2270) -> Result<(), Errno> {
2271    if flags & !(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET) != 0 {
2272        track_stub!(TODO("https://fxbug.dev/322874722"), "timerfd_settime unknown flags", flags);
2273        return error!(EINVAL);
2274    }
2275
2276    let file = current_task.get_file(fd)?;
2277    let timer_file = file.downcast_file::<TimerFile>().ok_or_else(|| errno!(EINVAL))?;
2278
2279    let new_timer_spec = current_task.read_multi_arch_object(user_new_value)?;
2280    let old_timer_spec = timer_file.set_timer_spec(current_task, &file, new_timer_spec, flags)?;
2281    log_trace!(
2282        "timerfd_settime(fd={:?}, flags={:#x}, new_value={:?}, current_value={:?})",
2283        fd,
2284        flags,
2285        new_timer_spec,
2286        old_timer_spec
2287    );
2288    if !user_old_value.is_null() {
2289        current_task.write_multi_arch_object(user_old_value, old_timer_spec)?;
2290    }
2291    Ok(())
2292}
2293
2294fn deadline_after_timespec(
2295    current_task: &CurrentTask,
2296    user_timespec: TimeSpecPtr,
2297) -> Result<zx::MonotonicInstant, Errno> {
2298    if user_timespec.is_null() {
2299        Ok(zx::MonotonicInstant::INFINITE)
2300    } else {
2301        let timespec = current_task.read_multi_arch_object(user_timespec)?;
2302        Ok(zx::MonotonicInstant::after(duration_from_timespec(timespec)?))
2303    }
2304}
2305
2306static_assertions::assert_eq_size!(uapi::__kernel_fd_set, uapi::arch32::__kernel_fd_set);
2307
2308fn select(
2309    locked: &mut Locked<Unlocked>,
2310    current_task: &mut CurrentTask,
2311    nfds: u32,
2312    readfds_addr: UserRef<__kernel_fd_set>,
2313    writefds_addr: UserRef<__kernel_fd_set>,
2314    exceptfds_addr: UserRef<__kernel_fd_set>,
2315    deadline: zx::MonotonicInstant,
2316    sigmask_addr: UserRef<pselect6_sigmask>,
2317) -> Result<i32, Errno> {
2318    const BITS_PER_BYTE: usize = 8;
2319
2320    fn sizeof<T>(_: &T) -> usize {
2321        BITS_PER_BYTE * std::mem::size_of::<T>()
2322    }
2323    fn is_fd_set(set: &__kernel_fd_set, fd: usize) -> bool {
2324        let index = fd / sizeof(&set.fds_bits[0]);
2325        let remainder = fd % sizeof(&set.fds_bits[0]);
2326        set.fds_bits[index] & (1 << remainder) > 0
2327    }
2328    fn add_fd_to_set(set: &mut __kernel_fd_set, fd: usize) {
2329        let index = fd / sizeof(&set.fds_bits[0]);
2330        let remainder = fd % sizeof(&set.fds_bits[0]);
2331
2332        set.fds_bits[index] |= 1 << remainder;
2333    }
2334    let read_fd_set = |addr: UserRef<__kernel_fd_set>| {
2335        if addr.is_null() { Ok(Default::default()) } else { current_task.read_object(addr) }
2336    };
2337
2338    if nfds as usize > BITS_PER_BYTE * std::mem::size_of::<__kernel_fd_set>() {
2339        return error!(EINVAL);
2340    }
2341
2342    let read_events =
2343        FdEvents::from_bits_truncate(POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR);
2344    let write_events = FdEvents::from_bits_truncate(POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR);
2345    let except_events = FdEvents::from_bits_truncate(POLLPRI);
2346
2347    let readfds = read_fd_set(readfds_addr)?;
2348    let writefds = read_fd_set(writefds_addr)?;
2349    let exceptfds = read_fd_set(exceptfds_addr)?;
2350
2351    let sets = &[(read_events, &readfds), (write_events, &writefds), (except_events, &exceptfds)];
2352    let waiter = FileWaiter::<FdNumber>::default();
2353
2354    for fd in 0..nfds {
2355        let mut aggregated_events = FdEvents::empty();
2356        for (events, fds) in sets.iter() {
2357            if is_fd_set(fds, fd as usize) {
2358                aggregated_events |= *events;
2359            }
2360        }
2361        if !aggregated_events.is_empty() {
2362            let fd = FdNumber::from_raw(fd as i32);
2363            let file = current_task.get_file(fd)?;
2364            waiter.add(locked, current_task, fd, Some(&file), aggregated_events)?;
2365        }
2366    }
2367
2368    let mask = if !sigmask_addr.is_null() {
2369        let sigmask = current_task.read_object(sigmask_addr)?;
2370        let mask = if sigmask.ss.is_null() {
2371            current_task.read().signal_mask()
2372        } else {
2373            if sigmask.ss_len < std::mem::size_of::<sigset_t>() {
2374                return error!(EINVAL);
2375            }
2376            current_task.read_object(sigmask.ss.into())?
2377        };
2378        Some(mask)
2379    } else {
2380        None
2381    };
2382
2383    waiter.wait(locked, current_task, mask, deadline)?;
2384
2385    let mut num_fds = 0;
2386    let mut readfds_out: __kernel_fd_set = Default::default();
2387    let mut writefds_out: __kernel_fd_set = Default::default();
2388    let mut exceptfds_out: __kernel_fd_set = Default::default();
2389    let mut sets = [
2390        (read_events, &readfds, &mut readfds_out),
2391        (write_events, &writefds, &mut writefds_out),
2392        (except_events, &exceptfds, &mut exceptfds_out),
2393    ];
2394    let mut ready_items = waiter.ready_items.lock();
2395    for ReadyItem { key: ready_key, events: ready_events } in ready_items.drain(..) {
2396        let ready_key = assert_matches::assert_matches!(
2397            ready_key,
2398            ReadyItemKey::FdNumber(v) => v
2399        );
2400
2401        sets.iter_mut().for_each(|(events, fds, fds_out)| {
2402            let fd = ready_key.raw() as usize;
2403            if events.intersects(ready_events) && is_fd_set(fds, fd) {
2404                add_fd_to_set(fds_out, fd);
2405                num_fds += 1;
2406            }
2407        });
2408    }
2409
2410    let write_fd_set =
2411        |addr: UserRef<__kernel_fd_set>, value: __kernel_fd_set| -> Result<(), Errno> {
2412            if !addr.is_null() {
2413                current_task.write_object(addr, &value)?;
2414            }
2415            Ok(())
2416        };
2417    write_fd_set(readfds_addr, readfds_out)?;
2418    write_fd_set(writefds_addr, writefds_out)?;
2419    write_fd_set(exceptfds_addr, exceptfds_out)?;
2420    Ok(num_fds)
2421}
2422
2423pub fn sys_pselect6(
2424    locked: &mut Locked<Unlocked>,
2425    current_task: &mut CurrentTask,
2426    nfds: u32,
2427    readfds_addr: UserRef<__kernel_fd_set>,
2428    writefds_addr: UserRef<__kernel_fd_set>,
2429    exceptfds_addr: UserRef<__kernel_fd_set>,
2430    timeout_addr: TimeSpecPtr,
2431    sigmask_addr: UserRef<pselect6_sigmask>,
2432) -> Result<i32, Errno> {
2433    let deadline = deadline_after_timespec(current_task, timeout_addr)?;
2434
2435    let num_fds = select(
2436        locked,
2437        current_task,
2438        nfds,
2439        readfds_addr,
2440        writefds_addr,
2441        exceptfds_addr,
2442        deadline,
2443        sigmask_addr,
2444    )?;
2445
2446    if !timeout_addr.is_null()
2447        && !current_task
2448            .thread_group()
2449            .read()
2450            .personality
2451            .contains(PersonalityFlags::STICKY_TIMEOUTS)
2452    {
2453        let now = zx::MonotonicInstant::get();
2454        let remaining = std::cmp::max(deadline - now, zx::MonotonicDuration::from_seconds(0));
2455        current_task.write_multi_arch_object(timeout_addr, timespec_from_duration(remaining))?;
2456    }
2457
2458    Ok(num_fds)
2459}
2460
2461pub fn sys_select(
2462    locked: &mut Locked<Unlocked>,
2463    current_task: &mut CurrentTask,
2464    nfds: u32,
2465    readfds_addr: UserRef<__kernel_fd_set>,
2466    writefds_addr: UserRef<__kernel_fd_set>,
2467    exceptfds_addr: UserRef<__kernel_fd_set>,
2468    timeout_addr: TimeValPtr,
2469) -> Result<i32, Errno> {
2470    let start_time = zx::MonotonicInstant::get();
2471
2472    let deadline = if timeout_addr.is_null() {
2473        zx::MonotonicInstant::INFINITE
2474    } else {
2475        let timeval = current_task.read_multi_arch_object(timeout_addr)?;
2476        start_time + starnix_types::time::duration_from_timeval(timeval)?
2477    };
2478
2479    let num_fds = select(
2480        locked,
2481        current_task,
2482        nfds,
2483        readfds_addr,
2484        writefds_addr,
2485        exceptfds_addr,
2486        deadline,
2487        UserRef::<pselect6_sigmask>::default(),
2488    )?;
2489
2490    if !timeout_addr.is_null()
2491        && !current_task
2492            .thread_group()
2493            .read()
2494            .personality
2495            .contains(PersonalityFlags::STICKY_TIMEOUTS)
2496    {
2497        let now = zx::MonotonicInstant::get();
2498        let remaining = std::cmp::max(deadline - now, zx::MonotonicDuration::from_seconds(0));
2499        current_task.write_multi_arch_object(
2500            timeout_addr,
2501            starnix_types::time::timeval_from_duration(remaining),
2502        )?;
2503    }
2504
2505    Ok(num_fds)
2506}
2507
2508pub fn sys_epoll_create1(
2509    locked: &mut Locked<Unlocked>,
2510    current_task: &CurrentTask,
2511    flags: u32,
2512) -> Result<FdNumber, Errno> {
2513    if flags & !EPOLL_CLOEXEC != 0 {
2514        return error!(EINVAL);
2515    }
2516    let ep_file = EpollFileObject::new_file(locked, current_task);
2517    let fd_flags = if flags & EPOLL_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() };
2518    let fd = current_task.add_file(locked, ep_file, fd_flags)?;
2519    Ok(fd)
2520}
2521
2522pub fn sys_epoll_ctl(
2523    locked: &mut Locked<Unlocked>,
2524    current_task: &CurrentTask,
2525    epfd: FdNumber,
2526    op: u32,
2527    fd: FdNumber,
2528    event: UserRef<EpollEvent>,
2529) -> Result<(), Errno> {
2530    let file = current_task.get_file(epfd)?;
2531    let epoll_file = file.downcast_file::<EpollFileObject>().ok_or_else(|| errno!(EINVAL))?;
2532    let operand_file = current_task.get_file(fd)?;
2533
2534    if Arc::ptr_eq(&file, &operand_file) {
2535        return error!(EINVAL);
2536    }
2537
2538    let epoll_event = match current_task.read_object(event) {
2539        Ok(mut epoll_event) => {
2540            // If EPOLLWAKEUP is specified in flags, but the caller does not have the CAP_BLOCK_SUSPEND
2541            // capability, then the EPOLLWAKEUP flag is silently ignored.
2542            // See https://man7.org/linux/man-pages/man2/epoll_ctl.2.html
2543            if epoll_event.events().contains(FdEvents::EPOLLWAKEUP) {
2544                if !security::is_task_capable_noaudit(current_task, CAP_BLOCK_SUSPEND) {
2545                    epoll_event.ignore(FdEvents::EPOLLWAKEUP);
2546                }
2547            }
2548            Ok(epoll_event)
2549        }
2550        result => result,
2551    };
2552
2553    match op {
2554        EPOLL_CTL_ADD => {
2555            epoll_file.add(locked, current_task, &operand_file, &file, epoll_event?)?;
2556            operand_file.register_epfd(&file);
2557        }
2558        EPOLL_CTL_MOD => {
2559            epoll_file.modify(locked, current_task, &operand_file, epoll_event?)?;
2560        }
2561        EPOLL_CTL_DEL => {
2562            epoll_file.delete(current_task, &operand_file)?;
2563            operand_file.unregister_epfd(&file);
2564        }
2565        _ => return error!(EINVAL),
2566    }
2567    Ok(())
2568}
2569
2570// Backend for sys_epoll_pwait and sys_epoll_pwait2 that takes an already-decoded deadline.
2571fn do_epoll_pwait(
2572    locked: &mut Locked<Unlocked>,
2573    current_task: &mut CurrentTask,
2574    epfd: FdNumber,
2575    events: UserRef<EpollEvent>,
2576    unvalidated_max_events: i32,
2577    deadline: zx::MonotonicInstant,
2578    user_sigmask: UserRef<SigSet>,
2579) -> Result<usize, Errno> {
2580    let file = current_task.get_file(epfd)?;
2581    let epoll_file = file.downcast_file::<EpollFileObject>().ok_or_else(|| errno!(EINVAL))?;
2582
2583    // Max_events must be greater than 0.
2584    let max_events: usize = unvalidated_max_events.try_into().map_err(|_| errno!(EINVAL))?;
2585    if max_events == 0 {
2586        return error!(EINVAL);
2587    }
2588
2589    // Return early if the user passes an obviously invalid pointer. This avoids dropping events
2590    // for common pointer errors. When we catch bad pointers after the wait is complete when the
2591    // memory is actually written, the events will be lost. This check is not a guarantee.
2592    current_task
2593        .mm()?
2594        .check_plausible(events.addr(), max_events * std::mem::size_of::<EpollEvent>())?;
2595
2596    let active_events = if !user_sigmask.is_null() {
2597        let signal_mask = current_task.read_object(user_sigmask)?;
2598        current_task.wait_with_temporary_mask(locked, signal_mask, |locked, current_task| {
2599            epoll_file.wait(locked, current_task, max_events, deadline)
2600        })?
2601    } else {
2602        epoll_file.wait(locked, current_task, max_events, deadline)?
2603    };
2604
2605    current_task.write_objects(events, &active_events)?;
2606    Ok(active_events.len())
2607}
2608
2609pub fn sys_epoll_pwait(
2610    locked: &mut Locked<Unlocked>,
2611    current_task: &mut CurrentTask,
2612    epfd: FdNumber,
2613    events: UserRef<EpollEvent>,
2614    max_events: i32,
2615    timeout: i32,
2616    user_sigmask: UserRef<SigSet>,
2617) -> Result<usize, Errno> {
2618    let deadline = zx::MonotonicInstant::after(duration_from_poll_timeout(timeout)?);
2619    do_epoll_pwait(locked, current_task, epfd, events, max_events, deadline, user_sigmask)
2620}
2621
2622pub fn sys_epoll_pwait2(
2623    locked: &mut Locked<Unlocked>,
2624    current_task: &mut CurrentTask,
2625    epfd: FdNumber,
2626    events: UserRef<EpollEvent>,
2627    max_events: i32,
2628    user_timespec: TimeSpecPtr,
2629    user_sigmask: UserRef<SigSet>,
2630) -> Result<usize, Errno> {
2631    let deadline = deadline_after_timespec(current_task, user_timespec)?;
2632    do_epoll_pwait(locked, current_task, epfd, events, max_events, deadline, user_sigmask)
2633}
2634
2635struct FileWaiter<Key: Into<ReadyItemKey>> {
2636    waiter: Waiter,
2637    ready_items: Arc<Mutex<VecDeque<ReadyItem>>>,
2638    _marker: PhantomData<Key>,
2639}
2640
2641impl<Key: Into<ReadyItemKey>> Default for FileWaiter<Key> {
2642    fn default() -> Self {
2643        Self { waiter: Waiter::new(), ready_items: Default::default(), _marker: PhantomData }
2644    }
2645}
2646
2647impl<Key: Into<ReadyItemKey>> FileWaiter<Key> {
2648    fn add<L>(
2649        &self,
2650        locked: &mut Locked<L>,
2651        current_task: &CurrentTask,
2652        key: Key,
2653        file: Option<&FileHandle>,
2654        requested_events: FdEvents,
2655    ) -> Result<(), Errno>
2656    where
2657        L: LockEqualOrBefore<FileOpsCore>,
2658    {
2659        let key = key.into();
2660
2661        if let Some(file) = file {
2662            let sought_events = requested_events | FdEvents::POLLERR | FdEvents::POLLHUP;
2663
2664            let handler =
2665                EventHandler::Enqueue { key, queue: self.ready_items.clone(), sought_events };
2666            file.wait_async(locked, current_task, &self.waiter, sought_events, handler);
2667            let current_events = file.query_events(locked, current_task)? & sought_events;
2668            if !current_events.is_empty() {
2669                self.ready_items.lock().push_back(ReadyItem { key, events: current_events });
2670            }
2671        } else {
2672            self.ready_items.lock().push_back(ReadyItem { key, events: FdEvents::POLLNVAL });
2673        }
2674        Ok(())
2675    }
2676
2677    fn wait<L>(
2678        &self,
2679        locked: &mut Locked<L>,
2680        current_task: &mut CurrentTask,
2681        signal_mask: Option<SigSet>,
2682        deadline: zx::MonotonicInstant,
2683    ) -> Result<(), Errno>
2684    where
2685        L: LockEqualOrBefore<FileOpsCore>,
2686    {
2687        if self.ready_items.lock().is_empty() {
2688            // When wait_until() returns Ok() it means there was a wake up; however there may not
2689            // be a ready item, for example if waiting on a sync file with multiple sync points.
2690            // Keep waiting until there's at least one ready item.
2691            let signal_mask = signal_mask.unwrap_or_else(|| current_task.read().signal_mask());
2692            let mut result = current_task.wait_with_temporary_mask(
2693                locked,
2694                signal_mask,
2695                |locked, current_task| self.waiter.wait_until(locked, current_task, deadline),
2696            );
2697            loop {
2698                match result {
2699                    Err(err) if err == ETIMEDOUT => return Ok(()),
2700                    Ok(()) => {
2701                        if !self.ready_items.lock().is_empty() {
2702                            break;
2703                        }
2704                    }
2705                    result => result?,
2706                };
2707                result = self.waiter.wait_until(locked, current_task, deadline);
2708            }
2709        }
2710        Ok(())
2711    }
2712}
2713
2714pub fn poll(
2715    locked: &mut Locked<Unlocked>,
2716    current_task: &mut CurrentTask,
2717    user_pollfds: UserRef<pollfd>,
2718    num_fds: i32,
2719    mask: Option<SigSet>,
2720    deadline: zx::MonotonicInstant,
2721) -> Result<usize, Errno> {
2722    if num_fds < 0
2723        || num_fds as u64 > current_task.thread_group().get_rlimit(locked, Resource::NOFILE)
2724    {
2725        return error!(EINVAL);
2726    }
2727
2728    let mut pollfds = vec![pollfd::default(); num_fds as usize];
2729    let waiter = FileWaiter::<usize>::default();
2730
2731    for (index, poll_descriptor) in pollfds.iter_mut().enumerate() {
2732        *poll_descriptor = current_task.read_object(user_pollfds.at(index)?)?;
2733        poll_descriptor.revents = 0;
2734        if poll_descriptor.fd < 0 {
2735            continue;
2736        }
2737        let file = current_task.get_file(FdNumber::from_raw(poll_descriptor.fd)).ok();
2738        waiter.add(
2739            locked,
2740            current_task,
2741            index,
2742            file.as_ref(),
2743            FdEvents::from_bits_truncate(poll_descriptor.events as u32),
2744        )?;
2745    }
2746
2747    waiter.wait(locked, current_task, mask, deadline)?;
2748
2749    let mut ready_items = waiter.ready_items.lock();
2750    let mut unique_ready_items =
2751        bit_vec::BitVec::from_elem(usize::try_from(num_fds).unwrap(), false);
2752    for ReadyItem { key: ready_key, events: ready_events } in ready_items.drain(..) {
2753        let ready_key = assert_matches::assert_matches!(
2754            ready_key,
2755            ReadyItemKey::Usize(v) => v
2756        );
2757        let interested_events = FdEvents::from_bits_truncate(pollfds[ready_key].events as u32)
2758            | FdEvents::POLLERR
2759            | FdEvents::POLLHUP
2760            | FdEvents::POLLNVAL;
2761        let return_events = (interested_events & ready_events).bits();
2762        pollfds[ready_key].revents = return_events as i16;
2763        unique_ready_items.set(ready_key, true);
2764    }
2765
2766    for (index, poll_descriptor) in pollfds.iter().enumerate() {
2767        current_task.write_object(user_pollfds.at(index)?, poll_descriptor)?;
2768    }
2769
2770    Ok(unique_ready_items.into_iter().filter(Clone::clone).count())
2771}
2772
2773pub fn sys_ppoll(
2774    locked: &mut Locked<Unlocked>,
2775    current_task: &mut CurrentTask,
2776    user_fds: UserRef<pollfd>,
2777    num_fds: i32,
2778    user_timespec: TimeSpecPtr,
2779    user_mask: UserRef<SigSet>,
2780    sigset_size: usize,
2781) -> Result<usize, Errno> {
2782    let start_time = zx::MonotonicInstant::get();
2783
2784    let timeout = if user_timespec.is_null() {
2785        // Passing -1 to poll is equivalent to an infinite timeout.
2786        -1
2787    } else {
2788        let ts = current_task.read_multi_arch_object(user_timespec)?;
2789        duration_from_timespec::<zx::MonotonicTimeline>(ts)?.into_millis() as i32
2790    };
2791
2792    let deadline = start_time + duration_from_poll_timeout(timeout)?;
2793
2794    let mask = if !user_mask.is_null() {
2795        if sigset_size != std::mem::size_of::<SigSet>() {
2796            return error!(EINVAL);
2797        }
2798        let mask = current_task.read_object(user_mask)?;
2799        Some(mask)
2800    } else {
2801        None
2802    };
2803
2804    let poll_result = poll(locked, current_task, user_fds, num_fds, mask, deadline);
2805
2806    if user_timespec.is_null() {
2807        return poll_result;
2808    }
2809
2810    let now = zx::MonotonicInstant::get();
2811    let remaining = std::cmp::max(deadline - now, zx::MonotonicDuration::from_seconds(0));
2812    let remaining_timespec = timespec_from_duration(remaining);
2813
2814    // From gVisor: "ppoll is normally restartable if interrupted by something other than a signal
2815    // handled by the application (i.e. returns ERESTARTNOHAND). However, if
2816    // [copy out] failed, then the restarted ppoll would use the wrong timeout, so the
2817    // error should be left as EINTR."
2818    match (current_task.write_multi_arch_object(user_timespec, remaining_timespec), poll_result) {
2819        // If write was ok, and poll was ok, return poll result.
2820        (Ok(_), Ok(num_events)) => Ok(num_events),
2821        (Ok(_), Err(e)) if e == EINTR => {
2822            error!(ERESTARTNOHAND)
2823        }
2824        (Ok(_), poll_result) => poll_result,
2825        // If write was a failure, return the poll result unchanged.
2826        (Err(_), poll_result) => poll_result,
2827    }
2828}
2829
2830pub fn sys_flock(
2831    locked: &mut Locked<Unlocked>,
2832    current_task: &CurrentTask,
2833    fd: FdNumber,
2834    operation: u32,
2835) -> Result<(), Errno> {
2836    let file = current_task.get_file(fd)?;
2837    let operation = FlockOperation::from_flags(operation)?;
2838    security::check_file_lock_access(current_task, &file)?;
2839    file.flock(locked, current_task, operation)
2840}
2841
2842pub fn sys_sync(locked: &mut Locked<Unlocked>, current_task: &CurrentTask) -> Result<(), Errno> {
2843    current_task.kernel().mounts.sync_all(locked, current_task)
2844}
2845
2846pub fn sys_syncfs(
2847    locked: &mut Locked<Unlocked>,
2848    current_task: &CurrentTask,
2849    fd: FdNumber,
2850) -> Result<(), Errno> {
2851    let file = current_task.get_file(fd)?;
2852    file.fs.sync(locked, current_task)
2853}
2854
2855pub fn sys_fsync(
2856    _locked: &mut Locked<Unlocked>,
2857    current_task: &CurrentTask,
2858    fd: FdNumber,
2859) -> Result<(), Errno> {
2860    let file = current_task.get_file(fd)?;
2861    file.sync(current_task)
2862}
2863
2864pub fn sys_fdatasync(
2865    _locked: &mut Locked<Unlocked>,
2866    current_task: &CurrentTask,
2867    fd: FdNumber,
2868) -> Result<(), Errno> {
2869    let file = current_task.get_file(fd)?;
2870    file.data_sync(current_task)
2871}
2872
2873pub fn sys_sync_file_range(
2874    _locked: &mut Locked<Unlocked>,
2875    current_task: &CurrentTask,
2876    fd: FdNumber,
2877    offset: off_t,
2878    length: off_t,
2879    flags: u32,
2880) -> Result<(), Errno> {
2881    const KNOWN_FLAGS: u32 = uapi::SYNC_FILE_RANGE_WAIT_BEFORE
2882        | uapi::SYNC_FILE_RANGE_WRITE
2883        | uapi::SYNC_FILE_RANGE_WAIT_AFTER;
2884    if flags & !KNOWN_FLAGS != 0 {
2885        return error!(EINVAL);
2886    }
2887
2888    let file = current_task.get_file(fd)?;
2889
2890    if offset < 0 || length < 0 {
2891        return error!(EINVAL);
2892    }
2893
2894    checked_add_offset_and_length(offset as usize, length as usize)?;
2895
2896    // From <https://linux.die.net/man/2/sync_file_range>:
2897    //
2898    //   fd refers to something other than a regular file, a block device, a directory, or a symbolic link.
2899    let mode = file.node().info().mode;
2900    if !mode.is_reg() && !mode.is_blk() && !mode.is_dir() && !mode.is_lnk() {
2901        return error!(ESPIPE);
2902    }
2903
2904    if flags == 0 {
2905        return Ok(());
2906    }
2907
2908    // Syncing the whole file is much more than we need for sync_file_range, which only needs to
2909    // sync the specified data range.
2910    file.data_sync(current_task)
2911}
2912
2913pub fn sys_fadvise64(
2914    _locked: &mut Locked<Unlocked>,
2915    current_task: &CurrentTask,
2916    fd: FdNumber,
2917    offset: off_t,
2918    len: off_t,
2919    advice: u32,
2920) -> Result<(), Errno> {
2921    match advice {
2922        POSIX_FADV_NORMAL => track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_NORMAL"),
2923        POSIX_FADV_RANDOM => track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_RANDOM"),
2924        POSIX_FADV_SEQUENTIAL => {
2925            track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_SEQUENTIAL")
2926        }
2927        POSIX_FADV_WILLNEED => {
2928            track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_WILLNEED")
2929        }
2930        POSIX_FADV_DONTNEED => {
2931            track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_DONTNEED")
2932        }
2933        POSIX_FADV_NOREUSE => {
2934            track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_NOREUSE")
2935        }
2936        _ => {
2937            track_stub!(TODO("https://fxbug.dev/322875684"), "fadvise64 unknown advice", advice);
2938            return error!(EINVAL);
2939        }
2940    }
2941
2942    if offset < 0 || len < 0 {
2943        return error!(EINVAL);
2944    }
2945
2946    let file = current_task.get_file(fd)?;
2947    // fadvise does not work on pipes.
2948    if file.downcast_file::<PipeFileObject>().is_some() {
2949        return error!(ESPIPE);
2950    }
2951
2952    // fadvise does not work on paths.
2953    if file.flags().contains(OpenFlags::PATH) {
2954        return error!(EBADF);
2955    }
2956
2957    Ok(())
2958}
2959
2960pub fn sys_fallocate(
2961    locked: &mut Locked<Unlocked>,
2962    current_task: &CurrentTask,
2963    fd: FdNumber,
2964    mode: u32,
2965    offset: off_t,
2966    len: off_t,
2967) -> Result<(), Errno> {
2968    let file = current_task.get_file(fd)?;
2969
2970    // Offset must not be less than 0.
2971    // Length must not be less than or equal to 0.
2972    // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2973    if offset < 0 || len <= 0 {
2974        return error!(EINVAL);
2975    }
2976
2977    let mode = FallocMode::from_bits(mode).ok_or_else(|| errno!(EINVAL))?;
2978    file.fallocate(locked, current_task, mode, offset as u64, len as u64)?;
2979
2980    Ok(())
2981}
2982
2983pub fn sys_inotify_init1(
2984    locked: &mut Locked<Unlocked>,
2985    current_task: &CurrentTask,
2986    flags: u32,
2987) -> Result<FdNumber, Errno> {
2988    if flags & !(IN_NONBLOCK | IN_CLOEXEC) != 0 {
2989        return error!(EINVAL);
2990    }
2991    let non_blocking = flags & IN_NONBLOCK != 0;
2992    let close_on_exec = flags & IN_CLOEXEC != 0;
2993    let inotify_file = InotifyFileObject::new_file(locked, current_task, non_blocking);
2994    let fd_flags = if close_on_exec { FdFlags::CLOEXEC } else { FdFlags::empty() };
2995    current_task.add_file(locked, inotify_file, fd_flags)
2996}
2997
2998pub fn sys_inotify_add_watch(
2999    locked: &mut Locked<Unlocked>,
3000    current_task: &CurrentTask,
3001    fd: FdNumber,
3002    user_path: UserCString,
3003    mask: u32,
3004) -> Result<WdNumber, Errno> {
3005    let mask = InotifyMask::from_bits(mask).ok_or_else(|| errno!(EINVAL))?;
3006    if !mask.intersects(InotifyMask::ALL_EVENTS) {
3007        // Mask must include at least 1 event.
3008        return error!(EINVAL);
3009    }
3010    let file = current_task.get_file(fd)?;
3011    let inotify_file = file.downcast_file::<InotifyFileObject>().ok_or_else(|| errno!(EINVAL))?;
3012    let options = if mask.contains(InotifyMask::DONT_FOLLOW) {
3013        LookupFlags::no_follow()
3014    } else {
3015        LookupFlags::default()
3016    };
3017    let watched_node = lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, options)?;
3018    if mask.contains(InotifyMask::ONLYDIR) && !watched_node.entry.node.is_dir() {
3019        return error!(ENOTDIR);
3020    }
3021    inotify_file.add_watch(watched_node.entry, mask, &file)
3022}
3023
3024pub fn sys_inotify_rm_watch(
3025    _locked: &mut Locked<Unlocked>,
3026    current_task: &CurrentTask,
3027    fd: FdNumber,
3028    watch_id: WdNumber,
3029) -> Result<(), Errno> {
3030    let file = current_task.get_file(fd)?;
3031    let inotify_file = file.downcast_file::<InotifyFileObject>().ok_or_else(|| errno!(EINVAL))?;
3032    inotify_file.remove_watch(watch_id, &file)
3033}
3034
3035pub fn sys_utimensat(
3036    locked: &mut Locked<Unlocked>,
3037    current_task: &CurrentTask,
3038    dir_fd: FdNumber,
3039    user_path: UserCString,
3040    user_times: TimeSpecPtr,
3041    flags: u32,
3042) -> Result<(), Errno> {
3043    let (atime, mtime) = if user_times.addr().is_null() {
3044        // If user_times is null, the timestamps are updated to the current time.
3045        (TimeUpdateType::Now, TimeUpdateType::Now)
3046    } else {
3047        let ts = current_task.read_multi_arch_objects_to_vec(user_times, 2)?;
3048        let atime = ts[0];
3049        let mtime = ts[1];
3050        let parse_timespec = |spec: timespec| match spec.tv_nsec {
3051            UTIME_NOW => Ok(TimeUpdateType::Now),
3052            UTIME_OMIT => Ok(TimeUpdateType::Omit),
3053            _ => time_from_timespec(spec).map(TimeUpdateType::Time),
3054        };
3055        (parse_timespec(atime)?, parse_timespec(mtime)?)
3056    };
3057
3058    if let (TimeUpdateType::Omit, TimeUpdateType::Omit) = (atime, mtime) {
3059        return Ok(());
3060    };
3061
3062    // Non-standard feature: if user_path is null, the timestamps are updated on the file referred
3063    // to by dir_fd.
3064    // See https://man7.org/linux/man-pages/man2/utimensat.2.html
3065    let name = if user_path.addr().is_null() {
3066        if dir_fd == FdNumber::AT_FDCWD {
3067            return error!(EFAULT);
3068        }
3069        let (node, _) = current_task.resolve_dir_fd(
3070            locked,
3071            dir_fd,
3072            Default::default(),
3073            ResolveFlags::empty(),
3074        )?;
3075        node
3076    } else {
3077        let lookup_flags = LookupFlags::from_bits(flags, AT_SYMLINK_NOFOLLOW)?;
3078        lookup_at(locked, current_task, dir_fd, user_path, lookup_flags)?
3079    };
3080    name.entry.node.update_atime_mtime(locked, current_task, &name.mount, atime, mtime)?;
3081    let event_mask = match (atime, mtime) {
3082        (_, TimeUpdateType::Omit) => InotifyMask::ACCESS,
3083        (TimeUpdateType::Omit, _) => InotifyMask::MODIFY,
3084        (_, _) => InotifyMask::ATTRIB,
3085    };
3086    name.entry.notify_ignoring_excl_unlink(event_mask);
3087    Ok(())
3088}
3089
3090pub fn sys_splice(
3091    locked: &mut Locked<Unlocked>,
3092    current_task: &CurrentTask,
3093    fd_in: FdNumber,
3094    off_in: OffsetPtr,
3095    fd_out: FdNumber,
3096    off_out: OffsetPtr,
3097    len: usize,
3098    flags: u32,
3099) -> Result<usize, Errno> {
3100    splice::splice(locked, current_task, fd_in, off_in, fd_out, off_out, len, flags)
3101}
3102
3103pub fn sys_vmsplice(
3104    locked: &mut Locked<Unlocked>,
3105    current_task: &CurrentTask,
3106    fd: FdNumber,
3107    iovec_addr: IOVecPtr,
3108    iovec_count: UserValue<i32>,
3109    flags: u32,
3110) -> Result<usize, Errno> {
3111    splice::vmsplice(locked, current_task, fd, iovec_addr, iovec_count, flags)
3112}
3113
3114pub fn sys_copy_file_range(
3115    locked: &mut Locked<Unlocked>,
3116    current_task: &CurrentTask,
3117    fd_in: FdNumber,
3118    off_in: OffsetPtr,
3119    fd_out: FdNumber,
3120    off_out: OffsetPtr,
3121    len: usize,
3122    flags: u32,
3123) -> Result<usize, Errno> {
3124    splice::copy_file_range(locked, current_task, fd_in, off_in, fd_out, off_out, len, flags)
3125}
3126
3127pub fn sys_tee(
3128    locked: &mut Locked<Unlocked>,
3129    current_task: &CurrentTask,
3130    fd_in: FdNumber,
3131    fd_out: FdNumber,
3132    len: usize,
3133    flags: u32,
3134) -> Result<usize, Errno> {
3135    splice::tee(locked, current_task, fd_in, fd_out, len, flags)
3136}
3137
3138pub fn sys_readahead(
3139    _locked: &mut Locked<Unlocked>,
3140    current_task: &CurrentTask,
3141    fd: FdNumber,
3142    offset: off_t,
3143    length: usize,
3144) -> Result<(), Errno> {
3145    let file = current_task.get_file(fd)?;
3146    // Allow only non-negative values of `offset`. Some versions of Linux allow it to be negative,
3147    // but GVisor tests require `readahead()` to fail in this case.
3148    let offset: usize = offset.try_into().map_err(|_| errno!(EINVAL))?;
3149    file.readahead(current_task, offset, length)
3150}
3151
3152pub fn sys_io_setup(
3153    _locked: &mut Locked<Unlocked>,
3154    current_task: &CurrentTask,
3155    user_nr_events: UserValue<u32>,
3156    user_ctx_idp: MultiArchUserRef<uapi::aio_context_t, uapi::arch32::aio_context_t>,
3157) -> Result<(), Errno> {
3158    // From https://man7.org/linux/man-pages/man2/io_setup.2.html:
3159    //
3160    //   EINVAL ctx_idp is not initialized, or the specified nr_events
3161    //   exceeds internal limits.  nr_events should be greater than
3162    //   0.
3163    //
3164    // TODO: Determine what "internal limits" means.
3165    let max_operations =
3166        user_nr_events.validate(0..(i32::MAX as u32)).ok_or_else(|| errno!(EINVAL))? as usize;
3167    if current_task.read_multi_arch_object(user_ctx_idp)? != 0 {
3168        return error!(EINVAL);
3169    }
3170    let ctx_id = AioContext::create(current_task, max_operations)?;
3171    current_task.write_multi_arch_object(user_ctx_idp, ctx_id).map_err(|e| {
3172        let _ = current_task
3173            .mm()
3174            .expect("previous sys_io_setup code verified mm exists")
3175            .destroy_aio_context(ctx_id.into());
3176        e
3177    })?;
3178    Ok(())
3179}
3180
3181pub fn sys_io_submit(
3182    _locked: &mut Locked<Unlocked>,
3183    current_task: &CurrentTask,
3184    ctx_id: aio_context_t,
3185    user_nr: UserValue<i32>,
3186    mut iocb_addrs: IocbPtrPtr,
3187) -> Result<i32, Errno> {
3188    let nr = user_nr.validate(0..i32::MAX).ok_or_else(|| errno!(EINVAL))?;
3189    if nr == 0 {
3190        return Ok(0);
3191    }
3192    let ctx = current_task.mm()?.get_aio_context(ctx_id.into()).ok_or_else(|| errno!(EINVAL))?;
3193
3194    // `iocbpp` is an array of addresses to iocb's.
3195    let mut num_submitted: i32 = 0;
3196    loop {
3197        let iocb_ref = current_task.read_multi_arch_ptr(iocb_addrs)?;
3198        let control_block = current_task.read_multi_arch_object(iocb_ref)?;
3199
3200        match (num_submitted, ctx.submit(current_task, control_block, iocb_ref)) {
3201            (0, Err(e)) => return Err(e),
3202            (_, Err(_)) => break,
3203            (_, Ok(())) => {
3204                num_submitted += 1;
3205                if num_submitted == nr {
3206                    break;
3207                }
3208            }
3209        };
3210
3211        iocb_addrs = iocb_addrs.next()?;
3212    }
3213
3214    Ok(num_submitted)
3215}
3216
3217pub fn sys_io_getevents(
3218    _locked: &mut Locked<Unlocked>,
3219    current_task: &CurrentTask,
3220    ctx_id: aio_context_t,
3221    min_nr: i64,
3222    nr: i64,
3223    events_ref: UserRef<io_event>,
3224    user_timeout: TimeSpecPtr,
3225) -> Result<i32, Errno> {
3226    if min_nr < 0 || min_nr > nr || nr < 0 {
3227        return error!(EINVAL);
3228    }
3229    let min_results = min_nr as usize;
3230    let max_results = nr as usize;
3231    let deadline = deadline_after_timespec(current_task, user_timeout)?;
3232
3233    let ctx = current_task.mm()?.get_aio_context(ctx_id.into()).ok_or_else(|| errno!(EINVAL))?;
3234    let events = ctx.get_events(current_task, min_results, max_results, deadline)?;
3235    current_task.write_objects(events_ref, &events)?;
3236
3237    Ok(events.len() as i32)
3238}
3239
3240pub fn sys_io_cancel(
3241    _locked: &mut Locked<Unlocked>,
3242    current_task: &CurrentTask,
3243    ctx_id: aio_context_t,
3244    user_iocb: IocbPtr,
3245    _result: UserRef<io_event>,
3246) -> Result<(), Errno> {
3247    let iocb = current_task.read_multi_arch_object(user_iocb)?;
3248    let ctx = current_task.mm()?.get_aio_context(ctx_id.into()).ok_or_else(|| errno!(EINVAL))?;
3249
3250    ctx.cancel(current_task, iocb, user_iocb)?;
3251    // TODO: Correctly handle return. If the operation is successfully canceled, the event should be copied into the memory pointed to by result without being placed into the completion queue.
3252    track_stub!(TODO("https://fxbug.dev/297433877"), "io_cancel");
3253    Ok(())
3254}
3255
3256pub fn sys_io_destroy(
3257    _locked: &mut Locked<Unlocked>,
3258    current_task: &CurrentTask,
3259    ctx_id: aio_context_t,
3260) -> Result<(), Errno> {
3261    let aio_context = current_task.mm()?.destroy_aio_context(ctx_id.into())?;
3262    std::mem::drop(aio_context);
3263    Ok(())
3264}
3265
3266pub fn sys_io_uring_setup(
3267    locked: &mut Locked<Unlocked>,
3268    current_task: &CurrentTask,
3269    user_entries: UserValue<u32>,
3270    user_params: UserRef<io_uring_params>,
3271) -> Result<FdNumber, Errno> {
3272    // TODO: https://fxbug.dev/397186254 - we will want to do a no-audit CAP_IPC_LOCK capability
3273    // check; see "If not granted CAP_IPC_LOCK io_uring operations are accounted against the user's
3274    // RLIMIT_MEMLOCK limit" at
3275    // https://github.com/SELinuxProject/selinux-notebook/blob/main/src/auditing.md#capability-audit-exemptions
3276
3277    if !current_task.kernel().features.io_uring {
3278        return error!(ENOSYS);
3279    }
3280
3281    // Apply policy from /proc/sys/kernel/io_uring_disabled
3282    let limits = &current_task.kernel().system_limits;
3283    match limits.io_uring_disabled.load(atomic::Ordering::Relaxed) {
3284        0 => (),
3285        1 => {
3286            let io_uring_group = limits.io_uring_group.load(atomic::Ordering::Relaxed).try_into();
3287            if io_uring_group.is_err()
3288                || !current_task.current_creds().is_in_group(io_uring_group.unwrap())
3289            {
3290                security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
3291            }
3292        }
3293        _ => {
3294            return error!(EPERM);
3295        }
3296    }
3297
3298    let entries = user_entries.validate(1..IORING_MAX_ENTRIES).ok_or_else(|| errno!(EINVAL))?;
3299
3300    let mut params = current_task.read_object(user_params)?;
3301    for byte in params.resv {
3302        if byte != 0 {
3303            return error!(EINVAL);
3304        }
3305    }
3306
3307    let file = IoUringFileObject::new_file(locked, current_task, entries, &mut params)?;
3308
3309    // io_uring file descriptors are always created with CLOEXEC.
3310    let fd = current_task.add_file(locked, file, FdFlags::CLOEXEC)?;
3311    current_task.write_object(user_params, &params)?;
3312    Ok(fd)
3313}
3314
3315pub fn sys_io_uring_enter(
3316    locked: &mut Locked<Unlocked>,
3317    current_task: &CurrentTask,
3318    fd: FdNumber,
3319    to_submit: u32,
3320    min_complete: u32,
3321    flags: u32,
3322    _sig: UserRef<SigSet>,
3323    sigset_size: usize,
3324) -> Result<u32, Errno> {
3325    if !current_task.kernel().features.io_uring {
3326        return error!(ENOSYS);
3327    }
3328    if !_sig.is_null() {
3329        if sigset_size != std::mem::size_of::<SigSet>() {
3330            return error!(EINVAL);
3331        }
3332    }
3333    let file = current_task.get_file(fd)?;
3334    let io_uring = file.downcast_file::<IoUringFileObject>().ok_or_else(|| errno!(EOPNOTSUPP))?;
3335    // TODO(https://fxbug.dev/297431387): Use `_sig` to change the signal mask for `current_task`.
3336    io_uring.enter(locked, current_task, to_submit, min_complete, flags)
3337}
3338
3339pub fn sys_io_uring_register(
3340    locked: &mut Locked<Unlocked>,
3341    current_task: &CurrentTask,
3342    fd: FdNumber,
3343    opcode: u32,
3344    arg: UserAddress,
3345    nr_args: UserValue<u32>,
3346) -> Result<SyscallResult, Errno> {
3347    if !current_task.kernel().features.io_uring {
3348        return error!(ENOSYS);
3349    }
3350    let file = current_task.get_file(fd)?;
3351    let io_uring = file.downcast_file::<IoUringFileObject>().ok_or_else(|| errno!(EOPNOTSUPP))?;
3352    match opcode {
3353        IORING_REGISTER_BUFFERS => {
3354            // TODO(https://fxbug.dev/297431387): Check nr_args for zero and return EINVAL here.
3355            let iovec = IOVecPtr::new(current_task, arg);
3356            let buffers = current_task.read_iovec(iovec, nr_args)?;
3357            io_uring.register_buffers(locked, buffers);
3358            return Ok(SUCCESS);
3359        }
3360        IORING_UNREGISTER_BUFFERS => {
3361            if !arg.is_null() {
3362                return error!(EINVAL);
3363            }
3364            io_uring.unregister_buffers(locked);
3365            return Ok(SUCCESS);
3366        }
3367        IORING_REGISTER_IOWQ_MAX_WORKERS => {
3368            track_stub!(
3369                TODO("https://fxbug.dev/297431387"),
3370                "io_uring_register IORING_REGISTER_IOWQ_MAX_WORKERS",
3371                opcode
3372            );
3373            // The current implementation only ever use 1 worker for read and 1 for write.
3374            return Ok(SUCCESS);
3375        }
3376        IORING_REGISTER_RING_FDS => {
3377            track_stub!(
3378                TODO("https://fxbug.dev/297431387"),
3379                "io_uring_register IORING_REGISTER_RING_FDS",
3380                opcode
3381            );
3382            // The current implementation doesn't use any thread local specific identifier for
3383            // performance. Instead, when registering a fd, just return the passed fd as the value
3384            // to use.
3385            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
3386            if nr_args > 16 {
3387                return error!(EINVAL);
3388            }
3389            let updates_addr = UserRef::<uapi::io_uring_rsrc_update>::from(arg);
3390            let mut updates = current_task
3391                .read_objects_to_smallvec::<uapi::io_uring_rsrc_update, 1>(updates_addr, nr_args)?;
3392            let mut result = 0;
3393            for update in updates.iter_mut() {
3394                if update.offset == u32::MAX {
3395                    update.offset = update.data.try_into().map_err(|_| errno!(EINVAL))?;
3396                    result += 1;
3397                }
3398            }
3399            current_task.write_objects(updates_addr, &updates)?;
3400            return Ok(result.into());
3401        }
3402        IORING_UNREGISTER_RING_FDS => {
3403            track_stub!(
3404                TODO("https://fxbug.dev/297431387"),
3405                "io_uring_register IORING_UNREGISTER_RING_FDS",
3406                opcode
3407            );
3408            // Because registering a fd doesn't use any resource currently, unregistering is free.
3409            return Ok(SUCCESS);
3410        }
3411        IORING_REGISTER_PBUF_RING => {
3412            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
3413            if nr_args != 1 {
3414                return error!(EINVAL);
3415            }
3416            let buffer_definition: uapi::io_uring_buf_reg = current_task.read_object(arg.into())?;
3417            io_uring.register_ring_buffers(locked, buffer_definition)?;
3418            return Ok(SUCCESS);
3419        }
3420
3421        IORING_UNREGISTER_PBUF_RING => {
3422            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
3423            if nr_args != 1 {
3424                return error!(EINVAL);
3425            }
3426            let buffer_definition: uapi::io_uring_buf_reg = current_task.read_object(arg.into())?;
3427            io_uring.unregister_ring_buffers(locked, buffer_definition)?;
3428            return Ok(SUCCESS);
3429        }
3430
3431        IORING_REGISTER_PBUF_STATUS => {
3432            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
3433            if nr_args != 1 {
3434                return error!(EINVAL);
3435            }
3436            let buffer_status_addr = UserRef::<uapi::io_uring_buf_status>::from(arg);
3437            let mut buffer_status: uapi::io_uring_buf_status =
3438                current_task.read_object(buffer_status_addr)?;
3439            io_uring.ring_buffer_status(locked, &mut buffer_status)?;
3440            current_task.write_object(buffer_status_addr, &buffer_status)?;
3441            return Ok(SUCCESS);
3442        }
3443
3444        _ => {
3445            track_stub!(
3446                TODO("https://fxbug.dev/297431387"),
3447                "io_uring_register unknown op",
3448                opcode
3449            );
3450            return error!(EINVAL);
3451        }
3452    }
3453}
3454
3455// Syscalls for arch32 usage
3456#[cfg(target_arch = "aarch64")]
3457mod arch32 {
3458    use crate::mm::MemoryAccessorExt;
3459    use crate::task::CurrentTask;
3460    use crate::vfs::syscalls::{
3461        LookupFlags, OpenFlags, lookup_at, sys_dup3, sys_faccessat, sys_fallocate, sys_lseek,
3462        sys_mkdirat, sys_openat, sys_readlinkat, sys_unlinkat,
3463    };
3464    use crate::vfs::{FdNumber, FsNode};
3465    use linux_uapi::off_t;
3466    use starnix_sync::{Locked, Unlocked};
3467    use starnix_syscalls::SyscallArg;
3468    use starnix_types::time::duration_from_poll_timeout;
3469    use starnix_uapi::errors::Errno;
3470    use starnix_uapi::file_mode::FileMode;
3471    use starnix_uapi::signals::SigSet;
3472    use starnix_uapi::user_address::{MultiArchUserRef, UserAddress, UserCString, UserRef};
3473    use starnix_uapi::vfs::EpollEvent;
3474    use starnix_uapi::{AT_REMOVEDIR, errno, error, uapi};
3475
3476    type StatFs64Ptr = MultiArchUserRef<uapi::statfs, uapi::arch32::statfs64>;
3477
3478    fn merge_low_and_high(low: u32, high: u32) -> off_t {
3479        ((high as off_t) << 32) | (low as off_t)
3480    }
3481
3482    pub fn sys_arch32_open(
3483        locked: &mut Locked<Unlocked>,
3484        current_task: &CurrentTask,
3485        user_path: UserCString,
3486        flags: u32,
3487        mode: FileMode,
3488    ) -> Result<FdNumber, Errno> {
3489        sys_openat(locked, current_task, FdNumber::AT_FDCWD, user_path, flags, mode)
3490    }
3491
3492    pub fn sys_arch32_access(
3493        locked: &mut Locked<Unlocked>,
3494        current_task: &CurrentTask,
3495        user_path: UserCString,
3496        mode: u32,
3497    ) -> Result<(), Errno> {
3498        sys_faccessat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode)
3499    }
3500    pub fn stat64(
3501        locked: &mut Locked<Unlocked>,
3502        current_task: &CurrentTask,
3503        node: &FsNode,
3504        arch32_stat_buf: UserRef<uapi::arch32::stat64>,
3505    ) -> Result<(), Errno> {
3506        let stat_buffer = node.stat(locked, current_task)?;
3507        let result: uapi::arch32::stat64 = stat_buffer.try_into().map_err(|_| errno!(EINVAL))?;
3508        // Now we copy to the arch32 version and write.
3509        current_task.write_object(arch32_stat_buf, &result)?;
3510        Ok(())
3511    }
3512
3513    pub fn sys_arch32_fstat64(
3514        locked: &mut Locked<Unlocked>,
3515        current_task: &CurrentTask,
3516        fd: FdNumber,
3517        arch32_stat_buf: UserRef<uapi::arch32::stat64>,
3518    ) -> Result<(), Errno> {
3519        let file = current_task.get_file_allowing_opath(fd)?;
3520        stat64(locked, current_task, file.node(), arch32_stat_buf)
3521    }
3522
3523    pub fn sys_arch32_fallocate(
3524        locked: &mut Locked<Unlocked>,
3525        current_task: &CurrentTask,
3526        fd: FdNumber,
3527        mode: u32,
3528        offset_low: u32,
3529        offset_high: u32,
3530        len_low: u32,
3531        len_high: u32,
3532    ) -> Result<(), Errno> {
3533        let offset = merge_low_and_high(offset_low, offset_high);
3534        let len = merge_low_and_high(len_low, len_high);
3535        sys_fallocate(locked, current_task, fd, mode, offset, len)
3536    }
3537
3538    pub fn sys_arch32_stat64(
3539        locked: &mut Locked<Unlocked>,
3540        current_task: &CurrentTask,
3541        user_path: UserCString,
3542        arch32_stat_buf: UserRef<uapi::arch32::stat64>,
3543    ) -> Result<(), Errno> {
3544        let name =
3545            lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?;
3546        stat64(locked, current_task, &name.entry.node, arch32_stat_buf)
3547    }
3548
3549    pub fn sys_arch32_readlink(
3550        locked: &mut Locked<Unlocked>,
3551        current_task: &CurrentTask,
3552        user_path: UserCString,
3553        buffer: UserAddress,
3554        buffer_size: usize,
3555    ) -> Result<usize, Errno> {
3556        sys_readlinkat(locked, current_task, FdNumber::AT_FDCWD, user_path, buffer, buffer_size)
3557    }
3558
3559    pub fn sys_arch32_mkdir(
3560        locked: &mut Locked<Unlocked>,
3561        current_task: &CurrentTask,
3562        user_path: UserCString,
3563        mode: FileMode,
3564    ) -> Result<(), Errno> {
3565        sys_mkdirat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode)
3566    }
3567
3568    pub fn sys_arch32_rmdir(
3569        locked: &mut Locked<Unlocked>,
3570        current_task: &CurrentTask,
3571        user_path: UserCString,
3572    ) -> Result<(), Errno> {
3573        sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, user_path, AT_REMOVEDIR)
3574    }
3575
3576    #[allow(non_snake_case)]
3577    pub fn sys_arch32__llseek(
3578        locked: &mut Locked<Unlocked>,
3579        current_task: &CurrentTask,
3580        fd: FdNumber,
3581        offset_high: u32,
3582        offset_low: u32,
3583        result: UserRef<off_t>,
3584        whence: u32,
3585    ) -> Result<(), Errno> {
3586        let offset = merge_low_and_high(offset_low, offset_high);
3587        let result_value = sys_lseek(locked, current_task, fd, offset, whence)?;
3588        current_task.write_object(result, &result_value).map(|_| ())
3589    }
3590
3591    pub fn sys_arch32_dup2(
3592        locked: &mut Locked<Unlocked>,
3593        current_task: &CurrentTask,
3594        oldfd: FdNumber,
3595        newfd: FdNumber,
3596    ) -> Result<FdNumber, Errno> {
3597        if oldfd == newfd {
3598            // O_PATH allowed for:
3599            //
3600            //  Duplicating the file descriptor (dup(2), fcntl(2)
3601            //  F_DUPFD, etc.).
3602            //
3603            // See https://man7.org/linux/man-pages/man2/open.2.html
3604            current_task.get_file_allowing_opath(oldfd)?;
3605            return Ok(newfd);
3606        }
3607        sys_dup3(locked, current_task, oldfd, newfd, 0)
3608    }
3609
3610    pub fn sys_arch32_unlink(
3611        locked: &mut Locked<Unlocked>,
3612        current_task: &CurrentTask,
3613        user_path: UserCString,
3614    ) -> Result<(), Errno> {
3615        sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, user_path, 0)
3616    }
3617
3618    pub fn sys_arch32_pread64(
3619        locked: &mut Locked<Unlocked>,
3620        current_task: &CurrentTask,
3621        fd: FdNumber,
3622        address: UserAddress,
3623        length: usize,
3624        _: SyscallArg,
3625        offset_low: u32,
3626        offset_high: u32,
3627    ) -> Result<usize, Errno> {
3628        super::sys_pread64(
3629            locked,
3630            current_task,
3631            fd,
3632            address,
3633            length,
3634            merge_low_and_high(offset_low, offset_high),
3635        )
3636    }
3637
3638    pub fn sys_arch32_pwrite64(
3639        locked: &mut Locked<Unlocked>,
3640        current_task: &CurrentTask,
3641        fd: FdNumber,
3642        address: UserAddress,
3643        length: usize,
3644        _: SyscallArg,
3645        offset_low: u32,
3646        offset_high: u32,
3647    ) -> Result<usize, Errno> {
3648        super::sys_pwrite64(
3649            locked,
3650            current_task,
3651            fd,
3652            address,
3653            length,
3654            merge_low_and_high(offset_low, offset_high),
3655        )
3656    }
3657
3658    pub fn sys_arch32_truncate64(
3659        locked: &mut Locked<Unlocked>,
3660        current_task: &CurrentTask,
3661        user_path: UserCString,
3662        _unused: SyscallArg,
3663        length_low: u32,
3664        length_high: u32,
3665    ) -> Result<(), Errno> {
3666        super::sys_truncate(
3667            locked,
3668            current_task,
3669            user_path,
3670            merge_low_and_high(length_low, length_high),
3671        )
3672    }
3673
3674    pub fn sys_arch32_ftruncate64(
3675        locked: &mut Locked<Unlocked>,
3676        current_task: &CurrentTask,
3677        fd: FdNumber,
3678        _: SyscallArg,
3679        length_low: u32,
3680        length_high: u32,
3681    ) -> Result<(), Errno> {
3682        super::sys_ftruncate(locked, current_task, fd, merge_low_and_high(length_low, length_high))
3683    }
3684
3685    pub fn sys_arch32_chmod(
3686        locked: &mut Locked<Unlocked>,
3687        current_task: &CurrentTask,
3688        user_path: UserCString,
3689        mode: FileMode,
3690    ) -> Result<(), Errno> {
3691        super::sys_fchmodat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode)
3692    }
3693
3694    pub fn sys_arch32_chown32(
3695        locked: &mut Locked<Unlocked>,
3696        current_task: &CurrentTask,
3697        user_path: UserCString,
3698        owner: uapi::arch32::__kernel_uid32_t,
3699        group: uapi::arch32::__kernel_uid32_t,
3700    ) -> Result<(), Errno> {
3701        super::sys_fchownat(locked, current_task, FdNumber::AT_FDCWD, user_path, owner, group, 0)
3702    }
3703
3704    pub fn sys_arch32_poll(
3705        locked: &mut Locked<Unlocked>,
3706        current_task: &mut CurrentTask,
3707        user_fds: UserRef<uapi::pollfd>,
3708        num_fds: i32,
3709        timeout: i32,
3710    ) -> Result<usize, Errno> {
3711        let deadline = zx::MonotonicInstant::after(duration_from_poll_timeout(timeout)?);
3712        super::poll(locked, current_task, user_fds, num_fds, None, deadline)
3713    }
3714
3715    pub fn sys_arch32_epoll_create(
3716        locked: &mut Locked<Unlocked>,
3717        current_task: &CurrentTask,
3718        size: i32,
3719    ) -> Result<FdNumber, Errno> {
3720        if size < 1 {
3721            // The man page for epoll_create says the size was used in a previous implementation as
3722            // a hint but no longer does anything. But it's still required to be >= 1 to ensure
3723            // programs are backwards-compatible.
3724            return error!(EINVAL);
3725        }
3726        super::sys_epoll_create1(locked, current_task, 0)
3727    }
3728
3729    pub fn sys_arch32_epoll_wait(
3730        locked: &mut Locked<Unlocked>,
3731        current_task: &mut CurrentTask,
3732        epfd: FdNumber,
3733        events: UserRef<EpollEvent>,
3734        max_events: i32,
3735        timeout: i32,
3736    ) -> Result<usize, Errno> {
3737        super::sys_epoll_pwait(
3738            locked,
3739            current_task,
3740            epfd,
3741            events,
3742            max_events,
3743            timeout,
3744            UserRef::<SigSet>::default(),
3745        )
3746    }
3747
3748    pub fn sys_arch32_rename(
3749        locked: &mut Locked<Unlocked>,
3750        current_task: &CurrentTask,
3751        old_user_path: UserCString,
3752        new_user_path: UserCString,
3753    ) -> Result<(), Errno> {
3754        super::sys_renameat2(
3755            locked,
3756            current_task,
3757            FdNumber::AT_FDCWD,
3758            old_user_path,
3759            FdNumber::AT_FDCWD,
3760            new_user_path,
3761            0,
3762        )
3763    }
3764
3765    pub fn sys_arch32_creat(
3766        locked: &mut Locked<Unlocked>,
3767        current_task: &CurrentTask,
3768        user_path: UserCString,
3769        mode: FileMode,
3770    ) -> Result<FdNumber, Errno> {
3771        super::sys_openat(
3772            locked,
3773            current_task,
3774            FdNumber::AT_FDCWD,
3775            user_path,
3776            (OpenFlags::WRONLY | OpenFlags::CREAT | OpenFlags::TRUNC).bits(),
3777            mode,
3778        )
3779    }
3780
3781    pub fn sys_arch32_symlink(
3782        locked: &mut Locked<Unlocked>,
3783        current_task: &CurrentTask,
3784        user_target: UserCString,
3785        user_path: UserCString,
3786    ) -> Result<(), Errno> {
3787        super::sys_symlinkat(locked, current_task, user_target, FdNumber::AT_FDCWD, user_path)
3788    }
3789
3790    pub fn sys_arch32_eventfd(
3791        locked: &mut Locked<Unlocked>,
3792        current_task: &CurrentTask,
3793        value: u32,
3794    ) -> Result<FdNumber, Errno> {
3795        super::sys_eventfd2(locked, current_task, value, 0)
3796    }
3797
3798    pub fn sys_arch32_inotify_init(
3799        locked: &mut Locked<Unlocked>,
3800        current_task: &CurrentTask,
3801    ) -> Result<FdNumber, Errno> {
3802        super::sys_inotify_init1(locked, current_task, 0)
3803    }
3804
3805    pub fn sys_arch32_link(
3806        locked: &mut Locked<Unlocked>,
3807        current_task: &CurrentTask,
3808        old_user_path: UserCString,
3809        new_user_path: UserCString,
3810    ) -> Result<(), Errno> {
3811        super::sys_linkat(
3812            locked,
3813            current_task,
3814            FdNumber::AT_FDCWD,
3815            old_user_path,
3816            FdNumber::AT_FDCWD,
3817            new_user_path,
3818            0,
3819        )
3820    }
3821
3822    pub fn sys_arch32_fstatfs64(
3823        locked: &mut Locked<Unlocked>,
3824        current_task: &CurrentTask,
3825        fd: FdNumber,
3826        user_buf_len: u32,
3827        user_buf: StatFs64Ptr,
3828    ) -> Result<(), Errno> {
3829        if (user_buf_len as usize) < std::mem::size_of::<uapi::arch32::statfs64>() {
3830            return error!(EINVAL);
3831        }
3832        super::fstatfs(locked, current_task, fd, user_buf)
3833    }
3834
3835    pub fn sys_arch32_statfs64(
3836        locked: &mut Locked<Unlocked>,
3837        current_task: &CurrentTask,
3838        user_path: UserCString,
3839        user_buf_len: u32,
3840        user_buf: StatFs64Ptr,
3841    ) -> Result<(), Errno> {
3842        if (user_buf_len as usize) < std::mem::size_of::<uapi::arch32::statfs64>() {
3843            return error!(EINVAL);
3844        }
3845        super::statfs(locked, current_task, user_path, user_buf)
3846    }
3847
3848    pub fn sys_arch32_arm_fadvise64_64(
3849        locked: &mut Locked<Unlocked>,
3850        current_task: &CurrentTask,
3851        fd: FdNumber,
3852        advice: u32,
3853        offset_low: u32,
3854        offset_high: u32,
3855        len_low: u32,
3856        len_high: u32,
3857    ) -> Result<(), Errno> {
3858        let offset = merge_low_and_high(offset_low, offset_high);
3859        let len = merge_low_and_high(len_low, len_high);
3860        super::sys_fadvise64(locked, current_task, fd, offset, len, advice)
3861    }
3862
3863    pub fn sys_arch32_sendfile64(
3864        locked: &mut Locked<Unlocked>,
3865        current_task: &CurrentTask,
3866        out_fd: FdNumber,
3867        in_fd: FdNumber,
3868        user_offset: UserRef<uapi::off_t>,
3869        count: i32,
3870    ) -> Result<usize, Errno> {
3871        super::sys_sendfile(locked, current_task, out_fd, in_fd, user_offset.into(), count)
3872    }
3873
3874    pub use super::{
3875        sys_chdir as sys_arch32_chdir, sys_chroot as sys_arch32_chroot,
3876        sys_copy_file_range as sys_arch32_copy_file_range, sys_dup3 as sys_arch32_dup3,
3877        sys_epoll_create1 as sys_arch32_epoll_create1, sys_epoll_ctl as sys_arch32_epoll_ctl,
3878        sys_epoll_pwait as sys_arch32_epoll_pwait, sys_epoll_pwait2 as sys_arch32_epoll_pwait2,
3879        sys_eventfd2 as sys_arch32_eventfd2, sys_fchmod as sys_arch32_fchmod,
3880        sys_fchmodat as sys_arch32_fchmodat, sys_fchown as sys_arch32_fchown32,
3881        sys_fchown as sys_arch32_fchown, sys_fchownat as sys_arch32_fchownat,
3882        sys_fdatasync as sys_arch32_fdatasync, sys_flock as sys_arch32_flock,
3883        sys_fsetxattr as sys_arch32_fsetxattr, sys_fstatat64 as sys_arch32_fstatat64,
3884        sys_fstatfs as sys_arch32_fstatfs, sys_fsync as sys_arch32_fsync,
3885        sys_ftruncate as sys_arch32_ftruncate,
3886        sys_inotify_add_watch as sys_arch32_inotify_add_watch,
3887        sys_inotify_init1 as sys_arch32_inotify_init1,
3888        sys_inotify_rm_watch as sys_arch32_inotify_rm_watch, sys_io_cancel as sys_arch32_io_cancel,
3889        sys_io_destroy as sys_arch32_io_destroy, sys_io_getevents as sys_arch32_io_getevents,
3890        sys_io_setup as sys_arch32_io_setup, sys_io_submit as sys_arch32_io_submit,
3891        sys_io_uring_enter as sys_arch32_io_uring_enter,
3892        sys_io_uring_register as sys_arch32_io_uring_register,
3893        sys_io_uring_setup as sys_arch32_io_uring_setup, sys_lgetxattr as sys_arch32_lgetxattr,
3894        sys_linkat as sys_arch32_linkat, sys_listxattr as sys_arch32_listxattr,
3895        sys_llistxattr as sys_arch32_llistxattr, sys_lsetxattr as sys_arch32_lsetxattr,
3896        sys_mkdirat as sys_arch32_mkdirat, sys_mknodat as sys_arch32_mknodat,
3897        sys_pidfd_getfd as sys_arch32_pidfd_getfd, sys_pidfd_open as sys_arch32_pidfd_open,
3898        sys_ppoll as sys_arch32_ppoll, sys_preadv as sys_arch32_preadv,
3899        sys_pselect6 as sys_arch32_pselect6, sys_readv as sys_arch32_readv,
3900        sys_removexattr as sys_arch32_removexattr, sys_renameat2 as sys_arch32_renameat2,
3901        sys_select as sys_arch32__newselect, sys_sendfile as sys_arch32_sendfile,
3902        sys_setxattr as sys_arch32_setxattr, sys_splice as sys_arch32_splice,
3903        sys_statfs as sys_arch32_statfs, sys_statx as sys_arch32_statx,
3904        sys_symlinkat as sys_arch32_symlinkat, sys_sync as sys_arch32_sync,
3905        sys_syncfs as sys_arch32_syncfs, sys_tee as sys_arch32_tee,
3906        sys_timerfd_create as sys_arch32_timerfd_create,
3907        sys_timerfd_gettime as sys_arch32_timerfd_gettime,
3908        sys_timerfd_settime as sys_arch32_timerfd_settime, sys_truncate as sys_arch32_truncate,
3909        sys_umask as sys_arch32_umask, sys_utimensat as sys_arch32_utimensat,
3910        sys_vmsplice as sys_arch32_vmsplice,
3911    };
3912}
3913
3914#[cfg(target_arch = "aarch64")]
3915pub use arch32::*;
3916
3917#[cfg(test)]
3918mod tests {
3919    use super::*;
3920    use crate::task::KernelFeatures;
3921    use crate::testing::*;
3922    use starnix_types::vfs::default_statfs;
3923    use starnix_uapi::{O_RDONLY, SEEK_CUR, SEEK_END, SEEK_SET};
3924    use zerocopy::IntoBytes;
3925
3926    #[::fuchsia::test]
3927    async fn test_sys_lseek() -> Result<(), Errno> {
3928        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
3929            let fd = FdNumber::from_raw(10);
3930            let file_handle =
3931                current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?;
3932            let file_size = file_handle.node().stat(locked, current_task).unwrap().st_size;
3933            current_task.live().files.insert(locked, current_task, fd, file_handle).unwrap();
3934
3935            assert_eq!(sys_lseek(locked, current_task, fd, 0, SEEK_CUR)?, 0);
3936            assert_eq!(sys_lseek(locked, current_task, fd, 1, SEEK_CUR)?, 1);
3937            assert_eq!(sys_lseek(locked, current_task, fd, 3, SEEK_SET)?, 3);
3938            assert_eq!(sys_lseek(locked, current_task, fd, -3, SEEK_CUR)?, 0);
3939            assert_eq!(sys_lseek(locked, current_task, fd, 0, SEEK_END)?, file_size);
3940            assert_eq!(sys_lseek(locked, current_task, fd, -5, SEEK_SET), error!(EINVAL));
3941
3942            // Make sure that the failed call above did not change the offset.
3943            assert_eq!(sys_lseek(locked, current_task, fd, 0, SEEK_CUR)?, file_size);
3944
3945            // Prepare for an overflow.
3946            assert_eq!(sys_lseek(locked, current_task, fd, 3, SEEK_SET)?, 3);
3947
3948            // Check for overflow.
3949            assert_eq!(sys_lseek(locked, current_task, fd, i64::MAX, SEEK_CUR), error!(EINVAL));
3950
3951            Ok(())
3952        })
3953        .await
3954    }
3955
3956    #[::fuchsia::test]
3957    async fn test_sys_dup() -> Result<(), Errno> {
3958        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
3959            let file_handle =
3960                current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?;
3961            let oldfd = current_task.add_file(locked, file_handle, FdFlags::empty())?;
3962            let newfd = sys_dup(locked, current_task, oldfd)?;
3963
3964            assert_ne!(oldfd, newfd);
3965            let files = &current_task.live().files;
3966            assert!(Arc::ptr_eq(&files.get(oldfd).unwrap(), &files.get(newfd).unwrap()));
3967
3968            assert_eq!(sys_dup(locked, current_task, FdNumber::from_raw(3)), error!(EBADF));
3969
3970            Ok(())
3971        })
3972        .await
3973    }
3974
3975    #[::fuchsia::test]
3976    async fn test_sys_dup3() -> Result<(), Errno> {
3977        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
3978            let file_handle =
3979                current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?;
3980            let oldfd = current_task.add_file(locked, file_handle, FdFlags::empty())?;
3981            let newfd = FdNumber::from_raw(2);
3982            sys_dup3(locked, current_task, oldfd, newfd, O_CLOEXEC)?;
3983
3984            assert_ne!(oldfd, newfd);
3985            let files = &current_task.live().files;
3986            assert!(Arc::ptr_eq(&files.get(oldfd).unwrap(), &files.get(newfd).unwrap()));
3987            assert_eq!(files.get_fd_flags_allowing_opath(oldfd).unwrap(), FdFlags::empty());
3988            assert_eq!(files.get_fd_flags_allowing_opath(newfd).unwrap(), FdFlags::CLOEXEC);
3989
3990            assert_eq!(sys_dup3(locked, current_task, oldfd, oldfd, O_CLOEXEC), error!(EINVAL));
3991
3992            // Pass invalid flags.
3993            let invalid_flags = 1234;
3994            assert_eq!(sys_dup3(locked, current_task, oldfd, newfd, invalid_flags), error!(EINVAL));
3995
3996            // Makes sure that dup closes the old file handle before the fd points
3997            // to the new file handle.
3998            let second_file_handle =
3999                current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?;
4000            let different_file_fd =
4001                current_task.add_file(locked, second_file_handle, FdFlags::empty())?;
4002            assert!(!Arc::ptr_eq(
4003                &files.get(oldfd).unwrap(),
4004                &files.get(different_file_fd).unwrap()
4005            ));
4006            sys_dup3(locked, current_task, oldfd, different_file_fd, O_CLOEXEC)?;
4007            assert!(Arc::ptr_eq(
4008                &files.get(oldfd).unwrap(),
4009                &files.get(different_file_fd).unwrap()
4010            ));
4011
4012            Ok(())
4013        })
4014        .await
4015    }
4016
4017    #[::fuchsia::test]
4018    async fn test_sys_open_cloexec() -> Result<(), Errno> {
4019        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
4020            let path_addr = map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4021            let path = b"data/testfile.txt\0";
4022            current_task.write_memory(path_addr, path)?;
4023            let fd = sys_openat(
4024                locked,
4025                &current_task,
4026                FdNumber::AT_FDCWD,
4027                UserCString::new(current_task, path_addr),
4028                O_RDONLY | O_CLOEXEC,
4029                FileMode::default(),
4030            )?;
4031            assert!(
4032                current_task
4033                    .live()
4034                    .files
4035                    .get_fd_flags_allowing_opath(fd)?
4036                    .contains(FdFlags::CLOEXEC)
4037            );
4038            Ok(())
4039        })
4040        .await
4041    }
4042
4043    #[::fuchsia::test]
4044    async fn test_sys_epoll() -> Result<(), Errno> {
4045        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
4046            let epoll_fd =
4047                sys_epoll_create1(locked, current_task, 0).expect("sys_epoll_create1 failed");
4048            sys_close(locked, current_task, epoll_fd).expect("sys_close failed");
4049
4050            Ok(())
4051        })
4052        .await
4053    }
4054
4055    #[::fuchsia::test]
4056    async fn test_fstat_tmp_file() {
4057        spawn_kernel_and_run(async |locked, current_task| {
4058            // Create the file that will be used to stat.
4059            let file_path = "testfile.txt";
4060            let _file_handle = current_task
4061                .open_file_at(
4062                    locked,
4063                    FdNumber::AT_FDCWD,
4064                    file_path.into(),
4065                    OpenFlags::RDWR | OpenFlags::CREAT,
4066                    FileMode::ALLOW_ALL,
4067                    ResolveFlags::empty(),
4068                    AccessCheck::default(),
4069                )
4070                .unwrap();
4071
4072            // Write the path to user memory.
4073            let path_addr = map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4074            current_task
4075                .write_memory(path_addr, file_path.as_bytes())
4076                .expect("failed to clear struct");
4077
4078            let memory_len = (path_addr + file_path.len()).expect("OOB memory allocation!");
4079            let user_stat = UserRef::new(memory_len);
4080            current_task
4081                .write_object(user_stat, &default_statfs(0))
4082                .expect("failed to clear struct");
4083
4084            let user_path = UserCString::new(current_task, path_addr);
4085
4086            assert_eq!(sys_statfs(locked, current_task, user_path, user_stat.into()), Ok(()));
4087
4088            let returned_stat = current_task.read_object(user_stat).expect("failed to read struct");
4089            let expected_stat = starnix_uapi::statfs {
4090                f_blocks: 0x100000000,
4091                f_bavail: 0x100000000,
4092                f_bfree: 0x100000000,
4093                f_flags: starnix_uapi::MS_RELATIME as i64,
4094                ..default_statfs(starnix_uapi::TMPFS_MAGIC)
4095            };
4096            assert!(
4097                returned_stat.as_bytes() == expected_stat.as_bytes(),
4098                "Expected {:?}, got {:?}",
4099                expected_stat,
4100                returned_stat
4101            );
4102        })
4103        .await;
4104    }
4105
4106    #[::fuchsia::test]
4107    async fn test_unlinkat_dir() {
4108        spawn_kernel_and_run(async |locked, current_task| {
4109            // Create the dir that we will attempt to unlink later.
4110            let no_slash_path = b"testdir";
4111            let no_slash_path_addr =
4112                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
4113            current_task
4114                .write_memory(no_slash_path_addr, no_slash_path)
4115                .expect("failed to write path");
4116            let no_slash_user_path = UserCString::new(current_task, no_slash_path_addr);
4117            sys_mkdirat(
4118                locked,
4119                &current_task,
4120                FdNumber::AT_FDCWD,
4121                no_slash_user_path,
4122                FileMode::ALLOW_ALL.with_type(FileMode::IFDIR),
4123            )
4124            .unwrap();
4125
4126            let slash_path = b"testdir/";
4127            let slash_path_addr =
4128                map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4129            current_task.write_memory(slash_path_addr, slash_path).expect("failed to write path");
4130            let slash_user_path = UserCString::new(current_task, slash_path_addr);
4131
4132            // Try to remove a directory without specifying AT_REMOVEDIR.
4133            // This should fail with EISDIR, irrespective of the terminating slash.
4134            let error = sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, slash_user_path, 0)
4135                .unwrap_err();
4136            assert_eq!(error, errno!(EISDIR));
4137            let error =
4138                sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, no_slash_user_path, 0)
4139                    .unwrap_err();
4140            assert_eq!(error, errno!(EISDIR));
4141
4142            // Success with AT_REMOVEDIR.
4143            sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, slash_user_path, AT_REMOVEDIR)
4144                .unwrap();
4145        })
4146        .await;
4147    }
4148
4149    #[::fuchsia::test]
4150    async fn test_rename_noreplace() {
4151        spawn_kernel_and_run(async |locked, current_task| {
4152            // Create the file that will be renamed.
4153            let old_user_path = "testfile.txt";
4154            let _old_file_handle = current_task
4155                .open_file_at(
4156                    locked,
4157                    FdNumber::AT_FDCWD,
4158                    old_user_path.into(),
4159                    OpenFlags::RDWR | OpenFlags::CREAT,
4160                    FileMode::ALLOW_ALL,
4161                    ResolveFlags::empty(),
4162                    AccessCheck::default(),
4163                )
4164                .unwrap();
4165
4166            // Write the path to user memory.
4167            let old_path_addr =
4168                map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4169            current_task
4170                .write_memory(old_path_addr, old_user_path.as_bytes())
4171                .expect("failed to clear struct");
4172
4173            // Create a second file that we will attempt to rename to.
4174            let new_user_path = "testfile2.txt";
4175            let _new_file_handle = current_task
4176                .open_file_at(
4177                    locked,
4178                    FdNumber::AT_FDCWD,
4179                    new_user_path.into(),
4180                    OpenFlags::RDWR | OpenFlags::CREAT,
4181                    FileMode::ALLOW_ALL,
4182                    ResolveFlags::empty(),
4183                    AccessCheck::default(),
4184                )
4185                .unwrap();
4186
4187            // Write the path to user memory.
4188            let new_path_addr =
4189                map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4190            current_task
4191                .write_memory(new_path_addr, new_user_path.as_bytes())
4192                .expect("failed to clear struct");
4193
4194            // Try to rename first file to second file's name with RENAME_NOREPLACE flag.
4195            // This should fail with EEXIST.
4196            let error = sys_renameat2(
4197                locked,
4198                &current_task,
4199                FdNumber::AT_FDCWD,
4200                UserCString::new(current_task, old_path_addr),
4201                FdNumber::AT_FDCWD,
4202                UserCString::new(current_task, new_path_addr),
4203                RenameFlags::NOREPLACE.bits(),
4204            )
4205            .unwrap_err();
4206            assert_eq!(error, errno!(EEXIST));
4207        })
4208        .await;
4209    }
4210
4211    #[::fuchsia::test]
4212    async fn test_sys_sync() -> Result<(), Errno> {
4213        spawn_kernel_and_run(async |locked, current_task| {
4214            sys_sync(locked, current_task)?;
4215            Ok(())
4216        })
4217        .await
4218    }
4219
4220    #[::fuchsia::test]
4221    async fn test_sys_syncfs() -> Result<(), Errno> {
4222        spawn_kernel_and_run(async |locked, current_task| {
4223            let file_handle = current_task.open_file(locked, ".".into(), OpenFlags::RDONLY)?;
4224            let fd = current_task.add_file(locked, file_handle, FdFlags::empty())?;
4225            sys_syncfs(locked, current_task, fd)?;
4226            Ok(())
4227        })
4228        .await
4229    }
4230
4231    // TODO(https://fxbug.dev/485370648) remove when unnecessary
4232    #[::fuchsia::test]
4233    async fn test_fake_ion_stat() {
4234        // Test with fake_ion disabled (default).
4235        spawn_kernel_and_run(async |locked, current_task| {
4236            let ion_path = b"/dev/ion\0";
4237            let path_addr = map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4238            current_task.write_memory(path_addr, ion_path).expect("failed to write path");
4239            let user_path = UserCString::new(current_task, path_addr);
4240
4241            let stat_addr = map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4242            let stat_ptr = StatPtr::new(current_task, stat_addr);
4243
4244            let error =
4245                sys_fstatat64(locked, current_task, FdNumber::AT_FDCWD, user_path, stat_ptr, 0)
4246                    .unwrap_err();
4247            assert_eq!(error, errno!(ENOENT));
4248        })
4249        .await;
4250
4251        // Test with fake_ion enabled.
4252        let mut features = KernelFeatures::default();
4253        features.fake_ion = true;
4254        spawn_kernel_with_features_and_run(
4255            async |locked, current_task| {
4256                let ion_path = b"/dev/ion\0";
4257                let path_addr =
4258                    map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4259                current_task.write_memory(path_addr, ion_path).expect("failed to write path");
4260                let user_path = UserCString::new(current_task, path_addr);
4261
4262                let stat_addr =
4263                    map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4264                let stat_ptr = StatPtr::new(current_task, stat_addr);
4265
4266                sys_fstatat64(locked, current_task, FdNumber::AT_FDCWD, user_path, stat_ptr, 0)
4267                    .expect("sys_fstatat64 should succeed with fake_ion");
4268
4269                let stat_result: uapi::stat =
4270                    current_task.read_object(stat_addr.into()).expect("failed to read stat");
4271                assert_eq!(stat_result.st_mode, uapi::S_IFCHR | 0o666);
4272                assert_eq!(stat_result.st_rdev, DeviceId::new(10, 59).bits());
4273
4274                // Test statx as well.
4275                let statx_addr =
4276                    map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4277                let statx_ptr = UserRef::new(statx_addr);
4278                sys_statx(
4279                    locked,
4280                    current_task,
4281                    FdNumber::AT_FDCWD,
4282                    user_path,
4283                    0,
4284                    uapi::STATX_BASIC_STATS,
4285                    statx_ptr,
4286                )
4287                .expect("sys_statx should succeed with fake_ion");
4288
4289                let statx_result: statx =
4290                    current_task.read_object(statx_ptr).expect("failed to read statx");
4291                assert_eq!(statx_result.stx_mode, (uapi::S_IFCHR | 0o666) as u16);
4292                assert_eq!(statx_result.stx_rdev_major, 10);
4293                assert_eq!(statx_result.stx_rdev_minor, 59);
4294            },
4295            features,
4296        )
4297        .await;
4298    }
4299}