Skip to main content

starnix_core/vfs/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::{IOVecPtr, MemoryAccessor, MemoryAccessorExt, PAGE_SIZE};
6use crate::power::WakeupSourceOrigin;
7use crate::security;
8use crate::syscalls::time::{ITimerSpecPtr, TimeSpecPtr, TimeValPtr};
9use crate::task::{CurrentTask, EventHandler, ProcessEntryRef, ReadyItem, ReadyItemKey, Waiter};
10use crate::time::{Timeline, TimerWakeup};
11use crate::vfs::aio::AioContext;
12use crate::vfs::buffers::{UserBuffersInputBuffer, UserBuffersOutputBuffer};
13use crate::vfs::eventfd::{EventFdType, new_eventfd};
14use crate::vfs::fs_args::MountParams;
15use crate::vfs::inotify::InotifyFileObject;
16use crate::vfs::io_uring::{IORING_MAX_ENTRIES, IoUringFileObject};
17use crate::vfs::pidfd::new_pidfd;
18use crate::vfs::pipe::{PipeFileObject, new_pipe};
19use crate::vfs::timer::TimerFile;
20use crate::vfs::{
21    CheckAccessReason, DirentSink64, EpollFileObject, FallocMode, FdFlags, FdNumber,
22    FileAsyncOwner, FileHandle, FileSystemOptions, FlockOperation, FsStr, FsString, LookupContext,
23    NamespaceNode, PathWithReachability, RecordLockCommand, RenameFlags, SeekTarget, StatxFlags,
24    SymlinkMode, SymlinkTarget, TargetFdNumber, TimeUpdateType, UnlinkKind, ValueOrSize, WdNumber,
25    WhatToMount, XattrOp, checked_add_offset_and_length, new_memfd, new_zombie_pidfd, splice,
26    wakeup_source_name_for_epoll,
27};
28use starnix_logging::{log_trace, track_stub};
29use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Mutex, Unlocked};
30use starnix_syscalls::{SUCCESS, SyscallArg, SyscallResult};
31use starnix_types::ownership::TempRef;
32use starnix_types::time::{
33    duration_from_poll_timeout, duration_from_timespec, time_from_timespec, timespec_from_duration,
34};
35use starnix_types::user_buffer::UserBuffer;
36use starnix_uapi::auth::{
37    CAP_BLOCK_SUSPEND, CAP_DAC_READ_SEARCH, CAP_LEASE, CAP_SYS_ADMIN, CAP_WAKE_ALARM, Credentials,
38    PTRACE_MODE_ATTACH_REALCREDS,
39};
40use starnix_uapi::device_type::DeviceType;
41use starnix_uapi::errors::{
42    EFAULT, EINTR, ENAMETOOLONG, ENOTSUP, ETIMEDOUT, Errno, ErrnoResultExt,
43};
44use starnix_uapi::file_lease::FileLeaseType;
45use starnix_uapi::file_mode::{Access, AccessCheck, FileMode};
46use starnix_uapi::inotify_mask::InotifyMask;
47use starnix_uapi::mount_flags::MountFlags;
48use starnix_uapi::open_flags::OpenFlags;
49use starnix_uapi::personality::PersonalityFlags;
50use starnix_uapi::resource_limits::Resource;
51use starnix_uapi::seal_flags::SealFlags;
52use starnix_uapi::signals::SigSet;
53use starnix_uapi::unmount_flags::UnmountFlags;
54use starnix_uapi::user_address::{MultiArchUserRef, UserAddress, UserCString, UserRef};
55use starnix_uapi::user_value::UserValue;
56use starnix_uapi::vfs::{EpollEvent, FdEvents, ResolveFlags};
57use starnix_uapi::{
58    __kernel_fd_set, AT_EACCESS, AT_EMPTY_PATH, AT_NO_AUTOMOUNT, AT_REMOVEDIR, AT_SYMLINK_FOLLOW,
59    AT_SYMLINK_NOFOLLOW, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM, CLOCK_MONOTONIC, CLOCK_REALTIME,
60    CLOCK_REALTIME_ALARM, CLOSE_RANGE_CLOEXEC, CLOSE_RANGE_UNSHARE, EFD_CLOEXEC, EFD_NONBLOCK,
61    EFD_SEMAPHORE, EPOLL_CLOEXEC, EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD, F_ADD_SEALS,
62    F_DUPFD, F_DUPFD_CLOEXEC, F_GET_SEALS, F_GETFD, F_GETFL, F_GETLEASE, F_GETLK, F_GETLK64,
63    F_GETOWN, F_GETOWN_EX, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW, F_OWNER_PGRP, F_OWNER_PID,
64    F_OWNER_TID, F_SETFD, F_SETFL, F_SETLEASE, F_SETLK, F_SETLK64, F_SETLKW, F_SETLKW64, F_SETOWN,
65    F_SETOWN_EX, F_SETSIG, FIOCLEX, FIONCLEX, IN_CLOEXEC, IN_NONBLOCK, MFD_ALLOW_SEALING,
66    MFD_CLOEXEC, MFD_EXEC, MFD_HUGE_MASK, MFD_HUGE_SHIFT, MFD_HUGETLB, MFD_NOEXEC_SEAL, NAME_MAX,
67    O_CLOEXEC, O_CREAT, O_NOFOLLOW, O_PATH, O_TMPFILE, PIDFD_NONBLOCK, POLLERR, POLLHUP, POLLIN,
68    POLLOUT, POLLPRI, POLLRDBAND, POLLRDNORM, POLLWRBAND, POLLWRNORM, POSIX_FADV_DONTNEED,
69    POSIX_FADV_NOREUSE, POSIX_FADV_NORMAL, POSIX_FADV_RANDOM, POSIX_FADV_SEQUENTIAL,
70    POSIX_FADV_WILLNEED, RWF_SUPPORTED, TFD_CLOEXEC, TFD_NONBLOCK, TFD_TIMER_ABSTIME,
71    TFD_TIMER_CANCEL_ON_SET, XATTR_CREATE, XATTR_NAME_MAX, XATTR_REPLACE, aio_context_t, errno,
72    error, f_owner_ex, io_event, io_uring_params,
73    io_uring_register_op_IORING_REGISTER_BUFFERS as IORING_REGISTER_BUFFERS,
74    io_uring_register_op_IORING_REGISTER_IOWQ_MAX_WORKERS as IORING_REGISTER_IOWQ_MAX_WORKERS,
75    io_uring_register_op_IORING_REGISTER_PBUF_RING as IORING_REGISTER_PBUF_RING,
76    io_uring_register_op_IORING_REGISTER_PBUF_STATUS as IORING_REGISTER_PBUF_STATUS,
77    io_uring_register_op_IORING_REGISTER_RING_FDS as IORING_REGISTER_RING_FDS,
78    io_uring_register_op_IORING_UNREGISTER_BUFFERS as IORING_UNREGISTER_BUFFERS,
79    io_uring_register_op_IORING_UNREGISTER_PBUF_RING as IORING_UNREGISTER_PBUF_RING,
80    io_uring_register_op_IORING_UNREGISTER_RING_FDS as IORING_UNREGISTER_RING_FDS, iocb, off_t,
81    pid_t, pollfd, pselect6_sigmask, sigset_t, statx, timespec, uapi, uid_t,
82};
83use std::cmp::Ordering;
84use std::collections::VecDeque;
85use std::marker::PhantomData;
86use std::sync::{Arc, atomic};
87use std::usize;
88use zerocopy::{Immutable, IntoBytes};
89
90uapi::check_arch_independent_layout! {
91    pollfd {
92        fd,
93        events,
94        revents,
95    }
96
97    io_event {
98        data,
99        obj,
100        res,
101        res2,
102    }
103
104    iocb {
105        aio_data,
106        aio_key,
107        aio_rw_flags,
108        aio_lio_opcode,
109        aio_reqprio,
110        aio_fildes,
111        aio_buf,
112        aio_nbytes,
113        aio_offset,
114        aio_reserved2,
115        aio_flags,
116        aio_resfd,
117    }
118
119    statx_timestamp {
120        tv_sec,
121        tv_nsec,
122    }
123
124    statx {
125        stx_mask,
126        stx_blksize,
127        stx_attributes,
128        stx_nlink,
129        stx_uid,
130        stx_gid,
131        stx_mode,
132        stx_ino,
133        stx_size,
134        stx_blocks,
135        stx_attributes_mask,
136        stx_atime,
137        stx_btime,
138        stx_ctime,
139        stx_mtime,
140        stx_rdev_major,
141        stx_rdev_minor,
142        stx_dev_major,
143        stx_dev_minor,
144        stx_mnt_id,
145        stx_dio_mem_align,
146        stx_dio_offset_align,
147        stx_subvol,
148        stx_atomic_write_unit_min,
149        stx_atomic_write_unit_max,
150        stx_atomic_write_segments_max,
151    }
152
153    io_sqring_offsets {
154        head,
155        tail,
156        ring_mask,
157        ring_entries,
158        flags,
159        dropped,
160        array,
161        resv1,
162        user_addr,
163    }
164
165    io_cqring_offsets {
166        head,
167        tail,
168        ring_mask,
169        ring_entries,
170        overflow,
171        cqes,
172        flags,
173        resv1,
174        user_addr,
175    }
176
177    io_uring_params {
178        sq_entries,
179        cq_entries,
180        flags,
181        sq_thread_cpu,
182        sq_thread_idle,
183        features,
184        wq_fd,
185        resv,
186        sq_off,
187        cq_off,
188    }
189
190    io_uring_rsrc_update {
191        offset,
192        resv,
193        data,
194    }
195
196    io_uring_buf_reg {
197        ring_addr,
198        ring_entries,
199        bgid,
200        flags,
201        resv,
202    }
203}
204
205// Constants from bionic/libc/include/sys/stat.h
206const UTIME_NOW: i64 = 0x3fffffff;
207const UTIME_OMIT: i64 = 0x3ffffffe;
208
209pub type OffsetPtr = MultiArchUserRef<uapi::off_t, uapi::arch32::off_t>;
210pub type IocbPtr = MultiArchUserRef<iocb, iocb>;
211pub type IocbPtrPtr = MultiArchUserRef<IocbPtr, IocbPtr>;
212
213pub fn sys_read(
214    locked: &mut Locked<Unlocked>,
215    current_task: &CurrentTask,
216    fd: FdNumber,
217    address: UserAddress,
218    length: usize,
219) -> Result<usize, Errno> {
220    let file = current_task.files.get(fd)?;
221    file.read(
222        locked,
223        current_task,
224        &mut UserBuffersOutputBuffer::unified_new_at(current_task, address, length)?,
225    )
226    .map_eintr(|| errno!(ERESTARTSYS))
227}
228
229pub fn sys_write(
230    locked: &mut Locked<Unlocked>,
231    current_task: &CurrentTask,
232    fd: FdNumber,
233    address: UserAddress,
234    length: usize,
235) -> Result<usize, Errno> {
236    let file = current_task.files.get(fd)?;
237    file.write(
238        locked,
239        current_task,
240        &mut UserBuffersInputBuffer::unified_new_at(current_task, address, length)?,
241    )
242    .map_eintr(|| errno!(ERESTARTSYS))
243}
244
245pub fn sys_close(
246    _locked: &mut Locked<Unlocked>,
247    current_task: &CurrentTask,
248    fd: FdNumber,
249) -> Result<(), Errno> {
250    current_task.files.close(fd)?;
251    Ok(())
252}
253
254pub fn sys_close_range(
255    locked: &mut Locked<Unlocked>,
256    current_task: &CurrentTask,
257    first: u32,
258    last: u32,
259    flags: u32,
260) -> Result<(), Errno> {
261    if first > last || flags & !(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC) != 0 {
262        return error!(EINVAL);
263    }
264    if flags & CLOSE_RANGE_UNSHARE != 0 {
265        current_task.files.unshare();
266    }
267    let in_range = |fd: FdNumber| fd.raw() as u32 >= first && fd.raw() as u32 <= last;
268    if flags & CLOSE_RANGE_CLOEXEC != 0 {
269        current_task.files.retain(locked, current_task, |fd, flags| {
270            if in_range(fd) {
271                *flags |= FdFlags::CLOEXEC;
272            }
273            true
274        });
275    } else {
276        current_task.files.retain(locked, current_task, |fd, _| !in_range(fd));
277    }
278    Ok(())
279}
280
281pub fn sys_lseek(
282    locked: &mut Locked<Unlocked>,
283    current_task: &CurrentTask,
284    fd: FdNumber,
285    offset: off_t,
286    whence: u32,
287) -> Result<off_t, Errno> {
288    let file = current_task.files.get(fd)?;
289    file.seek(locked, current_task, SeekTarget::from_raw(whence, offset)?)
290}
291
292pub fn sys_fcntl(
293    locked: &mut Locked<Unlocked>,
294    current_task: &CurrentTask,
295    fd: FdNumber,
296    cmd: u32,
297    arg: u64,
298) -> Result<SyscallResult, Errno> {
299    let file = match cmd {
300        F_DUPFD | F_DUPFD_CLOEXEC | F_GETFD | F_SETFD | F_GETFL => {
301            current_task.files.get_allowing_opath(fd)?
302        }
303        _ => current_task.files.get(fd)?,
304    };
305
306    match cmd {
307        // For the following values of cmd we need to perform more checks before running the
308        // `check_file_fcntl_access` LSM hook.
309        F_SETOWN | F_SETOWN_EX | F_ADD_SEALS | F_SETLEASE => {}
310        _ => {
311            security::check_file_fcntl_access(current_task, &file, cmd, arg)?;
312        }
313    };
314
315    match cmd {
316        F_DUPFD | F_DUPFD_CLOEXEC => {
317            let fd_number = arg as i32;
318            let flags = if cmd == F_DUPFD_CLOEXEC { FdFlags::CLOEXEC } else { FdFlags::empty() };
319            let newfd = current_task.files.duplicate(
320                locked,
321                current_task,
322                fd,
323                TargetFdNumber::Minimum(FdNumber::from_raw(fd_number)),
324                flags,
325            )?;
326            Ok(newfd.into())
327        }
328        F_GETOWN => match file.get_async_owner() {
329            FileAsyncOwner::Unowned => Ok(0.into()),
330            FileAsyncOwner::Thread(tid) => Ok(tid.into()),
331            FileAsyncOwner::Process(pid) => Ok(pid.into()),
332            FileAsyncOwner::ProcessGroup(pgid) => Ok((-pgid).into()),
333        },
334        F_GETOWN_EX => {
335            let maybe_owner = match file.get_async_owner() {
336                FileAsyncOwner::Unowned => None,
337                FileAsyncOwner::Thread(tid) => {
338                    Some(uapi::f_owner_ex { type_: F_OWNER_TID as i32, pid: tid })
339                }
340                FileAsyncOwner::Process(pid) => {
341                    Some(uapi::f_owner_ex { type_: F_OWNER_PID as i32, pid })
342                }
343                FileAsyncOwner::ProcessGroup(pgid) => {
344                    Some(uapi::f_owner_ex { type_: F_OWNER_PGRP as i32, pid: pgid })
345                }
346            };
347            if let Some(owner) = maybe_owner {
348                let user_owner: UserRef<f_owner_ex> =
349                    UserRef::<uapi::f_owner_ex>::new(UserAddress::from(arg));
350                current_task.write_object(user_owner, &owner)?;
351            }
352            Ok(SUCCESS)
353        }
354        F_SETOWN => {
355            let pid = (arg as u32) as i32;
356            let owner = match pid.cmp(&0) {
357                Ordering::Equal => FileAsyncOwner::Unowned,
358                Ordering::Greater => FileAsyncOwner::Process(pid),
359                Ordering::Less => {
360                    FileAsyncOwner::ProcessGroup(pid.checked_neg().ok_or_else(|| errno!(EINVAL))?)
361                }
362            };
363            owner.validate(current_task)?;
364            security::check_file_fcntl_access(current_task, &file, cmd, arg)?;
365            file.set_async_owner(owner);
366            Ok(SUCCESS)
367        }
368        F_SETOWN_EX => {
369            let user_owner = UserRef::<uapi::f_owner_ex>::new(UserAddress::from(arg));
370            let requested_owner = current_task.read_object(user_owner)?;
371            let mut owner = match requested_owner.type_ as u32 {
372                F_OWNER_TID => FileAsyncOwner::Thread(requested_owner.pid),
373                F_OWNER_PID => FileAsyncOwner::Process(requested_owner.pid),
374                F_OWNER_PGRP => FileAsyncOwner::ProcessGroup(requested_owner.pid),
375                _ => return error!(EINVAL),
376            };
377            if requested_owner.pid == 0 {
378                owner = FileAsyncOwner::Unowned;
379            }
380            owner.validate(current_task)?;
381            security::check_file_fcntl_access(current_task, &file, cmd, arg)?;
382            file.set_async_owner(owner);
383            Ok(SUCCESS)
384        }
385        F_GETFD => Ok(current_task.files.get_fd_flags_allowing_opath(fd)?.into()),
386        F_SETFD => {
387            current_task
388                .files
389                .set_fd_flags_allowing_opath(fd, FdFlags::from_bits_truncate(arg as u32))?;
390            Ok(SUCCESS)
391        }
392        F_GETFL => {
393            // O_PATH allowed for:
394            //
395            //   Retrieving open file status flags using the fcntl(2)
396            //   F_GETFL operation: the returned flags will include the
397            //   bit O_PATH.
398            //
399            // See https://man7.org/linux/man-pages/man2/open.2.html
400            Ok(file.flags().into())
401        }
402        F_SETFL => {
403            let settable_flags = OpenFlags::APPEND
404                | OpenFlags::DIRECT
405                | OpenFlags::NOATIME
406                | OpenFlags::NONBLOCK
407                | OpenFlags::ASYNC;
408            let requested_flags =
409                OpenFlags::from_bits_truncate((arg as u32) & settable_flags.bits());
410
411            // If `NOATIME` flag is being set then check that it's allowed.
412            if requested_flags.contains(OpenFlags::NOATIME)
413                && !file.flags().contains(OpenFlags::NOATIME)
414            {
415                file.name.check_o_noatime_allowed(current_task)?;
416            }
417
418            file.update_file_flags(requested_flags, settable_flags);
419            Ok(SUCCESS)
420        }
421        F_SETLK | F_SETLKW | F_GETLK => {
422            let flock_ref =
423                MultiArchUserRef::<uapi::flock, uapi::arch32::flock>::new(current_task, arg);
424            let flock = current_task.read_multi_arch_object(flock_ref)?;
425            let cmd = RecordLockCommand::from_raw(cmd).ok_or_else(|| errno!(EINVAL))?;
426            if let Some(flock) = file.record_lock(locked, current_task, cmd, flock)? {
427                current_task.write_multi_arch_object(flock_ref, flock)?;
428            }
429            Ok(SUCCESS)
430        }
431        F_SETLK64 | F_SETLKW64 | F_GETLK64 | F_OFD_GETLK | F_OFD_SETLK | F_OFD_SETLKW => {
432            let flock_ref =
433                MultiArchUserRef::<uapi::flock, uapi::arch32::flock64>::new(current_task, arg);
434            let flock = current_task.read_multi_arch_object(flock_ref)?;
435            let cmd = RecordLockCommand::from_raw(cmd).ok_or_else(|| errno!(EINVAL))?;
436            if let Some(flock) = file.record_lock(locked, current_task, cmd, flock)? {
437                current_task.write_multi_arch_object(flock_ref, flock)?;
438            }
439            Ok(SUCCESS)
440        }
441        F_ADD_SEALS => {
442            if !file.can_write() {
443                // Cannot add seals if the file is not writable
444                return error!(EPERM);
445            }
446            security::check_file_fcntl_access(current_task, &file, cmd, arg)?;
447            let mut state = file.name.entry.node.write_guard_state.lock();
448            let flags = SealFlags::from_bits_truncate(arg as u32);
449            state.try_add_seal(flags)?;
450            Ok(SUCCESS)
451        }
452        F_GET_SEALS => {
453            let state = file.name.entry.node.write_guard_state.lock();
454            Ok(state.get_seals()?.into())
455        }
456        F_SETLEASE => {
457            let fsuid = current_task.current_creds().fsuid;
458            if fsuid != file.node().info().uid {
459                security::check_task_capable(current_task, CAP_LEASE)?;
460            }
461            let lease = FileLeaseType::from_bits(arg as u32)?;
462            security::check_file_fcntl_access(current_task, &file, cmd, arg)?;
463            file.set_lease(current_task, lease)?;
464            Ok(SUCCESS)
465        }
466        F_GETLEASE => Ok(file.get_lease(current_task).into()),
467        F_SETSIG => {
468            track_stub!(TODO("https://fxbug.dev/437972675"), "F_SETSIG");
469            return error!(EOPNOTSUPP);
470        }
471        _ => file.fcntl(current_task, cmd, arg),
472    }
473}
474
475pub fn sys_pread64(
476    locked: &mut Locked<Unlocked>,
477    current_task: &CurrentTask,
478    fd: FdNumber,
479    address: UserAddress,
480    length: usize,
481    offset: off_t,
482) -> Result<usize, Errno> {
483    let file = current_task.files.get(fd)?;
484    let offset = offset.try_into().map_err(|_| errno!(EINVAL))?;
485    file.read_at(
486        locked,
487        current_task,
488        offset,
489        &mut UserBuffersOutputBuffer::unified_new_at(current_task, address, length)?,
490    )
491}
492
493pub fn sys_pwrite64(
494    locked: &mut Locked<Unlocked>,
495    current_task: &CurrentTask,
496    fd: FdNumber,
497    address: UserAddress,
498    length: usize,
499    offset: off_t,
500) -> Result<usize, Errno> {
501    let file = current_task.files.get(fd)?;
502    let offset = offset.try_into().map_err(|_| errno!(EINVAL))?;
503    file.write_at(
504        locked,
505        current_task,
506        offset,
507        &mut UserBuffersInputBuffer::unified_new_at(current_task, address, length)?,
508    )
509}
510
511fn do_readv(
512    locked: &mut Locked<Unlocked>,
513    current_task: &CurrentTask,
514    fd: FdNumber,
515    iovec_addr: IOVecPtr,
516    iovec_count: UserValue<i32>,
517    offset: Option<off_t>,
518    flags: u32,
519) -> Result<usize, Errno> {
520    if flags & !RWF_SUPPORTED != 0 {
521        return error!(EOPNOTSUPP);
522    }
523    if flags != 0 {
524        track_stub!(TODO("https://fxbug.dev/322875072"), "preadv2 flags", flags);
525    }
526    let file = current_task.files.get(fd)?;
527    let iovec = current_task.read_iovec(iovec_addr, iovec_count)?;
528    let mut data = UserBuffersOutputBuffer::unified_new(current_task, iovec)?;
529    if let Some(offset) = offset {
530        file.read_at(
531            locked,
532            current_task,
533            offset.try_into().map_err(|_| errno!(EINVAL))?,
534            &mut data,
535        )
536    } else {
537        file.read(locked, current_task, &mut data)
538    }
539}
540
541pub fn sys_readv(
542    locked: &mut Locked<Unlocked>,
543    current_task: &CurrentTask,
544    fd: FdNumber,
545    iovec_addr: IOVecPtr,
546    iovec_count: UserValue<i32>,
547) -> Result<usize, Errno> {
548    do_readv(locked, current_task, fd, iovec_addr, iovec_count, None, 0)
549}
550
551pub fn sys_preadv(
552    locked: &mut Locked<Unlocked>,
553    current_task: &CurrentTask,
554    fd: FdNumber,
555    iovec_addr: IOVecPtr,
556    iovec_count: UserValue<i32>,
557    offset: off_t,
558) -> Result<usize, Errno> {
559    do_readv(locked, current_task, fd, iovec_addr, iovec_count, Some(offset), 0)
560}
561
562pub fn sys_preadv2(
563    locked: &mut Locked<Unlocked>,
564    current_task: &CurrentTask,
565    fd: FdNumber,
566    iovec_addr: IOVecPtr,
567    iovec_count: UserValue<i32>,
568    offset: off_t,
569    _unused: SyscallArg, // On 32-bit systems, holds the upper 32 bits of offset.
570    flags: u32,
571) -> Result<usize, Errno> {
572    let offset = if offset == -1 { None } else { Some(offset) };
573    do_readv(locked, current_task, fd, iovec_addr, iovec_count, offset, flags)
574}
575
576fn do_writev(
577    locked: &mut Locked<Unlocked>,
578    current_task: &CurrentTask,
579    fd: FdNumber,
580    iovec_addr: IOVecPtr,
581    iovec_count: UserValue<i32>,
582    offset: Option<off_t>,
583    flags: u32,
584) -> Result<usize, Errno> {
585    if flags & !RWF_SUPPORTED != 0 {
586        return error!(EOPNOTSUPP);
587    }
588    if flags != 0 {
589        track_stub!(TODO("https://fxbug.dev/322874523"), "pwritev2 flags", flags);
590    }
591
592    let file = current_task.files.get(fd)?;
593    let iovec = current_task.read_iovec(iovec_addr, iovec_count)?;
594    let mut data = UserBuffersInputBuffer::unified_new(current_task, iovec)?;
595    let res = if let Some(offset) = offset {
596        file.write_at(
597            locked,
598            current_task,
599            offset.try_into().map_err(|_| errno!(EINVAL))?,
600            &mut data,
601        )
602    } else {
603        file.write(locked, current_task, &mut data)
604    };
605
606    match &res {
607        Err(e) if e.code == EFAULT => {
608            track_stub!(TODO("https://fxbug.dev/297370529"), "allow partial writes")
609        }
610        _ => (),
611    }
612
613    res
614}
615
616pub fn sys_writev(
617    locked: &mut Locked<Unlocked>,
618    current_task: &CurrentTask,
619    fd: FdNumber,
620    iovec_addr: IOVecPtr,
621    iovec_count: UserValue<i32>,
622) -> Result<usize, Errno> {
623    do_writev(locked, current_task, fd, iovec_addr, iovec_count, None, 0)
624}
625
626pub fn sys_pwritev(
627    locked: &mut Locked<Unlocked>,
628    current_task: &CurrentTask,
629    fd: FdNumber,
630    iovec_addr: IOVecPtr,
631    iovec_count: UserValue<i32>,
632    offset: off_t,
633) -> Result<usize, Errno> {
634    do_writev(locked, current_task, fd, iovec_addr, iovec_count, Some(offset), 0)
635}
636
637pub fn sys_pwritev2(
638    locked: &mut Locked<Unlocked>,
639    current_task: &CurrentTask,
640    fd: FdNumber,
641    iovec_addr: IOVecPtr,
642    iovec_count: UserValue<i32>,
643    offset: off_t,
644    _unused: SyscallArg, // On 32-bit systems, holds the upper 32 bits of offset.
645    flags: u32,
646) -> Result<usize, Errno> {
647    let offset = if offset == -1 { None } else { Some(offset) };
648    do_writev(locked, current_task, fd, iovec_addr, iovec_count, offset, flags)
649}
650
651type StatFsPtr = MultiArchUserRef<uapi::statfs, uapi::arch32::statfs>;
652
653pub fn fstatfs<T32: IntoBytes + Immutable + TryFrom<uapi::statfs>>(
654    locked: &mut Locked<Unlocked>,
655    current_task: &CurrentTask,
656    fd: FdNumber,
657    user_buf: MultiArchUserRef<uapi::statfs, T32>,
658) -> Result<(), Errno> {
659    // O_PATH allowed for:
660    //
661    //   fstatfs(2) (since Linux 3.12).
662    //
663    // See https://man7.org/linux/man-pages/man2/open.2.html
664    let file = current_task.files.get_allowing_opath(fd)?;
665    let mut stat = file.fs.statfs(locked, current_task)?;
666    stat.f_flags |= file.name.mount.flags().bits() as i64;
667    current_task.write_multi_arch_object(user_buf, stat)?;
668    Ok(())
669}
670
671pub fn sys_fstatfs(
672    locked: &mut Locked<Unlocked>,
673    current_task: &CurrentTask,
674    fd: FdNumber,
675    user_buf: StatFsPtr,
676) -> Result<(), Errno> {
677    fstatfs(locked, current_task, fd, user_buf)
678}
679
680fn statfs<T32: IntoBytes + Immutable + TryFrom<uapi::statfs>>(
681    locked: &mut Locked<Unlocked>,
682    current_task: &CurrentTask,
683    user_path: UserCString,
684    user_buf: MultiArchUserRef<uapi::statfs, T32>,
685) -> Result<(), Errno> {
686    let name =
687        lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?;
688    let fs = name.entry.node.fs();
689    let mut stat = fs.statfs(locked, current_task)?;
690    stat.f_flags |= name.mount.flags().bits() as i64;
691    current_task.write_multi_arch_object(user_buf, stat)?;
692    Ok(())
693}
694
695pub fn sys_statfs(
696    locked: &mut Locked<Unlocked>,
697    current_task: &CurrentTask,
698    user_path: UserCString,
699    user_buf: StatFsPtr,
700) -> Result<(), Errno> {
701    statfs(locked, current_task, user_path, user_buf)
702}
703
704pub fn sys_sendfile(
705    locked: &mut Locked<Unlocked>,
706    current_task: &CurrentTask,
707    out_fd: FdNumber,
708    in_fd: FdNumber,
709    user_offset: OffsetPtr,
710    count: i32,
711) -> Result<usize, Errno> {
712    splice::sendfile(locked, current_task, out_fd, in_fd, user_offset, count)
713}
714
715/// A convenient wrapper for Task::open_file_at.
716///
717/// Reads user_path from user memory and then calls through to Task::open_file_at.
718fn open_file_at(
719    locked: &mut Locked<Unlocked>,
720    current_task: &CurrentTask,
721    dir_fd: FdNumber,
722    user_path: UserCString,
723    flags: u32,
724    mode: FileMode,
725    resolve_flags: ResolveFlags,
726) -> Result<FileHandle, Errno> {
727    let path = current_task.read_path(user_path)?;
728    log_trace!(dir_fd:%, path:%; "open_file_at");
729    current_task.open_file_at(
730        locked,
731        dir_fd,
732        path.as_ref(),
733        OpenFlags::from_bits_truncate(flags),
734        mode,
735        resolve_flags,
736        AccessCheck::default(),
737    )
738}
739
740fn lookup_parent_at<T, F>(
741    locked: &mut Locked<Unlocked>,
742    current_task: &CurrentTask,
743    dir_fd: FdNumber,
744    user_path: UserCString,
745    callback: F,
746) -> Result<T, Errno>
747where
748    F: Fn(&mut Locked<Unlocked>, LookupContext, NamespaceNode, &FsStr) -> Result<T, Errno>,
749{
750    let path = current_task.read_path(user_path)?;
751    log_trace!(dir_fd:%, path:%; "lookup_parent_at");
752    if path.is_empty() {
753        return error!(ENOENT);
754    }
755    let mut context = LookupContext::default();
756    let (parent, basename) =
757        current_task.lookup_parent_at(locked, &mut context, dir_fd, path.as_ref())?;
758    callback(locked, context, parent, basename)
759}
760
761/// Options for lookup_at.
762#[derive(Debug, Default, Copy, Clone)]
763pub struct LookupFlags {
764    /// Whether AT_EMPTY_PATH was supplied.
765    allow_empty_path: bool,
766
767    /// Used to implement AT_SYMLINK_NOFOLLOW.
768    symlink_mode: SymlinkMode,
769
770    /// Automount directories on the path.
771    // TODO(https://fxbug.dev/297370602): Support the `AT_NO_AUTOMOUNT` flag.
772    #[allow(dead_code)]
773    automount: bool,
774}
775
776impl LookupFlags {
777    fn no_follow() -> Self {
778        Self { symlink_mode: SymlinkMode::NoFollow, ..Default::default() }
779    }
780
781    fn from_bits(flags: u32, allowed_flags: u32) -> Result<Self, Errno> {
782        if flags & !allowed_flags != 0 {
783            return error!(EINVAL);
784        }
785        let follow_symlinks = if allowed_flags & AT_SYMLINK_FOLLOW != 0 {
786            flags & AT_SYMLINK_FOLLOW != 0
787        } else {
788            flags & AT_SYMLINK_NOFOLLOW == 0
789        };
790        let automount =
791            if allowed_flags & AT_NO_AUTOMOUNT != 0 { flags & AT_NO_AUTOMOUNT == 0 } else { false };
792        if automount {
793            track_stub!(TODO("https://fxbug.dev/297370602"), "LookupFlags::automount");
794        }
795        Ok(LookupFlags {
796            allow_empty_path: (flags & AT_EMPTY_PATH != 0)
797                || (flags & O_PATH != 0 && flags & O_NOFOLLOW != 0),
798            symlink_mode: if follow_symlinks { SymlinkMode::Follow } else { SymlinkMode::NoFollow },
799            automount,
800        })
801    }
802}
803
804impl From<StatxFlags> for LookupFlags {
805    fn from(flags: StatxFlags) -> Self {
806        let lookup_flags = StatxFlags::AT_SYMLINK_NOFOLLOW
807            | StatxFlags::AT_EMPTY_PATH
808            | StatxFlags::AT_NO_AUTOMOUNT;
809        Self::from_bits((flags & lookup_flags).bits(), lookup_flags.bits()).unwrap()
810    }
811}
812
813pub fn lookup_at<L>(
814    locked: &mut Locked<L>,
815    current_task: &CurrentTask,
816    dir_fd: FdNumber,
817    user_path: UserCString,
818    options: LookupFlags,
819) -> Result<NamespaceNode, Errno>
820where
821    L: LockEqualOrBefore<FileOpsCore>,
822{
823    let path = current_task.read_path(user_path)?;
824    log_trace!(dir_fd:%, path:%; "lookup_at");
825    if path.is_empty() {
826        if options.allow_empty_path {
827            let (node, _) = current_task.resolve_dir_fd(
828                locked,
829                dir_fd,
830                path.as_ref(),
831                ResolveFlags::empty(),
832            )?;
833            return Ok(node);
834        }
835        return error!(ENOENT);
836    }
837
838    let mut parent_context = LookupContext::default();
839    let (parent, basename) =
840        current_task.lookup_parent_at(locked, &mut parent_context, dir_fd, path.as_ref())?;
841
842    let mut child_context = if parent_context.must_be_directory {
843        // The child must resolve to a directory. This is because a trailing slash
844        // was found in the path. If the child is a symlink, we should follow it.
845        // See https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap03.html#tag_21_03_00_75
846        parent_context.with(SymlinkMode::Follow)
847    } else {
848        parent_context.with(options.symlink_mode)
849    };
850
851    parent.lookup_child(locked, current_task, &mut child_context, basename)
852}
853
854fn do_openat(
855    locked: &mut Locked<Unlocked>,
856    current_task: &CurrentTask,
857    dir_fd: FdNumber,
858    user_path: UserCString,
859    flags: u32,
860    mode: FileMode,
861    resolve_flags: ResolveFlags,
862) -> Result<FdNumber, Errno> {
863    let file = open_file_at(locked, current_task, dir_fd, user_path, flags, mode, resolve_flags)?;
864    let fd_flags = get_fd_flags(flags);
865    current_task.add_file(locked, file, fd_flags)
866}
867
868pub fn sys_openat(
869    locked: &mut Locked<Unlocked>,
870    current_task: &CurrentTask,
871    dir_fd: FdNumber,
872    user_path: UserCString,
873    flags: u32,
874    mode: FileMode,
875) -> Result<FdNumber, Errno> {
876    do_openat(locked, current_task, dir_fd, user_path, flags, mode, ResolveFlags::empty())
877}
878
879pub fn sys_openat2(
880    locked: &mut Locked<Unlocked>,
881    current_task: &CurrentTask,
882    dir_fd: FdNumber,
883    user_path: UserCString,
884    how_ref: UserRef<uapi::open_how>,
885    size: usize,
886) -> Result<FdNumber, Errno> {
887    const EXPECTED_SIZE: usize = std::mem::size_of::<uapi::open_how>();
888    if size < EXPECTED_SIZE {
889        return error!(EINVAL);
890    }
891
892    let how = current_task.read_object(how_ref)?;
893
894    // If the `size` is greater than expected, then we need to check that any extra bytes after
895    // `open_how` are set to 0. This is needed to properly handle the case when `open_how` is
896    // extended with new fields in the future. There is no upper limit on the buffer size, so we
897    // limit size of each read to one page.
898    let mut pos = EXPECTED_SIZE;
899    while pos < size {
900        let length = std::cmp::min(size - pos, *PAGE_SIZE as usize);
901        let extra_bytes =
902            current_task.read_buffer(&UserBuffer { address: (how_ref.addr() + pos)?, length })?;
903        for b in extra_bytes {
904            if b != 0 {
905                return error!(E2BIG);
906            }
907        }
908        pos += length;
909    }
910
911    let flags: u32 = how.flags.try_into().map_err(|_| errno!(EINVAL))?;
912
913    // `mode` can be specified only with `O_CREAT` or `O_TMPFILE`.
914    let allowed_mode_flags = if (flags & (O_CREAT | O_TMPFILE)) > 0 { 0o7777 } else { 0 };
915    if (how.mode & !allowed_mode_flags) != 0 {
916        return error!(EINVAL);
917    }
918
919    let mode = FileMode::from_bits(how.mode.try_into().map_err(|_| errno!(EINVAL))?);
920    let resolve_flags =
921        ResolveFlags::from_bits(how.resolve.try_into().map_err(|_| errno!(EINVAL))?)
922            .ok_or_else(|| errno!(EINVAL))?;
923
924    if resolve_flags.contains(ResolveFlags::CACHED) {
925        track_stub!(TODO("https://fxbug.dev/326474574"), "openat2: RESOLVE_CACHED");
926        return error!(EAGAIN);
927    }
928
929    do_openat(locked, current_task, dir_fd, user_path, flags, mode, resolve_flags)
930}
931
932pub fn sys_faccessat(
933    locked: &mut Locked<Unlocked>,
934    current_task: &CurrentTask,
935    dir_fd: FdNumber,
936    user_path: UserCString,
937    mode: u32,
938) -> Result<(), Errno> {
939    sys_faccessat2(locked, current_task, dir_fd, user_path, mode, 0)
940}
941
942pub fn sys_faccessat2(
943    locked: &mut Locked<Unlocked>,
944    current_task: &CurrentTask,
945    dir_fd: FdNumber,
946    user_path: UserCString,
947    mode: u32,
948    flags: u32,
949) -> Result<(), Errno> {
950    let mut access_check = || {
951        let mode = Access::try_from(mode)?;
952        let lookup_flags = LookupFlags::from_bits(flags, AT_SYMLINK_NOFOLLOW | AT_EACCESS)?;
953        let name = lookup_at(locked, current_task, dir_fd, user_path, lookup_flags)?;
954        name.check_access(locked, current_task, mode, CheckAccessReason::Access)
955    };
956    // Unless `AT_ACCESS` is set, perform lookup & access-checking using real UID & GID.
957    if flags & AT_EACCESS == 0 {
958        let mut temporary_creds = Credentials::clone(&current_task.current_creds());
959        temporary_creds.fsuid = temporary_creds.uid;
960        temporary_creds.fsgid = temporary_creds.gid;
961        current_task.override_creds(temporary_creds.into(), access_check)
962    } else {
963        access_check()
964    }
965}
966
967pub fn sys_getdents64(
968    locked: &mut Locked<Unlocked>,
969    current_task: &CurrentTask,
970    fd: FdNumber,
971    user_buffer: UserAddress,
972    user_capacity: usize,
973) -> Result<usize, Errno> {
974    let file = current_task.files.get(fd)?;
975    let mut offset = file.offset.lock();
976    let mut sink = DirentSink64::new(current_task, &mut offset, user_buffer, user_capacity);
977    let result = file.readdir(locked, current_task, &mut sink);
978    sink.map_result_with_actual(result)
979}
980
981pub fn sys_chroot(
982    locked: &mut Locked<Unlocked>,
983    current_task: &CurrentTask,
984    user_path: UserCString,
985) -> Result<(), Errno> {
986    let name =
987        lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?;
988    if !name.entry.node.is_dir() {
989        return error!(ENOTDIR);
990    }
991
992    current_task.fs().chroot(locked, current_task, name)?;
993    Ok(())
994}
995
996pub fn sys_chdir(
997    locked: &mut Locked<Unlocked>,
998    current_task: &CurrentTask,
999    user_path: UserCString,
1000) -> Result<(), Errno> {
1001    let name =
1002        lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?;
1003    if !name.entry.node.is_dir() {
1004        return error!(ENOTDIR);
1005    }
1006    current_task.fs().chdir(locked, current_task, name)
1007}
1008
1009pub fn sys_fchdir(
1010    locked: &mut Locked<Unlocked>,
1011    current_task: &CurrentTask,
1012    fd: FdNumber,
1013) -> Result<(), Errno> {
1014    // O_PATH allowed for:
1015    //
1016    //   fchdir(2), if the file descriptor refers to a directory
1017    //   (since Linux 3.5).
1018    //
1019    // See https://man7.org/linux/man-pages/man2/open.2.html
1020    let file = current_task.files.get_allowing_opath(fd)?;
1021    if !file.name.entry.node.is_dir() {
1022        return error!(ENOTDIR);
1023    }
1024    current_task.fs().chdir(locked, current_task, file.name.to_passive())
1025}
1026
1027pub fn sys_fstat(
1028    locked: &mut Locked<Unlocked>,
1029    current_task: &CurrentTask,
1030    fd: FdNumber,
1031    buffer: UserRef<uapi::stat>,
1032) -> Result<(), Errno> {
1033    // O_PATH allowed for:
1034    //
1035    //   fstat(2) (since Linux 3.6).
1036    //
1037    // See https://man7.org/linux/man-pages/man2/open.2.html
1038    let file = current_task.files.get_allowing_opath(fd)?;
1039    let result = file.node().stat(locked, current_task)?;
1040    current_task.write_object(buffer, &result)?;
1041    Ok(())
1042}
1043
1044type StatPtr = MultiArchUserRef<uapi::stat, uapi::arch32::stat64>;
1045
1046// TODO(https://fxbug.dev/485370648) remove when unnecessary
1047fn get_fake_ion_stat() -> uapi::stat {
1048    uapi::stat {
1049        st_mode: uapi::S_IFCHR | 0o666,
1050        st_rdev: DeviceType::new(10, 59).bits(),
1051        st_nlink: 1,
1052        st_blksize: 4096,
1053        ..Default::default()
1054    }
1055}
1056
1057// TODO(https://fxbug.dev/485370648) remove when unnecessary
1058fn get_fake_ion_statx() -> statx {
1059    statx {
1060        stx_mask: uapi::STATX_BASIC_STATS,
1061        stx_mode: (uapi::S_IFCHR | 0o666) as u16,
1062        stx_rdev_major: 10,
1063        stx_rdev_minor: 59,
1064        stx_nlink: 1,
1065        stx_blksize: 4096,
1066        ..Default::default()
1067    }
1068}
1069
1070pub fn sys_fstatat64(
1071    locked: &mut Locked<Unlocked>,
1072    current_task: &CurrentTask,
1073    dir_fd: FdNumber,
1074    user_path: UserCString,
1075    buffer: StatPtr,
1076    flags: u32,
1077) -> Result<(), Errno> {
1078    let lookup_flags =
1079        LookupFlags::from_bits(flags, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)?;
1080    let result = match lookup_at(locked, current_task, dir_fd, user_path, lookup_flags) {
1081        Ok(name) => name.entry.node.stat(locked, current_task)?,
1082        // TODO(https://fxbug.dev/485370648) remove when unnecessary
1083        Err(e) if e == errno!(ENOENT) && current_task.kernel().features.fake_ion => {
1084            let path = current_task.read_path(user_path)?;
1085            if path == b"/dev/ion" {
1086                get_fake_ion_stat()
1087            } else {
1088                return Err(e);
1089            }
1090        }
1091        Err(e) => return Err(e),
1092    };
1093    current_task.write_multi_arch_object(buffer, result)?;
1094    Ok(())
1095}
1096
1097pub use sys_fstatat64 as sys_newfstatat;
1098
1099pub fn sys_statx(
1100    locked: &mut Locked<Unlocked>,
1101    current_task: &CurrentTask,
1102    dir_fd: FdNumber,
1103    user_path: UserCString,
1104    flags: u32,
1105    mask: u32,
1106    statxbuf: UserRef<statx>,
1107) -> Result<(), Errno> {
1108    let statx_flags = StatxFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
1109    if statx_flags & (StatxFlags::AT_STATX_FORCE_SYNC | StatxFlags::AT_STATX_DONT_SYNC)
1110        == (StatxFlags::AT_STATX_FORCE_SYNC | StatxFlags::AT_STATX_DONT_SYNC)
1111    {
1112        return error!(EINVAL);
1113    }
1114
1115    let result =
1116        match lookup_at(locked, current_task, dir_fd, user_path, LookupFlags::from(statx_flags)) {
1117            Ok(name) => name.entry.node.statx(locked, current_task, statx_flags, mask)?,
1118            // TODO(https://fxbug.dev/485370648) remove when unnecessary
1119            Err(e) if e == errno!(ENOENT) && current_task.kernel().features.fake_ion => {
1120                let path = current_task.read_path(user_path)?;
1121                if path == b"/dev/ion" {
1122                    get_fake_ion_statx()
1123                } else {
1124                    return Err(e);
1125                }
1126            }
1127            Err(e) => return Err(e),
1128        };
1129    current_task.write_object(statxbuf, &result)?;
1130    Ok(())
1131}
1132
1133pub fn sys_readlinkat(
1134    locked: &mut Locked<Unlocked>,
1135    current_task: &CurrentTask,
1136    dir_fd: FdNumber,
1137    user_path: UserCString,
1138    buffer: UserAddress,
1139    buffer_size: usize,
1140) -> Result<usize, Errno> {
1141    let path = current_task.read_path(user_path)?;
1142    let lookup_flags = if path.is_empty() {
1143        if dir_fd == FdNumber::AT_FDCWD {
1144            return error!(ENOENT);
1145        }
1146        LookupFlags {
1147            allow_empty_path: true,
1148            symlink_mode: SymlinkMode::NoFollow,
1149            ..Default::default()
1150        }
1151    } else {
1152        LookupFlags::no_follow()
1153    };
1154    let name = lookup_at(locked, current_task, dir_fd, user_path, lookup_flags)?;
1155
1156    let target = match name.readlink(locked, current_task)? {
1157        SymlinkTarget::Path(path) => path,
1158        SymlinkTarget::Node(node) => node.path(current_task),
1159    };
1160
1161    if buffer_size == 0 {
1162        return error!(EINVAL);
1163    }
1164    // Cap the returned length at buffer_size.
1165    let length = std::cmp::min(buffer_size, target.len());
1166    current_task.write_memory(buffer, &target[..length])?;
1167    Ok(length)
1168}
1169
1170pub fn sys_truncate(
1171    locked: &mut Locked<Unlocked>,
1172    current_task: &CurrentTask,
1173    user_path: UserCString,
1174    length: off_t,
1175) -> Result<(), Errno> {
1176    let length = length.try_into().map_err(|_| errno!(EINVAL))?;
1177    let name =
1178        lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?;
1179    name.truncate(locked, current_task, length)?;
1180    Ok(())
1181}
1182
1183pub fn sys_ftruncate(
1184    locked: &mut Locked<Unlocked>,
1185    current_task: &CurrentTask,
1186    fd: FdNumber,
1187    length: off_t,
1188) -> Result<(), Errno> {
1189    let length = length.try_into().map_err(|_| errno!(EINVAL))?;
1190    let file = current_task.files.get(fd)?;
1191    file.ftruncate(locked, current_task, length)?;
1192    Ok(())
1193}
1194
1195pub fn sys_mkdirat(
1196    locked: &mut Locked<Unlocked>,
1197    current_task: &CurrentTask,
1198    dir_fd: FdNumber,
1199    user_path: UserCString,
1200    mode: FileMode,
1201) -> Result<(), Errno> {
1202    let path = current_task.read_path(user_path)?;
1203
1204    if path.is_empty() {
1205        return error!(ENOENT);
1206    }
1207    let (parent, basename) = current_task.lookup_parent_at(
1208        locked,
1209        &mut LookupContext::default(),
1210        dir_fd,
1211        path.as_ref(),
1212    )?;
1213    parent.create_node(
1214        locked,
1215        current_task,
1216        basename,
1217        mode.with_type(FileMode::IFDIR),
1218        DeviceType::NONE,
1219    )?;
1220    Ok(())
1221}
1222
1223pub fn sys_mknodat(
1224    locked: &mut Locked<Unlocked>,
1225    current_task: &CurrentTask,
1226    dir_fd: FdNumber,
1227    user_path: UserCString,
1228    mode: FileMode,
1229    dev: DeviceType,
1230) -> Result<(), Errno> {
1231    let file_type = match mode.fmt() {
1232        FileMode::IFREG
1233        | FileMode::IFCHR
1234        | FileMode::IFBLK
1235        | FileMode::IFIFO
1236        | FileMode::IFSOCK => mode.fmt(),
1237        FileMode::EMPTY => FileMode::IFREG,
1238        _ => return error!(EINVAL),
1239    };
1240    lookup_parent_at(locked, current_task, dir_fd, user_path, |locked, _, parent, basename| {
1241        parent.create_node(locked, current_task, basename, mode.with_type(file_type), dev)
1242    })?;
1243    Ok(())
1244}
1245
1246pub fn sys_linkat(
1247    locked: &mut Locked<Unlocked>,
1248    current_task: &CurrentTask,
1249    old_dir_fd: FdNumber,
1250    old_user_path: UserCString,
1251    new_dir_fd: FdNumber,
1252    new_user_path: UserCString,
1253    flags: u32,
1254) -> Result<(), Errno> {
1255    if flags & !(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH) != 0 {
1256        track_stub!(TODO("https://fxbug.dev/322875706"), "linkat unknown flags", flags);
1257        return error!(EINVAL);
1258    }
1259
1260    if flags & AT_EMPTY_PATH != 0 {
1261        security::check_task_capable(current_task, CAP_DAC_READ_SEARCH)
1262            .map_err(|_| errno!(ENOENT))?;
1263    }
1264
1265    let flags = LookupFlags::from_bits(flags, AT_EMPTY_PATH | AT_SYMLINK_FOLLOW)?;
1266    let target = lookup_at(locked, current_task, old_dir_fd, old_user_path, flags)?;
1267    lookup_parent_at(
1268        locked,
1269        current_task,
1270        new_dir_fd,
1271        new_user_path,
1272        |locked, context, parent, basename| {
1273            // The path to a new link cannot end in `/`. That would imply that we are dereferencing
1274            // the link to a directory.
1275            if context.must_be_directory {
1276                return error!(ENOENT);
1277            }
1278            if target.mount != parent.mount {
1279                return error!(EXDEV);
1280            }
1281            parent.link(locked, current_task, basename, &target.entry.node)
1282        },
1283    )?;
1284
1285    Ok(())
1286}
1287
1288pub fn sys_unlinkat(
1289    locked: &mut Locked<Unlocked>,
1290    current_task: &CurrentTask,
1291    dir_fd: FdNumber,
1292    user_path: UserCString,
1293    flags: u32,
1294) -> Result<(), Errno> {
1295    if flags & !AT_REMOVEDIR != 0 {
1296        return error!(EINVAL);
1297    }
1298    let kind =
1299        if flags & AT_REMOVEDIR != 0 { UnlinkKind::Directory } else { UnlinkKind::NonDirectory };
1300    lookup_parent_at(
1301        locked,
1302        current_task,
1303        dir_fd,
1304        user_path,
1305        |locked, context, parent, basename| {
1306            parent.unlink(locked, current_task, basename, kind, context.must_be_directory)
1307        },
1308    )?;
1309    Ok(())
1310}
1311
1312pub fn sys_renameat2(
1313    locked: &mut Locked<Unlocked>,
1314    current_task: &CurrentTask,
1315    old_dir_fd: FdNumber,
1316    old_user_path: UserCString,
1317    new_dir_fd: FdNumber,
1318    new_user_path: UserCString,
1319    flags: u32,
1320) -> Result<(), Errno> {
1321    let flags = RenameFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
1322    if flags.intersects(RenameFlags::INTERNAL) {
1323        return error!(EINVAL);
1324    };
1325
1326    // RENAME_EXCHANGE cannot be combined with the other flags.
1327    if flags.contains(RenameFlags::EXCHANGE)
1328        && flags.intersects(RenameFlags::NOREPLACE | RenameFlags::WHITEOUT)
1329    {
1330        return error!(EINVAL);
1331    }
1332
1333    // RENAME_WHITEOUT is not supported.
1334    if flags.contains(RenameFlags::WHITEOUT) {
1335        track_stub!(TODO("https://fxbug.dev/322875416"), "RENAME_WHITEOUT");
1336        return error!(ENOSYS);
1337    };
1338
1339    let mut lookup = |dir_fd, user_path| {
1340        lookup_parent_at(locked, current_task, dir_fd, user_path, |_, _, parent, basename| {
1341            Ok((parent, basename.to_owned()))
1342        })
1343    };
1344
1345    let (old_parent, old_basename) = lookup(old_dir_fd, old_user_path)?;
1346    let (new_parent, new_basename) = lookup(new_dir_fd, new_user_path)?;
1347
1348    if new_basename.len() > NAME_MAX as usize {
1349        return error!(ENAMETOOLONG);
1350    }
1351
1352    NamespaceNode::rename(
1353        locked,
1354        current_task,
1355        &old_parent,
1356        old_basename.as_ref(),
1357        &new_parent,
1358        new_basename.as_ref(),
1359        flags,
1360    )
1361}
1362
1363pub fn sys_fchmod(
1364    locked: &mut Locked<Unlocked>,
1365    current_task: &CurrentTask,
1366    fd: FdNumber,
1367    mode: FileMode,
1368) -> Result<(), Errno> {
1369    // Remove the filetype from the mode.
1370    let mode = mode & FileMode::PERMISSIONS;
1371    let file = current_task.files.get(fd)?;
1372    file.name.entry.node.chmod(locked, current_task, &file.name.mount, mode)?;
1373    file.name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB);
1374    Ok(())
1375}
1376
1377pub fn sys_fchmodat(
1378    locked: &mut Locked<Unlocked>,
1379    current_task: &CurrentTask,
1380    dir_fd: FdNumber,
1381    user_path: UserCString,
1382    mode: FileMode,
1383) -> Result<(), Errno> {
1384    // Remove the filetype from the mode.
1385    let mode = mode & FileMode::PERMISSIONS;
1386    let name = lookup_at(locked, current_task, dir_fd, user_path, LookupFlags::default())?;
1387    name.entry.node.chmod(locked, current_task, &name.mount, mode)?;
1388    name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB);
1389    Ok(())
1390}
1391
1392fn maybe_uid(id: u32) -> Option<uid_t> {
1393    if id == u32::MAX { None } else { Some(id) }
1394}
1395
1396pub fn sys_fchown(
1397    locked: &mut Locked<Unlocked>,
1398    current_task: &CurrentTask,
1399    fd: FdNumber,
1400    owner: u32,
1401    group: u32,
1402) -> Result<(), Errno> {
1403    let file = current_task.files.get(fd)?;
1404    file.name.entry.node.chown(
1405        locked,
1406        current_task,
1407        &file.name.mount,
1408        maybe_uid(owner),
1409        maybe_uid(group),
1410    )?;
1411    file.name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB);
1412    Ok(())
1413}
1414
1415pub fn sys_fchownat(
1416    locked: &mut Locked<Unlocked>,
1417    current_task: &CurrentTask,
1418    dir_fd: FdNumber,
1419    user_path: UserCString,
1420    owner: u32,
1421    group: u32,
1422    flags: u32,
1423) -> Result<(), Errno> {
1424    let flags = LookupFlags::from_bits(flags, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW)?;
1425    let name = lookup_at(locked, current_task, dir_fd, user_path, flags)?;
1426    name.entry.node.chown(locked, current_task, &name.mount, maybe_uid(owner), maybe_uid(group))?;
1427    name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB);
1428    Ok(())
1429}
1430
1431fn read_xattr_name(current_task: &CurrentTask, name_addr: UserCString) -> Result<FsString, Errno> {
1432    let name = current_task
1433        .read_c_string_to_vec(name_addr, XATTR_NAME_MAX as usize + 1)
1434        .map_err(|e| if e == ENAMETOOLONG { errno!(ERANGE) } else { e })?;
1435    if name.is_empty() {
1436        return error!(ERANGE);
1437    }
1438    let dot_index = memchr::memchr(b'.', &name).ok_or_else(|| errno!(ENOTSUP))?;
1439    if name[dot_index + 1..].is_empty() {
1440        return error!(EINVAL);
1441    }
1442    match &name[..dot_index] {
1443        b"user" | b"security" | b"trusted" | b"system" => {}
1444        _ => return error!(ENOTSUP),
1445    }
1446    Ok(name)
1447}
1448
1449fn do_getxattr(
1450    locked: &mut Locked<Unlocked>,
1451    current_task: &CurrentTask,
1452    node: &NamespaceNode,
1453    name_addr: UserCString,
1454    value_addr: UserAddress,
1455    size: usize,
1456) -> Result<usize, Errno> {
1457    let name = read_xattr_name(current_task, name_addr)?;
1458    let value =
1459        match node.entry.node.get_xattr(locked, current_task, &node.mount, name.as_ref(), size)? {
1460            ValueOrSize::Size(s) => return Ok(s),
1461            ValueOrSize::Value(v) => v,
1462        };
1463    if size == 0 {
1464        return Ok(value.len());
1465    }
1466    if size < value.len() {
1467        return error!(ERANGE);
1468    }
1469    current_task.write_memory(value_addr, &value)
1470}
1471
1472pub fn sys_getxattr(
1473    locked: &mut Locked<Unlocked>,
1474    current_task: &CurrentTask,
1475    path_addr: UserCString,
1476    name_addr: UserCString,
1477    value_addr: UserAddress,
1478    size: usize,
1479) -> Result<usize, Errno> {
1480    let node =
1481        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?;
1482    do_getxattr(locked, current_task, &node, name_addr, value_addr, size)
1483}
1484
1485pub fn sys_fgetxattr(
1486    locked: &mut Locked<Unlocked>,
1487    current_task: &CurrentTask,
1488    fd: FdNumber,
1489    name_addr: UserCString,
1490    value_addr: UserAddress,
1491    size: usize,
1492) -> Result<usize, Errno> {
1493    let file = current_task.files.get(fd)?;
1494    do_getxattr(locked, current_task, &file.name, name_addr, value_addr, size)
1495}
1496
1497pub fn sys_lgetxattr(
1498    locked: &mut Locked<Unlocked>,
1499    current_task: &CurrentTask,
1500    path_addr: UserCString,
1501    name_addr: UserCString,
1502    value_addr: UserAddress,
1503    size: usize,
1504) -> Result<usize, Errno> {
1505    let node =
1506        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?;
1507    do_getxattr(locked, current_task, &node, name_addr, value_addr, size)
1508}
1509
1510fn do_setxattr(
1511    locked: &mut Locked<Unlocked>,
1512    current_task: &CurrentTask,
1513    node: &NamespaceNode,
1514    name_addr: UserCString,
1515    value_addr: UserAddress,
1516    size: usize,
1517    flags: u32,
1518) -> Result<(), Errno> {
1519    if size > XATTR_NAME_MAX as usize {
1520        return error!(E2BIG);
1521    }
1522
1523    let op = match flags {
1524        0 => XattrOp::Set,
1525        XATTR_CREATE => XattrOp::Create,
1526        XATTR_REPLACE => XattrOp::Replace,
1527        _ => return error!(EINVAL),
1528    };
1529    let name = read_xattr_name(current_task, name_addr)?;
1530    let value = FsString::from(current_task.read_memory_to_vec(value_addr, size)?);
1531    node.entry.node.set_xattr(locked, current_task, &node.mount, name.as_ref(), value.as_ref(), op)
1532}
1533
1534pub fn sys_fsetxattr(
1535    locked: &mut Locked<Unlocked>,
1536    current_task: &CurrentTask,
1537    fd: FdNumber,
1538    name_addr: UserCString,
1539    value_addr: UserAddress,
1540    size: usize,
1541    flags: u32,
1542) -> Result<(), Errno> {
1543    let file = current_task.files.get(fd)?;
1544    do_setxattr(locked, current_task, &file.name, name_addr, value_addr, size, flags)
1545}
1546
1547pub fn sys_lsetxattr(
1548    locked: &mut Locked<Unlocked>,
1549    current_task: &CurrentTask,
1550    path_addr: UserCString,
1551    name_addr: UserCString,
1552    value_addr: UserAddress,
1553    size: usize,
1554    flags: u32,
1555) -> Result<(), Errno> {
1556    let node =
1557        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?;
1558    do_setxattr(locked, current_task, &node, name_addr, value_addr, size, flags)
1559}
1560
1561pub fn sys_setxattr(
1562    locked: &mut Locked<Unlocked>,
1563    current_task: &CurrentTask,
1564    path_addr: UserCString,
1565    name_addr: UserCString,
1566    value_addr: UserAddress,
1567    size: usize,
1568    flags: u32,
1569) -> Result<(), Errno> {
1570    let node =
1571        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?;
1572    do_setxattr(locked, current_task, &node, name_addr, value_addr, size, flags)
1573}
1574
1575fn do_removexattr(
1576    locked: &mut Locked<Unlocked>,
1577    current_task: &CurrentTask,
1578    node: &NamespaceNode,
1579    name_addr: UserCString,
1580) -> Result<(), Errno> {
1581    let mode = node.entry.node.info().mode;
1582    if mode.is_chr() || mode.is_fifo() {
1583        return error!(EPERM);
1584    }
1585    let name = read_xattr_name(current_task, name_addr)?;
1586    node.entry.node.remove_xattr(locked, current_task, &node.mount, name.as_ref())
1587}
1588
1589pub fn sys_removexattr(
1590    locked: &mut Locked<Unlocked>,
1591    current_task: &CurrentTask,
1592    path_addr: UserCString,
1593    name_addr: UserCString,
1594) -> Result<(), Errno> {
1595    let node =
1596        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?;
1597    do_removexattr(locked, current_task, &node, name_addr)
1598}
1599
1600pub fn sys_lremovexattr(
1601    locked: &mut Locked<Unlocked>,
1602    current_task: &CurrentTask,
1603    path_addr: UserCString,
1604    name_addr: UserCString,
1605) -> Result<(), Errno> {
1606    let node =
1607        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?;
1608    do_removexattr(locked, current_task, &node, name_addr)
1609}
1610
1611pub fn sys_fremovexattr(
1612    locked: &mut Locked<Unlocked>,
1613    current_task: &CurrentTask,
1614    fd: FdNumber,
1615    name_addr: UserCString,
1616) -> Result<(), Errno> {
1617    let file = current_task.files.get(fd)?;
1618    do_removexattr(locked, current_task, &file.name, name_addr)
1619}
1620
1621fn do_listxattr(
1622    locked: &mut Locked<Unlocked>,
1623    current_task: &CurrentTask,
1624    node: &NamespaceNode,
1625    list_addr: UserAddress,
1626    size: usize,
1627) -> Result<usize, Errno> {
1628    let security_xattr = security::fs_node_listsecurity(current_task, &node.entry.node);
1629    let xattrs = match node.entry.node.list_xattrs(locked, current_task, size) {
1630        Ok(ValueOrSize::Size(s)) => return Ok(s + security_xattr.map_or(0, |s| s.len() + 1)),
1631        Ok(ValueOrSize::Value(mut v)) => {
1632            if let Some(security_value) = security_xattr {
1633                if !v.contains(&security_value) {
1634                    v.push(security_value);
1635                }
1636            }
1637            v
1638        }
1639        Err(e) => {
1640            if e.code != ENOTSUP || security_xattr.is_none() {
1641                return Err(e);
1642            }
1643            vec![security_xattr.unwrap()]
1644        }
1645    };
1646
1647    let mut list = vec![];
1648    for name in xattrs.iter() {
1649        list.extend_from_slice(name);
1650        list.push(b'\0');
1651    }
1652    if size == 0 {
1653        return Ok(list.len());
1654    }
1655    if size < list.len() {
1656        return error!(ERANGE);
1657    }
1658    current_task.write_memory(list_addr, &list)
1659}
1660
1661pub fn sys_listxattr(
1662    locked: &mut Locked<Unlocked>,
1663    current_task: &CurrentTask,
1664    path_addr: UserCString,
1665    list_addr: UserAddress,
1666    size: usize,
1667) -> Result<usize, Errno> {
1668    let node =
1669        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?;
1670    do_listxattr(locked, current_task, &node, list_addr, size)
1671}
1672
1673pub fn sys_llistxattr(
1674    locked: &mut Locked<Unlocked>,
1675    current_task: &CurrentTask,
1676    path_addr: UserCString,
1677    list_addr: UserAddress,
1678    size: usize,
1679) -> Result<usize, Errno> {
1680    let node =
1681        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?;
1682    do_listxattr(locked, current_task, &node, list_addr, size)
1683}
1684
1685pub fn sys_flistxattr(
1686    locked: &mut Locked<Unlocked>,
1687    current_task: &CurrentTask,
1688    fd: FdNumber,
1689    list_addr: UserAddress,
1690    size: usize,
1691) -> Result<usize, Errno> {
1692    let file = current_task.files.get(fd)?;
1693    do_listxattr(locked, current_task, &file.name, list_addr, size)
1694}
1695
1696pub fn sys_getcwd(
1697    _locked: &mut Locked<Unlocked>,
1698    current_task: &CurrentTask,
1699    buf: UserAddress,
1700    size: usize,
1701) -> Result<usize, Errno> {
1702    let root = current_task.fs().root();
1703    let cwd = current_task.fs().cwd();
1704    let mut user_cwd = match cwd.path_from_root(Some(&root)) {
1705        PathWithReachability::Reachable(path) => path,
1706        PathWithReachability::Unreachable(mut path) => {
1707            let mut combined = vec![];
1708            combined.extend_from_slice(b"(unreachable)");
1709            combined.append(&mut path);
1710            combined.into()
1711        }
1712    };
1713    user_cwd.push(b'\0');
1714    if user_cwd.len() > size {
1715        return error!(ERANGE);
1716    }
1717    current_task.write_memory(buf, &user_cwd)?;
1718    Ok(user_cwd.len())
1719}
1720
1721pub fn sys_umask(
1722    _locked: &mut Locked<Unlocked>,
1723    current_task: &CurrentTask,
1724    umask: FileMode,
1725) -> Result<FileMode, Errno> {
1726    Ok(current_task.fs().set_umask(umask))
1727}
1728
1729fn get_fd_flags(flags: u32) -> FdFlags {
1730    if flags & O_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() }
1731}
1732
1733pub fn sys_pipe2(
1734    locked: &mut Locked<Unlocked>,
1735    current_task: &CurrentTask,
1736    user_pipe: UserRef<FdNumber>,
1737    flags: u32,
1738) -> Result<(), Errno> {
1739    let supported_file_flags = OpenFlags::NONBLOCK | OpenFlags::DIRECT;
1740    if flags & !(O_CLOEXEC | supported_file_flags.bits()) != 0 {
1741        return error!(EINVAL);
1742    }
1743    let (read, write) = new_pipe(locked, current_task)?;
1744
1745    let file_flags = OpenFlags::from_bits_truncate(flags & supported_file_flags.bits());
1746    read.update_file_flags(file_flags, supported_file_flags);
1747    write.update_file_flags(file_flags, supported_file_flags);
1748
1749    let fd_flags = get_fd_flags(flags);
1750    let fd_read = current_task.add_file(locked, read, fd_flags)?;
1751    let fd_write = current_task.add_file(locked, write, fd_flags)?;
1752    log_trace!("pipe2 -> [{:#x}, {:#x}]", fd_read.raw(), fd_write.raw());
1753
1754    current_task.write_object(user_pipe, &fd_read)?;
1755    let user_pipe = user_pipe.next()?;
1756    current_task.write_object(user_pipe, &fd_write)?;
1757
1758    Ok(())
1759}
1760
1761pub fn sys_ioctl(
1762    locked: &mut Locked<Unlocked>,
1763    current_task: &CurrentTask,
1764    fd: FdNumber,
1765    request: u32,
1766    arg: SyscallArg,
1767) -> Result<SyscallResult, Errno> {
1768    match request {
1769        FIOCLEX | FIONCLEX => {
1770            current_task.files.ioctl_fd_flags(current_task, fd, request)?;
1771            Ok(SUCCESS)
1772        }
1773        _ => {
1774            let file = current_task.files.get(fd)?;
1775            file.ioctl(locked, current_task, request, arg)
1776        }
1777    }
1778}
1779
1780pub fn sys_symlinkat(
1781    locked: &mut Locked<Unlocked>,
1782    current_task: &CurrentTask,
1783    user_target: UserCString,
1784    new_dir_fd: FdNumber,
1785    user_path: UserCString,
1786) -> Result<(), Errno> {
1787    let target = current_task.read_path(user_target)?;
1788    if target.is_empty() {
1789        return error!(ENOENT);
1790    }
1791
1792    let path = current_task.read_path(user_path)?;
1793    // TODO: This check could probably be moved into parent.symlink(..).
1794    if path.is_empty() {
1795        return error!(ENOENT);
1796    }
1797
1798    let res = lookup_parent_at(
1799        locked,
1800        current_task,
1801        new_dir_fd,
1802        user_path,
1803        |locked, context, parent, basename| {
1804            // The path to a new symlink cannot end in `/`. That would imply that we are dereferencing
1805            // the symlink to a directory.
1806            //
1807            // See https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap03.html#tag_21_03_00_75
1808            if context.must_be_directory {
1809                return error!(ENOENT);
1810            }
1811            parent.create_symlink(locked, current_task, basename, target.as_ref())
1812        },
1813    );
1814    res?;
1815    Ok(())
1816}
1817
1818pub fn sys_dup(
1819    locked: &mut Locked<Unlocked>,
1820    current_task: &CurrentTask,
1821    oldfd: FdNumber,
1822) -> Result<FdNumber, Errno> {
1823    current_task.files.duplicate(
1824        locked,
1825        current_task,
1826        oldfd,
1827        TargetFdNumber::Default,
1828        FdFlags::empty(),
1829    )
1830}
1831
1832pub fn sys_dup3(
1833    locked: &mut Locked<Unlocked>,
1834    current_task: &CurrentTask,
1835    oldfd: FdNumber,
1836    newfd: FdNumber,
1837    flags: u32,
1838) -> Result<FdNumber, Errno> {
1839    if oldfd == newfd {
1840        return error!(EINVAL);
1841    }
1842    if flags & !O_CLOEXEC != 0 {
1843        return error!(EINVAL);
1844    }
1845    let fd_flags = get_fd_flags(flags);
1846    current_task.files.duplicate(
1847        locked,
1848        current_task,
1849        oldfd,
1850        TargetFdNumber::Specific(newfd),
1851        fd_flags,
1852    )?;
1853    Ok(newfd)
1854}
1855
1856/// A memfd file descriptor cannot have a name longer than 250 bytes, including
1857/// the null terminator.
1858///
1859/// See Errors section of https://man7.org/linux/man-pages/man2/memfd_create.2.html
1860const MEMFD_NAME_MAX_LEN: usize = 250;
1861
1862pub fn sys_memfd_create(
1863    locked: &mut Locked<Unlocked>,
1864    current_task: &CurrentTask,
1865    user_name: UserCString,
1866    flags: u32,
1867) -> Result<FdNumber, Errno> {
1868    const HUGE_SHIFTED_MASK: u32 = MFD_HUGE_MASK << MFD_HUGE_SHIFT;
1869
1870    if flags
1871        & !(MFD_CLOEXEC
1872            | MFD_ALLOW_SEALING
1873            | MFD_HUGETLB
1874            | HUGE_SHIFTED_MASK
1875            | MFD_NOEXEC_SEAL
1876            | MFD_EXEC)
1877        != 0
1878    {
1879        track_stub!(TODO("https://fxbug.dev/322875665"), "memfd_create unknown flags", flags);
1880        return error!(EINVAL);
1881    }
1882
1883    let _huge_page_size = if flags & MFD_HUGETLB != 0 {
1884        Some(flags & HUGE_SHIFTED_MASK)
1885    } else {
1886        if flags & HUGE_SHIFTED_MASK != 0 {
1887            return error!(EINVAL);
1888        }
1889        None
1890    };
1891
1892    let name = current_task
1893        .read_c_string_to_vec(user_name, MEMFD_NAME_MAX_LEN)
1894        .map_err(|e| if e == ENAMETOOLONG { errno!(EINVAL) } else { e })?;
1895
1896    // This behavior matches MEMFD_NOEXEC_SCOPE_EXEC, which states:
1897    //   > memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like MFD_EXEC was set.
1898    //
1899    // This behavior can be changed on Linux via sysctl vm.memfd_noexec, which is pid namespaced.
1900    // We do not currently support changing this behavior.
1901    let seals = if flags & MFD_NOEXEC_SEAL != 0 {
1902        SealFlags::NO_EXEC
1903    } else if flags & MFD_ALLOW_SEALING != 0 {
1904        SealFlags::empty()
1905    } else {
1906        // Forbid sealing, by sealing the seal operation.
1907        SealFlags::SEAL
1908    };
1909
1910    let file = new_memfd(locked, current_task, name, seals, OpenFlags::RDWR)?;
1911
1912    let mut fd_flags = FdFlags::empty();
1913    if flags & MFD_CLOEXEC != 0 {
1914        fd_flags |= FdFlags::CLOEXEC;
1915    }
1916    let fd = current_task.add_file(locked, file, fd_flags)?;
1917    Ok(fd)
1918}
1919
1920pub fn sys_mount(
1921    locked: &mut Locked<Unlocked>,
1922    current_task: &CurrentTask,
1923    source_addr: UserCString,
1924    target_addr: UserCString,
1925    filesystemtype_addr: UserCString,
1926    flags: u32,
1927    data_addr: UserCString,
1928) -> Result<(), Errno> {
1929    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1930
1931    let flags = MountFlags::from_bits(flags).ok_or_else(|| {
1932        track_stub!(
1933            TODO("https://fxbug.dev/322875327"),
1934            "mount unknown flags",
1935            flags & !MountFlags::from_bits_truncate(flags).bits()
1936        );
1937        errno!(EINVAL)
1938    })?;
1939
1940    let target =
1941        lookup_at(locked, current_task, FdNumber::AT_FDCWD, target_addr, LookupFlags::default())?;
1942
1943    security::sb_mount(current_task, &target, flags)?;
1944
1945    if flags.contains(MountFlags::REMOUNT) {
1946        do_mount_remount(current_task, target, flags, data_addr)
1947    } else if flags.contains(MountFlags::BIND) {
1948        do_mount_bind(locked, current_task, source_addr, target, flags)
1949    } else if flags.intersects(MountFlags::SHARED | MountFlags::PRIVATE | MountFlags::DOWNSTREAM) {
1950        do_mount_change_propagation_type(current_task, target, flags)
1951    } else {
1952        do_mount_create(
1953            locked,
1954            current_task,
1955            source_addr,
1956            target,
1957            filesystemtype_addr,
1958            data_addr,
1959            flags,
1960        )
1961    }
1962}
1963
1964fn do_mount_remount(
1965    current_task: &CurrentTask,
1966    target: NamespaceNode,
1967    flags: MountFlags,
1968    data_addr: UserCString,
1969) -> Result<(), Errno> {
1970    if !data_addr.is_null() {
1971        track_stub!(TODO("https://fxbug.dev/322875506"), "MS_REMOUNT: Updating data");
1972    }
1973    let mount = target.mount_if_root()?;
1974
1975    let data = current_task.read_path_if_non_null(data_addr)?;
1976    let mount_options =
1977        security::sb_eat_lsm_opts(current_task.kernel(), &mut MountParams::parse(data.as_ref())?)?;
1978    security::sb_remount(current_task, &mount, mount_options)?;
1979    let updated_flags = flags & MountFlags::CHANGEABLE_WITH_REMOUNT;
1980    mount.update_flags(updated_flags);
1981    if !flags.contains(MountFlags::BIND) {
1982        // From <https://man7.org/linux/man-pages/man2/mount.2.html>
1983        //
1984        //   Since Linux 2.6.26, the MS_REMOUNT flag can be used with MS_BIND
1985        //   to modify only the per-mount-point flags.  This is particularly
1986        //   useful for setting or clearing the "read-only" flag on a mount
1987        //   without changing the underlying filesystem.
1988        track_stub!(TODO("https://fxbug.dev/322875215"), "MS_REMOUNT: Updating superblock flags");
1989    }
1990    Ok(())
1991}
1992
1993fn do_mount_bind(
1994    locked: &mut Locked<Unlocked>,
1995    current_task: &CurrentTask,
1996    source_addr: UserCString,
1997    target: NamespaceNode,
1998    flags: MountFlags,
1999) -> Result<(), Errno> {
2000    let source =
2001        lookup_at(locked, current_task, FdNumber::AT_FDCWD, source_addr, LookupFlags::default())?;
2002    log_trace!(
2003        source:% = source.path(current_task),
2004        target:% = target.path(current_task),
2005        flags:?;
2006        "do_mount_bind",
2007    );
2008    target.mount(WhatToMount::Bind(source), flags)
2009}
2010
2011fn do_mount_change_propagation_type(
2012    current_task: &CurrentTask,
2013    target: NamespaceNode,
2014    flags: MountFlags,
2015) -> Result<(), Errno> {
2016    log_trace!(
2017        target:% = target.path(current_task),
2018        flags:?;
2019        "do_mount_change_propagation_type",
2020    );
2021
2022    // Flag validation. Of the three propagation type flags, exactly one must be passed. The only
2023    // valid flags other than propagation type are MS_SILENT and MS_REC.
2024    //
2025    // Use if statements to find the first propagation type flag, then check for valid flags using
2026    // only the first propagation flag and MS_REC / MS_SILENT as valid flags.
2027    let propagation_flag = if flags.contains(MountFlags::SHARED) {
2028        MountFlags::SHARED
2029    } else if flags.contains(MountFlags::PRIVATE) {
2030        MountFlags::PRIVATE
2031    } else if flags.contains(MountFlags::DOWNSTREAM) {
2032        MountFlags::DOWNSTREAM
2033    } else {
2034        return error!(EINVAL);
2035    };
2036    if flags.intersects(!(propagation_flag | MountFlags::REC | MountFlags::SILENT)) {
2037        return error!(EINVAL);
2038    }
2039
2040    let mount = target.mount_if_root()?;
2041    mount.change_propagation(propagation_flag, flags.contains(MountFlags::REC));
2042    Ok(())
2043}
2044
2045fn do_mount_create(
2046    locked: &mut Locked<Unlocked>,
2047    current_task: &CurrentTask,
2048    source_addr: UserCString,
2049    target: NamespaceNode,
2050    filesystemtype_addr: UserCString,
2051    data_addr: UserCString,
2052    flags: MountFlags,
2053) -> Result<(), Errno> {
2054    let source = current_task.read_path_if_non_null(source_addr)?;
2055    let fs_type = current_task.read_path(filesystemtype_addr)?;
2056    let data = current_task.read_path_if_non_null(data_addr)?;
2057    log_trace!(
2058        source:%,
2059        target:% = target.path(current_task),
2060        fs_type:%,
2061        data:%;
2062        "do_mount_create",
2063    );
2064
2065    let options = FileSystemOptions {
2066        source: source.into(),
2067        flags: flags & MountFlags::STORED_ON_FILESYSTEM,
2068        params: MountParams::parse(data.as_ref())?,
2069    };
2070
2071    let fs = current_task.create_filesystem(locked, fs_type.as_ref(), options)?;
2072
2073    security::sb_kern_mount(current_task, &fs)?;
2074    target.mount(WhatToMount::Fs(fs), flags)
2075}
2076
2077pub fn sys_umount2(
2078    locked: &mut Locked<Unlocked>,
2079    current_task: &CurrentTask,
2080    target_addr: UserCString,
2081    flags: u32,
2082) -> Result<(), Errno> {
2083    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
2084
2085    let unmount_flags = UnmountFlags::from_bits(flags).ok_or_else(|| {
2086        track_stub!(
2087            TODO("https://fxbug.dev/322875327"),
2088            "unmount unknown flags",
2089            flags & !UnmountFlags::from_bits_truncate(flags).bits()
2090        );
2091        errno!(EINVAL)
2092    })?;
2093
2094    if unmount_flags.contains(UnmountFlags::EXPIRE)
2095        && (unmount_flags.contains(UnmountFlags::FORCE)
2096            || unmount_flags.contains(UnmountFlags::DETACH))
2097    {
2098        return error!(EINVAL);
2099    }
2100
2101    let lookup_flags = if unmount_flags.contains(UnmountFlags::NOFOLLOW) {
2102        LookupFlags::no_follow()
2103    } else {
2104        LookupFlags::default()
2105    };
2106    let target = lookup_at(locked, current_task, FdNumber::AT_FDCWD, target_addr, lookup_flags)?;
2107
2108    security::sb_umount(current_task, &target, unmount_flags)?;
2109
2110    target.unmount(unmount_flags)
2111}
2112
2113pub fn sys_eventfd2(
2114    locked: &mut Locked<Unlocked>,
2115    current_task: &CurrentTask,
2116    value: u32,
2117    flags: u32,
2118) -> Result<FdNumber, Errno> {
2119    if flags & !(EFD_CLOEXEC | EFD_NONBLOCK | EFD_SEMAPHORE) != 0 {
2120        return error!(EINVAL);
2121    }
2122    let blocking = (flags & EFD_NONBLOCK) == 0;
2123    let eventfd_type =
2124        if (flags & EFD_SEMAPHORE) == 0 { EventFdType::Counter } else { EventFdType::Semaphore };
2125    let file = new_eventfd(locked, current_task, value, eventfd_type, blocking);
2126    let fd_flags = if flags & EFD_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() };
2127    let fd = current_task.add_file(locked, file, fd_flags)?;
2128    Ok(fd)
2129}
2130
2131pub fn sys_pidfd_open(
2132    locked: &mut Locked<Unlocked>,
2133    current_task: &CurrentTask,
2134    pid: pid_t,
2135    flags: u32,
2136) -> Result<FdNumber, Errno> {
2137    if flags & !PIDFD_NONBLOCK != 0 {
2138        return error!(EINVAL);
2139    }
2140    if pid <= 0 {
2141        return error!(EINVAL);
2142    }
2143
2144    let file = {
2145        let pid_table = current_task.kernel().pids.read();
2146
2147        let blocking = (flags & PIDFD_NONBLOCK) == 0;
2148        let open_flags = if blocking { OpenFlags::empty() } else { OpenFlags::NONBLOCK };
2149
2150        // Validate that a process (and not just a task) entry exists for the PID.
2151        let task = pid_table.get_task(pid);
2152        let file = match (pid_table.get_process(pid), task.upgrade()) {
2153            (Some(ProcessEntryRef::Process(proc)), Some(task)) => {
2154                new_pidfd(locked, current_task, &proc, &*task.mm()?, open_flags)
2155            }
2156            (Some(ProcessEntryRef::Zombie(_)), _) => {
2157                new_zombie_pidfd(locked, current_task, open_flags)
2158            }
2159            (None, Some(_)) => return error!(EINVAL),
2160            _ => return error!(ESRCH),
2161        };
2162        file
2163    };
2164
2165    current_task.add_file(locked, file, FdFlags::CLOEXEC)
2166}
2167
2168pub fn sys_pidfd_getfd(
2169    locked: &mut Locked<Unlocked>,
2170    current_task: &CurrentTask,
2171    pidfd: FdNumber,
2172    targetfd: FdNumber,
2173    flags: u32,
2174) -> Result<FdNumber, Errno> {
2175    if flags != 0 {
2176        return error!(EINVAL);
2177    }
2178
2179    let file = current_task.files.get(pidfd)?;
2180    let tg = file.as_thread_group_key()?;
2181    let tg = tg.upgrade().ok_or_else(|| errno!(ESRCH))?;
2182    let task = TempRef::into_static(tg.read().tasks().next().ok_or_else(|| errno!(ESRCH))?);
2183
2184    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &task)?;
2185
2186    let target_file = task.files.get(targetfd)?;
2187    current_task.add_file(locked, target_file, FdFlags::CLOEXEC)
2188}
2189
2190pub fn sys_timerfd_create(
2191    locked: &mut Locked<Unlocked>,
2192    current_task: &CurrentTask,
2193    clock_id: u32,
2194    flags: u32,
2195) -> Result<FdNumber, Errno> {
2196    let timeline = match clock_id {
2197        CLOCK_MONOTONIC => Timeline::Monotonic,
2198        CLOCK_BOOTTIME | CLOCK_BOOTTIME_ALARM => Timeline::BootInstant,
2199        CLOCK_REALTIME | CLOCK_REALTIME_ALARM => Timeline::RealTime,
2200        _ => return error!(EINVAL),
2201    };
2202    let timer_type = match clock_id {
2203        CLOCK_MONOTONIC | CLOCK_BOOTTIME | CLOCK_REALTIME => TimerWakeup::Regular,
2204        CLOCK_BOOTTIME_ALARM | CLOCK_REALTIME_ALARM => {
2205            security::check_task_capable(current_task, CAP_WAKE_ALARM)?;
2206            TimerWakeup::Alarm
2207        }
2208        _ => return error!(EINVAL),
2209    };
2210    if flags & !(TFD_NONBLOCK | TFD_CLOEXEC) != 0 {
2211        track_stub!(TODO("https://fxbug.dev/322875488"), "timerfd_create unknown flags", flags);
2212        return error!(EINVAL);
2213    }
2214    log_trace!("timerfd_create(clock_id={:?}, flags={:#x})", clock_id, flags);
2215
2216    let mut open_flags = OpenFlags::RDWR;
2217    if flags & TFD_NONBLOCK != 0 {
2218        open_flags |= OpenFlags::NONBLOCK;
2219    }
2220
2221    let mut fd_flags = FdFlags::empty();
2222    if flags & TFD_CLOEXEC != 0 {
2223        fd_flags |= FdFlags::CLOEXEC;
2224    };
2225
2226    let timer = TimerFile::new_file(locked, current_task, timer_type, timeline, open_flags)?;
2227    let fd = current_task.add_file(locked, timer, fd_flags)?;
2228    Ok(fd)
2229}
2230
2231pub fn sys_timerfd_gettime(
2232    _locked: &mut Locked<Unlocked>,
2233    current_task: &CurrentTask,
2234    fd: FdNumber,
2235    user_current_value: ITimerSpecPtr,
2236) -> Result<(), Errno> {
2237    let file = current_task.files.get(fd)?;
2238    let timer_file = file.downcast_file::<TimerFile>().ok_or_else(|| errno!(EINVAL))?;
2239    let timer_info = timer_file.current_timer_spec();
2240    log_trace!("timerfd_gettime(fd={:?}, current_value={:?})", fd, timer_info);
2241    current_task.write_multi_arch_object(user_current_value, timer_info)?;
2242    Ok(())
2243}
2244
2245pub fn sys_timerfd_settime(
2246    _locked: &mut Locked<Unlocked>,
2247    current_task: &CurrentTask,
2248    fd: FdNumber,
2249    flags: u32,
2250    user_new_value: ITimerSpecPtr,
2251    user_old_value: ITimerSpecPtr,
2252) -> Result<(), Errno> {
2253    if flags & !(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET) != 0 {
2254        track_stub!(TODO("https://fxbug.dev/322874722"), "timerfd_settime unknown flags", flags);
2255        return error!(EINVAL);
2256    }
2257
2258    let file = current_task.files.get(fd)?;
2259    let timer_file = file.downcast_file::<TimerFile>().ok_or_else(|| errno!(EINVAL))?;
2260
2261    let new_timer_spec = current_task.read_multi_arch_object(user_new_value)?;
2262    let old_timer_spec = timer_file.set_timer_spec(current_task, &file, new_timer_spec, flags)?;
2263    log_trace!(
2264        "timerfd_settime(fd={:?}, flags={:#x}, new_value={:?}, current_value={:?})",
2265        fd,
2266        flags,
2267        new_timer_spec,
2268        old_timer_spec
2269    );
2270    if !user_old_value.is_null() {
2271        current_task.write_multi_arch_object(user_old_value, old_timer_spec)?;
2272    }
2273    Ok(())
2274}
2275
2276fn deadline_after_timespec(
2277    current_task: &CurrentTask,
2278    user_timespec: TimeSpecPtr,
2279) -> Result<zx::MonotonicInstant, Errno> {
2280    if user_timespec.is_null() {
2281        Ok(zx::MonotonicInstant::INFINITE)
2282    } else {
2283        let timespec = current_task.read_multi_arch_object(user_timespec)?;
2284        Ok(zx::MonotonicInstant::after(duration_from_timespec(timespec)?))
2285    }
2286}
2287
2288static_assertions::assert_eq_size!(uapi::__kernel_fd_set, uapi::arch32::__kernel_fd_set);
2289
2290fn select(
2291    locked: &mut Locked<Unlocked>,
2292    current_task: &mut CurrentTask,
2293    nfds: u32,
2294    readfds_addr: UserRef<__kernel_fd_set>,
2295    writefds_addr: UserRef<__kernel_fd_set>,
2296    exceptfds_addr: UserRef<__kernel_fd_set>,
2297    deadline: zx::MonotonicInstant,
2298    sigmask_addr: UserRef<pselect6_sigmask>,
2299) -> Result<i32, Errno> {
2300    const BITS_PER_BYTE: usize = 8;
2301
2302    fn sizeof<T>(_: &T) -> usize {
2303        BITS_PER_BYTE * std::mem::size_of::<T>()
2304    }
2305    fn is_fd_set(set: &__kernel_fd_set, fd: usize) -> bool {
2306        let index = fd / sizeof(&set.fds_bits[0]);
2307        let remainder = fd % sizeof(&set.fds_bits[0]);
2308        set.fds_bits[index] & (1 << remainder) > 0
2309    }
2310    fn add_fd_to_set(set: &mut __kernel_fd_set, fd: usize) {
2311        let index = fd / sizeof(&set.fds_bits[0]);
2312        let remainder = fd % sizeof(&set.fds_bits[0]);
2313
2314        set.fds_bits[index] |= 1 << remainder;
2315    }
2316    let read_fd_set = |addr: UserRef<__kernel_fd_set>| {
2317        if addr.is_null() { Ok(Default::default()) } else { current_task.read_object(addr) }
2318    };
2319
2320    if nfds as usize > BITS_PER_BYTE * std::mem::size_of::<__kernel_fd_set>() {
2321        return error!(EINVAL);
2322    }
2323
2324    let read_events =
2325        FdEvents::from_bits_truncate(POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR);
2326    let write_events = FdEvents::from_bits_truncate(POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR);
2327    let except_events = FdEvents::from_bits_truncate(POLLPRI);
2328
2329    let readfds = read_fd_set(readfds_addr)?;
2330    let writefds = read_fd_set(writefds_addr)?;
2331    let exceptfds = read_fd_set(exceptfds_addr)?;
2332
2333    let sets = &[(read_events, &readfds), (write_events, &writefds), (except_events, &exceptfds)];
2334    let waiter = FileWaiter::<FdNumber>::default();
2335
2336    for fd in 0..nfds {
2337        let mut aggregated_events = FdEvents::empty();
2338        for (events, fds) in sets.iter() {
2339            if is_fd_set(fds, fd as usize) {
2340                aggregated_events |= *events;
2341            }
2342        }
2343        if !aggregated_events.is_empty() {
2344            let fd = FdNumber::from_raw(fd as i32);
2345            let file = current_task.files.get(fd)?;
2346            waiter.add(locked, current_task, fd, Some(&file), aggregated_events)?;
2347        }
2348    }
2349
2350    let mask = if !sigmask_addr.is_null() {
2351        let sigmask = current_task.read_object(sigmask_addr)?;
2352        let mask = if sigmask.ss.is_null() {
2353            current_task.read().signal_mask()
2354        } else {
2355            if sigmask.ss_len < std::mem::size_of::<sigset_t>() {
2356                return error!(EINVAL);
2357            }
2358            current_task.read_object(sigmask.ss.into())?
2359        };
2360        Some(mask)
2361    } else {
2362        None
2363    };
2364
2365    waiter.wait(locked, current_task, mask, deadline)?;
2366
2367    let mut num_fds = 0;
2368    let mut readfds_out: __kernel_fd_set = Default::default();
2369    let mut writefds_out: __kernel_fd_set = Default::default();
2370    let mut exceptfds_out: __kernel_fd_set = Default::default();
2371    let mut sets = [
2372        (read_events, &readfds, &mut readfds_out),
2373        (write_events, &writefds, &mut writefds_out),
2374        (except_events, &exceptfds, &mut exceptfds_out),
2375    ];
2376    let mut ready_items = waiter.ready_items.lock();
2377    for ReadyItem { key: ready_key, events: ready_events } in ready_items.drain(..) {
2378        let ready_key = assert_matches::assert_matches!(
2379            ready_key,
2380            ReadyItemKey::FdNumber(v) => v
2381        );
2382
2383        sets.iter_mut().for_each(|(events, fds, fds_out)| {
2384            let fd = ready_key.raw() as usize;
2385            if events.intersects(ready_events) && is_fd_set(fds, fd) {
2386                add_fd_to_set(fds_out, fd);
2387                num_fds += 1;
2388            }
2389        });
2390    }
2391
2392    let write_fd_set =
2393        |addr: UserRef<__kernel_fd_set>, value: __kernel_fd_set| -> Result<(), Errno> {
2394            if !addr.is_null() {
2395                current_task.write_object(addr, &value)?;
2396            }
2397            Ok(())
2398        };
2399    write_fd_set(readfds_addr, readfds_out)?;
2400    write_fd_set(writefds_addr, writefds_out)?;
2401    write_fd_set(exceptfds_addr, exceptfds_out)?;
2402    Ok(num_fds)
2403}
2404
2405pub fn sys_pselect6(
2406    locked: &mut Locked<Unlocked>,
2407    current_task: &mut CurrentTask,
2408    nfds: u32,
2409    readfds_addr: UserRef<__kernel_fd_set>,
2410    writefds_addr: UserRef<__kernel_fd_set>,
2411    exceptfds_addr: UserRef<__kernel_fd_set>,
2412    timeout_addr: TimeSpecPtr,
2413    sigmask_addr: UserRef<pselect6_sigmask>,
2414) -> Result<i32, Errno> {
2415    let deadline = deadline_after_timespec(current_task, timeout_addr)?;
2416
2417    let num_fds = select(
2418        locked,
2419        current_task,
2420        nfds,
2421        readfds_addr,
2422        writefds_addr,
2423        exceptfds_addr,
2424        deadline,
2425        sigmask_addr,
2426    )?;
2427
2428    if !timeout_addr.is_null()
2429        && !current_task
2430            .thread_group()
2431            .read()
2432            .personality
2433            .contains(PersonalityFlags::STICKY_TIMEOUTS)
2434    {
2435        let now = zx::MonotonicInstant::get();
2436        let remaining = std::cmp::max(deadline - now, zx::MonotonicDuration::from_seconds(0));
2437        current_task.write_multi_arch_object(timeout_addr, timespec_from_duration(remaining))?;
2438    }
2439
2440    Ok(num_fds)
2441}
2442
2443pub fn sys_select(
2444    locked: &mut Locked<Unlocked>,
2445    current_task: &mut CurrentTask,
2446    nfds: u32,
2447    readfds_addr: UserRef<__kernel_fd_set>,
2448    writefds_addr: UserRef<__kernel_fd_set>,
2449    exceptfds_addr: UserRef<__kernel_fd_set>,
2450    timeout_addr: TimeValPtr,
2451) -> Result<i32, Errno> {
2452    let start_time = zx::MonotonicInstant::get();
2453
2454    let deadline = if timeout_addr.is_null() {
2455        zx::MonotonicInstant::INFINITE
2456    } else {
2457        let timeval = current_task.read_multi_arch_object(timeout_addr)?;
2458        start_time + starnix_types::time::duration_from_timeval(timeval)?
2459    };
2460
2461    let num_fds = select(
2462        locked,
2463        current_task,
2464        nfds,
2465        readfds_addr,
2466        writefds_addr,
2467        exceptfds_addr,
2468        deadline,
2469        UserRef::<pselect6_sigmask>::default(),
2470    )?;
2471
2472    if !timeout_addr.is_null()
2473        && !current_task
2474            .thread_group()
2475            .read()
2476            .personality
2477            .contains(PersonalityFlags::STICKY_TIMEOUTS)
2478    {
2479        let now = zx::MonotonicInstant::get();
2480        let remaining = std::cmp::max(deadline - now, zx::MonotonicDuration::from_seconds(0));
2481        current_task.write_multi_arch_object(
2482            timeout_addr,
2483            starnix_types::time::timeval_from_duration(remaining),
2484        )?;
2485    }
2486
2487    Ok(num_fds)
2488}
2489
2490pub fn sys_epoll_create1(
2491    locked: &mut Locked<Unlocked>,
2492    current_task: &CurrentTask,
2493    flags: u32,
2494) -> Result<FdNumber, Errno> {
2495    if flags & !EPOLL_CLOEXEC != 0 {
2496        return error!(EINVAL);
2497    }
2498    let ep_file = EpollFileObject::new_file(locked, current_task);
2499    let fd_flags = if flags & EPOLL_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() };
2500    let fd = current_task.add_file(locked, ep_file, fd_flags)?;
2501    Ok(fd)
2502}
2503
2504pub fn sys_epoll_ctl(
2505    locked: &mut Locked<Unlocked>,
2506    current_task: &CurrentTask,
2507    epfd: FdNumber,
2508    op: u32,
2509    fd: FdNumber,
2510    event: UserRef<EpollEvent>,
2511) -> Result<(), Errno> {
2512    let file = current_task.files.get(epfd)?;
2513    let epoll_file = file.downcast_file::<EpollFileObject>().ok_or_else(|| errno!(EINVAL))?;
2514    let operand_file = current_task.files.get(fd)?;
2515
2516    if Arc::ptr_eq(&file, &operand_file) {
2517        return error!(EINVAL);
2518    }
2519
2520    let epoll_event = match current_task.read_object(event) {
2521        Ok(mut epoll_event) => {
2522            // If EPOLLWAKEUP is specified in flags, but the caller does not have the CAP_BLOCK_SUSPEND
2523            // capability, then the EPOLLWAKEUP flag is silently ignored.
2524            // See https://man7.org/linux/man-pages/man2/epoll_ctl.2.html
2525            if epoll_event.events().contains(FdEvents::EPOLLWAKEUP) {
2526                if !security::is_task_capable_noaudit(current_task, CAP_BLOCK_SUSPEND) {
2527                    epoll_event.ignore(FdEvents::EPOLLWAKEUP);
2528                }
2529            }
2530            Ok(epoll_event)
2531        }
2532        result => result,
2533    };
2534
2535    match op {
2536        EPOLL_CTL_ADD => {
2537            epoll_file.add(locked, current_task, &operand_file, &file, epoll_event?)?;
2538            operand_file.register_epfd(&file);
2539        }
2540        EPOLL_CTL_MOD => {
2541            epoll_file.modify(locked, current_task, &operand_file, epoll_event?)?;
2542        }
2543        EPOLL_CTL_DEL => {
2544            epoll_file.delete(&operand_file)?;
2545            current_task.kernel().suspend_resume_manager.deactivate_wakeup_source(
2546                &WakeupSourceOrigin::Epoll(wakeup_source_name_for_epoll(
2547                    current_task,
2548                    operand_file.id.as_epoll_key(),
2549                )),
2550            );
2551            operand_file.unregister_epfd(&file);
2552        }
2553        _ => return error!(EINVAL),
2554    }
2555    Ok(())
2556}
2557
2558// Backend for sys_epoll_pwait and sys_epoll_pwait2 that takes an already-decoded deadline.
2559fn do_epoll_pwait(
2560    locked: &mut Locked<Unlocked>,
2561    current_task: &mut CurrentTask,
2562    epfd: FdNumber,
2563    events: UserRef<EpollEvent>,
2564    unvalidated_max_events: i32,
2565    deadline: zx::MonotonicInstant,
2566    user_sigmask: UserRef<SigSet>,
2567) -> Result<usize, Errno> {
2568    let file = current_task.files.get(epfd)?;
2569    let epoll_file = file.downcast_file::<EpollFileObject>().ok_or_else(|| errno!(EINVAL))?;
2570
2571    // Max_events must be greater than 0.
2572    let max_events: usize = unvalidated_max_events.try_into().map_err(|_| errno!(EINVAL))?;
2573    if max_events == 0 {
2574        return error!(EINVAL);
2575    }
2576
2577    // Return early if the user passes an obviously invalid pointer. This avoids dropping events
2578    // for common pointer errors. When we catch bad pointers after the wait is complete when the
2579    // memory is actually written, the events will be lost. This check is not a guarantee.
2580    current_task
2581        .mm()?
2582        .check_plausible(events.addr(), max_events * std::mem::size_of::<EpollEvent>())?;
2583
2584    let active_events = if !user_sigmask.is_null() {
2585        let signal_mask = current_task.read_object(user_sigmask)?;
2586        current_task.wait_with_temporary_mask(locked, signal_mask, |locked, current_task| {
2587            epoll_file.wait(locked, current_task, max_events, deadline)
2588        })?
2589    } else {
2590        epoll_file.wait(locked, current_task, max_events, deadline)?
2591    };
2592
2593    current_task.write_objects(events, &active_events)?;
2594    Ok(active_events.len())
2595}
2596
2597pub fn sys_epoll_pwait(
2598    locked: &mut Locked<Unlocked>,
2599    current_task: &mut CurrentTask,
2600    epfd: FdNumber,
2601    events: UserRef<EpollEvent>,
2602    max_events: i32,
2603    timeout: i32,
2604    user_sigmask: UserRef<SigSet>,
2605) -> Result<usize, Errno> {
2606    let deadline = zx::MonotonicInstant::after(duration_from_poll_timeout(timeout)?);
2607    do_epoll_pwait(locked, current_task, epfd, events, max_events, deadline, user_sigmask)
2608}
2609
2610pub fn sys_epoll_pwait2(
2611    locked: &mut Locked<Unlocked>,
2612    current_task: &mut CurrentTask,
2613    epfd: FdNumber,
2614    events: UserRef<EpollEvent>,
2615    max_events: i32,
2616    user_timespec: TimeSpecPtr,
2617    user_sigmask: UserRef<SigSet>,
2618) -> Result<usize, Errno> {
2619    let deadline = deadline_after_timespec(current_task, user_timespec)?;
2620    do_epoll_pwait(locked, current_task, epfd, events, max_events, deadline, user_sigmask)
2621}
2622
2623struct FileWaiter<Key: Into<ReadyItemKey>> {
2624    waiter: Waiter,
2625    ready_items: Arc<Mutex<VecDeque<ReadyItem>>>,
2626    _marker: PhantomData<Key>,
2627}
2628
2629impl<Key: Into<ReadyItemKey>> Default for FileWaiter<Key> {
2630    fn default() -> Self {
2631        Self { waiter: Waiter::new(), ready_items: Default::default(), _marker: PhantomData }
2632    }
2633}
2634
2635impl<Key: Into<ReadyItemKey>> FileWaiter<Key> {
2636    fn add<L>(
2637        &self,
2638        locked: &mut Locked<L>,
2639        current_task: &CurrentTask,
2640        key: Key,
2641        file: Option<&FileHandle>,
2642        requested_events: FdEvents,
2643    ) -> Result<(), Errno>
2644    where
2645        L: LockEqualOrBefore<FileOpsCore>,
2646    {
2647        let key = key.into();
2648
2649        if let Some(file) = file {
2650            let sought_events = requested_events | FdEvents::POLLERR | FdEvents::POLLHUP;
2651
2652            let handler =
2653                EventHandler::Enqueue { key, queue: self.ready_items.clone(), sought_events };
2654            file.wait_async(locked, current_task, &self.waiter, sought_events, handler);
2655            let current_events = file.query_events(locked, current_task)? & sought_events;
2656            if !current_events.is_empty() {
2657                self.ready_items.lock().push_back(ReadyItem { key, events: current_events });
2658            }
2659        } else {
2660            self.ready_items.lock().push_back(ReadyItem { key, events: FdEvents::POLLNVAL });
2661        }
2662        Ok(())
2663    }
2664
2665    fn wait<L>(
2666        &self,
2667        locked: &mut Locked<L>,
2668        current_task: &mut CurrentTask,
2669        signal_mask: Option<SigSet>,
2670        deadline: zx::MonotonicInstant,
2671    ) -> Result<(), Errno>
2672    where
2673        L: LockEqualOrBefore<FileOpsCore>,
2674    {
2675        if self.ready_items.lock().is_empty() {
2676            // When wait_until() returns Ok() it means there was a wake up; however there may not
2677            // be a ready item, for example if waiting on a sync file with multiple sync points.
2678            // Keep waiting until there's at least one ready item.
2679            let signal_mask = signal_mask.unwrap_or_else(|| current_task.read().signal_mask());
2680            let mut result = current_task.wait_with_temporary_mask(
2681                locked,
2682                signal_mask,
2683                |locked, current_task| self.waiter.wait_until(locked, current_task, deadline),
2684            );
2685            loop {
2686                match result {
2687                    Err(err) if err == ETIMEDOUT => return Ok(()),
2688                    Ok(()) => {
2689                        if !self.ready_items.lock().is_empty() {
2690                            break;
2691                        }
2692                    }
2693                    result => result?,
2694                };
2695                result = self.waiter.wait_until(locked, current_task, deadline);
2696            }
2697        }
2698        Ok(())
2699    }
2700}
2701
2702pub fn poll(
2703    locked: &mut Locked<Unlocked>,
2704    current_task: &mut CurrentTask,
2705    user_pollfds: UserRef<pollfd>,
2706    num_fds: i32,
2707    mask: Option<SigSet>,
2708    deadline: zx::MonotonicInstant,
2709) -> Result<usize, Errno> {
2710    if num_fds < 0
2711        || num_fds as u64 > current_task.thread_group().get_rlimit(locked, Resource::NOFILE)
2712    {
2713        return error!(EINVAL);
2714    }
2715
2716    let mut pollfds = vec![pollfd::default(); num_fds as usize];
2717    let waiter = FileWaiter::<usize>::default();
2718
2719    for (index, poll_descriptor) in pollfds.iter_mut().enumerate() {
2720        *poll_descriptor = current_task.read_object(user_pollfds.at(index)?)?;
2721        poll_descriptor.revents = 0;
2722        if poll_descriptor.fd < 0 {
2723            continue;
2724        }
2725        let file = current_task.files.get(FdNumber::from_raw(poll_descriptor.fd)).ok();
2726        waiter.add(
2727            locked,
2728            current_task,
2729            index,
2730            file.as_ref(),
2731            FdEvents::from_bits_truncate(poll_descriptor.events as u32),
2732        )?;
2733    }
2734
2735    waiter.wait(locked, current_task, mask, deadline)?;
2736
2737    let mut ready_items = waiter.ready_items.lock();
2738    let mut unique_ready_items =
2739        bit_vec::BitVec::from_elem(usize::try_from(num_fds).unwrap(), false);
2740    for ReadyItem { key: ready_key, events: ready_events } in ready_items.drain(..) {
2741        let ready_key = assert_matches::assert_matches!(
2742            ready_key,
2743            ReadyItemKey::Usize(v) => v
2744        );
2745        let interested_events = FdEvents::from_bits_truncate(pollfds[ready_key].events as u32)
2746            | FdEvents::POLLERR
2747            | FdEvents::POLLHUP
2748            | FdEvents::POLLNVAL;
2749        let return_events = (interested_events & ready_events).bits();
2750        pollfds[ready_key].revents = return_events as i16;
2751        unique_ready_items.set(ready_key, true);
2752    }
2753
2754    for (index, poll_descriptor) in pollfds.iter().enumerate() {
2755        current_task.write_object(user_pollfds.at(index)?, poll_descriptor)?;
2756    }
2757
2758    Ok(unique_ready_items.into_iter().filter(Clone::clone).count())
2759}
2760
2761pub fn sys_ppoll(
2762    locked: &mut Locked<Unlocked>,
2763    current_task: &mut CurrentTask,
2764    user_fds: UserRef<pollfd>,
2765    num_fds: i32,
2766    user_timespec: TimeSpecPtr,
2767    user_mask: UserRef<SigSet>,
2768    sigset_size: usize,
2769) -> Result<usize, Errno> {
2770    let start_time = zx::MonotonicInstant::get();
2771
2772    let timeout = if user_timespec.is_null() {
2773        // Passing -1 to poll is equivalent to an infinite timeout.
2774        -1
2775    } else {
2776        let ts = current_task.read_multi_arch_object(user_timespec)?;
2777        duration_from_timespec::<zx::MonotonicTimeline>(ts)?.into_millis() as i32
2778    };
2779
2780    let deadline = start_time + duration_from_poll_timeout(timeout)?;
2781
2782    let mask = if !user_mask.is_null() {
2783        if sigset_size != std::mem::size_of::<SigSet>() {
2784            return error!(EINVAL);
2785        }
2786        let mask = current_task.read_object(user_mask)?;
2787        Some(mask)
2788    } else {
2789        None
2790    };
2791
2792    let poll_result = poll(locked, current_task, user_fds, num_fds, mask, deadline);
2793
2794    if user_timespec.is_null() {
2795        return poll_result;
2796    }
2797
2798    let now = zx::MonotonicInstant::get();
2799    let remaining = std::cmp::max(deadline - now, zx::MonotonicDuration::from_seconds(0));
2800    let remaining_timespec = timespec_from_duration(remaining);
2801
2802    // From gVisor: "ppoll is normally restartable if interrupted by something other than a signal
2803    // handled by the application (i.e. returns ERESTARTNOHAND). However, if
2804    // [copy out] failed, then the restarted ppoll would use the wrong timeout, so the
2805    // error should be left as EINTR."
2806    match (current_task.write_multi_arch_object(user_timespec, remaining_timespec), poll_result) {
2807        // If write was ok, and poll was ok, return poll result.
2808        (Ok(_), Ok(num_events)) => Ok(num_events),
2809        (Ok(_), Err(e)) if e == EINTR => {
2810            error!(ERESTARTNOHAND)
2811        }
2812        (Ok(_), poll_result) => poll_result,
2813        // If write was a failure, return the poll result unchanged.
2814        (Err(_), poll_result) => poll_result,
2815    }
2816}
2817
2818pub fn sys_flock(
2819    locked: &mut Locked<Unlocked>,
2820    current_task: &CurrentTask,
2821    fd: FdNumber,
2822    operation: u32,
2823) -> Result<(), Errno> {
2824    let file = current_task.files.get(fd)?;
2825    let operation = FlockOperation::from_flags(operation)?;
2826    security::check_file_lock_access(current_task, &file)?;
2827    file.flock(locked, current_task, operation)
2828}
2829
2830pub fn sys_sync(locked: &mut Locked<Unlocked>, current_task: &CurrentTask) -> Result<(), Errno> {
2831    current_task.kernel().mounts.sync_all(locked, current_task)
2832}
2833
2834pub fn sys_syncfs(
2835    locked: &mut Locked<Unlocked>,
2836    current_task: &CurrentTask,
2837    fd: FdNumber,
2838) -> Result<(), Errno> {
2839    let file = current_task.files.get(fd)?;
2840    file.fs.sync(locked, current_task)
2841}
2842
2843pub fn sys_fsync(
2844    _locked: &mut Locked<Unlocked>,
2845    current_task: &CurrentTask,
2846    fd: FdNumber,
2847) -> Result<(), Errno> {
2848    let file = current_task.files.get(fd)?;
2849    file.sync(current_task)
2850}
2851
2852pub fn sys_fdatasync(
2853    _locked: &mut Locked<Unlocked>,
2854    current_task: &CurrentTask,
2855    fd: FdNumber,
2856) -> Result<(), Errno> {
2857    let file = current_task.files.get(fd)?;
2858    file.data_sync(current_task)
2859}
2860
2861pub fn sys_sync_file_range(
2862    _locked: &mut Locked<Unlocked>,
2863    current_task: &CurrentTask,
2864    fd: FdNumber,
2865    offset: off_t,
2866    length: off_t,
2867    flags: u32,
2868) -> Result<(), Errno> {
2869    const KNOWN_FLAGS: u32 = uapi::SYNC_FILE_RANGE_WAIT_BEFORE
2870        | uapi::SYNC_FILE_RANGE_WRITE
2871        | uapi::SYNC_FILE_RANGE_WAIT_AFTER;
2872    if flags & !KNOWN_FLAGS != 0 {
2873        return error!(EINVAL);
2874    }
2875
2876    let file = current_task.files.get(fd)?;
2877
2878    if offset < 0 || length < 0 {
2879        return error!(EINVAL);
2880    }
2881
2882    checked_add_offset_and_length(offset as usize, length as usize)?;
2883
2884    // From <https://linux.die.net/man/2/sync_file_range>:
2885    //
2886    //   fd refers to something other than a regular file, a block device, a directory, or a symbolic link.
2887    let mode = file.node().info().mode;
2888    if !mode.is_reg() && !mode.is_blk() && !mode.is_dir() && !mode.is_lnk() {
2889        return error!(ESPIPE);
2890    }
2891
2892    if flags == 0 {
2893        return Ok(());
2894    }
2895
2896    // Syncing the whole file is much more than we need for sync_file_range, which only needs to
2897    // sync the specified data range.
2898    file.data_sync(current_task)
2899}
2900
2901pub fn sys_fadvise64(
2902    _locked: &mut Locked<Unlocked>,
2903    current_task: &CurrentTask,
2904    fd: FdNumber,
2905    offset: off_t,
2906    len: off_t,
2907    advice: u32,
2908) -> Result<(), Errno> {
2909    match advice {
2910        POSIX_FADV_NORMAL => track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_NORMAL"),
2911        POSIX_FADV_RANDOM => track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_RANDOM"),
2912        POSIX_FADV_SEQUENTIAL => {
2913            track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_SEQUENTIAL")
2914        }
2915        POSIX_FADV_WILLNEED => {
2916            track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_WILLNEED")
2917        }
2918        POSIX_FADV_DONTNEED => {
2919            track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_DONTNEED")
2920        }
2921        POSIX_FADV_NOREUSE => {
2922            track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_NOREUSE")
2923        }
2924        _ => {
2925            track_stub!(TODO("https://fxbug.dev/322875684"), "fadvise64 unknown advice", advice);
2926            return error!(EINVAL);
2927        }
2928    }
2929
2930    if offset < 0 || len < 0 {
2931        return error!(EINVAL);
2932    }
2933
2934    let file = current_task.files.get(fd)?;
2935    // fadvise does not work on pipes.
2936    if file.downcast_file::<PipeFileObject>().is_some() {
2937        return error!(ESPIPE);
2938    }
2939
2940    // fadvise does not work on paths.
2941    if file.flags().contains(OpenFlags::PATH) {
2942        return error!(EBADF);
2943    }
2944
2945    Ok(())
2946}
2947
2948pub fn sys_fallocate(
2949    locked: &mut Locked<Unlocked>,
2950    current_task: &CurrentTask,
2951    fd: FdNumber,
2952    mode: u32,
2953    offset: off_t,
2954    len: off_t,
2955) -> Result<(), Errno> {
2956    let file = current_task.files.get(fd)?;
2957
2958    // Offset must not be less than 0.
2959    // Length must not be less than or equal to 0.
2960    // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2961    if offset < 0 || len <= 0 {
2962        return error!(EINVAL);
2963    }
2964
2965    let mode = FallocMode::from_bits(mode).ok_or_else(|| errno!(EINVAL))?;
2966    file.fallocate(locked, current_task, mode, offset as u64, len as u64)?;
2967
2968    Ok(())
2969}
2970
2971pub fn sys_inotify_init1(
2972    locked: &mut Locked<Unlocked>,
2973    current_task: &CurrentTask,
2974    flags: u32,
2975) -> Result<FdNumber, Errno> {
2976    if flags & !(IN_NONBLOCK | IN_CLOEXEC) != 0 {
2977        return error!(EINVAL);
2978    }
2979    let non_blocking = flags & IN_NONBLOCK != 0;
2980    let close_on_exec = flags & IN_CLOEXEC != 0;
2981    let inotify_file = InotifyFileObject::new_file(locked, current_task, non_blocking);
2982    let fd_flags = if close_on_exec { FdFlags::CLOEXEC } else { FdFlags::empty() };
2983    current_task.add_file(locked, inotify_file, fd_flags)
2984}
2985
2986pub fn sys_inotify_add_watch(
2987    locked: &mut Locked<Unlocked>,
2988    current_task: &CurrentTask,
2989    fd: FdNumber,
2990    user_path: UserCString,
2991    mask: u32,
2992) -> Result<WdNumber, Errno> {
2993    let mask = InotifyMask::from_bits(mask).ok_or_else(|| errno!(EINVAL))?;
2994    if !mask.intersects(InotifyMask::ALL_EVENTS) {
2995        // Mask must include at least 1 event.
2996        return error!(EINVAL);
2997    }
2998    let file = current_task.files.get(fd)?;
2999    let inotify_file = file.downcast_file::<InotifyFileObject>().ok_or_else(|| errno!(EINVAL))?;
3000    let options = if mask.contains(InotifyMask::DONT_FOLLOW) {
3001        LookupFlags::no_follow()
3002    } else {
3003        LookupFlags::default()
3004    };
3005    let watched_node = lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, options)?;
3006    if mask.contains(InotifyMask::ONLYDIR) && !watched_node.entry.node.is_dir() {
3007        return error!(ENOTDIR);
3008    }
3009    inotify_file.add_watch(watched_node.entry, mask, &file)
3010}
3011
3012pub fn sys_inotify_rm_watch(
3013    _locked: &mut Locked<Unlocked>,
3014    current_task: &CurrentTask,
3015    fd: FdNumber,
3016    watch_id: WdNumber,
3017) -> Result<(), Errno> {
3018    let file = current_task.files.get(fd)?;
3019    let inotify_file = file.downcast_file::<InotifyFileObject>().ok_or_else(|| errno!(EINVAL))?;
3020    inotify_file.remove_watch(watch_id, &file)
3021}
3022
3023pub fn sys_utimensat(
3024    locked: &mut Locked<Unlocked>,
3025    current_task: &CurrentTask,
3026    dir_fd: FdNumber,
3027    user_path: UserCString,
3028    user_times: TimeSpecPtr,
3029    flags: u32,
3030) -> Result<(), Errno> {
3031    let (atime, mtime) = if user_times.addr().is_null() {
3032        // If user_times is null, the timestamps are updated to the current time.
3033        (TimeUpdateType::Now, TimeUpdateType::Now)
3034    } else {
3035        let ts = current_task.read_multi_arch_objects_to_vec(user_times, 2)?;
3036        let atime = ts[0];
3037        let mtime = ts[1];
3038        let parse_timespec = |spec: timespec| match spec.tv_nsec {
3039            UTIME_NOW => Ok(TimeUpdateType::Now),
3040            UTIME_OMIT => Ok(TimeUpdateType::Omit),
3041            _ => time_from_timespec(spec).map(TimeUpdateType::Time),
3042        };
3043        (parse_timespec(atime)?, parse_timespec(mtime)?)
3044    };
3045
3046    if let (TimeUpdateType::Omit, TimeUpdateType::Omit) = (atime, mtime) {
3047        return Ok(());
3048    };
3049
3050    // Non-standard feature: if user_path is null, the timestamps are updated on the file referred
3051    // to by dir_fd.
3052    // See https://man7.org/linux/man-pages/man2/utimensat.2.html
3053    let name = if user_path.addr().is_null() {
3054        if dir_fd == FdNumber::AT_FDCWD {
3055            return error!(EFAULT);
3056        }
3057        let (node, _) = current_task.resolve_dir_fd(
3058            locked,
3059            dir_fd,
3060            Default::default(),
3061            ResolveFlags::empty(),
3062        )?;
3063        node
3064    } else {
3065        let lookup_flags = LookupFlags::from_bits(flags, AT_SYMLINK_NOFOLLOW)?;
3066        lookup_at(locked, current_task, dir_fd, user_path, lookup_flags)?
3067    };
3068    name.entry.node.update_atime_mtime(locked, current_task, &name.mount, atime, mtime)?;
3069    let event_mask = match (atime, mtime) {
3070        (_, TimeUpdateType::Omit) => InotifyMask::ACCESS,
3071        (TimeUpdateType::Omit, _) => InotifyMask::MODIFY,
3072        (_, _) => InotifyMask::ATTRIB,
3073    };
3074    name.entry.notify_ignoring_excl_unlink(event_mask);
3075    Ok(())
3076}
3077
3078pub fn sys_splice(
3079    locked: &mut Locked<Unlocked>,
3080    current_task: &CurrentTask,
3081    fd_in: FdNumber,
3082    off_in: OffsetPtr,
3083    fd_out: FdNumber,
3084    off_out: OffsetPtr,
3085    len: usize,
3086    flags: u32,
3087) -> Result<usize, Errno> {
3088    splice::splice(locked, current_task, fd_in, off_in, fd_out, off_out, len, flags)
3089}
3090
3091pub fn sys_vmsplice(
3092    locked: &mut Locked<Unlocked>,
3093    current_task: &CurrentTask,
3094    fd: FdNumber,
3095    iovec_addr: IOVecPtr,
3096    iovec_count: UserValue<i32>,
3097    flags: u32,
3098) -> Result<usize, Errno> {
3099    splice::vmsplice(locked, current_task, fd, iovec_addr, iovec_count, flags)
3100}
3101
3102pub fn sys_copy_file_range(
3103    locked: &mut Locked<Unlocked>,
3104    current_task: &CurrentTask,
3105    fd_in: FdNumber,
3106    off_in: OffsetPtr,
3107    fd_out: FdNumber,
3108    off_out: OffsetPtr,
3109    len: usize,
3110    flags: u32,
3111) -> Result<usize, Errno> {
3112    splice::copy_file_range(locked, current_task, fd_in, off_in, fd_out, off_out, len, flags)
3113}
3114
3115pub fn sys_tee(
3116    locked: &mut Locked<Unlocked>,
3117    current_task: &CurrentTask,
3118    fd_in: FdNumber,
3119    fd_out: FdNumber,
3120    len: usize,
3121    flags: u32,
3122) -> Result<usize, Errno> {
3123    splice::tee(locked, current_task, fd_in, fd_out, len, flags)
3124}
3125
3126pub fn sys_readahead(
3127    _locked: &mut Locked<Unlocked>,
3128    current_task: &CurrentTask,
3129    fd: FdNumber,
3130    offset: off_t,
3131    length: usize,
3132) -> Result<(), Errno> {
3133    let file = current_task.files.get(fd)?;
3134    // Allow only non-negative values of `offset`. Some versions of Linux allow it to be negative,
3135    // but GVisor tests require `readahead()` to fail in this case.
3136    let offset: usize = offset.try_into().map_err(|_| errno!(EINVAL))?;
3137    file.readahead(current_task, offset, length)
3138}
3139
3140pub fn sys_io_setup(
3141    _locked: &mut Locked<Unlocked>,
3142    current_task: &CurrentTask,
3143    user_nr_events: UserValue<u32>,
3144    user_ctx_idp: MultiArchUserRef<uapi::aio_context_t, uapi::arch32::aio_context_t>,
3145) -> Result<(), Errno> {
3146    // From https://man7.org/linux/man-pages/man2/io_setup.2.html:
3147    //
3148    //   EINVAL ctx_idp is not initialized, or the specified nr_events
3149    //   exceeds internal limits.  nr_events should be greater than
3150    //   0.
3151    //
3152    // TODO: Determine what "internal limits" means.
3153    let max_operations =
3154        user_nr_events.validate(0..(i32::MAX as u32)).ok_or_else(|| errno!(EINVAL))? as usize;
3155    if current_task.read_multi_arch_object(user_ctx_idp)? != 0 {
3156        return error!(EINVAL);
3157    }
3158    let ctx_id = AioContext::create(current_task, max_operations)?;
3159    current_task.write_multi_arch_object(user_ctx_idp, ctx_id).map_err(|e| {
3160        let _ = current_task
3161            .mm()
3162            .expect("previous sys_io_setup code verified mm exists")
3163            .destroy_aio_context(ctx_id.into());
3164        e
3165    })?;
3166    Ok(())
3167}
3168
3169pub fn sys_io_submit(
3170    _locked: &mut Locked<Unlocked>,
3171    current_task: &CurrentTask,
3172    ctx_id: aio_context_t,
3173    user_nr: UserValue<i32>,
3174    mut iocb_addrs: IocbPtrPtr,
3175) -> Result<i32, Errno> {
3176    let nr = user_nr.validate(0..i32::MAX).ok_or_else(|| errno!(EINVAL))?;
3177    if nr == 0 {
3178        return Ok(0);
3179    }
3180    let ctx = current_task.mm()?.get_aio_context(ctx_id.into()).ok_or_else(|| errno!(EINVAL))?;
3181
3182    // `iocbpp` is an array of addresses to iocb's.
3183    let mut num_submitted: i32 = 0;
3184    loop {
3185        let iocb_ref = current_task.read_multi_arch_ptr(iocb_addrs)?;
3186        let control_block = current_task.read_multi_arch_object(iocb_ref)?;
3187
3188        match (num_submitted, ctx.submit(current_task, control_block, iocb_ref)) {
3189            (0, Err(e)) => return Err(e),
3190            (_, Err(_)) => break,
3191            (_, Ok(())) => {
3192                num_submitted += 1;
3193                if num_submitted == nr {
3194                    break;
3195                }
3196            }
3197        };
3198
3199        iocb_addrs = iocb_addrs.next()?;
3200    }
3201
3202    Ok(num_submitted)
3203}
3204
3205pub fn sys_io_getevents(
3206    _locked: &mut Locked<Unlocked>,
3207    current_task: &CurrentTask,
3208    ctx_id: aio_context_t,
3209    min_nr: i64,
3210    nr: i64,
3211    events_ref: UserRef<io_event>,
3212    user_timeout: TimeSpecPtr,
3213) -> Result<i32, Errno> {
3214    if min_nr < 0 || min_nr > nr || nr < 0 {
3215        return error!(EINVAL);
3216    }
3217    let min_results = min_nr as usize;
3218    let max_results = nr as usize;
3219    let deadline = deadline_after_timespec(current_task, user_timeout)?;
3220
3221    let ctx = current_task.mm()?.get_aio_context(ctx_id.into()).ok_or_else(|| errno!(EINVAL))?;
3222    let events = ctx.get_events(current_task, min_results, max_results, deadline)?;
3223    current_task.write_objects(events_ref, &events)?;
3224
3225    Ok(events.len() as i32)
3226}
3227
3228pub fn sys_io_cancel(
3229    _locked: &mut Locked<Unlocked>,
3230    current_task: &CurrentTask,
3231    ctx_id: aio_context_t,
3232    user_iocb: IocbPtr,
3233    _result: UserRef<io_event>,
3234) -> Result<(), Errno> {
3235    let iocb = current_task.read_multi_arch_object(user_iocb)?;
3236    let ctx = current_task.mm()?.get_aio_context(ctx_id.into()).ok_or_else(|| errno!(EINVAL))?;
3237
3238    ctx.cancel(current_task, iocb, user_iocb)?;
3239    // TODO: Correctly handle return. If the operation is successfully canceled, the event should be copied into the memory pointed to by result without being placed into the completion queue.
3240    track_stub!(TODO("https://fxbug.dev/297433877"), "io_cancel");
3241    Ok(())
3242}
3243
3244pub fn sys_io_destroy(
3245    _locked: &mut Locked<Unlocked>,
3246    current_task: &CurrentTask,
3247    ctx_id: aio_context_t,
3248) -> Result<(), Errno> {
3249    let aio_context = current_task.mm()?.destroy_aio_context(ctx_id.into())?;
3250    std::mem::drop(aio_context);
3251    Ok(())
3252}
3253
3254pub fn sys_io_uring_setup(
3255    locked: &mut Locked<Unlocked>,
3256    current_task: &CurrentTask,
3257    user_entries: UserValue<u32>,
3258    user_params: UserRef<io_uring_params>,
3259) -> Result<FdNumber, Errno> {
3260    // TODO: https://fxbug.dev/397186254 - we will want to do a no-audit CAP_IPC_LOCK capability
3261    // check; see "If not granted CAP_IPC_LOCK io_uring operations are accounted against the user's
3262    // RLIMIT_MEMLOCK limit" at
3263    // https://github.com/SELinuxProject/selinux-notebook/blob/main/src/auditing.md#capability-audit-exemptions
3264
3265    if !current_task.kernel().features.io_uring {
3266        return error!(ENOSYS);
3267    }
3268
3269    // Apply policy from /proc/sys/kernel/io_uring_disabled
3270    let limits = &current_task.kernel().system_limits;
3271    match limits.io_uring_disabled.load(atomic::Ordering::Relaxed) {
3272        0 => (),
3273        1 => {
3274            let io_uring_group = limits.io_uring_group.load(atomic::Ordering::Relaxed).try_into();
3275            if io_uring_group.is_err()
3276                || !current_task.current_creds().is_in_group(io_uring_group.unwrap())
3277            {
3278                security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
3279            }
3280        }
3281        _ => {
3282            return error!(EPERM);
3283        }
3284    }
3285
3286    let entries = user_entries.validate(1..IORING_MAX_ENTRIES).ok_or_else(|| errno!(EINVAL))?;
3287
3288    let mut params = current_task.read_object(user_params)?;
3289    for byte in params.resv {
3290        if byte != 0 {
3291            return error!(EINVAL);
3292        }
3293    }
3294
3295    let file = IoUringFileObject::new_file(locked, current_task, entries, &mut params)?;
3296
3297    // io_uring file descriptors are always created with CLOEXEC.
3298    let fd = current_task.add_file(locked, file, FdFlags::CLOEXEC)?;
3299    current_task.write_object(user_params, &params)?;
3300    Ok(fd)
3301}
3302
3303pub fn sys_io_uring_enter(
3304    locked: &mut Locked<Unlocked>,
3305    current_task: &CurrentTask,
3306    fd: FdNumber,
3307    to_submit: u32,
3308    min_complete: u32,
3309    flags: u32,
3310    _sig: UserRef<SigSet>,
3311    sigset_size: usize,
3312) -> Result<u32, Errno> {
3313    if !current_task.kernel().features.io_uring {
3314        return error!(ENOSYS);
3315    }
3316    if !_sig.is_null() {
3317        if sigset_size != std::mem::size_of::<SigSet>() {
3318            return error!(EINVAL);
3319        }
3320    }
3321    let file = current_task.files.get(fd)?;
3322    let io_uring = file.downcast_file::<IoUringFileObject>().ok_or_else(|| errno!(EOPNOTSUPP))?;
3323    // TODO(https://fxbug.dev/297431387): Use `_sig` to change the signal mask for `current_task`.
3324    io_uring.enter(locked, current_task, to_submit, min_complete, flags)
3325}
3326
3327pub fn sys_io_uring_register(
3328    locked: &mut Locked<Unlocked>,
3329    current_task: &CurrentTask,
3330    fd: FdNumber,
3331    opcode: u32,
3332    arg: UserAddress,
3333    nr_args: UserValue<u32>,
3334) -> Result<SyscallResult, Errno> {
3335    if !current_task.kernel().features.io_uring {
3336        return error!(ENOSYS);
3337    }
3338    let file = current_task.files.get(fd)?;
3339    let io_uring = file.downcast_file::<IoUringFileObject>().ok_or_else(|| errno!(EOPNOTSUPP))?;
3340    match opcode {
3341        IORING_REGISTER_BUFFERS => {
3342            // TODO(https://fxbug.dev/297431387): Check nr_args for zero and return EINVAL here.
3343            let iovec = IOVecPtr::new(current_task, arg);
3344            let buffers = current_task.read_iovec(iovec, nr_args)?;
3345            io_uring.register_buffers(locked, buffers);
3346            return Ok(SUCCESS);
3347        }
3348        IORING_UNREGISTER_BUFFERS => {
3349            if !arg.is_null() {
3350                return error!(EINVAL);
3351            }
3352            io_uring.unregister_buffers(locked);
3353            return Ok(SUCCESS);
3354        }
3355        IORING_REGISTER_IOWQ_MAX_WORKERS => {
3356            track_stub!(
3357                TODO("https://fxbug.dev/297431387"),
3358                "io_uring_register IORING_REGISTER_IOWQ_MAX_WORKERS",
3359                opcode
3360            );
3361            // The current implementation only ever use 1 worker for read and 1 for write.
3362            return Ok(SUCCESS);
3363        }
3364        IORING_REGISTER_RING_FDS => {
3365            track_stub!(
3366                TODO("https://fxbug.dev/297431387"),
3367                "io_uring_register IORING_REGISTER_RING_FDS",
3368                opcode
3369            );
3370            // The current implementation doesn't use any thread local specific identifier for
3371            // performance. Instead, when registering a fd, just return the passed fd as the value
3372            // to use.
3373            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
3374            if nr_args > 16 {
3375                return error!(EINVAL);
3376            }
3377            let updates_addr = UserRef::<uapi::io_uring_rsrc_update>::from(arg);
3378            let mut updates = current_task
3379                .read_objects_to_smallvec::<uapi::io_uring_rsrc_update, 1>(updates_addr, nr_args)?;
3380            let mut result = 0;
3381            for update in updates.iter_mut() {
3382                if update.offset == u32::MAX {
3383                    update.offset = update.data.try_into().map_err(|_| errno!(EINVAL))?;
3384                    result += 1;
3385                }
3386            }
3387            current_task.write_objects(updates_addr, &updates)?;
3388            return Ok(result.into());
3389        }
3390        IORING_UNREGISTER_RING_FDS => {
3391            track_stub!(
3392                TODO("https://fxbug.dev/297431387"),
3393                "io_uring_register IORING_UNREGISTER_RING_FDS",
3394                opcode
3395            );
3396            // Because registering a fd doesn't use any resource currently, unregistering is free.
3397            return Ok(SUCCESS);
3398        }
3399        IORING_REGISTER_PBUF_RING => {
3400            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
3401            if nr_args != 1 {
3402                return error!(EINVAL);
3403            }
3404            let buffer_definition: uapi::io_uring_buf_reg = current_task.read_object(arg.into())?;
3405            io_uring.register_ring_buffers(locked, buffer_definition)?;
3406            return Ok(SUCCESS);
3407        }
3408
3409        IORING_UNREGISTER_PBUF_RING => {
3410            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
3411            if nr_args != 1 {
3412                return error!(EINVAL);
3413            }
3414            let buffer_definition: uapi::io_uring_buf_reg = current_task.read_object(arg.into())?;
3415            io_uring.unregister_ring_buffers(locked, buffer_definition)?;
3416            return Ok(SUCCESS);
3417        }
3418
3419        IORING_REGISTER_PBUF_STATUS => {
3420            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
3421            if nr_args != 1 {
3422                return error!(EINVAL);
3423            }
3424            let buffer_status_addr = UserRef::<uapi::io_uring_buf_status>::from(arg);
3425            let mut buffer_status: uapi::io_uring_buf_status =
3426                current_task.read_object(buffer_status_addr)?;
3427            io_uring.ring_buffer_status(locked, &mut buffer_status)?;
3428            current_task.write_object(buffer_status_addr, &buffer_status)?;
3429            return Ok(SUCCESS);
3430        }
3431
3432        _ => {
3433            track_stub!(
3434                TODO("https://fxbug.dev/297431387"),
3435                "io_uring_register unknown op",
3436                opcode
3437            );
3438            return error!(EINVAL);
3439        }
3440    }
3441}
3442
3443// Syscalls for arch32 usage
3444#[cfg(target_arch = "aarch64")]
3445mod arch32 {
3446    use crate::mm::MemoryAccessorExt;
3447    use crate::task::CurrentTask;
3448    use crate::vfs::syscalls::{
3449        LookupFlags, OpenFlags, lookup_at, sys_dup3, sys_faccessat, sys_fallocate, sys_lseek,
3450        sys_mkdirat, sys_openat, sys_readlinkat, sys_unlinkat,
3451    };
3452    use crate::vfs::{FdNumber, FsNode};
3453    use linux_uapi::off_t;
3454    use starnix_sync::{Locked, Unlocked};
3455    use starnix_syscalls::SyscallArg;
3456    use starnix_types::time::duration_from_poll_timeout;
3457    use starnix_uapi::errors::Errno;
3458    use starnix_uapi::file_mode::FileMode;
3459    use starnix_uapi::signals::SigSet;
3460    use starnix_uapi::user_address::{MultiArchUserRef, UserAddress, UserCString, UserRef};
3461    use starnix_uapi::vfs::EpollEvent;
3462    use starnix_uapi::{AT_REMOVEDIR, errno, error, uapi};
3463
3464    type StatFs64Ptr = MultiArchUserRef<uapi::statfs, uapi::arch32::statfs64>;
3465
3466    fn merge_low_and_high(low: u32, high: u32) -> off_t {
3467        ((high as off_t) << 32) | (low as off_t)
3468    }
3469
3470    pub fn sys_arch32_open(
3471        locked: &mut Locked<Unlocked>,
3472        current_task: &CurrentTask,
3473        user_path: UserCString,
3474        flags: u32,
3475        mode: FileMode,
3476    ) -> Result<FdNumber, Errno> {
3477        sys_openat(locked, current_task, FdNumber::AT_FDCWD, user_path, flags, mode)
3478    }
3479
3480    pub fn sys_arch32_access(
3481        locked: &mut Locked<Unlocked>,
3482        current_task: &CurrentTask,
3483        user_path: UserCString,
3484        mode: u32,
3485    ) -> Result<(), Errno> {
3486        sys_faccessat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode)
3487    }
3488    pub fn stat64(
3489        locked: &mut Locked<Unlocked>,
3490        current_task: &CurrentTask,
3491        node: &FsNode,
3492        arch32_stat_buf: UserRef<uapi::arch32::stat64>,
3493    ) -> Result<(), Errno> {
3494        let stat_buffer = node.stat(locked, current_task)?;
3495        let result: uapi::arch32::stat64 = stat_buffer.try_into().map_err(|_| errno!(EINVAL))?;
3496        // Now we copy to the arch32 version and write.
3497        current_task.write_object(arch32_stat_buf, &result)?;
3498        Ok(())
3499    }
3500
3501    pub fn sys_arch32_fstat64(
3502        locked: &mut Locked<Unlocked>,
3503        current_task: &CurrentTask,
3504        fd: FdNumber,
3505        arch32_stat_buf: UserRef<uapi::arch32::stat64>,
3506    ) -> Result<(), Errno> {
3507        let file = current_task.files.get_allowing_opath(fd)?;
3508        stat64(locked, current_task, file.node(), arch32_stat_buf)
3509    }
3510
3511    pub fn sys_arch32_fallocate(
3512        locked: &mut Locked<Unlocked>,
3513        current_task: &CurrentTask,
3514        fd: FdNumber,
3515        mode: u32,
3516        offset_low: u32,
3517        offset_high: u32,
3518        len_low: u32,
3519        len_high: u32,
3520    ) -> Result<(), Errno> {
3521        let offset = merge_low_and_high(offset_low, offset_high);
3522        let len = merge_low_and_high(len_low, len_high);
3523        sys_fallocate(locked, current_task, fd, mode, offset, len)
3524    }
3525
3526    pub fn sys_arch32_stat64(
3527        locked: &mut Locked<Unlocked>,
3528        current_task: &CurrentTask,
3529        user_path: UserCString,
3530        arch32_stat_buf: UserRef<uapi::arch32::stat64>,
3531    ) -> Result<(), Errno> {
3532        let name =
3533            lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?;
3534        stat64(locked, current_task, &name.entry.node, arch32_stat_buf)
3535    }
3536
3537    pub fn sys_arch32_readlink(
3538        locked: &mut Locked<Unlocked>,
3539        current_task: &CurrentTask,
3540        user_path: UserCString,
3541        buffer: UserAddress,
3542        buffer_size: usize,
3543    ) -> Result<usize, Errno> {
3544        sys_readlinkat(locked, current_task, FdNumber::AT_FDCWD, user_path, buffer, buffer_size)
3545    }
3546
3547    pub fn sys_arch32_mkdir(
3548        locked: &mut Locked<Unlocked>,
3549        current_task: &CurrentTask,
3550        user_path: UserCString,
3551        mode: FileMode,
3552    ) -> Result<(), Errno> {
3553        sys_mkdirat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode)
3554    }
3555
3556    pub fn sys_arch32_rmdir(
3557        locked: &mut Locked<Unlocked>,
3558        current_task: &CurrentTask,
3559        user_path: UserCString,
3560    ) -> Result<(), Errno> {
3561        sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, user_path, AT_REMOVEDIR)
3562    }
3563
3564    #[allow(non_snake_case)]
3565    pub fn sys_arch32__llseek(
3566        locked: &mut Locked<Unlocked>,
3567        current_task: &CurrentTask,
3568        fd: FdNumber,
3569        offset_high: u32,
3570        offset_low: u32,
3571        result: UserRef<off_t>,
3572        whence: u32,
3573    ) -> Result<(), Errno> {
3574        let offset = merge_low_and_high(offset_low, offset_high);
3575        let result_value = sys_lseek(locked, current_task, fd, offset, whence)?;
3576        current_task.write_object(result, &result_value).map(|_| ())
3577    }
3578
3579    pub fn sys_arch32_dup2(
3580        locked: &mut Locked<Unlocked>,
3581        current_task: &CurrentTask,
3582        oldfd: FdNumber,
3583        newfd: FdNumber,
3584    ) -> Result<FdNumber, Errno> {
3585        if oldfd == newfd {
3586            // O_PATH allowed for:
3587            //
3588            //  Duplicating the file descriptor (dup(2), fcntl(2)
3589            //  F_DUPFD, etc.).
3590            //
3591            // See https://man7.org/linux/man-pages/man2/open.2.html
3592            current_task.files.get_allowing_opath(oldfd)?;
3593            return Ok(newfd);
3594        }
3595        sys_dup3(locked, current_task, oldfd, newfd, 0)
3596    }
3597
3598    pub fn sys_arch32_unlink(
3599        locked: &mut Locked<Unlocked>,
3600        current_task: &CurrentTask,
3601        user_path: UserCString,
3602    ) -> Result<(), Errno> {
3603        sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, user_path, 0)
3604    }
3605
3606    pub fn sys_arch32_pread64(
3607        locked: &mut Locked<Unlocked>,
3608        current_task: &CurrentTask,
3609        fd: FdNumber,
3610        address: UserAddress,
3611        length: usize,
3612        _: SyscallArg,
3613        offset_low: u32,
3614        offset_high: u32,
3615    ) -> Result<usize, Errno> {
3616        super::sys_pread64(
3617            locked,
3618            current_task,
3619            fd,
3620            address,
3621            length,
3622            merge_low_and_high(offset_low, offset_high),
3623        )
3624    }
3625
3626    pub fn sys_arch32_pwrite64(
3627        locked: &mut Locked<Unlocked>,
3628        current_task: &CurrentTask,
3629        fd: FdNumber,
3630        address: UserAddress,
3631        length: usize,
3632        _: SyscallArg,
3633        offset_low: u32,
3634        offset_high: u32,
3635    ) -> Result<usize, Errno> {
3636        super::sys_pwrite64(
3637            locked,
3638            current_task,
3639            fd,
3640            address,
3641            length,
3642            merge_low_and_high(offset_low, offset_high),
3643        )
3644    }
3645
3646    pub fn sys_arch32_truncate64(
3647        locked: &mut Locked<Unlocked>,
3648        current_task: &CurrentTask,
3649        user_path: UserCString,
3650        _unused: SyscallArg,
3651        length_low: u32,
3652        length_high: u32,
3653    ) -> Result<(), Errno> {
3654        super::sys_truncate(
3655            locked,
3656            current_task,
3657            user_path,
3658            merge_low_and_high(length_low, length_high),
3659        )
3660    }
3661
3662    pub fn sys_arch32_ftruncate64(
3663        locked: &mut Locked<Unlocked>,
3664        current_task: &CurrentTask,
3665        fd: FdNumber,
3666        _: SyscallArg,
3667        length_low: u32,
3668        length_high: u32,
3669    ) -> Result<(), Errno> {
3670        super::sys_ftruncate(locked, current_task, fd, merge_low_and_high(length_low, length_high))
3671    }
3672
3673    pub fn sys_arch32_chmod(
3674        locked: &mut Locked<Unlocked>,
3675        current_task: &CurrentTask,
3676        user_path: UserCString,
3677        mode: FileMode,
3678    ) -> Result<(), Errno> {
3679        super::sys_fchmodat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode)
3680    }
3681
3682    pub fn sys_arch32_chown32(
3683        locked: &mut Locked<Unlocked>,
3684        current_task: &CurrentTask,
3685        user_path: UserCString,
3686        owner: uapi::arch32::__kernel_uid32_t,
3687        group: uapi::arch32::__kernel_uid32_t,
3688    ) -> Result<(), Errno> {
3689        super::sys_fchownat(locked, current_task, FdNumber::AT_FDCWD, user_path, owner, group, 0)
3690    }
3691
3692    pub fn sys_arch32_poll(
3693        locked: &mut Locked<Unlocked>,
3694        current_task: &mut CurrentTask,
3695        user_fds: UserRef<uapi::pollfd>,
3696        num_fds: i32,
3697        timeout: i32,
3698    ) -> Result<usize, Errno> {
3699        let deadline = zx::MonotonicInstant::after(duration_from_poll_timeout(timeout)?);
3700        super::poll(locked, current_task, user_fds, num_fds, None, deadline)
3701    }
3702
3703    pub fn sys_arch32_epoll_create(
3704        locked: &mut Locked<Unlocked>,
3705        current_task: &CurrentTask,
3706        size: i32,
3707    ) -> Result<FdNumber, Errno> {
3708        if size < 1 {
3709            // The man page for epoll_create says the size was used in a previous implementation as
3710            // a hint but no longer does anything. But it's still required to be >= 1 to ensure
3711            // programs are backwards-compatible.
3712            return error!(EINVAL);
3713        }
3714        super::sys_epoll_create1(locked, current_task, 0)
3715    }
3716
3717    pub fn sys_arch32_epoll_wait(
3718        locked: &mut Locked<Unlocked>,
3719        current_task: &mut CurrentTask,
3720        epfd: FdNumber,
3721        events: UserRef<EpollEvent>,
3722        max_events: i32,
3723        timeout: i32,
3724    ) -> Result<usize, Errno> {
3725        super::sys_epoll_pwait(
3726            locked,
3727            current_task,
3728            epfd,
3729            events,
3730            max_events,
3731            timeout,
3732            UserRef::<SigSet>::default(),
3733        )
3734    }
3735
3736    pub fn sys_arch32_rename(
3737        locked: &mut Locked<Unlocked>,
3738        current_task: &CurrentTask,
3739        old_user_path: UserCString,
3740        new_user_path: UserCString,
3741    ) -> Result<(), Errno> {
3742        super::sys_renameat2(
3743            locked,
3744            current_task,
3745            FdNumber::AT_FDCWD,
3746            old_user_path,
3747            FdNumber::AT_FDCWD,
3748            new_user_path,
3749            0,
3750        )
3751    }
3752
3753    pub fn sys_arch32_creat(
3754        locked: &mut Locked<Unlocked>,
3755        current_task: &CurrentTask,
3756        user_path: UserCString,
3757        mode: FileMode,
3758    ) -> Result<FdNumber, Errno> {
3759        super::sys_openat(
3760            locked,
3761            current_task,
3762            FdNumber::AT_FDCWD,
3763            user_path,
3764            (OpenFlags::WRONLY | OpenFlags::CREAT | OpenFlags::TRUNC).bits(),
3765            mode,
3766        )
3767    }
3768
3769    pub fn sys_arch32_symlink(
3770        locked: &mut Locked<Unlocked>,
3771        current_task: &CurrentTask,
3772        user_target: UserCString,
3773        user_path: UserCString,
3774    ) -> Result<(), Errno> {
3775        super::sys_symlinkat(locked, current_task, user_target, FdNumber::AT_FDCWD, user_path)
3776    }
3777
3778    pub fn sys_arch32_eventfd(
3779        locked: &mut Locked<Unlocked>,
3780        current_task: &CurrentTask,
3781        value: u32,
3782    ) -> Result<FdNumber, Errno> {
3783        super::sys_eventfd2(locked, current_task, value, 0)
3784    }
3785
3786    pub fn sys_arch32_inotify_init(
3787        locked: &mut Locked<Unlocked>,
3788        current_task: &CurrentTask,
3789    ) -> Result<FdNumber, Errno> {
3790        super::sys_inotify_init1(locked, current_task, 0)
3791    }
3792
3793    pub fn sys_arch32_link(
3794        locked: &mut Locked<Unlocked>,
3795        current_task: &CurrentTask,
3796        old_user_path: UserCString,
3797        new_user_path: UserCString,
3798    ) -> Result<(), Errno> {
3799        super::sys_linkat(
3800            locked,
3801            current_task,
3802            FdNumber::AT_FDCWD,
3803            old_user_path,
3804            FdNumber::AT_FDCWD,
3805            new_user_path,
3806            0,
3807        )
3808    }
3809
3810    pub fn sys_arch32_fstatfs64(
3811        locked: &mut Locked<Unlocked>,
3812        current_task: &CurrentTask,
3813        fd: FdNumber,
3814        user_buf_len: u32,
3815        user_buf: StatFs64Ptr,
3816    ) -> Result<(), Errno> {
3817        if (user_buf_len as usize) < std::mem::size_of::<uapi::arch32::statfs64>() {
3818            return error!(EINVAL);
3819        }
3820        super::fstatfs(locked, current_task, fd, user_buf)
3821    }
3822
3823    pub fn sys_arch32_statfs64(
3824        locked: &mut Locked<Unlocked>,
3825        current_task: &CurrentTask,
3826        user_path: UserCString,
3827        user_buf_len: u32,
3828        user_buf: StatFs64Ptr,
3829    ) -> Result<(), Errno> {
3830        if (user_buf_len as usize) < std::mem::size_of::<uapi::arch32::statfs64>() {
3831            return error!(EINVAL);
3832        }
3833        super::statfs(locked, current_task, user_path, user_buf)
3834    }
3835
3836    pub fn sys_arch32_arm_fadvise64_64(
3837        locked: &mut Locked<Unlocked>,
3838        current_task: &CurrentTask,
3839        fd: FdNumber,
3840        advice: u32,
3841        offset_low: u32,
3842        offset_high: u32,
3843        len_low: u32,
3844        len_high: u32,
3845    ) -> Result<(), Errno> {
3846        let offset = merge_low_and_high(offset_low, offset_high);
3847        let len = merge_low_and_high(len_low, len_high);
3848        super::sys_fadvise64(locked, current_task, fd, offset, len, advice)
3849    }
3850
3851    pub fn sys_arch32_sendfile64(
3852        locked: &mut Locked<Unlocked>,
3853        current_task: &CurrentTask,
3854        out_fd: FdNumber,
3855        in_fd: FdNumber,
3856        user_offset: UserRef<uapi::off_t>,
3857        count: i32,
3858    ) -> Result<usize, Errno> {
3859        super::sys_sendfile(locked, current_task, out_fd, in_fd, user_offset.into(), count)
3860    }
3861
3862    pub use super::{
3863        sys_chdir as sys_arch32_chdir, sys_chroot as sys_arch32_chroot,
3864        sys_copy_file_range as sys_arch32_copy_file_range, sys_dup3 as sys_arch32_dup3,
3865        sys_epoll_create1 as sys_arch32_epoll_create1, sys_epoll_ctl as sys_arch32_epoll_ctl,
3866        sys_epoll_pwait as sys_arch32_epoll_pwait, sys_epoll_pwait2 as sys_arch32_epoll_pwait2,
3867        sys_eventfd2 as sys_arch32_eventfd2, sys_fchmod as sys_arch32_fchmod,
3868        sys_fchmodat as sys_arch32_fchmodat, sys_fchown as sys_arch32_fchown32,
3869        sys_fchown as sys_arch32_fchown, sys_fchownat as sys_arch32_fchownat,
3870        sys_fdatasync as sys_arch32_fdatasync, sys_flock as sys_arch32_flock,
3871        sys_fsetxattr as sys_arch32_fsetxattr, sys_fstatat64 as sys_arch32_fstatat64,
3872        sys_fstatfs as sys_arch32_fstatfs, sys_fsync as sys_arch32_fsync,
3873        sys_ftruncate as sys_arch32_ftruncate,
3874        sys_inotify_add_watch as sys_arch32_inotify_add_watch,
3875        sys_inotify_init1 as sys_arch32_inotify_init1,
3876        sys_inotify_rm_watch as sys_arch32_inotify_rm_watch, sys_io_cancel as sys_arch32_io_cancel,
3877        sys_io_destroy as sys_arch32_io_destroy, sys_io_getevents as sys_arch32_io_getevents,
3878        sys_io_setup as sys_arch32_io_setup, sys_io_submit as sys_arch32_io_submit,
3879        sys_io_uring_enter as sys_arch32_io_uring_enter,
3880        sys_io_uring_register as sys_arch32_io_uring_register,
3881        sys_io_uring_setup as sys_arch32_io_uring_setup, sys_lgetxattr as sys_arch32_lgetxattr,
3882        sys_linkat as sys_arch32_linkat, sys_listxattr as sys_arch32_listxattr,
3883        sys_llistxattr as sys_arch32_llistxattr, sys_lsetxattr as sys_arch32_lsetxattr,
3884        sys_mkdirat as sys_arch32_mkdirat, sys_mknodat as sys_arch32_mknodat,
3885        sys_pidfd_getfd as sys_arch32_pidfd_getfd, sys_pidfd_open as sys_arch32_pidfd_open,
3886        sys_ppoll as sys_arch32_ppoll, sys_preadv as sys_arch32_preadv,
3887        sys_pselect6 as sys_arch32_pselect6, sys_readv as sys_arch32_readv,
3888        sys_removexattr as sys_arch32_removexattr, sys_renameat2 as sys_arch32_renameat2,
3889        sys_select as sys_arch32__newselect, sys_sendfile as sys_arch32_sendfile,
3890        sys_setxattr as sys_arch32_setxattr, sys_splice as sys_arch32_splice,
3891        sys_statfs as sys_arch32_statfs, sys_statx as sys_arch32_statx,
3892        sys_symlinkat as sys_arch32_symlinkat, sys_sync as sys_arch32_sync,
3893        sys_syncfs as sys_arch32_syncfs, sys_tee as sys_arch32_tee,
3894        sys_timerfd_create as sys_arch32_timerfd_create,
3895        sys_timerfd_gettime as sys_arch32_timerfd_gettime,
3896        sys_timerfd_settime as sys_arch32_timerfd_settime, sys_truncate as sys_arch32_truncate,
3897        sys_umask as sys_arch32_umask, sys_utimensat as sys_arch32_utimensat,
3898        sys_vmsplice as sys_arch32_vmsplice,
3899    };
3900}
3901
3902#[cfg(target_arch = "aarch64")]
3903pub use arch32::*;
3904
3905#[cfg(test)]
3906mod tests {
3907    use super::*;
3908    use crate::task::KernelFeatures;
3909    use crate::testing::*;
3910    use starnix_types::vfs::default_statfs;
3911    use starnix_uapi::{O_RDONLY, SEEK_CUR, SEEK_END, SEEK_SET};
3912    use zerocopy::IntoBytes;
3913
3914    #[::fuchsia::test]
3915    async fn test_sys_lseek() -> Result<(), Errno> {
3916        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
3917            let fd = FdNumber::from_raw(10);
3918            let file_handle =
3919                current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?;
3920            let file_size = file_handle.node().stat(locked, current_task).unwrap().st_size;
3921            current_task.files.insert(locked, current_task, fd, file_handle).unwrap();
3922
3923            assert_eq!(sys_lseek(locked, current_task, fd, 0, SEEK_CUR)?, 0);
3924            assert_eq!(sys_lseek(locked, current_task, fd, 1, SEEK_CUR)?, 1);
3925            assert_eq!(sys_lseek(locked, current_task, fd, 3, SEEK_SET)?, 3);
3926            assert_eq!(sys_lseek(locked, current_task, fd, -3, SEEK_CUR)?, 0);
3927            assert_eq!(sys_lseek(locked, current_task, fd, 0, SEEK_END)?, file_size);
3928            assert_eq!(sys_lseek(locked, current_task, fd, -5, SEEK_SET), error!(EINVAL));
3929
3930            // Make sure that the failed call above did not change the offset.
3931            assert_eq!(sys_lseek(locked, current_task, fd, 0, SEEK_CUR)?, file_size);
3932
3933            // Prepare for an overflow.
3934            assert_eq!(sys_lseek(locked, current_task, fd, 3, SEEK_SET)?, 3);
3935
3936            // Check for overflow.
3937            assert_eq!(sys_lseek(locked, current_task, fd, i64::MAX, SEEK_CUR), error!(EINVAL));
3938
3939            Ok(())
3940        })
3941        .await
3942    }
3943
3944    #[::fuchsia::test]
3945    async fn test_sys_dup() -> Result<(), Errno> {
3946        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
3947            let file_handle =
3948                current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?;
3949            let oldfd = current_task.add_file(locked, file_handle, FdFlags::empty())?;
3950            let newfd = sys_dup(locked, current_task, oldfd)?;
3951
3952            assert_ne!(oldfd, newfd);
3953            let files = &current_task.files;
3954            assert!(Arc::ptr_eq(&files.get(oldfd).unwrap(), &files.get(newfd).unwrap()));
3955
3956            assert_eq!(sys_dup(locked, current_task, FdNumber::from_raw(3)), error!(EBADF));
3957
3958            Ok(())
3959        })
3960        .await
3961    }
3962
3963    #[::fuchsia::test]
3964    async fn test_sys_dup3() -> Result<(), Errno> {
3965        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
3966            let file_handle =
3967                current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?;
3968            let oldfd = current_task.add_file(locked, file_handle, FdFlags::empty())?;
3969            let newfd = FdNumber::from_raw(2);
3970            sys_dup3(locked, current_task, oldfd, newfd, O_CLOEXEC)?;
3971
3972            assert_ne!(oldfd, newfd);
3973            let files = &current_task.files;
3974            assert!(Arc::ptr_eq(&files.get(oldfd).unwrap(), &files.get(newfd).unwrap()));
3975            assert_eq!(files.get_fd_flags_allowing_opath(oldfd).unwrap(), FdFlags::empty());
3976            assert_eq!(files.get_fd_flags_allowing_opath(newfd).unwrap(), FdFlags::CLOEXEC);
3977
3978            assert_eq!(sys_dup3(locked, current_task, oldfd, oldfd, O_CLOEXEC), error!(EINVAL));
3979
3980            // Pass invalid flags.
3981            let invalid_flags = 1234;
3982            assert_eq!(sys_dup3(locked, current_task, oldfd, newfd, invalid_flags), error!(EINVAL));
3983
3984            // Makes sure that dup closes the old file handle before the fd points
3985            // to the new file handle.
3986            let second_file_handle =
3987                current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?;
3988            let different_file_fd =
3989                current_task.add_file(locked, second_file_handle, FdFlags::empty())?;
3990            assert!(!Arc::ptr_eq(
3991                &files.get(oldfd).unwrap(),
3992                &files.get(different_file_fd).unwrap()
3993            ));
3994            sys_dup3(locked, current_task, oldfd, different_file_fd, O_CLOEXEC)?;
3995            assert!(Arc::ptr_eq(
3996                &files.get(oldfd).unwrap(),
3997                &files.get(different_file_fd).unwrap()
3998            ));
3999
4000            Ok(())
4001        })
4002        .await
4003    }
4004
4005    #[::fuchsia::test]
4006    async fn test_sys_open_cloexec() -> Result<(), Errno> {
4007        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
4008            let path_addr = map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4009            let path = b"data/testfile.txt\0";
4010            current_task.write_memory(path_addr, path)?;
4011            let fd = sys_openat(
4012                locked,
4013                &current_task,
4014                FdNumber::AT_FDCWD,
4015                UserCString::new(current_task, path_addr),
4016                O_RDONLY | O_CLOEXEC,
4017                FileMode::default(),
4018            )?;
4019            assert!(current_task.files.get_fd_flags_allowing_opath(fd)?.contains(FdFlags::CLOEXEC));
4020            Ok(())
4021        })
4022        .await
4023    }
4024
4025    #[::fuchsia::test]
4026    async fn test_sys_epoll() -> Result<(), Errno> {
4027        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
4028            let epoll_fd =
4029                sys_epoll_create1(locked, current_task, 0).expect("sys_epoll_create1 failed");
4030            sys_close(locked, current_task, epoll_fd).expect("sys_close failed");
4031
4032            Ok(())
4033        })
4034        .await
4035    }
4036
4037    #[::fuchsia::test]
4038    async fn test_fstat_tmp_file() {
4039        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
4040            // Create the file that will be used to stat.
4041            let file_path = "data/testfile.txt";
4042            let _file_handle =
4043                current_task.open_file(locked, file_path.into(), OpenFlags::RDONLY).unwrap();
4044
4045            // Write the path to user memory.
4046            let path_addr = map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4047            current_task
4048                .write_memory(path_addr, file_path.as_bytes())
4049                .expect("failed to clear struct");
4050
4051            let memory_len = (path_addr + file_path.len()).expect("OOB memory allocation!");
4052            let user_stat = UserRef::new(memory_len);
4053            current_task
4054                .write_object(user_stat, &default_statfs(0))
4055                .expect("failed to clear struct");
4056
4057            let user_path = UserCString::new(current_task, path_addr);
4058
4059            assert_eq!(sys_statfs(locked, current_task, user_path, user_stat.into()), Ok(()));
4060
4061            let returned_stat = current_task.read_object(user_stat).expect("failed to read struct");
4062            assert_eq!(
4063                returned_stat.as_bytes(),
4064                default_statfs(u32::from_be_bytes(*b"f.io")).as_bytes()
4065            );
4066        })
4067        .await;
4068    }
4069
4070    #[::fuchsia::test]
4071    async fn test_unlinkat_dir() {
4072        spawn_kernel_and_run(async |locked, current_task| {
4073            // Create the dir that we will attempt to unlink later.
4074            let no_slash_path = b"testdir";
4075            let no_slash_path_addr =
4076                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
4077            current_task
4078                .write_memory(no_slash_path_addr, no_slash_path)
4079                .expect("failed to write path");
4080            let no_slash_user_path = UserCString::new(current_task, no_slash_path_addr);
4081            sys_mkdirat(
4082                locked,
4083                &current_task,
4084                FdNumber::AT_FDCWD,
4085                no_slash_user_path,
4086                FileMode::ALLOW_ALL.with_type(FileMode::IFDIR),
4087            )
4088            .unwrap();
4089
4090            let slash_path = b"testdir/";
4091            let slash_path_addr =
4092                map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4093            current_task.write_memory(slash_path_addr, slash_path).expect("failed to write path");
4094            let slash_user_path = UserCString::new(current_task, slash_path_addr);
4095
4096            // Try to remove a directory without specifying AT_REMOVEDIR.
4097            // This should fail with EISDIR, irrespective of the terminating slash.
4098            let error = sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, slash_user_path, 0)
4099                .unwrap_err();
4100            assert_eq!(error, errno!(EISDIR));
4101            let error =
4102                sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, no_slash_user_path, 0)
4103                    .unwrap_err();
4104            assert_eq!(error, errno!(EISDIR));
4105
4106            // Success with AT_REMOVEDIR.
4107            sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, slash_user_path, AT_REMOVEDIR)
4108                .unwrap();
4109        })
4110        .await;
4111    }
4112
4113    #[::fuchsia::test]
4114    async fn test_rename_noreplace() {
4115        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
4116            // Create the file that will be renamed.
4117            let old_user_path = "data/testfile.txt";
4118            let _old_file_handle =
4119                current_task.open_file(locked, old_user_path.into(), OpenFlags::RDONLY).unwrap();
4120
4121            // Write the path to user memory.
4122            let old_path_addr =
4123                map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4124            current_task
4125                .write_memory(old_path_addr, old_user_path.as_bytes())
4126                .expect("failed to clear struct");
4127
4128            // Create a second file that we will attempt to rename to.
4129            let new_user_path = "data/testfile2.txt";
4130            let _new_file_handle =
4131                current_task.open_file(locked, new_user_path.into(), OpenFlags::RDONLY).unwrap();
4132
4133            // Write the path to user memory.
4134            let new_path_addr =
4135                map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4136            current_task
4137                .write_memory(new_path_addr, new_user_path.as_bytes())
4138                .expect("failed to clear struct");
4139
4140            // Try to rename first file to second file's name with RENAME_NOREPLACE flag.
4141            // This should fail with EEXIST.
4142            let error = sys_renameat2(
4143                locked,
4144                &current_task,
4145                FdNumber::AT_FDCWD,
4146                UserCString::new(current_task, old_path_addr),
4147                FdNumber::AT_FDCWD,
4148                UserCString::new(current_task, new_path_addr),
4149                RenameFlags::NOREPLACE.bits(),
4150            )
4151            .unwrap_err();
4152            assert_eq!(error, errno!(EEXIST));
4153        })
4154        .await;
4155    }
4156
4157    #[::fuchsia::test]
4158    async fn test_sys_sync() -> Result<(), Errno> {
4159        spawn_kernel_and_run(async |locked, current_task| {
4160            sys_sync(locked, current_task)?;
4161            Ok(())
4162        })
4163        .await
4164    }
4165
4166    #[::fuchsia::test]
4167    async fn test_sys_syncfs() -> Result<(), Errno> {
4168        spawn_kernel_and_run(async |locked, current_task| {
4169            let file_handle = current_task.open_file(locked, ".".into(), OpenFlags::RDONLY)?;
4170            let fd = current_task.add_file(locked, file_handle, FdFlags::empty())?;
4171            sys_syncfs(locked, current_task, fd)?;
4172            Ok(())
4173        })
4174        .await
4175    }
4176
4177    // TODO(https://fxbug.dev/485370648) remove when unnecessary
4178    #[::fuchsia::test]
4179    async fn test_fake_ion_stat() {
4180        // Test with fake_ion disabled (default).
4181        spawn_kernel_and_run(async |locked, current_task| {
4182            let ion_path = b"/dev/ion\0";
4183            let path_addr = map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4184            current_task.write_memory(path_addr, ion_path).expect("failed to write path");
4185            let user_path = UserCString::new(current_task, path_addr);
4186
4187            let stat_addr = map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4188            let stat_ptr = StatPtr::new(current_task, stat_addr);
4189
4190            let error =
4191                sys_fstatat64(locked, current_task, FdNumber::AT_FDCWD, user_path, stat_ptr, 0)
4192                    .unwrap_err();
4193            assert_eq!(error, errno!(ENOENT));
4194        })
4195        .await;
4196
4197        // Test with fake_ion enabled.
4198        let mut features = KernelFeatures::default();
4199        features.fake_ion = true;
4200        spawn_kernel_with_features_and_run(
4201            async |locked, current_task| {
4202                let ion_path = b"/dev/ion\0";
4203                let path_addr =
4204                    map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4205                current_task.write_memory(path_addr, ion_path).expect("failed to write path");
4206                let user_path = UserCString::new(current_task, path_addr);
4207
4208                let stat_addr =
4209                    map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4210                let stat_ptr = StatPtr::new(current_task, stat_addr);
4211
4212                sys_fstatat64(locked, current_task, FdNumber::AT_FDCWD, user_path, stat_ptr, 0)
4213                    .expect("sys_fstatat64 should succeed with fake_ion");
4214
4215                let stat_result: uapi::stat =
4216                    current_task.read_object(stat_addr.into()).expect("failed to read stat");
4217                assert_eq!(stat_result.st_mode, uapi::S_IFCHR | 0o666);
4218                assert_eq!(stat_result.st_rdev, DeviceType::new(10, 59).bits());
4219
4220                // Test statx as well.
4221                let statx_addr =
4222                    map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4223                let statx_ptr = UserRef::new(statx_addr);
4224                sys_statx(
4225                    locked,
4226                    current_task,
4227                    FdNumber::AT_FDCWD,
4228                    user_path,
4229                    0,
4230                    uapi::STATX_BASIC_STATS,
4231                    statx_ptr,
4232                )
4233                .expect("sys_statx should succeed with fake_ion");
4234
4235                let statx_result: statx =
4236                    current_task.read_object(statx_ptr).expect("failed to read statx");
4237                assert_eq!(statx_result.stx_mode, (uapi::S_IFCHR | 0o666) as u16);
4238                assert_eq!(statx_result.stx_rdev_major, 10);
4239                assert_eq!(statx_result.stx_rdev_minor, 59);
4240            },
4241            features,
4242        )
4243        .await;
4244    }
4245}