Skip to main content

starnix_core/vfs/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::{IOVecPtr, MemoryAccessor, MemoryAccessorExt, PAGE_SIZE};
6use crate::security;
7use crate::syscalls::time::{ITimerSpecPtr, TimeSpecPtr, TimeValPtr};
8use crate::task::{CurrentTask, EventHandler, ProcessEntryRef, ReadyItem, ReadyItemKey, Waiter};
9use crate::time::{Timeline, TimerWakeup};
10use crate::vfs::aio::AioContext;
11use crate::vfs::buffers::{UserBuffersInputBuffer, UserBuffersOutputBuffer};
12use crate::vfs::eventfd::{EventFdType, new_eventfd};
13use crate::vfs::fs_args::MountParams;
14use crate::vfs::inotify::InotifyFileObject;
15use crate::vfs::io_uring::{IORING_MAX_ENTRIES, IoUringFileObject};
16use crate::vfs::pidfd::new_pidfd;
17use crate::vfs::pipe::{PipeFileObject, new_pipe};
18use crate::vfs::timer::TimerFile;
19use crate::vfs::{
20    CheckAccessReason, DirentSink64, EpollFileObject, FallocMode, FdFlags, FdNumber,
21    FileAsyncOwner, FileHandle, FileSystemOptions, FlockOperation, FsStr, FsString, LookupContext,
22    NamespaceNode, PathWithReachability, RecordLockCommand, RenameFlags, SeekTarget, StatxFlags,
23    SymlinkMode, SymlinkTarget, TargetFdNumber, TimeUpdateType, UnlinkKind, ValueOrSize, WdNumber,
24    WhatToMount, XattrOp, checked_add_offset_and_length, new_memfd, new_zombie_pidfd, splice,
25};
26use starnix_logging::{log_trace, track_stub};
27use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Mutex, Unlocked};
28use starnix_syscalls::{SUCCESS, SyscallArg, SyscallResult};
29use starnix_types::ownership::TempRef;
30use starnix_types::time::{
31    duration_from_poll_timeout, duration_from_timespec, time_from_timespec, timespec_from_duration,
32};
33use starnix_types::user_buffer::UserBuffer;
34use starnix_uapi::auth::{
35    CAP_BLOCK_SUSPEND, CAP_DAC_READ_SEARCH, CAP_LEASE, CAP_SYS_ADMIN, CAP_WAKE_ALARM, Credentials,
36    PTRACE_MODE_ATTACH_REALCREDS,
37};
38use starnix_uapi::device_id::DeviceId;
39use starnix_uapi::errors::{
40    EFAULT, EINTR, ENAMETOOLONG, ENOTSUP, ETIMEDOUT, Errno, ErrnoResultExt,
41};
42use starnix_uapi::file_lease::FileLeaseType;
43use starnix_uapi::file_mode::{Access, AccessCheck, FileMode};
44use starnix_uapi::inotify_mask::InotifyMask;
45use starnix_uapi::mount_flags::{FileSystemFlags, MountFlags};
46use starnix_uapi::open_flags::OpenFlags;
47use starnix_uapi::personality::PersonalityFlags;
48use starnix_uapi::resource_limits::Resource;
49use starnix_uapi::seal_flags::SealFlags;
50use starnix_uapi::signals::SigSet;
51use starnix_uapi::unmount_flags::UnmountFlags;
52use starnix_uapi::user_address::{MultiArchUserRef, UserAddress, UserCString, UserRef};
53use starnix_uapi::user_value::UserValue;
54use starnix_uapi::vfs::{EpollEvent, FdEvents, ResolveFlags};
55use starnix_uapi::{
56    __kernel_fd_set, AT_EACCESS, AT_EMPTY_PATH, AT_NO_AUTOMOUNT, AT_REMOVEDIR, AT_SYMLINK_FOLLOW,
57    AT_SYMLINK_NOFOLLOW, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM, CLOCK_MONOTONIC, CLOCK_REALTIME,
58    CLOCK_REALTIME_ALARM, CLOSE_RANGE_CLOEXEC, CLOSE_RANGE_UNSHARE, EFD_CLOEXEC, EFD_NONBLOCK,
59    EFD_SEMAPHORE, EPOLL_CLOEXEC, EPOLL_CTL_ADD, EPOLL_CTL_DEL, EPOLL_CTL_MOD, F_ADD_SEALS,
60    F_DUPFD, F_DUPFD_CLOEXEC, F_GET_SEALS, F_GETFD, F_GETFL, F_GETLEASE, F_GETLK, F_GETLK64,
61    F_GETOWN, F_GETOWN_EX, F_OFD_GETLK, F_OFD_SETLK, F_OFD_SETLKW, F_OWNER_PGRP, F_OWNER_PID,
62    F_OWNER_TID, F_SETFD, F_SETFL, F_SETLEASE, F_SETLK, F_SETLK64, F_SETLKW, F_SETLKW64, F_SETOWN,
63    F_SETOWN_EX, F_SETSIG, FIOCLEX, FIONCLEX, IN_CLOEXEC, IN_NONBLOCK, MFD_ALLOW_SEALING,
64    MFD_CLOEXEC, MFD_EXEC, MFD_HUGE_MASK, MFD_HUGE_SHIFT, MFD_HUGETLB, MFD_NOEXEC_SEAL, NAME_MAX,
65    O_CLOEXEC, O_CREAT, O_NOFOLLOW, O_PATH, O_TMPFILE, PIDFD_NONBLOCK, POLLERR, POLLHUP, POLLIN,
66    POLLOUT, POLLPRI, POLLRDBAND, POLLRDNORM, POLLWRBAND, POLLWRNORM, POSIX_FADV_DONTNEED,
67    POSIX_FADV_NOREUSE, POSIX_FADV_NORMAL, POSIX_FADV_RANDOM, POSIX_FADV_SEQUENTIAL,
68    POSIX_FADV_WILLNEED, RWF_SUPPORTED, TFD_CLOEXEC, TFD_NONBLOCK, TFD_TIMER_ABSTIME,
69    TFD_TIMER_CANCEL_ON_SET, XATTR_CREATE, XATTR_NAME_MAX, XATTR_REPLACE, aio_context_t, errno,
70    error, f_owner_ex, io_event, io_uring_params,
71    io_uring_register_op_IORING_REGISTER_BUFFERS as IORING_REGISTER_BUFFERS,
72    io_uring_register_op_IORING_REGISTER_IOWQ_MAX_WORKERS as IORING_REGISTER_IOWQ_MAX_WORKERS,
73    io_uring_register_op_IORING_REGISTER_PBUF_RING as IORING_REGISTER_PBUF_RING,
74    io_uring_register_op_IORING_REGISTER_PBUF_STATUS as IORING_REGISTER_PBUF_STATUS,
75    io_uring_register_op_IORING_REGISTER_RING_FDS as IORING_REGISTER_RING_FDS,
76    io_uring_register_op_IORING_UNREGISTER_BUFFERS as IORING_UNREGISTER_BUFFERS,
77    io_uring_register_op_IORING_UNREGISTER_PBUF_RING as IORING_UNREGISTER_PBUF_RING,
78    io_uring_register_op_IORING_UNREGISTER_RING_FDS as IORING_UNREGISTER_RING_FDS, iocb, off_t,
79    pid_t, pollfd, pselect6_sigmask, sigset_t, statx, timespec, uapi, uid_t,
80};
81use std::cmp::Ordering;
82use std::collections::VecDeque;
83use std::marker::PhantomData;
84use std::sync::{Arc, atomic};
85use std::usize;
86use zerocopy::{Immutable, IntoBytes};
87
88uapi::check_arch_independent_layout! {
89    pollfd {
90        fd,
91        events,
92        revents,
93    }
94
95    io_event {
96        data,
97        obj,
98        res,
99        res2,
100    }
101
102    iocb {
103        aio_data,
104        aio_key,
105        aio_rw_flags,
106        aio_lio_opcode,
107        aio_reqprio,
108        aio_fildes,
109        aio_buf,
110        aio_nbytes,
111        aio_offset,
112        aio_reserved2,
113        aio_flags,
114        aio_resfd,
115    }
116
117    statx_timestamp {
118        tv_sec,
119        tv_nsec,
120    }
121
122    statx {
123        stx_mask,
124        stx_blksize,
125        stx_attributes,
126        stx_nlink,
127        stx_uid,
128        stx_gid,
129        stx_mode,
130        stx_ino,
131        stx_size,
132        stx_blocks,
133        stx_attributes_mask,
134        stx_atime,
135        stx_btime,
136        stx_ctime,
137        stx_mtime,
138        stx_rdev_major,
139        stx_rdev_minor,
140        stx_dev_major,
141        stx_dev_minor,
142        stx_mnt_id,
143        stx_dio_mem_align,
144        stx_dio_offset_align,
145        stx_subvol,
146        stx_atomic_write_unit_min,
147        stx_atomic_write_unit_max,
148        stx_atomic_write_segments_max,
149    }
150
151    io_sqring_offsets {
152        head,
153        tail,
154        ring_mask,
155        ring_entries,
156        flags,
157        dropped,
158        array,
159        resv1,
160        user_addr,
161    }
162
163    io_cqring_offsets {
164        head,
165        tail,
166        ring_mask,
167        ring_entries,
168        overflow,
169        cqes,
170        flags,
171        resv1,
172        user_addr,
173    }
174
175    io_uring_params {
176        sq_entries,
177        cq_entries,
178        flags,
179        sq_thread_cpu,
180        sq_thread_idle,
181        features,
182        wq_fd,
183        resv,
184        sq_off,
185        cq_off,
186    }
187
188    io_uring_rsrc_update {
189        offset,
190        resv,
191        data,
192    }
193
194    io_uring_buf_reg {
195        ring_addr,
196        ring_entries,
197        bgid,
198        flags,
199        resv,
200    }
201}
202
203// Constants from bionic/libc/include/sys/stat.h
204const UTIME_NOW: i64 = 0x3fffffff;
205const UTIME_OMIT: i64 = 0x3ffffffe;
206
207pub type OffsetPtr = MultiArchUserRef<uapi::off_t, uapi::arch32::off_t>;
208pub type IocbPtr = MultiArchUserRef<iocb, iocb>;
209pub type IocbPtrPtr = MultiArchUserRef<IocbPtr, IocbPtr>;
210
211pub fn sys_read(
212    locked: &mut Locked<Unlocked>,
213    current_task: &CurrentTask,
214    fd: FdNumber,
215    address: UserAddress,
216    length: usize,
217) -> Result<usize, Errno> {
218    let file = current_task.get_file(fd)?;
219    file.read(
220        locked,
221        current_task,
222        &mut UserBuffersOutputBuffer::unified_new_at(current_task, address, length)?,
223    )
224    .map_eintr(|| errno!(ERESTARTSYS))
225}
226
227pub fn sys_write(
228    locked: &mut Locked<Unlocked>,
229    current_task: &CurrentTask,
230    fd: FdNumber,
231    address: UserAddress,
232    length: usize,
233) -> Result<usize, Errno> {
234    let file = current_task.get_file(fd)?;
235    file.write(
236        locked,
237        current_task,
238        &mut UserBuffersInputBuffer::unified_new_at(current_task, address, length)?,
239    )
240    .map_eintr(|| errno!(ERESTARTSYS))
241}
242
243pub fn sys_close(
244    _locked: &mut Locked<Unlocked>,
245    current_task: &CurrentTask,
246    fd: FdNumber,
247) -> Result<(), Errno> {
248    current_task.live().files.close(fd)?;
249    Ok(())
250}
251
252pub fn sys_close_range(
253    locked: &mut Locked<Unlocked>,
254    current_task: &CurrentTask,
255    first: u32,
256    last: u32,
257    flags: u32,
258) -> Result<(), Errno> {
259    if first > last || flags & !(CLOSE_RANGE_UNSHARE | CLOSE_RANGE_CLOEXEC) != 0 {
260        return error!(EINVAL);
261    }
262    let live_task = current_task.live();
263    if flags & CLOSE_RANGE_UNSHARE != 0 {
264        live_task.files.unshare();
265    }
266    let in_range = |fd: FdNumber| fd.raw() as u32 >= first && fd.raw() as u32 <= last;
267    if flags & CLOSE_RANGE_CLOEXEC != 0 {
268        live_task.files.retain(locked, current_task, |fd, flags| {
269            if in_range(fd) {
270                *flags |= FdFlags::CLOEXEC;
271            }
272            true
273        });
274    } else {
275        live_task.files.retain(locked, current_task, |fd, _| !in_range(fd));
276    }
277    Ok(())
278}
279
280pub fn sys_lseek(
281    locked: &mut Locked<Unlocked>,
282    current_task: &CurrentTask,
283    fd: FdNumber,
284    offset: off_t,
285    whence: u32,
286) -> Result<off_t, Errno> {
287    let file = current_task.get_file(fd)?;
288    file.seek(locked, current_task, SeekTarget::from_raw(whence, offset)?)
289}
290
291pub fn sys_fcntl(
292    locked: &mut Locked<Unlocked>,
293    current_task: &CurrentTask,
294    fd: FdNumber,
295    cmd: u32,
296    arg: u64,
297) -> Result<SyscallResult, Errno> {
298    let file = match cmd {
299        F_DUPFD | F_DUPFD_CLOEXEC | F_GETFD | F_SETFD | F_GETFL => {
300            current_task.get_file_allowing_opath(fd)?
301        }
302        _ => current_task.get_file(fd)?,
303    };
304
305    match cmd {
306        // For the following values of cmd we need to perform more checks before running the
307        // `check_file_fcntl_access` LSM hook.
308        F_SETOWN | F_SETOWN_EX | F_ADD_SEALS | F_SETLEASE => {}
309        _ => {
310            security::check_file_fcntl_access(current_task, &file, cmd, arg)?;
311        }
312    };
313
314    match cmd {
315        F_DUPFD | F_DUPFD_CLOEXEC => {
316            let fd_number = arg as i32;
317            let flags = if cmd == F_DUPFD_CLOEXEC { FdFlags::CLOEXEC } else { FdFlags::empty() };
318            let newfd = current_task.live().files.duplicate(
319                locked,
320                current_task,
321                fd,
322                TargetFdNumber::Minimum(FdNumber::from_raw(fd_number)),
323                flags,
324            )?;
325            Ok(newfd.into())
326        }
327        F_GETOWN => match file.get_async_owner() {
328            FileAsyncOwner::Unowned => Ok(0.into()),
329            FileAsyncOwner::Thread(tid) => Ok(tid.into()),
330            FileAsyncOwner::Process(pid) => Ok(pid.into()),
331            FileAsyncOwner::ProcessGroup(pgid) => Ok((-pgid).into()),
332        },
333        F_GETOWN_EX => {
334            let maybe_owner = match file.get_async_owner() {
335                FileAsyncOwner::Unowned => None,
336                FileAsyncOwner::Thread(tid) => {
337                    Some(uapi::f_owner_ex { type_: F_OWNER_TID as i32, pid: tid })
338                }
339                FileAsyncOwner::Process(pid) => {
340                    Some(uapi::f_owner_ex { type_: F_OWNER_PID as i32, pid })
341                }
342                FileAsyncOwner::ProcessGroup(pgid) => {
343                    Some(uapi::f_owner_ex { type_: F_OWNER_PGRP as i32, pid: pgid })
344                }
345            };
346            if let Some(owner) = maybe_owner {
347                let user_owner: UserRef<f_owner_ex> =
348                    UserRef::<uapi::f_owner_ex>::new(UserAddress::from(arg));
349                current_task.write_object(user_owner, &owner)?;
350            }
351            Ok(SUCCESS)
352        }
353        F_SETOWN => {
354            let pid = (arg as u32) as i32;
355            let owner = match pid.cmp(&0) {
356                Ordering::Equal => FileAsyncOwner::Unowned,
357                Ordering::Greater => FileAsyncOwner::Process(pid),
358                Ordering::Less => {
359                    FileAsyncOwner::ProcessGroup(pid.checked_neg().ok_or_else(|| errno!(EINVAL))?)
360                }
361            };
362            owner.validate(current_task)?;
363            security::check_file_fcntl_access(current_task, &file, cmd, arg)?;
364            file.set_async_owner(owner);
365            Ok(SUCCESS)
366        }
367        F_SETOWN_EX => {
368            let user_owner = UserRef::<uapi::f_owner_ex>::new(UserAddress::from(arg));
369            let requested_owner = current_task.read_object(user_owner)?;
370            let mut owner = match requested_owner.type_ as u32 {
371                F_OWNER_TID => FileAsyncOwner::Thread(requested_owner.pid),
372                F_OWNER_PID => FileAsyncOwner::Process(requested_owner.pid),
373                F_OWNER_PGRP => FileAsyncOwner::ProcessGroup(requested_owner.pid),
374                _ => return error!(EINVAL),
375            };
376            if requested_owner.pid == 0 {
377                owner = FileAsyncOwner::Unowned;
378            }
379            owner.validate(current_task)?;
380            security::check_file_fcntl_access(current_task, &file, cmd, arg)?;
381            file.set_async_owner(owner);
382            Ok(SUCCESS)
383        }
384        F_GETFD => Ok(current_task.live().files.get_fd_flags_allowing_opath(fd)?.into()),
385        F_SETFD => {
386            current_task
387                .live()
388                .files
389                .set_fd_flags_allowing_opath(fd, FdFlags::from_bits_truncate(arg as u32))?;
390            Ok(SUCCESS)
391        }
392        F_GETFL => {
393            // O_PATH allowed for:
394            //
395            //   Retrieving open file status flags using the fcntl(2)
396            //   F_GETFL operation: the returned flags will include the
397            //   bit O_PATH.
398            //
399            // See https://man7.org/linux/man-pages/man2/open.2.html
400            Ok(file.flags().into())
401        }
402        F_SETFL => {
403            let settable_flags = OpenFlags::APPEND
404                | OpenFlags::DIRECT
405                | OpenFlags::NOATIME
406                | OpenFlags::NONBLOCK
407                | OpenFlags::ASYNC;
408            let requested_flags =
409                OpenFlags::from_bits_truncate((arg as u32) & settable_flags.bits());
410
411            // If `NOATIME` flag is being set then check that it's allowed.
412            if requested_flags.contains(OpenFlags::NOATIME)
413                && !file.flags().contains(OpenFlags::NOATIME)
414            {
415                file.name.check_o_noatime_allowed(current_task)?;
416            }
417
418            file.update_file_flags(requested_flags, settable_flags);
419            Ok(SUCCESS)
420        }
421        F_SETLK | F_SETLKW | F_GETLK => {
422            let flock_ref =
423                MultiArchUserRef::<uapi::flock, uapi::arch32::flock>::new(current_task, arg);
424            let flock = current_task.read_multi_arch_object(flock_ref)?;
425            let cmd = RecordLockCommand::from_raw(cmd).ok_or_else(|| errno!(EINVAL))?;
426            if let Some(flock) = file.record_lock(locked, current_task, cmd, flock)? {
427                current_task.write_multi_arch_object(flock_ref, flock)?;
428            }
429            Ok(SUCCESS)
430        }
431        F_SETLK64 | F_SETLKW64 | F_GETLK64 | F_OFD_GETLK | F_OFD_SETLK | F_OFD_SETLKW => {
432            let flock_ref =
433                MultiArchUserRef::<uapi::flock, uapi::arch32::flock64>::new(current_task, arg);
434            let flock = current_task.read_multi_arch_object(flock_ref)?;
435            let cmd = RecordLockCommand::from_raw(cmd).ok_or_else(|| errno!(EINVAL))?;
436            if let Some(flock) = file.record_lock(locked, current_task, cmd, flock)? {
437                current_task.write_multi_arch_object(flock_ref, flock)?;
438            }
439            Ok(SUCCESS)
440        }
441        F_ADD_SEALS => {
442            if !file.can_write() {
443                // Cannot add seals if the file is not writable
444                return error!(EPERM);
445            }
446            security::check_file_fcntl_access(current_task, &file, cmd, arg)?;
447            let mut state = file.name.entry.node.write_guard_state.lock();
448            let flags = SealFlags::from_bits_truncate(arg as u32);
449            state.try_add_seal(flags)?;
450            Ok(SUCCESS)
451        }
452        F_GET_SEALS => {
453            let state = file.name.entry.node.write_guard_state.lock();
454            Ok(state.get_seals()?.into())
455        }
456        F_SETLEASE => {
457            let fsuid = current_task.current_creds().fsuid;
458            if fsuid != file.node().info().uid {
459                security::check_task_capable(current_task, CAP_LEASE)?;
460            }
461            let lease = FileLeaseType::from_bits(arg as u32)?;
462            security::check_file_fcntl_access(current_task, &file, cmd, arg)?;
463            file.set_lease(current_task, lease)?;
464            Ok(SUCCESS)
465        }
466        F_GETLEASE => Ok(file.get_lease(current_task).into()),
467        F_SETSIG => {
468            track_stub!(TODO("https://fxbug.dev/437972675"), "F_SETSIG");
469            return error!(EOPNOTSUPP);
470        }
471        _ => file.fcntl(current_task, cmd, arg),
472    }
473}
474
475pub fn sys_pread64(
476    locked: &mut Locked<Unlocked>,
477    current_task: &CurrentTask,
478    fd: FdNumber,
479    address: UserAddress,
480    length: usize,
481    offset: off_t,
482) -> Result<usize, Errno> {
483    let file = current_task.get_file(fd)?;
484    let offset = offset.try_into().map_err(|_| errno!(EINVAL))?;
485    file.read_at(
486        locked,
487        current_task,
488        offset,
489        &mut UserBuffersOutputBuffer::unified_new_at(current_task, address, length)?,
490    )
491}
492
493pub fn sys_pwrite64(
494    locked: &mut Locked<Unlocked>,
495    current_task: &CurrentTask,
496    fd: FdNumber,
497    address: UserAddress,
498    length: usize,
499    offset: off_t,
500) -> Result<usize, Errno> {
501    let file = current_task.get_file(fd)?;
502    let offset = offset.try_into().map_err(|_| errno!(EINVAL))?;
503    file.write_at(
504        locked,
505        current_task,
506        offset,
507        &mut UserBuffersInputBuffer::unified_new_at(current_task, address, length)?,
508    )
509}
510
511fn do_readv(
512    locked: &mut Locked<Unlocked>,
513    current_task: &CurrentTask,
514    fd: FdNumber,
515    iovec_addr: IOVecPtr,
516    iovec_count: UserValue<i32>,
517    offset: Option<off_t>,
518    flags: u32,
519) -> Result<usize, Errno> {
520    if flags & !RWF_SUPPORTED != 0 {
521        return error!(EOPNOTSUPP);
522    }
523    if flags != 0 {
524        track_stub!(TODO("https://fxbug.dev/322875072"), "preadv2 flags", flags);
525    }
526    let file = current_task.get_file(fd)?;
527    let iovec = current_task.read_iovec(iovec_addr, iovec_count)?;
528    let mut data = UserBuffersOutputBuffer::unified_new(current_task, iovec)?;
529    if let Some(offset) = offset {
530        file.read_at(
531            locked,
532            current_task,
533            offset.try_into().map_err(|_| errno!(EINVAL))?,
534            &mut data,
535        )
536    } else {
537        file.read(locked, current_task, &mut data)
538    }
539}
540
541pub fn sys_readv(
542    locked: &mut Locked<Unlocked>,
543    current_task: &CurrentTask,
544    fd: FdNumber,
545    iovec_addr: IOVecPtr,
546    iovec_count: UserValue<i32>,
547) -> Result<usize, Errno> {
548    do_readv(locked, current_task, fd, iovec_addr, iovec_count, None, 0)
549}
550
551pub fn sys_preadv(
552    locked: &mut Locked<Unlocked>,
553    current_task: &CurrentTask,
554    fd: FdNumber,
555    iovec_addr: IOVecPtr,
556    iovec_count: UserValue<i32>,
557    offset: off_t,
558) -> Result<usize, Errno> {
559    do_readv(locked, current_task, fd, iovec_addr, iovec_count, Some(offset), 0)
560}
561
562pub fn sys_preadv2(
563    locked: &mut Locked<Unlocked>,
564    current_task: &CurrentTask,
565    fd: FdNumber,
566    iovec_addr: IOVecPtr,
567    iovec_count: UserValue<i32>,
568    offset: off_t,
569    _unused: SyscallArg, // On 32-bit systems, holds the upper 32 bits of offset.
570    flags: u32,
571) -> Result<usize, Errno> {
572    let offset = if offset == -1 { None } else { Some(offset) };
573    do_readv(locked, current_task, fd, iovec_addr, iovec_count, offset, flags)
574}
575
576fn do_writev(
577    locked: &mut Locked<Unlocked>,
578    current_task: &CurrentTask,
579    fd: FdNumber,
580    iovec_addr: IOVecPtr,
581    iovec_count: UserValue<i32>,
582    offset: Option<off_t>,
583    flags: u32,
584) -> Result<usize, Errno> {
585    if flags & !RWF_SUPPORTED != 0 {
586        return error!(EOPNOTSUPP);
587    }
588    if flags != 0 {
589        track_stub!(TODO("https://fxbug.dev/322874523"), "pwritev2 flags", flags);
590    }
591
592    let file = current_task.get_file(fd)?;
593    let iovec = current_task.read_iovec(iovec_addr, iovec_count)?;
594    let mut data = UserBuffersInputBuffer::unified_new(current_task, iovec)?;
595    let res = if let Some(offset) = offset {
596        file.write_at(
597            locked,
598            current_task,
599            offset.try_into().map_err(|_| errno!(EINVAL))?,
600            &mut data,
601        )
602    } else {
603        file.write(locked, current_task, &mut data)
604    };
605
606    match &res {
607        Err(e) if e.code == EFAULT => {
608            track_stub!(TODO("https://fxbug.dev/297370529"), "allow partial writes")
609        }
610        _ => (),
611    }
612
613    res
614}
615
616pub fn sys_writev(
617    locked: &mut Locked<Unlocked>,
618    current_task: &CurrentTask,
619    fd: FdNumber,
620    iovec_addr: IOVecPtr,
621    iovec_count: UserValue<i32>,
622) -> Result<usize, Errno> {
623    do_writev(locked, current_task, fd, iovec_addr, iovec_count, None, 0)
624}
625
626pub fn sys_pwritev(
627    locked: &mut Locked<Unlocked>,
628    current_task: &CurrentTask,
629    fd: FdNumber,
630    iovec_addr: IOVecPtr,
631    iovec_count: UserValue<i32>,
632    offset: off_t,
633) -> Result<usize, Errno> {
634    do_writev(locked, current_task, fd, iovec_addr, iovec_count, Some(offset), 0)
635}
636
637pub fn sys_pwritev2(
638    locked: &mut Locked<Unlocked>,
639    current_task: &CurrentTask,
640    fd: FdNumber,
641    iovec_addr: IOVecPtr,
642    iovec_count: UserValue<i32>,
643    offset: off_t,
644    _unused: SyscallArg, // On 32-bit systems, holds the upper 32 bits of offset.
645    flags: u32,
646) -> Result<usize, Errno> {
647    let offset = if offset == -1 { None } else { Some(offset) };
648    do_writev(locked, current_task, fd, iovec_addr, iovec_count, offset, flags)
649}
650
651type StatFsPtr = MultiArchUserRef<uapi::statfs, uapi::arch32::statfs>;
652
653pub fn fstatfs<T32: IntoBytes + Immutable + TryFrom<uapi::statfs>>(
654    locked: &mut Locked<Unlocked>,
655    current_task: &CurrentTask,
656    fd: FdNumber,
657    user_buf: MultiArchUserRef<uapi::statfs, T32>,
658) -> Result<(), Errno> {
659    // O_PATH allowed for:
660    //
661    //   fstatfs(2) (since Linux 3.12).
662    //
663    // See https://man7.org/linux/man-pages/man2/open.2.html
664    let file = current_task.get_file_allowing_opath(fd)?;
665    let mut stat = file.fs.statfs(locked, current_task)?;
666    stat.f_flags |= file.name.mount.flags().bits() as i64;
667    current_task.write_multi_arch_object(user_buf, stat)?;
668    Ok(())
669}
670
671pub fn sys_fstatfs(
672    locked: &mut Locked<Unlocked>,
673    current_task: &CurrentTask,
674    fd: FdNumber,
675    user_buf: StatFsPtr,
676) -> Result<(), Errno> {
677    fstatfs(locked, current_task, fd, user_buf)
678}
679
680fn statfs<T32: IntoBytes + Immutable + TryFrom<uapi::statfs>>(
681    locked: &mut Locked<Unlocked>,
682    current_task: &CurrentTask,
683    user_path: UserCString,
684    user_buf: MultiArchUserRef<uapi::statfs, T32>,
685) -> Result<(), Errno> {
686    let name =
687        lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?;
688    let fs = name.entry.node.fs();
689    let mut stat = fs.statfs(locked, current_task)?;
690    stat.f_flags |= name.mount.flags().bits() as i64;
691    current_task.write_multi_arch_object(user_buf, stat)?;
692    Ok(())
693}
694
695pub fn sys_statfs(
696    locked: &mut Locked<Unlocked>,
697    current_task: &CurrentTask,
698    user_path: UserCString,
699    user_buf: StatFsPtr,
700) -> Result<(), Errno> {
701    statfs(locked, current_task, user_path, user_buf)
702}
703
704pub fn sys_sendfile(
705    locked: &mut Locked<Unlocked>,
706    current_task: &CurrentTask,
707    out_fd: FdNumber,
708    in_fd: FdNumber,
709    user_offset: OffsetPtr,
710    count: i32,
711) -> Result<usize, Errno> {
712    splice::sendfile(locked, current_task, out_fd, in_fd, user_offset, count)
713}
714
715/// A convenient wrapper for Task::open_file_at.
716///
717/// Reads user_path from user memory and then calls through to Task::open_file_at.
718fn open_file_at(
719    locked: &mut Locked<Unlocked>,
720    current_task: &CurrentTask,
721    dir_fd: FdNumber,
722    user_path: UserCString,
723    flags: u32,
724    mode: FileMode,
725    resolve_flags: ResolveFlags,
726) -> Result<FileHandle, Errno> {
727    let path = current_task.read_path(user_path)?;
728    log_trace!(dir_fd:%, path:%; "open_file_at");
729    current_task.open_file_at(
730        locked,
731        dir_fd,
732        path.as_ref(),
733        OpenFlags::from_bits_truncate(flags),
734        mode,
735        resolve_flags,
736        AccessCheck::default(),
737    )
738}
739
740fn lookup_parent_at<T, F>(
741    locked: &mut Locked<Unlocked>,
742    current_task: &CurrentTask,
743    dir_fd: FdNumber,
744    user_path: UserCString,
745    callback: F,
746) -> Result<T, Errno>
747where
748    F: Fn(&mut Locked<Unlocked>, LookupContext, NamespaceNode, &FsStr) -> Result<T, Errno>,
749{
750    let path = current_task.read_path(user_path)?;
751    log_trace!(dir_fd:%, path:%; "lookup_parent_at");
752    if path.is_empty() {
753        return error!(ENOENT);
754    }
755    let mut context = LookupContext::default();
756    let (parent, basename) =
757        current_task.lookup_parent_at(locked, &mut context, dir_fd, path.as_ref())?;
758    callback(locked, context, parent, basename)
759}
760
761/// Options for lookup_at.
762#[derive(Debug, Default, Copy, Clone)]
763pub struct LookupFlags {
764    /// Whether AT_EMPTY_PATH was supplied.
765    allow_empty_path: bool,
766
767    /// Used to implement AT_SYMLINK_NOFOLLOW.
768    symlink_mode: SymlinkMode,
769
770    /// Automount directories on the path.
771    // TODO(https://fxbug.dev/297370602): Support the `AT_NO_AUTOMOUNT` flag.
772    #[allow(dead_code)]
773    automount: bool,
774}
775
776impl LookupFlags {
777    fn no_follow() -> Self {
778        Self { symlink_mode: SymlinkMode::NoFollow, ..Default::default() }
779    }
780
781    fn from_bits(flags: u32, allowed_flags: u32) -> Result<Self, Errno> {
782        if flags & !allowed_flags != 0 {
783            return error!(EINVAL);
784        }
785        let follow_symlinks = if allowed_flags & AT_SYMLINK_FOLLOW != 0 {
786            flags & AT_SYMLINK_FOLLOW != 0
787        } else {
788            flags & AT_SYMLINK_NOFOLLOW == 0
789        };
790        let automount =
791            if allowed_flags & AT_NO_AUTOMOUNT != 0 { flags & AT_NO_AUTOMOUNT == 0 } else { false };
792        if automount {
793            track_stub!(TODO("https://fxbug.dev/297370602"), "LookupFlags::automount");
794        }
795        Ok(LookupFlags {
796            allow_empty_path: (flags & AT_EMPTY_PATH != 0)
797                || (flags & O_PATH != 0 && flags & O_NOFOLLOW != 0),
798            symlink_mode: if follow_symlinks { SymlinkMode::Follow } else { SymlinkMode::NoFollow },
799            automount,
800        })
801    }
802}
803
804impl From<StatxFlags> for LookupFlags {
805    fn from(flags: StatxFlags) -> Self {
806        let lookup_flags = StatxFlags::AT_SYMLINK_NOFOLLOW
807            | StatxFlags::AT_EMPTY_PATH
808            | StatxFlags::AT_NO_AUTOMOUNT;
809        Self::from_bits((flags & lookup_flags).bits(), lookup_flags.bits()).unwrap()
810    }
811}
812
813pub fn lookup_at<L>(
814    locked: &mut Locked<L>,
815    current_task: &CurrentTask,
816    dir_fd: FdNumber,
817    user_path: UserCString,
818    options: LookupFlags,
819) -> Result<NamespaceNode, Errno>
820where
821    L: LockEqualOrBefore<FileOpsCore>,
822{
823    let path = current_task.read_path(user_path)?;
824    log_trace!(dir_fd:%, path:%; "lookup_at");
825    if path.is_empty() {
826        if options.allow_empty_path {
827            let (node, _) = current_task.resolve_dir_fd(
828                locked,
829                dir_fd,
830                path.as_ref(),
831                ResolveFlags::empty(),
832            )?;
833            return Ok(node);
834        }
835        return error!(ENOENT);
836    }
837
838    let mut parent_context = LookupContext::default();
839    let (parent, basename) =
840        current_task.lookup_parent_at(locked, &mut parent_context, dir_fd, path.as_ref())?;
841
842    let mut child_context = if parent_context.must_be_directory {
843        // The child must resolve to a directory. This is because a trailing slash
844        // was found in the path. If the child is a symlink, we should follow it.
845        // See https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap03.html#tag_21_03_00_75
846        parent_context.with(SymlinkMode::Follow)
847    } else {
848        parent_context.with(options.symlink_mode)
849    };
850
851    parent.lookup_child(locked, current_task, &mut child_context, basename)
852}
853
854fn do_openat(
855    locked: &mut Locked<Unlocked>,
856    current_task: &CurrentTask,
857    dir_fd: FdNumber,
858    user_path: UserCString,
859    flags: u32,
860    mode: FileMode,
861    resolve_flags: ResolveFlags,
862) -> Result<FdNumber, Errno> {
863    let file = open_file_at(locked, current_task, dir_fd, user_path, flags, mode, resolve_flags)?;
864    let fd_flags = get_fd_flags(flags);
865    current_task.add_file(locked, file, fd_flags)
866}
867
868pub fn sys_openat(
869    locked: &mut Locked<Unlocked>,
870    current_task: &CurrentTask,
871    dir_fd: FdNumber,
872    user_path: UserCString,
873    flags: u32,
874    mode: FileMode,
875) -> Result<FdNumber, Errno> {
876    do_openat(locked, current_task, dir_fd, user_path, flags, mode, ResolveFlags::empty())
877}
878
879pub fn sys_openat2(
880    locked: &mut Locked<Unlocked>,
881    current_task: &CurrentTask,
882    dir_fd: FdNumber,
883    user_path: UserCString,
884    how_ref: UserRef<uapi::open_how>,
885    size: usize,
886) -> Result<FdNumber, Errno> {
887    const EXPECTED_SIZE: usize = std::mem::size_of::<uapi::open_how>();
888    if size < EXPECTED_SIZE {
889        return error!(EINVAL);
890    }
891
892    let how = current_task.read_object(how_ref)?;
893
894    // If the `size` is greater than expected, then we need to check that any extra bytes after
895    // `open_how` are set to 0. This is needed to properly handle the case when `open_how` is
896    // extended with new fields in the future. There is no upper limit on the buffer size, so we
897    // limit size of each read to one page.
898    let mut pos = EXPECTED_SIZE;
899    while pos < size {
900        let length = std::cmp::min(size - pos, *PAGE_SIZE as usize);
901        let extra_bytes =
902            current_task.read_buffer(&UserBuffer { address: (how_ref.addr() + pos)?, length })?;
903        for b in extra_bytes {
904            if b != 0 {
905                return error!(E2BIG);
906            }
907        }
908        pos += length;
909    }
910
911    let flags: u32 = how.flags.try_into().map_err(|_| errno!(EINVAL))?;
912
913    // `mode` can be specified only with `O_CREAT` or `O_TMPFILE`.
914    let allowed_mode_flags = if (flags & (O_CREAT | O_TMPFILE)) > 0 { 0o7777 } else { 0 };
915    if (how.mode & !allowed_mode_flags) != 0 {
916        return error!(EINVAL);
917    }
918
919    let mode = FileMode::from_bits(how.mode.try_into().map_err(|_| errno!(EINVAL))?);
920    let resolve_flags =
921        ResolveFlags::from_bits(how.resolve.try_into().map_err(|_| errno!(EINVAL))?)
922            .ok_or_else(|| errno!(EINVAL))?;
923
924    if resolve_flags.contains(ResolveFlags::CACHED) {
925        track_stub!(TODO("https://fxbug.dev/326474574"), "openat2: RESOLVE_CACHED");
926        return error!(EAGAIN);
927    }
928
929    do_openat(locked, current_task, dir_fd, user_path, flags, mode, resolve_flags)
930}
931
932pub fn sys_faccessat(
933    locked: &mut Locked<Unlocked>,
934    current_task: &CurrentTask,
935    dir_fd: FdNumber,
936    user_path: UserCString,
937    mode: u32,
938) -> Result<(), Errno> {
939    sys_faccessat2(locked, current_task, dir_fd, user_path, mode, 0)
940}
941
942pub fn sys_faccessat2(
943    locked: &mut Locked<Unlocked>,
944    current_task: &CurrentTask,
945    dir_fd: FdNumber,
946    user_path: UserCString,
947    mode: u32,
948    flags: u32,
949) -> Result<(), Errno> {
950    let mut access_check = || {
951        let mode = Access::try_from(mode)?;
952        let lookup_flags = LookupFlags::from_bits(flags, AT_SYMLINK_NOFOLLOW | AT_EACCESS)?;
953        let name = lookup_at(locked, current_task, dir_fd, user_path, lookup_flags)?;
954        name.check_access(locked, current_task, mode, CheckAccessReason::Access)
955    };
956    // Unless `AT_ACCESS` is set, perform lookup & access-checking using real UID & GID.
957    if flags & AT_EACCESS == 0 {
958        let mut temporary_creds = Credentials::clone(&current_task.current_creds());
959        temporary_creds.fsuid = temporary_creds.uid;
960        temporary_creds.fsgid = temporary_creds.gid;
961        current_task.override_creds(temporary_creds.into(), access_check)
962    } else {
963        access_check()
964    }
965}
966
967pub fn sys_getdents64(
968    locked: &mut Locked<Unlocked>,
969    current_task: &CurrentTask,
970    fd: FdNumber,
971    user_buffer: UserAddress,
972    user_capacity: usize,
973) -> Result<usize, Errno> {
974    let file = current_task.get_file(fd)?;
975    let mut offset = file.offset.copy();
976    let mut sink = DirentSink64::new(current_task, &mut *offset, user_buffer, user_capacity);
977    let result = file.readdir(locked, current_task, &mut sink);
978    let ret = sink.map_result_with_actual(result);
979    offset.update();
980    ret
981}
982
983pub fn sys_chroot(
984    locked: &mut Locked<Unlocked>,
985    current_task: &CurrentTask,
986    user_path: UserCString,
987) -> Result<(), Errno> {
988    let name =
989        lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?;
990    if !name.entry.node.is_dir() {
991        return error!(ENOTDIR);
992    }
993
994    current_task.fs().chroot(locked, current_task, name)?;
995    Ok(())
996}
997
998pub fn sys_chdir(
999    locked: &mut Locked<Unlocked>,
1000    current_task: &CurrentTask,
1001    user_path: UserCString,
1002) -> Result<(), Errno> {
1003    let name =
1004        lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?;
1005    if !name.entry.node.is_dir() {
1006        return error!(ENOTDIR);
1007    }
1008    current_task.fs().chdir(locked, current_task, name)
1009}
1010
1011pub fn sys_fchdir(
1012    locked: &mut Locked<Unlocked>,
1013    current_task: &CurrentTask,
1014    fd: FdNumber,
1015) -> Result<(), Errno> {
1016    // O_PATH allowed for:
1017    //
1018    //   fchdir(2), if the file descriptor refers to a directory
1019    //   (since Linux 3.5).
1020    //
1021    // See https://man7.org/linux/man-pages/man2/open.2.html
1022    let file = current_task.get_file_allowing_opath(fd)?;
1023    if !file.name.entry.node.is_dir() {
1024        return error!(ENOTDIR);
1025    }
1026    current_task.fs().chdir(locked, current_task, file.name.to_passive())
1027}
1028
1029pub fn sys_fstat(
1030    locked: &mut Locked<Unlocked>,
1031    current_task: &CurrentTask,
1032    fd: FdNumber,
1033    buffer: UserRef<uapi::stat>,
1034) -> Result<(), Errno> {
1035    // O_PATH allowed for:
1036    //
1037    //   fstat(2) (since Linux 3.6).
1038    //
1039    // See https://man7.org/linux/man-pages/man2/open.2.html
1040    let file = current_task.get_file_allowing_opath(fd)?;
1041    let result = file.node().stat(locked, current_task)?;
1042    current_task.write_object(buffer, &result)?;
1043    Ok(())
1044}
1045
1046type StatPtr = MultiArchUserRef<uapi::stat, uapi::arch32::stat64>;
1047
1048// TODO(https://fxbug.dev/485370648) remove when unnecessary
1049fn get_fake_ion_stat() -> uapi::stat {
1050    uapi::stat {
1051        st_mode: uapi::S_IFCHR | 0o666,
1052        st_rdev: DeviceId::new(10, 59).bits(),
1053        st_nlink: 1,
1054        st_blksize: 4096,
1055        ..Default::default()
1056    }
1057}
1058
1059// TODO(https://fxbug.dev/485370648) remove when unnecessary
1060fn get_fake_ion_statx() -> statx {
1061    statx {
1062        stx_mask: uapi::STATX_BASIC_STATS,
1063        stx_mode: (uapi::S_IFCHR | 0o666) as u16,
1064        stx_rdev_major: 10,
1065        stx_rdev_minor: 59,
1066        stx_nlink: 1,
1067        stx_blksize: 4096,
1068        ..Default::default()
1069    }
1070}
1071
1072pub fn sys_fstatat64(
1073    locked: &mut Locked<Unlocked>,
1074    current_task: &CurrentTask,
1075    dir_fd: FdNumber,
1076    user_path: UserCString,
1077    buffer: StatPtr,
1078    flags: u32,
1079) -> Result<(), Errno> {
1080    let lookup_flags =
1081        LookupFlags::from_bits(flags, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT)?;
1082    let result = match lookup_at(locked, current_task, dir_fd, user_path, lookup_flags) {
1083        Ok(name) => name.entry.node.stat(locked, current_task)?,
1084        // TODO(https://fxbug.dev/485370648) remove when unnecessary
1085        Err(e) if e == errno!(ENOENT) && current_task.kernel().features.fake_ion => {
1086            let path = current_task.read_path(user_path)?;
1087            if path == b"/dev/ion" {
1088                get_fake_ion_stat()
1089            } else {
1090                return Err(e);
1091            }
1092        }
1093        Err(e) => return Err(e),
1094    };
1095    current_task.write_multi_arch_object(buffer, result)?;
1096    Ok(())
1097}
1098
1099pub use sys_fstatat64 as sys_newfstatat;
1100
1101pub fn sys_statx(
1102    locked: &mut Locked<Unlocked>,
1103    current_task: &CurrentTask,
1104    dir_fd: FdNumber,
1105    user_path: UserCString,
1106    flags: u32,
1107    mask: u32,
1108    statxbuf: UserRef<statx>,
1109) -> Result<(), Errno> {
1110    let statx_flags = StatxFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
1111    if statx_flags & (StatxFlags::AT_STATX_FORCE_SYNC | StatxFlags::AT_STATX_DONT_SYNC)
1112        == (StatxFlags::AT_STATX_FORCE_SYNC | StatxFlags::AT_STATX_DONT_SYNC)
1113    {
1114        return error!(EINVAL);
1115    }
1116
1117    let result =
1118        match lookup_at(locked, current_task, dir_fd, user_path, LookupFlags::from(statx_flags)) {
1119            Ok(name) => name.entry.node.statx(locked, current_task, statx_flags, mask)?,
1120            // TODO(https://fxbug.dev/485370648) remove when unnecessary
1121            Err(e) if e == errno!(ENOENT) && current_task.kernel().features.fake_ion => {
1122                let path = current_task.read_path(user_path)?;
1123                if path == b"/dev/ion" {
1124                    get_fake_ion_statx()
1125                } else {
1126                    return Err(e);
1127                }
1128            }
1129            Err(e) => return Err(e),
1130        };
1131    current_task.write_object(statxbuf, &result)?;
1132    Ok(())
1133}
1134
1135pub fn sys_readlinkat(
1136    locked: &mut Locked<Unlocked>,
1137    current_task: &CurrentTask,
1138    dir_fd: FdNumber,
1139    user_path: UserCString,
1140    buffer: UserAddress,
1141    buffer_size: usize,
1142) -> Result<usize, Errno> {
1143    let path = current_task.read_path(user_path)?;
1144    let lookup_flags = if path.is_empty() {
1145        if dir_fd == FdNumber::AT_FDCWD {
1146            return error!(ENOENT);
1147        }
1148        LookupFlags {
1149            allow_empty_path: true,
1150            symlink_mode: SymlinkMode::NoFollow,
1151            ..Default::default()
1152        }
1153    } else {
1154        LookupFlags::no_follow()
1155    };
1156    let name = lookup_at(locked, current_task, dir_fd, user_path, lookup_flags)?;
1157
1158    let target = match name.readlink(locked, current_task)? {
1159        SymlinkTarget::Path(path) => path,
1160        SymlinkTarget::Node(node) => node.path(&current_task.fs()),
1161    };
1162
1163    if buffer_size == 0 {
1164        return error!(EINVAL);
1165    }
1166    // Cap the returned length at buffer_size.
1167    let length = std::cmp::min(buffer_size, target.len());
1168    current_task.write_memory(buffer, &target[..length])?;
1169    Ok(length)
1170}
1171
1172pub fn sys_truncate(
1173    locked: &mut Locked<Unlocked>,
1174    current_task: &CurrentTask,
1175    user_path: UserCString,
1176    length: off_t,
1177) -> Result<(), Errno> {
1178    let length = length.try_into().map_err(|_| errno!(EINVAL))?;
1179    let name =
1180        lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?;
1181    name.truncate(locked, current_task, length)?;
1182    Ok(())
1183}
1184
1185pub fn sys_ftruncate(
1186    locked: &mut Locked<Unlocked>,
1187    current_task: &CurrentTask,
1188    fd: FdNumber,
1189    length: off_t,
1190) -> Result<(), Errno> {
1191    let length = length.try_into().map_err(|_| errno!(EINVAL))?;
1192    let file = current_task.get_file(fd)?;
1193    file.ftruncate(locked, current_task, length)?;
1194    Ok(())
1195}
1196
1197pub fn sys_mkdirat(
1198    locked: &mut Locked<Unlocked>,
1199    current_task: &CurrentTask,
1200    dir_fd: FdNumber,
1201    user_path: UserCString,
1202    mode: FileMode,
1203) -> Result<(), Errno> {
1204    let path = current_task.read_path(user_path)?;
1205
1206    if path.is_empty() {
1207        return error!(ENOENT);
1208    }
1209    let (parent, basename) = current_task.lookup_parent_at(
1210        locked,
1211        &mut LookupContext::default(),
1212        dir_fd,
1213        path.as_ref(),
1214    )?;
1215    parent.create_node(
1216        locked,
1217        current_task,
1218        basename,
1219        mode.with_type(FileMode::IFDIR),
1220        DeviceId::NONE,
1221    )?;
1222    Ok(())
1223}
1224
1225pub fn sys_mknodat(
1226    locked: &mut Locked<Unlocked>,
1227    current_task: &CurrentTask,
1228    dir_fd: FdNumber,
1229    user_path: UserCString,
1230    mode: FileMode,
1231    dev: DeviceId,
1232) -> Result<(), Errno> {
1233    let file_type = match mode.fmt() {
1234        FileMode::IFREG
1235        | FileMode::IFCHR
1236        | FileMode::IFBLK
1237        | FileMode::IFIFO
1238        | FileMode::IFSOCK => mode.fmt(),
1239        FileMode::EMPTY => FileMode::IFREG,
1240        _ => return error!(EINVAL),
1241    };
1242    lookup_parent_at(locked, current_task, dir_fd, user_path, |locked, _, parent, basename| {
1243        parent.create_node(locked, current_task, basename, mode.with_type(file_type), dev)
1244    })?;
1245    Ok(())
1246}
1247
1248pub fn sys_linkat(
1249    locked: &mut Locked<Unlocked>,
1250    current_task: &CurrentTask,
1251    old_dir_fd: FdNumber,
1252    old_user_path: UserCString,
1253    new_dir_fd: FdNumber,
1254    new_user_path: UserCString,
1255    flags: u32,
1256) -> Result<(), Errno> {
1257    if flags & !(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH) != 0 {
1258        track_stub!(TODO("https://fxbug.dev/322875706"), "linkat unknown flags", flags);
1259        return error!(EINVAL);
1260    }
1261
1262    if flags & AT_EMPTY_PATH != 0 {
1263        security::check_task_capable(current_task, CAP_DAC_READ_SEARCH)
1264            .map_err(|_| errno!(ENOENT))?;
1265    }
1266
1267    let flags = LookupFlags::from_bits(flags, AT_EMPTY_PATH | AT_SYMLINK_FOLLOW)?;
1268    let target = lookup_at(locked, current_task, old_dir_fd, old_user_path, flags)?;
1269    lookup_parent_at(
1270        locked,
1271        current_task,
1272        new_dir_fd,
1273        new_user_path,
1274        |locked, context, parent, basename| {
1275            // The path to a new link cannot end in `/`. That would imply that we are dereferencing
1276            // the link to a directory.
1277            if context.must_be_directory {
1278                return error!(ENOENT);
1279            }
1280            if target.mount != parent.mount {
1281                return error!(EXDEV);
1282            }
1283            parent.link(locked, current_task, basename, &target.entry.node)
1284        },
1285    )?;
1286
1287    Ok(())
1288}
1289
1290pub fn sys_unlinkat(
1291    locked: &mut Locked<Unlocked>,
1292    current_task: &CurrentTask,
1293    dir_fd: FdNumber,
1294    user_path: UserCString,
1295    flags: u32,
1296) -> Result<(), Errno> {
1297    if flags & !AT_REMOVEDIR != 0 {
1298        return error!(EINVAL);
1299    }
1300    let kind =
1301        if flags & AT_REMOVEDIR != 0 { UnlinkKind::Directory } else { UnlinkKind::NonDirectory };
1302    lookup_parent_at(
1303        locked,
1304        current_task,
1305        dir_fd,
1306        user_path,
1307        |locked, context, parent, basename| {
1308            parent.unlink(locked, current_task, basename, kind, context.must_be_directory)
1309        },
1310    )?;
1311    Ok(())
1312}
1313
1314pub fn sys_renameat2(
1315    locked: &mut Locked<Unlocked>,
1316    current_task: &CurrentTask,
1317    old_dir_fd: FdNumber,
1318    old_user_path: UserCString,
1319    new_dir_fd: FdNumber,
1320    new_user_path: UserCString,
1321    flags: u32,
1322) -> Result<(), Errno> {
1323    let flags = RenameFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
1324    if flags.intersects(RenameFlags::INTERNAL) {
1325        return error!(EINVAL);
1326    };
1327
1328    // RENAME_EXCHANGE cannot be combined with the other flags.
1329    if flags.contains(RenameFlags::EXCHANGE)
1330        && flags.intersects(RenameFlags::NOREPLACE | RenameFlags::WHITEOUT)
1331    {
1332        return error!(EINVAL);
1333    }
1334
1335    // RENAME_WHITEOUT is not supported.
1336    if flags.contains(RenameFlags::WHITEOUT) {
1337        track_stub!(TODO("https://fxbug.dev/322875416"), "RENAME_WHITEOUT");
1338        return error!(ENOSYS);
1339    };
1340
1341    let mut lookup = |dir_fd, user_path| {
1342        lookup_parent_at(locked, current_task, dir_fd, user_path, |_, _, parent, basename| {
1343            Ok((parent, basename.to_owned()))
1344        })
1345    };
1346
1347    let (old_parent, old_basename) = lookup(old_dir_fd, old_user_path)?;
1348    let (new_parent, new_basename) = lookup(new_dir_fd, new_user_path)?;
1349
1350    if new_basename.len() > NAME_MAX as usize {
1351        return error!(ENAMETOOLONG);
1352    }
1353
1354    NamespaceNode::rename(
1355        locked,
1356        current_task,
1357        &old_parent,
1358        old_basename.as_ref(),
1359        &new_parent,
1360        new_basename.as_ref(),
1361        flags,
1362    )
1363}
1364
1365pub fn sys_fchmod(
1366    locked: &mut Locked<Unlocked>,
1367    current_task: &CurrentTask,
1368    fd: FdNumber,
1369    mode: FileMode,
1370) -> Result<(), Errno> {
1371    // Remove the filetype from the mode.
1372    let mode = mode & FileMode::PERMISSIONS;
1373    let file = current_task.get_file(fd)?;
1374    file.name.entry.node.chmod(locked, current_task, &file.name.mount, mode)?;
1375    file.name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB);
1376    Ok(())
1377}
1378
1379pub fn sys_fchmodat(
1380    locked: &mut Locked<Unlocked>,
1381    current_task: &CurrentTask,
1382    dir_fd: FdNumber,
1383    user_path: UserCString,
1384    mode: FileMode,
1385) -> Result<(), Errno> {
1386    // Remove the filetype from the mode.
1387    let mode = mode & FileMode::PERMISSIONS;
1388    let name = lookup_at(locked, current_task, dir_fd, user_path, LookupFlags::default())?;
1389    name.entry.node.chmod(locked, current_task, &name.mount, mode)?;
1390    name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB);
1391    Ok(())
1392}
1393
1394fn maybe_uid(id: u32) -> Option<uid_t> {
1395    if id == u32::MAX { None } else { Some(id) }
1396}
1397
1398pub fn sys_fchown(
1399    locked: &mut Locked<Unlocked>,
1400    current_task: &CurrentTask,
1401    fd: FdNumber,
1402    owner: u32,
1403    group: u32,
1404) -> Result<(), Errno> {
1405    let file = current_task.get_file(fd)?;
1406    file.name.entry.node.chown(
1407        locked,
1408        current_task,
1409        &file.name.mount,
1410        maybe_uid(owner),
1411        maybe_uid(group),
1412    )?;
1413    file.name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB);
1414    Ok(())
1415}
1416
1417pub fn sys_fchownat(
1418    locked: &mut Locked<Unlocked>,
1419    current_task: &CurrentTask,
1420    dir_fd: FdNumber,
1421    user_path: UserCString,
1422    owner: u32,
1423    group: u32,
1424    flags: u32,
1425) -> Result<(), Errno> {
1426    let flags = LookupFlags::from_bits(flags, AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW)?;
1427    let name = lookup_at(locked, current_task, dir_fd, user_path, flags)?;
1428    name.entry.node.chown(locked, current_task, &name.mount, maybe_uid(owner), maybe_uid(group))?;
1429    name.entry.notify_ignoring_excl_unlink(InotifyMask::ATTRIB);
1430    Ok(())
1431}
1432
1433fn read_xattr_name(current_task: &CurrentTask, name_addr: UserCString) -> Result<FsString, Errno> {
1434    let name = current_task
1435        .read_c_string_to_vec(name_addr, XATTR_NAME_MAX as usize + 1)
1436        .map_err(|e| if e == ENAMETOOLONG { errno!(ERANGE) } else { e })?;
1437    if name.is_empty() {
1438        return error!(ERANGE);
1439    }
1440    let dot_index = memchr::memchr(b'.', &name).ok_or_else(|| errno!(ENOTSUP))?;
1441    if name[dot_index + 1..].is_empty() {
1442        return error!(EINVAL);
1443    }
1444    match &name[..dot_index] {
1445        b"user" | b"security" | b"trusted" | b"system" => {}
1446        _ => return error!(ENOTSUP),
1447    }
1448    Ok(name)
1449}
1450
1451fn do_getxattr(
1452    locked: &mut Locked<Unlocked>,
1453    current_task: &CurrentTask,
1454    node: &NamespaceNode,
1455    name_addr: UserCString,
1456    value_addr: UserAddress,
1457    size: usize,
1458) -> Result<usize, Errno> {
1459    let name = read_xattr_name(current_task, name_addr)?;
1460    let value =
1461        match node.entry.node.get_xattr(locked, current_task, &node.mount, name.as_ref(), size)? {
1462            ValueOrSize::Size(s) => return Ok(s),
1463            ValueOrSize::Value(v) => v,
1464        };
1465    if size == 0 {
1466        return Ok(value.len());
1467    }
1468    if size < value.len() {
1469        return error!(ERANGE);
1470    }
1471    current_task.write_memory(value_addr, &value)
1472}
1473
1474pub fn sys_getxattr(
1475    locked: &mut Locked<Unlocked>,
1476    current_task: &CurrentTask,
1477    path_addr: UserCString,
1478    name_addr: UserCString,
1479    value_addr: UserAddress,
1480    size: usize,
1481) -> Result<usize, Errno> {
1482    let node =
1483        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?;
1484    do_getxattr(locked, current_task, &node, name_addr, value_addr, size)
1485}
1486
1487pub fn sys_fgetxattr(
1488    locked: &mut Locked<Unlocked>,
1489    current_task: &CurrentTask,
1490    fd: FdNumber,
1491    name_addr: UserCString,
1492    value_addr: UserAddress,
1493    size: usize,
1494) -> Result<usize, Errno> {
1495    let file = current_task.get_file(fd)?;
1496    do_getxattr(locked, current_task, &file.name, name_addr, value_addr, size)
1497}
1498
1499pub fn sys_lgetxattr(
1500    locked: &mut Locked<Unlocked>,
1501    current_task: &CurrentTask,
1502    path_addr: UserCString,
1503    name_addr: UserCString,
1504    value_addr: UserAddress,
1505    size: usize,
1506) -> Result<usize, Errno> {
1507    let node =
1508        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?;
1509    do_getxattr(locked, current_task, &node, name_addr, value_addr, size)
1510}
1511
1512fn do_setxattr(
1513    locked: &mut Locked<Unlocked>,
1514    current_task: &CurrentTask,
1515    node: &NamespaceNode,
1516    name_addr: UserCString,
1517    value_addr: UserAddress,
1518    size: usize,
1519    flags: u32,
1520) -> Result<(), Errno> {
1521    if size > XATTR_NAME_MAX as usize {
1522        return error!(E2BIG);
1523    }
1524
1525    let op = match flags {
1526        0 => XattrOp::Set,
1527        XATTR_CREATE => XattrOp::Create,
1528        XATTR_REPLACE => XattrOp::Replace,
1529        _ => return error!(EINVAL),
1530    };
1531    let name = read_xattr_name(current_task, name_addr)?;
1532    let value = FsString::from(current_task.read_memory_to_vec(value_addr, size)?);
1533    node.entry.node.set_xattr(locked, current_task, &node.mount, name.as_ref(), value.as_ref(), op)
1534}
1535
1536pub fn sys_fsetxattr(
1537    locked: &mut Locked<Unlocked>,
1538    current_task: &CurrentTask,
1539    fd: FdNumber,
1540    name_addr: UserCString,
1541    value_addr: UserAddress,
1542    size: usize,
1543    flags: u32,
1544) -> Result<(), Errno> {
1545    let file = current_task.get_file(fd)?;
1546    do_setxattr(locked, current_task, &file.name, name_addr, value_addr, size, flags)
1547}
1548
1549pub fn sys_lsetxattr(
1550    locked: &mut Locked<Unlocked>,
1551    current_task: &CurrentTask,
1552    path_addr: UserCString,
1553    name_addr: UserCString,
1554    value_addr: UserAddress,
1555    size: usize,
1556    flags: u32,
1557) -> Result<(), Errno> {
1558    let node =
1559        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?;
1560    do_setxattr(locked, current_task, &node, name_addr, value_addr, size, flags)
1561}
1562
1563pub fn sys_setxattr(
1564    locked: &mut Locked<Unlocked>,
1565    current_task: &CurrentTask,
1566    path_addr: UserCString,
1567    name_addr: UserCString,
1568    value_addr: UserAddress,
1569    size: usize,
1570    flags: u32,
1571) -> Result<(), Errno> {
1572    let node =
1573        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?;
1574    do_setxattr(locked, current_task, &node, name_addr, value_addr, size, flags)
1575}
1576
1577fn do_removexattr(
1578    locked: &mut Locked<Unlocked>,
1579    current_task: &CurrentTask,
1580    node: &NamespaceNode,
1581    name_addr: UserCString,
1582) -> Result<(), Errno> {
1583    let mode = node.entry.node.info().mode;
1584    if mode.is_chr() || mode.is_fifo() {
1585        return error!(EPERM);
1586    }
1587    let name = read_xattr_name(current_task, name_addr)?;
1588    node.entry.node.remove_xattr(locked, current_task, &node.mount, name.as_ref())
1589}
1590
1591pub fn sys_removexattr(
1592    locked: &mut Locked<Unlocked>,
1593    current_task: &CurrentTask,
1594    path_addr: UserCString,
1595    name_addr: UserCString,
1596) -> Result<(), Errno> {
1597    let node =
1598        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?;
1599    do_removexattr(locked, current_task, &node, name_addr)
1600}
1601
1602pub fn sys_lremovexattr(
1603    locked: &mut Locked<Unlocked>,
1604    current_task: &CurrentTask,
1605    path_addr: UserCString,
1606    name_addr: UserCString,
1607) -> Result<(), Errno> {
1608    let node =
1609        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?;
1610    do_removexattr(locked, current_task, &node, name_addr)
1611}
1612
1613pub fn sys_fremovexattr(
1614    locked: &mut Locked<Unlocked>,
1615    current_task: &CurrentTask,
1616    fd: FdNumber,
1617    name_addr: UserCString,
1618) -> Result<(), Errno> {
1619    let file = current_task.get_file(fd)?;
1620    do_removexattr(locked, current_task, &file.name, name_addr)
1621}
1622
1623fn do_listxattr(
1624    locked: &mut Locked<Unlocked>,
1625    current_task: &CurrentTask,
1626    node: &NamespaceNode,
1627    list_addr: UserAddress,
1628    size: usize,
1629) -> Result<usize, Errno> {
1630    let security_xattr = security::fs_node_listsecurity(current_task, &node.entry.node);
1631    let xattrs = match node.entry.node.list_xattrs(locked, current_task, size) {
1632        Ok(ValueOrSize::Size(s)) => return Ok(s + security_xattr.map_or(0, |s| s.len() + 1)),
1633        Ok(ValueOrSize::Value(mut v)) => {
1634            if let Some(security_value) = security_xattr {
1635                if !v.contains(&security_value) {
1636                    v.push(security_value);
1637                }
1638            }
1639            v
1640        }
1641        Err(e) => {
1642            if e.code != ENOTSUP || security_xattr.is_none() {
1643                return Err(e);
1644            }
1645            vec![security_xattr.unwrap()]
1646        }
1647    };
1648
1649    let mut list = vec![];
1650    for name in xattrs.iter() {
1651        list.extend_from_slice(name);
1652        list.push(b'\0');
1653    }
1654    if size == 0 {
1655        return Ok(list.len());
1656    }
1657    if size < list.len() {
1658        return error!(ERANGE);
1659    }
1660    current_task.write_memory(list_addr, &list)
1661}
1662
1663pub fn sys_listxattr(
1664    locked: &mut Locked<Unlocked>,
1665    current_task: &CurrentTask,
1666    path_addr: UserCString,
1667    list_addr: UserAddress,
1668    size: usize,
1669) -> Result<usize, Errno> {
1670    let node =
1671        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::default())?;
1672    do_listxattr(locked, current_task, &node, list_addr, size)
1673}
1674
1675pub fn sys_llistxattr(
1676    locked: &mut Locked<Unlocked>,
1677    current_task: &CurrentTask,
1678    path_addr: UserCString,
1679    list_addr: UserAddress,
1680    size: usize,
1681) -> Result<usize, Errno> {
1682    let node =
1683        lookup_at(locked, current_task, FdNumber::AT_FDCWD, path_addr, LookupFlags::no_follow())?;
1684    do_listxattr(locked, current_task, &node, list_addr, size)
1685}
1686
1687pub fn sys_flistxattr(
1688    locked: &mut Locked<Unlocked>,
1689    current_task: &CurrentTask,
1690    fd: FdNumber,
1691    list_addr: UserAddress,
1692    size: usize,
1693) -> Result<usize, Errno> {
1694    let file = current_task.get_file(fd)?;
1695    do_listxattr(locked, current_task, &file.name, list_addr, size)
1696}
1697
1698pub fn sys_getcwd(
1699    _locked: &mut Locked<Unlocked>,
1700    current_task: &CurrentTask,
1701    buf: UserAddress,
1702    size: usize,
1703) -> Result<usize, Errno> {
1704    let root = current_task.fs().root();
1705    let cwd = current_task.fs().cwd();
1706    let mut user_cwd = match cwd.path_from_root(Some(&root)) {
1707        PathWithReachability::Reachable(path) => path,
1708        PathWithReachability::Unreachable(mut path) => {
1709            let mut combined = vec![];
1710            combined.extend_from_slice(b"(unreachable)");
1711            combined.append(&mut path);
1712            combined.into()
1713        }
1714    };
1715    user_cwd.push(b'\0');
1716    if user_cwd.len() > size {
1717        return error!(ERANGE);
1718    }
1719    current_task.write_memory(buf, &user_cwd)?;
1720    Ok(user_cwd.len())
1721}
1722
1723pub fn sys_umask(
1724    _locked: &mut Locked<Unlocked>,
1725    current_task: &CurrentTask,
1726    umask: FileMode,
1727) -> Result<FileMode, Errno> {
1728    Ok(current_task.fs().set_umask(umask))
1729}
1730
1731fn get_fd_flags(flags: u32) -> FdFlags {
1732    if flags & O_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() }
1733}
1734
1735pub fn sys_pipe2(
1736    locked: &mut Locked<Unlocked>,
1737    current_task: &CurrentTask,
1738    user_pipe: UserRef<FdNumber>,
1739    flags: u32,
1740) -> Result<(), Errno> {
1741    let supported_file_flags = OpenFlags::NONBLOCK | OpenFlags::DIRECT;
1742    if flags & !(O_CLOEXEC | supported_file_flags.bits()) != 0 {
1743        return error!(EINVAL);
1744    }
1745    let (read, write) = new_pipe(locked, current_task)?;
1746
1747    let file_flags = OpenFlags::from_bits_truncate(flags & supported_file_flags.bits());
1748    read.update_file_flags(file_flags, supported_file_flags);
1749    write.update_file_flags(file_flags, supported_file_flags);
1750
1751    let fd_flags = get_fd_flags(flags);
1752    let fd_read = current_task.add_file(locked, read, fd_flags)?;
1753    let fd_write = current_task.add_file(locked, write, fd_flags)?;
1754    log_trace!("pipe2 -> [{:#x}, {:#x}]", fd_read.raw(), fd_write.raw());
1755
1756    current_task.write_object(user_pipe, &fd_read)?;
1757    let user_pipe = user_pipe.next()?;
1758    current_task.write_object(user_pipe, &fd_write)?;
1759
1760    Ok(())
1761}
1762
1763pub fn sys_ioctl(
1764    locked: &mut Locked<Unlocked>,
1765    current_task: &CurrentTask,
1766    fd: FdNumber,
1767    request: u32,
1768    arg: SyscallArg,
1769) -> Result<SyscallResult, Errno> {
1770    match request {
1771        FIOCLEX | FIONCLEX => {
1772            current_task.live().files.ioctl_fd_flags(current_task, fd, request)?;
1773            Ok(SUCCESS)
1774        }
1775        _ => {
1776            let file = current_task.get_file(fd)?;
1777            file.ioctl(locked, current_task, request, arg)
1778        }
1779    }
1780}
1781
1782pub fn sys_symlinkat(
1783    locked: &mut Locked<Unlocked>,
1784    current_task: &CurrentTask,
1785    user_target: UserCString,
1786    new_dir_fd: FdNumber,
1787    user_path: UserCString,
1788) -> Result<(), Errno> {
1789    let target = current_task.read_path(user_target)?;
1790    if target.is_empty() {
1791        return error!(ENOENT);
1792    }
1793
1794    let path = current_task.read_path(user_path)?;
1795    // TODO: This check could probably be moved into parent.symlink(..).
1796    if path.is_empty() {
1797        return error!(ENOENT);
1798    }
1799
1800    let res = lookup_parent_at(
1801        locked,
1802        current_task,
1803        new_dir_fd,
1804        user_path,
1805        |locked, context, parent, basename| {
1806            // The path to a new symlink cannot end in `/`. That would imply that we are dereferencing
1807            // the symlink to a directory.
1808            //
1809            // See https://pubs.opengroup.org/onlinepubs/9699919799/xrat/V4_xbd_chap03.html#tag_21_03_00_75
1810            if context.must_be_directory {
1811                return error!(ENOENT);
1812            }
1813            parent.create_symlink(locked, current_task, basename, target.as_ref())
1814        },
1815    );
1816    res?;
1817    Ok(())
1818}
1819
1820pub fn sys_dup(
1821    locked: &mut Locked<Unlocked>,
1822    current_task: &CurrentTask,
1823    oldfd: FdNumber,
1824) -> Result<FdNumber, Errno> {
1825    current_task.live().files.duplicate(
1826        locked,
1827        current_task,
1828        oldfd,
1829        TargetFdNumber::Default,
1830        FdFlags::empty(),
1831    )
1832}
1833
1834pub fn sys_dup3(
1835    locked: &mut Locked<Unlocked>,
1836    current_task: &CurrentTask,
1837    oldfd: FdNumber,
1838    newfd: FdNumber,
1839    flags: u32,
1840) -> Result<FdNumber, Errno> {
1841    if oldfd == newfd {
1842        return error!(EINVAL);
1843    }
1844    if flags & !O_CLOEXEC != 0 {
1845        return error!(EINVAL);
1846    }
1847    let fd_flags = get_fd_flags(flags);
1848    current_task.live().files.duplicate(
1849        locked,
1850        current_task,
1851        oldfd,
1852        TargetFdNumber::Specific(newfd),
1853        fd_flags,
1854    )?;
1855    Ok(newfd)
1856}
1857
1858/// A memfd file descriptor cannot have a name longer than 250 bytes, including
1859/// the null terminator.
1860///
1861/// See Errors section of https://man7.org/linux/man-pages/man2/memfd_create.2.html
1862const MEMFD_NAME_MAX_LEN: usize = 250;
1863
1864pub fn sys_memfd_create(
1865    locked: &mut Locked<Unlocked>,
1866    current_task: &CurrentTask,
1867    user_name: UserCString,
1868    flags: u32,
1869) -> Result<FdNumber, Errno> {
1870    const HUGE_SHIFTED_MASK: u32 = MFD_HUGE_MASK << MFD_HUGE_SHIFT;
1871
1872    if flags
1873        & !(MFD_CLOEXEC
1874            | MFD_ALLOW_SEALING
1875            | MFD_HUGETLB
1876            | HUGE_SHIFTED_MASK
1877            | MFD_NOEXEC_SEAL
1878            | MFD_EXEC)
1879        != 0
1880    {
1881        track_stub!(TODO("https://fxbug.dev/322875665"), "memfd_create unknown flags", flags);
1882        return error!(EINVAL);
1883    }
1884
1885    let _huge_page_size = if flags & MFD_HUGETLB != 0 {
1886        Some(flags & HUGE_SHIFTED_MASK)
1887    } else {
1888        if flags & HUGE_SHIFTED_MASK != 0 {
1889            return error!(EINVAL);
1890        }
1891        None
1892    };
1893
1894    let name = current_task
1895        .read_c_string_to_vec(user_name, MEMFD_NAME_MAX_LEN)
1896        .map_err(|e| if e == ENAMETOOLONG { errno!(EINVAL) } else { e })?;
1897
1898    // This behavior matches MEMFD_NOEXEC_SCOPE_EXEC, which states:
1899    //   > memfd_create() without MFD_EXEC nor MFD_NOEXEC_SEAL acts like MFD_EXEC was set.
1900    //
1901    // This behavior can be changed on Linux via sysctl vm.memfd_noexec, which is pid namespaced.
1902    // We do not currently support changing this behavior.
1903    let seals = if flags & MFD_NOEXEC_SEAL != 0 {
1904        SealFlags::NO_EXEC
1905    } else if flags & MFD_ALLOW_SEALING != 0 {
1906        SealFlags::empty()
1907    } else {
1908        // Forbid sealing, by sealing the seal operation.
1909        SealFlags::SEAL
1910    };
1911
1912    let file = new_memfd(locked, current_task, name, seals, OpenFlags::RDWR)?;
1913
1914    let mut fd_flags = FdFlags::empty();
1915    if flags & MFD_CLOEXEC != 0 {
1916        fd_flags |= FdFlags::CLOEXEC;
1917    }
1918    let fd = current_task.add_file(locked, file, fd_flags)?;
1919    Ok(fd)
1920}
1921
1922pub fn sys_mount(
1923    locked: &mut Locked<Unlocked>,
1924    current_task: &CurrentTask,
1925    source_addr: UserCString,
1926    target_addr: UserCString,
1927    filesystemtype_addr: UserCString,
1928    flags: u32,
1929    data_addr: UserCString,
1930) -> Result<(), Errno> {
1931    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1932
1933    let flags = MountFlags::from_bits(flags).ok_or_else(|| {
1934        track_stub!(
1935            TODO("https://fxbug.dev/322875327"),
1936            "mount unknown flags",
1937            flags & !MountFlags::from_bits_truncate(flags).bits()
1938        );
1939        errno!(EINVAL)
1940    })?;
1941
1942    let target =
1943        lookup_at(locked, current_task, FdNumber::AT_FDCWD, target_addr, LookupFlags::default())?;
1944
1945    security::sb_mount(current_task, &target, flags)?;
1946
1947    if flags.contains(MountFlags::REMOUNT) {
1948        do_mount_remount(current_task, target, flags, data_addr)
1949    } else if flags.contains(MountFlags::BIND) {
1950        do_mount_bind(locked, current_task, source_addr, target, flags)
1951    } else if flags.intersects(MountFlags::SHARED | MountFlags::PRIVATE | MountFlags::DOWNSTREAM) {
1952        do_mount_change_propagation_type(current_task, target, flags)
1953    } else {
1954        do_mount_create(
1955            locked,
1956            current_task,
1957            source_addr,
1958            target,
1959            filesystemtype_addr,
1960            data_addr,
1961            flags,
1962        )
1963    }
1964}
1965
1966fn do_mount_remount(
1967    current_task: &CurrentTask,
1968    target: NamespaceNode,
1969    flags: MountFlags,
1970    data_addr: UserCString,
1971) -> Result<(), Errno> {
1972    if !data_addr.is_null() {
1973        track_stub!(TODO("https://fxbug.dev/322875506"), "MS_REMOUNT: Updating data");
1974    }
1975    let mount = target.mount_if_root()?;
1976
1977    let data = current_task.read_path_if_non_null(data_addr)?;
1978    let mount_options =
1979        security::sb_eat_lsm_opts(current_task.kernel(), &mut MountParams::parse(data.as_ref())?)?;
1980
1981    if !flags.contains(MountFlags::BIND) {
1982        security::sb_remount(current_task, &mount, mount_options)?;
1983
1984        // From <https://man7.org/linux/man-pages/man2/mount.2.html>
1985        //
1986        //   Since Linux 2.6.26, the MS_REMOUNT flag can be used with MS_BIND
1987        //   to modify only the per-mount-point flags.  This is particularly
1988        //   useful for setting or clearing the "read-only" flag on a mount
1989        //   without changing the underlying filesystem.
1990        track_stub!(TODO("https://fxbug.dev/322875215"), "MS_REMOUNT: Updating superblock flags");
1991    }
1992
1993    let mut updated_flags = flags & MountFlags::CHANGEABLE_WITH_REMOUNT;
1994    // TODO: https://fxbug.dev/322875215 - Support non-bind remount and remove this.
1995    if target.entry.node.fs().options.flags.contains(FileSystemFlags::RDONLY) {
1996        updated_flags |= MountFlags::RDONLY;
1997    }
1998    mount.update_flags(updated_flags.mountpoint_flags());
1999
2000    Ok(())
2001}
2002
2003fn do_mount_bind(
2004    locked: &mut Locked<Unlocked>,
2005    current_task: &CurrentTask,
2006    source_addr: UserCString,
2007    target: NamespaceNode,
2008    flags: MountFlags,
2009) -> Result<(), Errno> {
2010    let source =
2011        lookup_at(locked, current_task, FdNumber::AT_FDCWD, source_addr, LookupFlags::default())?;
2012    log_trace!(
2013        source:% = source.path(&current_task.fs()),
2014        target:% = target.path(&current_task.fs()),
2015        flags:?;
2016        "do_mount_bind",
2017    );
2018    target.mount(WhatToMount::Bind(source), flags.mountpoint_flags())
2019}
2020
2021fn do_mount_change_propagation_type(
2022    current_task: &CurrentTask,
2023    target: NamespaceNode,
2024    flags: MountFlags,
2025) -> Result<(), Errno> {
2026    log_trace!(
2027        target:% = target.path(&current_task.fs()),
2028        flags:?;
2029        "do_mount_change_propagation_type",
2030    );
2031
2032    // Flag validation. Of the three propagation type flags, exactly one must be passed. The only
2033    // valid flags other than propagation type are MS_SILENT and MS_REC.
2034    //
2035    // Use if statements to find the first propagation type flag, then check for valid flags using
2036    // only the first propagation flag and MS_REC / MS_SILENT as valid flags.
2037    let propagation_flag = if flags.contains(MountFlags::SHARED) {
2038        MountFlags::SHARED
2039    } else if flags.contains(MountFlags::PRIVATE) {
2040        MountFlags::PRIVATE
2041    } else if flags.contains(MountFlags::DOWNSTREAM) {
2042        MountFlags::DOWNSTREAM
2043    } else {
2044        return error!(EINVAL);
2045    };
2046    if flags.intersects(!(propagation_flag | MountFlags::REC | MountFlags::SILENT)) {
2047        return error!(EINVAL);
2048    }
2049
2050    let mount = target.mount_if_root()?;
2051    mount.change_propagation(propagation_flag, flags.contains(MountFlags::REC));
2052    Ok(())
2053}
2054
2055fn do_mount_create(
2056    locked: &mut Locked<Unlocked>,
2057    current_task: &CurrentTask,
2058    source_addr: UserCString,
2059    target: NamespaceNode,
2060    filesystemtype_addr: UserCString,
2061    data_addr: UserCString,
2062    flags: MountFlags,
2063) -> Result<(), Errno> {
2064    let source = current_task.read_path_if_non_null(source_addr)?;
2065    let fs_type = current_task.read_path(filesystemtype_addr)?;
2066    let data = current_task.read_path_if_non_null(data_addr)?;
2067    log_trace!(
2068        source:%,
2069        target:% = target.path(&current_task.fs()),
2070        fs_type:%,
2071        data:%;
2072        "do_mount_create",
2073    );
2074
2075    let options = FileSystemOptions {
2076        source: source.into(),
2077        flags: flags.file_system_flags(),
2078        params: MountParams::parse(data.as_ref())?,
2079    };
2080
2081    let fs = current_task.create_filesystem(locked, fs_type.as_ref(), options)?;
2082
2083    security::sb_kern_mount(current_task, &fs)?;
2084    target.mount(WhatToMount::Fs(fs), flags.mountpoint_flags())
2085}
2086
2087pub fn sys_umount2(
2088    locked: &mut Locked<Unlocked>,
2089    current_task: &CurrentTask,
2090    target_addr: UserCString,
2091    flags: u32,
2092) -> Result<(), Errno> {
2093    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
2094
2095    let unmount_flags = UnmountFlags::from_bits(flags).ok_or_else(|| {
2096        track_stub!(
2097            TODO("https://fxbug.dev/322875327"),
2098            "unmount unknown flags",
2099            flags & !UnmountFlags::from_bits_truncate(flags).bits()
2100        );
2101        errno!(EINVAL)
2102    })?;
2103
2104    if unmount_flags.contains(UnmountFlags::EXPIRE)
2105        && (unmount_flags.contains(UnmountFlags::FORCE)
2106            || unmount_flags.contains(UnmountFlags::DETACH))
2107    {
2108        return error!(EINVAL);
2109    }
2110
2111    let lookup_flags = if unmount_flags.contains(UnmountFlags::NOFOLLOW) {
2112        LookupFlags::no_follow()
2113    } else {
2114        LookupFlags::default()
2115    };
2116    let target = lookup_at(locked, current_task, FdNumber::AT_FDCWD, target_addr, lookup_flags)?;
2117
2118    security::sb_umount(current_task, &target, unmount_flags)?;
2119
2120    target.unmount(unmount_flags)
2121}
2122
2123pub fn sys_eventfd2(
2124    locked: &mut Locked<Unlocked>,
2125    current_task: &CurrentTask,
2126    value: u32,
2127    flags: u32,
2128) -> Result<FdNumber, Errno> {
2129    if flags & !(EFD_CLOEXEC | EFD_NONBLOCK | EFD_SEMAPHORE) != 0 {
2130        return error!(EINVAL);
2131    }
2132    let blocking = (flags & EFD_NONBLOCK) == 0;
2133    let eventfd_type =
2134        if (flags & EFD_SEMAPHORE) == 0 { EventFdType::Counter } else { EventFdType::Semaphore };
2135    let file = new_eventfd(locked, current_task, value, eventfd_type, blocking);
2136    let fd_flags = if flags & EFD_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() };
2137    let fd = current_task.add_file(locked, file, fd_flags)?;
2138    Ok(fd)
2139}
2140
2141pub fn sys_pidfd_open(
2142    locked: &mut Locked<Unlocked>,
2143    current_task: &CurrentTask,
2144    pid: pid_t,
2145    flags: u32,
2146) -> Result<FdNumber, Errno> {
2147    if flags & !PIDFD_NONBLOCK != 0 {
2148        return error!(EINVAL);
2149    }
2150    if pid <= 0 {
2151        return error!(EINVAL);
2152    }
2153
2154    let file = {
2155        let pid_table = current_task.kernel().pids.read();
2156
2157        let blocking = (flags & PIDFD_NONBLOCK) == 0;
2158        let open_flags = if blocking { OpenFlags::empty() } else { OpenFlags::NONBLOCK };
2159
2160        // Validate that a process (and not just a task) entry exists for the PID.
2161        let task = pid_table.get_task(pid);
2162        let file = match (pid_table.get_process(pid), task.upgrade()) {
2163            (Some(ProcessEntryRef::Process(proc)), Some(task)) => {
2164                new_pidfd(locked, current_task, &proc, &*task.mm()?, open_flags)
2165            }
2166            (Some(ProcessEntryRef::Zombie(_)), _) => {
2167                new_zombie_pidfd(locked, current_task, open_flags)
2168            }
2169            (None, Some(_)) => return error!(EINVAL),
2170            _ => return error!(ESRCH),
2171        };
2172        file
2173    };
2174
2175    current_task.add_file(locked, file, FdFlags::CLOEXEC)
2176}
2177
2178pub fn sys_pidfd_getfd(
2179    locked: &mut Locked<Unlocked>,
2180    current_task: &CurrentTask,
2181    pidfd: FdNumber,
2182    targetfd: FdNumber,
2183    flags: u32,
2184) -> Result<FdNumber, Errno> {
2185    if flags != 0 {
2186        return error!(EINVAL);
2187    }
2188
2189    let file = current_task.get_file(pidfd)?;
2190    let tg = file.as_thread_group_key()?;
2191    let tg = tg.upgrade().ok_or_else(|| errno!(ESRCH))?;
2192    let task = TempRef::into_static(tg.read().tasks().next().ok_or_else(|| errno!(ESRCH))?);
2193
2194    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_ATTACH_REALCREDS, &task)?;
2195
2196    let target_file = task.live()?.files.get(targetfd)?;
2197    current_task.add_file(locked, target_file, FdFlags::CLOEXEC)
2198}
2199
2200pub fn sys_timerfd_create(
2201    locked: &mut Locked<Unlocked>,
2202    current_task: &CurrentTask,
2203    clock_id: u32,
2204    flags: u32,
2205) -> Result<FdNumber, Errno> {
2206    let timeline = match clock_id {
2207        CLOCK_MONOTONIC => Timeline::Monotonic,
2208        CLOCK_BOOTTIME | CLOCK_BOOTTIME_ALARM => Timeline::BootInstant,
2209        CLOCK_REALTIME | CLOCK_REALTIME_ALARM => Timeline::RealTime,
2210        _ => return error!(EINVAL),
2211    };
2212    let timer_type = match clock_id {
2213        CLOCK_MONOTONIC | CLOCK_BOOTTIME | CLOCK_REALTIME => TimerWakeup::Regular,
2214        CLOCK_BOOTTIME_ALARM | CLOCK_REALTIME_ALARM => {
2215            security::check_task_capable(current_task, CAP_WAKE_ALARM)?;
2216            TimerWakeup::Alarm
2217        }
2218        _ => return error!(EINVAL),
2219    };
2220    if flags & !(TFD_NONBLOCK | TFD_CLOEXEC) != 0 {
2221        track_stub!(TODO("https://fxbug.dev/322875488"), "timerfd_create unknown flags", flags);
2222        return error!(EINVAL);
2223    }
2224    log_trace!("timerfd_create(clock_id={:?}, flags={:#x})", clock_id, flags);
2225
2226    let mut open_flags = OpenFlags::RDWR;
2227    if flags & TFD_NONBLOCK != 0 {
2228        open_flags |= OpenFlags::NONBLOCK;
2229    }
2230
2231    let mut fd_flags = FdFlags::empty();
2232    if flags & TFD_CLOEXEC != 0 {
2233        fd_flags |= FdFlags::CLOEXEC;
2234    };
2235
2236    let timer = TimerFile::new_file(locked, current_task, timer_type, timeline, open_flags)?;
2237    let fd = current_task.add_file(locked, timer, fd_flags)?;
2238    Ok(fd)
2239}
2240
2241pub fn sys_timerfd_gettime(
2242    _locked: &mut Locked<Unlocked>,
2243    current_task: &CurrentTask,
2244    fd: FdNumber,
2245    user_current_value: ITimerSpecPtr,
2246) -> Result<(), Errno> {
2247    let file = current_task.get_file(fd)?;
2248    let timer_file = file.downcast_file::<TimerFile>().ok_or_else(|| errno!(EINVAL))?;
2249    let timer_info = timer_file.current_timer_spec();
2250    log_trace!("timerfd_gettime(fd={:?}, current_value={:?})", fd, timer_info);
2251    current_task.write_multi_arch_object(user_current_value, timer_info)?;
2252    Ok(())
2253}
2254
2255pub fn sys_timerfd_settime(
2256    _locked: &mut Locked<Unlocked>,
2257    current_task: &CurrentTask,
2258    fd: FdNumber,
2259    flags: u32,
2260    user_new_value: ITimerSpecPtr,
2261    user_old_value: ITimerSpecPtr,
2262) -> Result<(), Errno> {
2263    if flags & !(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET) != 0 {
2264        track_stub!(TODO("https://fxbug.dev/322874722"), "timerfd_settime unknown flags", flags);
2265        return error!(EINVAL);
2266    }
2267
2268    let file = current_task.get_file(fd)?;
2269    let timer_file = file.downcast_file::<TimerFile>().ok_or_else(|| errno!(EINVAL))?;
2270
2271    let new_timer_spec = current_task.read_multi_arch_object(user_new_value)?;
2272    let old_timer_spec = timer_file.set_timer_spec(current_task, &file, new_timer_spec, flags)?;
2273    log_trace!(
2274        "timerfd_settime(fd={:?}, flags={:#x}, new_value={:?}, current_value={:?})",
2275        fd,
2276        flags,
2277        new_timer_spec,
2278        old_timer_spec
2279    );
2280    if !user_old_value.is_null() {
2281        current_task.write_multi_arch_object(user_old_value, old_timer_spec)?;
2282    }
2283    Ok(())
2284}
2285
2286fn deadline_after_timespec(
2287    current_task: &CurrentTask,
2288    user_timespec: TimeSpecPtr,
2289) -> Result<zx::MonotonicInstant, Errno> {
2290    if user_timespec.is_null() {
2291        Ok(zx::MonotonicInstant::INFINITE)
2292    } else {
2293        let timespec = current_task.read_multi_arch_object(user_timespec)?;
2294        Ok(zx::MonotonicInstant::after(duration_from_timespec(timespec)?))
2295    }
2296}
2297
2298static_assertions::assert_eq_size!(uapi::__kernel_fd_set, uapi::arch32::__kernel_fd_set);
2299
2300fn select(
2301    locked: &mut Locked<Unlocked>,
2302    current_task: &mut CurrentTask,
2303    nfds: u32,
2304    readfds_addr: UserRef<__kernel_fd_set>,
2305    writefds_addr: UserRef<__kernel_fd_set>,
2306    exceptfds_addr: UserRef<__kernel_fd_set>,
2307    deadline: zx::MonotonicInstant,
2308    sigmask_addr: UserRef<pselect6_sigmask>,
2309) -> Result<i32, Errno> {
2310    const BITS_PER_BYTE: usize = 8;
2311
2312    fn sizeof<T>(_: &T) -> usize {
2313        BITS_PER_BYTE * std::mem::size_of::<T>()
2314    }
2315    fn is_fd_set(set: &__kernel_fd_set, fd: usize) -> bool {
2316        let index = fd / sizeof(&set.fds_bits[0]);
2317        let remainder = fd % sizeof(&set.fds_bits[0]);
2318        set.fds_bits[index] & (1 << remainder) > 0
2319    }
2320    fn add_fd_to_set(set: &mut __kernel_fd_set, fd: usize) {
2321        let index = fd / sizeof(&set.fds_bits[0]);
2322        let remainder = fd % sizeof(&set.fds_bits[0]);
2323
2324        set.fds_bits[index] |= 1 << remainder;
2325    }
2326    let read_fd_set = |addr: UserRef<__kernel_fd_set>| {
2327        if addr.is_null() { Ok(Default::default()) } else { current_task.read_object(addr) }
2328    };
2329
2330    if nfds as usize > BITS_PER_BYTE * std::mem::size_of::<__kernel_fd_set>() {
2331        return error!(EINVAL);
2332    }
2333
2334    let read_events =
2335        FdEvents::from_bits_truncate(POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR);
2336    let write_events = FdEvents::from_bits_truncate(POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR);
2337    let except_events = FdEvents::from_bits_truncate(POLLPRI);
2338
2339    let readfds = read_fd_set(readfds_addr)?;
2340    let writefds = read_fd_set(writefds_addr)?;
2341    let exceptfds = read_fd_set(exceptfds_addr)?;
2342
2343    let sets = &[(read_events, &readfds), (write_events, &writefds), (except_events, &exceptfds)];
2344    let waiter = FileWaiter::<FdNumber>::default();
2345
2346    for fd in 0..nfds {
2347        let mut aggregated_events = FdEvents::empty();
2348        for (events, fds) in sets.iter() {
2349            if is_fd_set(fds, fd as usize) {
2350                aggregated_events |= *events;
2351            }
2352        }
2353        if !aggregated_events.is_empty() {
2354            let fd = FdNumber::from_raw(fd as i32);
2355            let file = current_task.get_file(fd)?;
2356            waiter.add(locked, current_task, fd, Some(&file), aggregated_events)?;
2357        }
2358    }
2359
2360    let mask = if !sigmask_addr.is_null() {
2361        let sigmask = current_task.read_object(sigmask_addr)?;
2362        let mask = if sigmask.ss.is_null() {
2363            current_task.read().signal_mask()
2364        } else {
2365            if sigmask.ss_len < std::mem::size_of::<sigset_t>() {
2366                return error!(EINVAL);
2367            }
2368            current_task.read_object(sigmask.ss.into())?
2369        };
2370        Some(mask)
2371    } else {
2372        None
2373    };
2374
2375    waiter.wait(locked, current_task, mask, deadline)?;
2376
2377    let mut num_fds = 0;
2378    let mut readfds_out: __kernel_fd_set = Default::default();
2379    let mut writefds_out: __kernel_fd_set = Default::default();
2380    let mut exceptfds_out: __kernel_fd_set = Default::default();
2381    let mut sets = [
2382        (read_events, &readfds, &mut readfds_out),
2383        (write_events, &writefds, &mut writefds_out),
2384        (except_events, &exceptfds, &mut exceptfds_out),
2385    ];
2386    let mut ready_items = waiter.ready_items.lock();
2387    for ReadyItem { key: ready_key, events: ready_events } in ready_items.drain(..) {
2388        let ready_key = assert_matches::assert_matches!(
2389            ready_key,
2390            ReadyItemKey::FdNumber(v) => v
2391        );
2392
2393        sets.iter_mut().for_each(|(events, fds, fds_out)| {
2394            let fd = ready_key.raw() as usize;
2395            if events.intersects(ready_events) && is_fd_set(fds, fd) {
2396                add_fd_to_set(fds_out, fd);
2397                num_fds += 1;
2398            }
2399        });
2400    }
2401
2402    let write_fd_set =
2403        |addr: UserRef<__kernel_fd_set>, value: __kernel_fd_set| -> Result<(), Errno> {
2404            if !addr.is_null() {
2405                current_task.write_object(addr, &value)?;
2406            }
2407            Ok(())
2408        };
2409    write_fd_set(readfds_addr, readfds_out)?;
2410    write_fd_set(writefds_addr, writefds_out)?;
2411    write_fd_set(exceptfds_addr, exceptfds_out)?;
2412    Ok(num_fds)
2413}
2414
2415pub fn sys_pselect6(
2416    locked: &mut Locked<Unlocked>,
2417    current_task: &mut CurrentTask,
2418    nfds: u32,
2419    readfds_addr: UserRef<__kernel_fd_set>,
2420    writefds_addr: UserRef<__kernel_fd_set>,
2421    exceptfds_addr: UserRef<__kernel_fd_set>,
2422    timeout_addr: TimeSpecPtr,
2423    sigmask_addr: UserRef<pselect6_sigmask>,
2424) -> Result<i32, Errno> {
2425    let deadline = deadline_after_timespec(current_task, timeout_addr)?;
2426
2427    let num_fds = select(
2428        locked,
2429        current_task,
2430        nfds,
2431        readfds_addr,
2432        writefds_addr,
2433        exceptfds_addr,
2434        deadline,
2435        sigmask_addr,
2436    )?;
2437
2438    if !timeout_addr.is_null()
2439        && !current_task
2440            .thread_group()
2441            .read()
2442            .personality
2443            .contains(PersonalityFlags::STICKY_TIMEOUTS)
2444    {
2445        let now = zx::MonotonicInstant::get();
2446        let remaining = std::cmp::max(deadline - now, zx::MonotonicDuration::from_seconds(0));
2447        current_task.write_multi_arch_object(timeout_addr, timespec_from_duration(remaining))?;
2448    }
2449
2450    Ok(num_fds)
2451}
2452
2453pub fn sys_select(
2454    locked: &mut Locked<Unlocked>,
2455    current_task: &mut CurrentTask,
2456    nfds: u32,
2457    readfds_addr: UserRef<__kernel_fd_set>,
2458    writefds_addr: UserRef<__kernel_fd_set>,
2459    exceptfds_addr: UserRef<__kernel_fd_set>,
2460    timeout_addr: TimeValPtr,
2461) -> Result<i32, Errno> {
2462    let start_time = zx::MonotonicInstant::get();
2463
2464    let deadline = if timeout_addr.is_null() {
2465        zx::MonotonicInstant::INFINITE
2466    } else {
2467        let timeval = current_task.read_multi_arch_object(timeout_addr)?;
2468        start_time + starnix_types::time::duration_from_timeval(timeval)?
2469    };
2470
2471    let num_fds = select(
2472        locked,
2473        current_task,
2474        nfds,
2475        readfds_addr,
2476        writefds_addr,
2477        exceptfds_addr,
2478        deadline,
2479        UserRef::<pselect6_sigmask>::default(),
2480    )?;
2481
2482    if !timeout_addr.is_null()
2483        && !current_task
2484            .thread_group()
2485            .read()
2486            .personality
2487            .contains(PersonalityFlags::STICKY_TIMEOUTS)
2488    {
2489        let now = zx::MonotonicInstant::get();
2490        let remaining = std::cmp::max(deadline - now, zx::MonotonicDuration::from_seconds(0));
2491        current_task.write_multi_arch_object(
2492            timeout_addr,
2493            starnix_types::time::timeval_from_duration(remaining),
2494        )?;
2495    }
2496
2497    Ok(num_fds)
2498}
2499
2500pub fn sys_epoll_create1(
2501    locked: &mut Locked<Unlocked>,
2502    current_task: &CurrentTask,
2503    flags: u32,
2504) -> Result<FdNumber, Errno> {
2505    if flags & !EPOLL_CLOEXEC != 0 {
2506        return error!(EINVAL);
2507    }
2508    let ep_file = EpollFileObject::new_file(locked, current_task);
2509    let fd_flags = if flags & EPOLL_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() };
2510    let fd = current_task.add_file(locked, ep_file, fd_flags)?;
2511    Ok(fd)
2512}
2513
2514pub fn sys_epoll_ctl(
2515    locked: &mut Locked<Unlocked>,
2516    current_task: &CurrentTask,
2517    epfd: FdNumber,
2518    op: u32,
2519    fd: FdNumber,
2520    event: UserRef<EpollEvent>,
2521) -> Result<(), Errno> {
2522    let file = current_task.get_file(epfd)?;
2523    let epoll_file = file.downcast_file::<EpollFileObject>().ok_or_else(|| errno!(EINVAL))?;
2524    let operand_file = current_task.get_file(fd)?;
2525
2526    if Arc::ptr_eq(&file, &operand_file) {
2527        return error!(EINVAL);
2528    }
2529
2530    let epoll_event = match current_task.read_object(event) {
2531        Ok(mut epoll_event) => {
2532            // If EPOLLWAKEUP is specified in flags, but the caller does not have the CAP_BLOCK_SUSPEND
2533            // capability, then the EPOLLWAKEUP flag is silently ignored.
2534            // See https://man7.org/linux/man-pages/man2/epoll_ctl.2.html
2535            if epoll_event.events().contains(FdEvents::EPOLLWAKEUP) {
2536                if !security::is_task_capable_noaudit(current_task, CAP_BLOCK_SUSPEND) {
2537                    epoll_event.ignore(FdEvents::EPOLLWAKEUP);
2538                }
2539            }
2540            Ok(epoll_event)
2541        }
2542        result => result,
2543    };
2544
2545    match op {
2546        EPOLL_CTL_ADD => {
2547            epoll_file.add(locked, current_task, &operand_file, &file, epoll_event?)?;
2548            operand_file.register_epfd(&file);
2549        }
2550        EPOLL_CTL_MOD => {
2551            epoll_file.modify(locked, current_task, &operand_file, epoll_event?)?;
2552        }
2553        EPOLL_CTL_DEL => {
2554            epoll_file.delete(current_task, &operand_file)?;
2555            operand_file.unregister_epfd(&file);
2556        }
2557        _ => return error!(EINVAL),
2558    }
2559    Ok(())
2560}
2561
2562// Backend for sys_epoll_pwait and sys_epoll_pwait2 that takes an already-decoded deadline.
2563fn do_epoll_pwait(
2564    locked: &mut Locked<Unlocked>,
2565    current_task: &mut CurrentTask,
2566    epfd: FdNumber,
2567    events: UserRef<EpollEvent>,
2568    unvalidated_max_events: i32,
2569    deadline: zx::MonotonicInstant,
2570    user_sigmask: UserRef<SigSet>,
2571) -> Result<usize, Errno> {
2572    let file = current_task.get_file(epfd)?;
2573    let epoll_file = file.downcast_file::<EpollFileObject>().ok_or_else(|| errno!(EINVAL))?;
2574
2575    // Max_events must be greater than 0.
2576    let max_events: usize = unvalidated_max_events.try_into().map_err(|_| errno!(EINVAL))?;
2577    if max_events == 0 {
2578        return error!(EINVAL);
2579    }
2580
2581    // Return early if the user passes an obviously invalid pointer. This avoids dropping events
2582    // for common pointer errors. When we catch bad pointers after the wait is complete when the
2583    // memory is actually written, the events will be lost. This check is not a guarantee.
2584    current_task
2585        .mm()?
2586        .check_plausible(events.addr(), max_events * std::mem::size_of::<EpollEvent>())?;
2587
2588    let active_events = if !user_sigmask.is_null() {
2589        let signal_mask = current_task.read_object(user_sigmask)?;
2590        current_task.wait_with_temporary_mask(locked, signal_mask, |locked, current_task| {
2591            epoll_file.wait(locked, current_task, max_events, deadline)
2592        })?
2593    } else {
2594        epoll_file.wait(locked, current_task, max_events, deadline)?
2595    };
2596
2597    current_task.write_objects(events, &active_events)?;
2598    Ok(active_events.len())
2599}
2600
2601pub fn sys_epoll_pwait(
2602    locked: &mut Locked<Unlocked>,
2603    current_task: &mut CurrentTask,
2604    epfd: FdNumber,
2605    events: UserRef<EpollEvent>,
2606    max_events: i32,
2607    timeout: i32,
2608    user_sigmask: UserRef<SigSet>,
2609) -> Result<usize, Errno> {
2610    let deadline = zx::MonotonicInstant::after(duration_from_poll_timeout(timeout)?);
2611    do_epoll_pwait(locked, current_task, epfd, events, max_events, deadline, user_sigmask)
2612}
2613
2614pub fn sys_epoll_pwait2(
2615    locked: &mut Locked<Unlocked>,
2616    current_task: &mut CurrentTask,
2617    epfd: FdNumber,
2618    events: UserRef<EpollEvent>,
2619    max_events: i32,
2620    user_timespec: TimeSpecPtr,
2621    user_sigmask: UserRef<SigSet>,
2622) -> Result<usize, Errno> {
2623    let deadline = deadline_after_timespec(current_task, user_timespec)?;
2624    do_epoll_pwait(locked, current_task, epfd, events, max_events, deadline, user_sigmask)
2625}
2626
2627struct FileWaiter<Key: Into<ReadyItemKey>> {
2628    waiter: Waiter,
2629    ready_items: Arc<Mutex<VecDeque<ReadyItem>>>,
2630    _marker: PhantomData<Key>,
2631}
2632
2633impl<Key: Into<ReadyItemKey>> Default for FileWaiter<Key> {
2634    fn default() -> Self {
2635        Self { waiter: Waiter::new(), ready_items: Default::default(), _marker: PhantomData }
2636    }
2637}
2638
2639impl<Key: Into<ReadyItemKey>> FileWaiter<Key> {
2640    fn add<L>(
2641        &self,
2642        locked: &mut Locked<L>,
2643        current_task: &CurrentTask,
2644        key: Key,
2645        file: Option<&FileHandle>,
2646        requested_events: FdEvents,
2647    ) -> Result<(), Errno>
2648    where
2649        L: LockEqualOrBefore<FileOpsCore>,
2650    {
2651        let key = key.into();
2652
2653        if let Some(file) = file {
2654            let sought_events = requested_events | FdEvents::POLLERR | FdEvents::POLLHUP;
2655
2656            let handler =
2657                EventHandler::Enqueue { key, queue: self.ready_items.clone(), sought_events };
2658            file.wait_async(locked, current_task, &self.waiter, sought_events, handler);
2659            let current_events = file.query_events(locked, current_task)? & sought_events;
2660            if !current_events.is_empty() {
2661                self.ready_items.lock().push_back(ReadyItem { key, events: current_events });
2662            }
2663        } else {
2664            self.ready_items.lock().push_back(ReadyItem { key, events: FdEvents::POLLNVAL });
2665        }
2666        Ok(())
2667    }
2668
2669    fn wait<L>(
2670        &self,
2671        locked: &mut Locked<L>,
2672        current_task: &mut CurrentTask,
2673        signal_mask: Option<SigSet>,
2674        deadline: zx::MonotonicInstant,
2675    ) -> Result<(), Errno>
2676    where
2677        L: LockEqualOrBefore<FileOpsCore>,
2678    {
2679        if self.ready_items.lock().is_empty() {
2680            // When wait_until() returns Ok() it means there was a wake up; however there may not
2681            // be a ready item, for example if waiting on a sync file with multiple sync points.
2682            // Keep waiting until there's at least one ready item.
2683            let signal_mask = signal_mask.unwrap_or_else(|| current_task.read().signal_mask());
2684            let mut result = current_task.wait_with_temporary_mask(
2685                locked,
2686                signal_mask,
2687                |locked, current_task| self.waiter.wait_until(locked, current_task, deadline),
2688            );
2689            loop {
2690                match result {
2691                    Err(err) if err == ETIMEDOUT => return Ok(()),
2692                    Ok(()) => {
2693                        if !self.ready_items.lock().is_empty() {
2694                            break;
2695                        }
2696                    }
2697                    result => result?,
2698                };
2699                result = self.waiter.wait_until(locked, current_task, deadline);
2700            }
2701        }
2702        Ok(())
2703    }
2704}
2705
2706pub fn poll(
2707    locked: &mut Locked<Unlocked>,
2708    current_task: &mut CurrentTask,
2709    user_pollfds: UserRef<pollfd>,
2710    num_fds: i32,
2711    mask: Option<SigSet>,
2712    deadline: zx::MonotonicInstant,
2713) -> Result<usize, Errno> {
2714    if num_fds < 0
2715        || num_fds as u64 > current_task.thread_group().get_rlimit(locked, Resource::NOFILE)
2716    {
2717        return error!(EINVAL);
2718    }
2719
2720    let mut pollfds = vec![pollfd::default(); num_fds as usize];
2721    let waiter = FileWaiter::<usize>::default();
2722
2723    for (index, poll_descriptor) in pollfds.iter_mut().enumerate() {
2724        *poll_descriptor = current_task.read_object(user_pollfds.at(index)?)?;
2725        poll_descriptor.revents = 0;
2726        if poll_descriptor.fd < 0 {
2727            continue;
2728        }
2729        let file = current_task.get_file(FdNumber::from_raw(poll_descriptor.fd)).ok();
2730        waiter.add(
2731            locked,
2732            current_task,
2733            index,
2734            file.as_ref(),
2735            FdEvents::from_bits_truncate(poll_descriptor.events as u32),
2736        )?;
2737    }
2738
2739    waiter.wait(locked, current_task, mask, deadline)?;
2740
2741    let mut ready_items = waiter.ready_items.lock();
2742    let mut unique_ready_items =
2743        bit_vec::BitVec::from_elem(usize::try_from(num_fds).unwrap(), false);
2744    for ReadyItem { key: ready_key, events: ready_events } in ready_items.drain(..) {
2745        let ready_key = assert_matches::assert_matches!(
2746            ready_key,
2747            ReadyItemKey::Usize(v) => v
2748        );
2749        let interested_events = FdEvents::from_bits_truncate(pollfds[ready_key].events as u32)
2750            | FdEvents::POLLERR
2751            | FdEvents::POLLHUP
2752            | FdEvents::POLLNVAL;
2753        let return_events = (interested_events & ready_events).bits();
2754        pollfds[ready_key].revents = return_events as i16;
2755        unique_ready_items.set(ready_key, true);
2756    }
2757
2758    for (index, poll_descriptor) in pollfds.iter().enumerate() {
2759        current_task.write_object(user_pollfds.at(index)?, poll_descriptor)?;
2760    }
2761
2762    Ok(unique_ready_items.into_iter().filter(Clone::clone).count())
2763}
2764
2765pub fn sys_ppoll(
2766    locked: &mut Locked<Unlocked>,
2767    current_task: &mut CurrentTask,
2768    user_fds: UserRef<pollfd>,
2769    num_fds: i32,
2770    user_timespec: TimeSpecPtr,
2771    user_mask: UserRef<SigSet>,
2772    sigset_size: usize,
2773) -> Result<usize, Errno> {
2774    let start_time = zx::MonotonicInstant::get();
2775
2776    let timeout = if user_timespec.is_null() {
2777        // Passing -1 to poll is equivalent to an infinite timeout.
2778        -1
2779    } else {
2780        let ts = current_task.read_multi_arch_object(user_timespec)?;
2781        duration_from_timespec::<zx::MonotonicTimeline>(ts)?.into_millis() as i32
2782    };
2783
2784    let deadline = start_time + duration_from_poll_timeout(timeout)?;
2785
2786    let mask = if !user_mask.is_null() {
2787        if sigset_size != std::mem::size_of::<SigSet>() {
2788            return error!(EINVAL);
2789        }
2790        let mask = current_task.read_object(user_mask)?;
2791        Some(mask)
2792    } else {
2793        None
2794    };
2795
2796    let poll_result = poll(locked, current_task, user_fds, num_fds, mask, deadline);
2797
2798    if user_timespec.is_null() {
2799        return poll_result;
2800    }
2801
2802    let now = zx::MonotonicInstant::get();
2803    let remaining = std::cmp::max(deadline - now, zx::MonotonicDuration::from_seconds(0));
2804    let remaining_timespec = timespec_from_duration(remaining);
2805
2806    // From gVisor: "ppoll is normally restartable if interrupted by something other than a signal
2807    // handled by the application (i.e. returns ERESTARTNOHAND). However, if
2808    // [copy out] failed, then the restarted ppoll would use the wrong timeout, so the
2809    // error should be left as EINTR."
2810    match (current_task.write_multi_arch_object(user_timespec, remaining_timespec), poll_result) {
2811        // If write was ok, and poll was ok, return poll result.
2812        (Ok(_), Ok(num_events)) => Ok(num_events),
2813        (Ok(_), Err(e)) if e == EINTR => {
2814            error!(ERESTARTNOHAND)
2815        }
2816        (Ok(_), poll_result) => poll_result,
2817        // If write was a failure, return the poll result unchanged.
2818        (Err(_), poll_result) => poll_result,
2819    }
2820}
2821
2822pub fn sys_flock(
2823    locked: &mut Locked<Unlocked>,
2824    current_task: &CurrentTask,
2825    fd: FdNumber,
2826    operation: u32,
2827) -> Result<(), Errno> {
2828    let file = current_task.get_file(fd)?;
2829    let operation = FlockOperation::from_flags(operation)?;
2830    security::check_file_lock_access(current_task, &file)?;
2831    file.flock(locked, current_task, operation)
2832}
2833
2834pub fn sys_sync(locked: &mut Locked<Unlocked>, current_task: &CurrentTask) -> Result<(), Errno> {
2835    current_task.kernel().mounts.sync_all(locked, current_task)
2836}
2837
2838pub fn sys_syncfs(
2839    locked: &mut Locked<Unlocked>,
2840    current_task: &CurrentTask,
2841    fd: FdNumber,
2842) -> Result<(), Errno> {
2843    let file = current_task.get_file(fd)?;
2844    file.fs.sync(locked, current_task)
2845}
2846
2847pub fn sys_fsync(
2848    _locked: &mut Locked<Unlocked>,
2849    current_task: &CurrentTask,
2850    fd: FdNumber,
2851) -> Result<(), Errno> {
2852    let file = current_task.get_file(fd)?;
2853    file.sync(current_task)
2854}
2855
2856pub fn sys_fdatasync(
2857    _locked: &mut Locked<Unlocked>,
2858    current_task: &CurrentTask,
2859    fd: FdNumber,
2860) -> Result<(), Errno> {
2861    let file = current_task.get_file(fd)?;
2862    file.data_sync(current_task)
2863}
2864
2865pub fn sys_sync_file_range(
2866    _locked: &mut Locked<Unlocked>,
2867    current_task: &CurrentTask,
2868    fd: FdNumber,
2869    offset: off_t,
2870    length: off_t,
2871    flags: u32,
2872) -> Result<(), Errno> {
2873    const KNOWN_FLAGS: u32 = uapi::SYNC_FILE_RANGE_WAIT_BEFORE
2874        | uapi::SYNC_FILE_RANGE_WRITE
2875        | uapi::SYNC_FILE_RANGE_WAIT_AFTER;
2876    if flags & !KNOWN_FLAGS != 0 {
2877        return error!(EINVAL);
2878    }
2879
2880    let file = current_task.get_file(fd)?;
2881
2882    if offset < 0 || length < 0 {
2883        return error!(EINVAL);
2884    }
2885
2886    checked_add_offset_and_length(offset as usize, length as usize)?;
2887
2888    // From <https://linux.die.net/man/2/sync_file_range>:
2889    //
2890    //   fd refers to something other than a regular file, a block device, a directory, or a symbolic link.
2891    let mode = file.node().info().mode;
2892    if !mode.is_reg() && !mode.is_blk() && !mode.is_dir() && !mode.is_lnk() {
2893        return error!(ESPIPE);
2894    }
2895
2896    if flags == 0 {
2897        return Ok(());
2898    }
2899
2900    // Syncing the whole file is much more than we need for sync_file_range, which only needs to
2901    // sync the specified data range.
2902    file.data_sync(current_task)
2903}
2904
2905pub fn sys_fadvise64(
2906    _locked: &mut Locked<Unlocked>,
2907    current_task: &CurrentTask,
2908    fd: FdNumber,
2909    offset: off_t,
2910    len: off_t,
2911    advice: u32,
2912) -> Result<(), Errno> {
2913    match advice {
2914        POSIX_FADV_NORMAL => track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_NORMAL"),
2915        POSIX_FADV_RANDOM => track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_RANDOM"),
2916        POSIX_FADV_SEQUENTIAL => {
2917            track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_SEQUENTIAL")
2918        }
2919        POSIX_FADV_WILLNEED => {
2920            track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_WILLNEED")
2921        }
2922        POSIX_FADV_DONTNEED => {
2923            track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_DONTNEED")
2924        }
2925        POSIX_FADV_NOREUSE => {
2926            track_stub!(TODO("https://fxbug.dev/297434181"), "POSIX_FADV_NOREUSE")
2927        }
2928        _ => {
2929            track_stub!(TODO("https://fxbug.dev/322875684"), "fadvise64 unknown advice", advice);
2930            return error!(EINVAL);
2931        }
2932    }
2933
2934    if offset < 0 || len < 0 {
2935        return error!(EINVAL);
2936    }
2937
2938    let file = current_task.get_file(fd)?;
2939    // fadvise does not work on pipes.
2940    if file.downcast_file::<PipeFileObject>().is_some() {
2941        return error!(ESPIPE);
2942    }
2943
2944    // fadvise does not work on paths.
2945    if file.flags().contains(OpenFlags::PATH) {
2946        return error!(EBADF);
2947    }
2948
2949    Ok(())
2950}
2951
2952pub fn sys_fallocate(
2953    locked: &mut Locked<Unlocked>,
2954    current_task: &CurrentTask,
2955    fd: FdNumber,
2956    mode: u32,
2957    offset: off_t,
2958    len: off_t,
2959) -> Result<(), Errno> {
2960    let file = current_task.get_file(fd)?;
2961
2962    // Offset must not be less than 0.
2963    // Length must not be less than or equal to 0.
2964    // See https://man7.org/linux/man-pages/man2/fallocate.2.html#ERRORS
2965    if offset < 0 || len <= 0 {
2966        return error!(EINVAL);
2967    }
2968
2969    let mode = FallocMode::from_bits(mode).ok_or_else(|| errno!(EINVAL))?;
2970    file.fallocate(locked, current_task, mode, offset as u64, len as u64)?;
2971
2972    Ok(())
2973}
2974
2975pub fn sys_inotify_init1(
2976    locked: &mut Locked<Unlocked>,
2977    current_task: &CurrentTask,
2978    flags: u32,
2979) -> Result<FdNumber, Errno> {
2980    if flags & !(IN_NONBLOCK | IN_CLOEXEC) != 0 {
2981        return error!(EINVAL);
2982    }
2983    let non_blocking = flags & IN_NONBLOCK != 0;
2984    let close_on_exec = flags & IN_CLOEXEC != 0;
2985    let inotify_file = InotifyFileObject::new_file(locked, current_task, non_blocking);
2986    let fd_flags = if close_on_exec { FdFlags::CLOEXEC } else { FdFlags::empty() };
2987    current_task.add_file(locked, inotify_file, fd_flags)
2988}
2989
2990pub fn sys_inotify_add_watch(
2991    locked: &mut Locked<Unlocked>,
2992    current_task: &CurrentTask,
2993    fd: FdNumber,
2994    user_path: UserCString,
2995    mask: u32,
2996) -> Result<WdNumber, Errno> {
2997    let mask = InotifyMask::from_bits(mask).ok_or_else(|| errno!(EINVAL))?;
2998    if !mask.intersects(InotifyMask::ALL_EVENTS) {
2999        // Mask must include at least 1 event.
3000        return error!(EINVAL);
3001    }
3002    let file = current_task.get_file(fd)?;
3003    let inotify_file = file.downcast_file::<InotifyFileObject>().ok_or_else(|| errno!(EINVAL))?;
3004    let options = if mask.contains(InotifyMask::DONT_FOLLOW) {
3005        LookupFlags::no_follow()
3006    } else {
3007        LookupFlags::default()
3008    };
3009    let watched_node = lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, options)?;
3010    if mask.contains(InotifyMask::ONLYDIR) && !watched_node.entry.node.is_dir() {
3011        return error!(ENOTDIR);
3012    }
3013    inotify_file.add_watch(watched_node.entry, mask, &file)
3014}
3015
3016pub fn sys_inotify_rm_watch(
3017    _locked: &mut Locked<Unlocked>,
3018    current_task: &CurrentTask,
3019    fd: FdNumber,
3020    watch_id: WdNumber,
3021) -> Result<(), Errno> {
3022    let file = current_task.get_file(fd)?;
3023    let inotify_file = file.downcast_file::<InotifyFileObject>().ok_or_else(|| errno!(EINVAL))?;
3024    inotify_file.remove_watch(watch_id, &file)
3025}
3026
3027pub fn sys_utimensat(
3028    locked: &mut Locked<Unlocked>,
3029    current_task: &CurrentTask,
3030    dir_fd: FdNumber,
3031    user_path: UserCString,
3032    user_times: TimeSpecPtr,
3033    flags: u32,
3034) -> Result<(), Errno> {
3035    let (atime, mtime) = if user_times.addr().is_null() {
3036        // If user_times is null, the timestamps are updated to the current time.
3037        (TimeUpdateType::Now, TimeUpdateType::Now)
3038    } else {
3039        let ts = current_task.read_multi_arch_objects_to_vec(user_times, 2)?;
3040        let atime = ts[0];
3041        let mtime = ts[1];
3042        let parse_timespec = |spec: timespec| match spec.tv_nsec {
3043            UTIME_NOW => Ok(TimeUpdateType::Now),
3044            UTIME_OMIT => Ok(TimeUpdateType::Omit),
3045            _ => time_from_timespec(spec).map(TimeUpdateType::Time),
3046        };
3047        (parse_timespec(atime)?, parse_timespec(mtime)?)
3048    };
3049
3050    if let (TimeUpdateType::Omit, TimeUpdateType::Omit) = (atime, mtime) {
3051        return Ok(());
3052    };
3053
3054    // Non-standard feature: if user_path is null, the timestamps are updated on the file referred
3055    // to by dir_fd.
3056    // See https://man7.org/linux/man-pages/man2/utimensat.2.html
3057    let name = if user_path.addr().is_null() {
3058        if dir_fd == FdNumber::AT_FDCWD {
3059            return error!(EFAULT);
3060        }
3061        let (node, _) = current_task.resolve_dir_fd(
3062            locked,
3063            dir_fd,
3064            Default::default(),
3065            ResolveFlags::empty(),
3066        )?;
3067        node
3068    } else {
3069        let lookup_flags = LookupFlags::from_bits(flags, AT_SYMLINK_NOFOLLOW)?;
3070        lookup_at(locked, current_task, dir_fd, user_path, lookup_flags)?
3071    };
3072    name.entry.node.update_atime_mtime(locked, current_task, &name.mount, atime, mtime)?;
3073    let event_mask = match (atime, mtime) {
3074        (_, TimeUpdateType::Omit) => InotifyMask::ACCESS,
3075        (TimeUpdateType::Omit, _) => InotifyMask::MODIFY,
3076        (_, _) => InotifyMask::ATTRIB,
3077    };
3078    name.entry.notify_ignoring_excl_unlink(event_mask);
3079    Ok(())
3080}
3081
3082pub fn sys_splice(
3083    locked: &mut Locked<Unlocked>,
3084    current_task: &CurrentTask,
3085    fd_in: FdNumber,
3086    off_in: OffsetPtr,
3087    fd_out: FdNumber,
3088    off_out: OffsetPtr,
3089    len: usize,
3090    flags: u32,
3091) -> Result<usize, Errno> {
3092    splice::splice(locked, current_task, fd_in, off_in, fd_out, off_out, len, flags)
3093}
3094
3095pub fn sys_vmsplice(
3096    locked: &mut Locked<Unlocked>,
3097    current_task: &CurrentTask,
3098    fd: FdNumber,
3099    iovec_addr: IOVecPtr,
3100    iovec_count: UserValue<i32>,
3101    flags: u32,
3102) -> Result<usize, Errno> {
3103    splice::vmsplice(locked, current_task, fd, iovec_addr, iovec_count, flags)
3104}
3105
3106pub fn sys_copy_file_range(
3107    locked: &mut Locked<Unlocked>,
3108    current_task: &CurrentTask,
3109    fd_in: FdNumber,
3110    off_in: OffsetPtr,
3111    fd_out: FdNumber,
3112    off_out: OffsetPtr,
3113    len: usize,
3114    flags: u32,
3115) -> Result<usize, Errno> {
3116    splice::copy_file_range(locked, current_task, fd_in, off_in, fd_out, off_out, len, flags)
3117}
3118
3119pub fn sys_tee(
3120    locked: &mut Locked<Unlocked>,
3121    current_task: &CurrentTask,
3122    fd_in: FdNumber,
3123    fd_out: FdNumber,
3124    len: usize,
3125    flags: u32,
3126) -> Result<usize, Errno> {
3127    splice::tee(locked, current_task, fd_in, fd_out, len, flags)
3128}
3129
3130pub fn sys_readahead(
3131    _locked: &mut Locked<Unlocked>,
3132    current_task: &CurrentTask,
3133    fd: FdNumber,
3134    offset: off_t,
3135    length: usize,
3136) -> Result<(), Errno> {
3137    let file = current_task.get_file(fd)?;
3138    // Allow only non-negative values of `offset`. Some versions of Linux allow it to be negative,
3139    // but GVisor tests require `readahead()` to fail in this case.
3140    let offset: usize = offset.try_into().map_err(|_| errno!(EINVAL))?;
3141    file.readahead(current_task, offset, length)
3142}
3143
3144pub fn sys_io_setup(
3145    _locked: &mut Locked<Unlocked>,
3146    current_task: &CurrentTask,
3147    user_nr_events: UserValue<u32>,
3148    user_ctx_idp: MultiArchUserRef<uapi::aio_context_t, uapi::arch32::aio_context_t>,
3149) -> Result<(), Errno> {
3150    // From https://man7.org/linux/man-pages/man2/io_setup.2.html:
3151    //
3152    //   EINVAL ctx_idp is not initialized, or the specified nr_events
3153    //   exceeds internal limits.  nr_events should be greater than
3154    //   0.
3155    //
3156    // TODO: Determine what "internal limits" means.
3157    let max_operations =
3158        user_nr_events.validate(0..(i32::MAX as u32)).ok_or_else(|| errno!(EINVAL))? as usize;
3159    if current_task.read_multi_arch_object(user_ctx_idp)? != 0 {
3160        return error!(EINVAL);
3161    }
3162    let ctx_id = AioContext::create(current_task, max_operations)?;
3163    current_task.write_multi_arch_object(user_ctx_idp, ctx_id).map_err(|e| {
3164        let _ = current_task
3165            .mm()
3166            .expect("previous sys_io_setup code verified mm exists")
3167            .destroy_aio_context(ctx_id.into());
3168        e
3169    })?;
3170    Ok(())
3171}
3172
3173pub fn sys_io_submit(
3174    _locked: &mut Locked<Unlocked>,
3175    current_task: &CurrentTask,
3176    ctx_id: aio_context_t,
3177    user_nr: UserValue<i32>,
3178    mut iocb_addrs: IocbPtrPtr,
3179) -> Result<i32, Errno> {
3180    let nr = user_nr.validate(0..i32::MAX).ok_or_else(|| errno!(EINVAL))?;
3181    if nr == 0 {
3182        return Ok(0);
3183    }
3184    let ctx = current_task.mm()?.get_aio_context(ctx_id.into()).ok_or_else(|| errno!(EINVAL))?;
3185
3186    // `iocbpp` is an array of addresses to iocb's.
3187    let mut num_submitted: i32 = 0;
3188    loop {
3189        let iocb_ref = current_task.read_multi_arch_ptr(iocb_addrs)?;
3190        let control_block = current_task.read_multi_arch_object(iocb_ref)?;
3191
3192        match (num_submitted, ctx.submit(current_task, control_block, iocb_ref)) {
3193            (0, Err(e)) => return Err(e),
3194            (_, Err(_)) => break,
3195            (_, Ok(())) => {
3196                num_submitted += 1;
3197                if num_submitted == nr {
3198                    break;
3199                }
3200            }
3201        };
3202
3203        iocb_addrs = iocb_addrs.next()?;
3204    }
3205
3206    Ok(num_submitted)
3207}
3208
3209pub fn sys_io_getevents(
3210    _locked: &mut Locked<Unlocked>,
3211    current_task: &CurrentTask,
3212    ctx_id: aio_context_t,
3213    min_nr: i64,
3214    nr: i64,
3215    events_ref: UserRef<io_event>,
3216    user_timeout: TimeSpecPtr,
3217) -> Result<i32, Errno> {
3218    if min_nr < 0 || min_nr > nr || nr < 0 {
3219        return error!(EINVAL);
3220    }
3221    let min_results = min_nr as usize;
3222    let max_results = nr as usize;
3223    let deadline = deadline_after_timespec(current_task, user_timeout)?;
3224
3225    let ctx = current_task.mm()?.get_aio_context(ctx_id.into()).ok_or_else(|| errno!(EINVAL))?;
3226    let events = ctx.get_events(current_task, min_results, max_results, deadline)?;
3227    current_task.write_objects(events_ref, &events)?;
3228
3229    Ok(events.len() as i32)
3230}
3231
3232pub fn sys_io_cancel(
3233    _locked: &mut Locked<Unlocked>,
3234    current_task: &CurrentTask,
3235    ctx_id: aio_context_t,
3236    user_iocb: IocbPtr,
3237    _result: UserRef<io_event>,
3238) -> Result<(), Errno> {
3239    let iocb = current_task.read_multi_arch_object(user_iocb)?;
3240    let ctx = current_task.mm()?.get_aio_context(ctx_id.into()).ok_or_else(|| errno!(EINVAL))?;
3241
3242    ctx.cancel(current_task, iocb, user_iocb)?;
3243    // TODO: Correctly handle return. If the operation is successfully canceled, the event should be copied into the memory pointed to by result without being placed into the completion queue.
3244    track_stub!(TODO("https://fxbug.dev/297433877"), "io_cancel");
3245    Ok(())
3246}
3247
3248pub fn sys_io_destroy(
3249    _locked: &mut Locked<Unlocked>,
3250    current_task: &CurrentTask,
3251    ctx_id: aio_context_t,
3252) -> Result<(), Errno> {
3253    let aio_context = current_task.mm()?.destroy_aio_context(ctx_id.into())?;
3254    std::mem::drop(aio_context);
3255    Ok(())
3256}
3257
3258pub fn sys_io_uring_setup(
3259    locked: &mut Locked<Unlocked>,
3260    current_task: &CurrentTask,
3261    user_entries: UserValue<u32>,
3262    user_params: UserRef<io_uring_params>,
3263) -> Result<FdNumber, Errno> {
3264    // TODO: https://fxbug.dev/397186254 - we will want to do a no-audit CAP_IPC_LOCK capability
3265    // check; see "If not granted CAP_IPC_LOCK io_uring operations are accounted against the user's
3266    // RLIMIT_MEMLOCK limit" at
3267    // https://github.com/SELinuxProject/selinux-notebook/blob/main/src/auditing.md#capability-audit-exemptions
3268
3269    if !current_task.kernel().features.io_uring {
3270        return error!(ENOSYS);
3271    }
3272
3273    // Apply policy from /proc/sys/kernel/io_uring_disabled
3274    let limits = &current_task.kernel().system_limits;
3275    match limits.io_uring_disabled.load(atomic::Ordering::Relaxed) {
3276        0 => (),
3277        1 => {
3278            let io_uring_group = limits.io_uring_group.load(atomic::Ordering::Relaxed).try_into();
3279            if io_uring_group.is_err()
3280                || !current_task.current_creds().is_in_group(io_uring_group.unwrap())
3281            {
3282                security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
3283            }
3284        }
3285        _ => {
3286            return error!(EPERM);
3287        }
3288    }
3289
3290    let entries = user_entries.validate(1..IORING_MAX_ENTRIES).ok_or_else(|| errno!(EINVAL))?;
3291
3292    let mut params = current_task.read_object(user_params)?;
3293    for byte in params.resv {
3294        if byte != 0 {
3295            return error!(EINVAL);
3296        }
3297    }
3298
3299    let file = IoUringFileObject::new_file(locked, current_task, entries, &mut params)?;
3300
3301    // io_uring file descriptors are always created with CLOEXEC.
3302    let fd = current_task.add_file(locked, file, FdFlags::CLOEXEC)?;
3303    current_task.write_object(user_params, &params)?;
3304    Ok(fd)
3305}
3306
3307pub fn sys_io_uring_enter(
3308    locked: &mut Locked<Unlocked>,
3309    current_task: &CurrentTask,
3310    fd: FdNumber,
3311    to_submit: u32,
3312    min_complete: u32,
3313    flags: u32,
3314    _sig: UserRef<SigSet>,
3315    sigset_size: usize,
3316) -> Result<u32, Errno> {
3317    if !current_task.kernel().features.io_uring {
3318        return error!(ENOSYS);
3319    }
3320    if !_sig.is_null() {
3321        if sigset_size != std::mem::size_of::<SigSet>() {
3322            return error!(EINVAL);
3323        }
3324    }
3325    let file = current_task.get_file(fd)?;
3326    let io_uring = file.downcast_file::<IoUringFileObject>().ok_or_else(|| errno!(EOPNOTSUPP))?;
3327    // TODO(https://fxbug.dev/297431387): Use `_sig` to change the signal mask for `current_task`.
3328    io_uring.enter(locked, current_task, to_submit, min_complete, flags)
3329}
3330
3331pub fn sys_io_uring_register(
3332    locked: &mut Locked<Unlocked>,
3333    current_task: &CurrentTask,
3334    fd: FdNumber,
3335    opcode: u32,
3336    arg: UserAddress,
3337    nr_args: UserValue<u32>,
3338) -> Result<SyscallResult, Errno> {
3339    if !current_task.kernel().features.io_uring {
3340        return error!(ENOSYS);
3341    }
3342    let file = current_task.get_file(fd)?;
3343    let io_uring = file.downcast_file::<IoUringFileObject>().ok_or_else(|| errno!(EOPNOTSUPP))?;
3344    match opcode {
3345        IORING_REGISTER_BUFFERS => {
3346            // TODO(https://fxbug.dev/297431387): Check nr_args for zero and return EINVAL here.
3347            let iovec = IOVecPtr::new(current_task, arg);
3348            let buffers = current_task.read_iovec(iovec, nr_args)?;
3349            io_uring.register_buffers(locked, buffers);
3350            return Ok(SUCCESS);
3351        }
3352        IORING_UNREGISTER_BUFFERS => {
3353            if !arg.is_null() {
3354                return error!(EINVAL);
3355            }
3356            io_uring.unregister_buffers(locked);
3357            return Ok(SUCCESS);
3358        }
3359        IORING_REGISTER_IOWQ_MAX_WORKERS => {
3360            track_stub!(
3361                TODO("https://fxbug.dev/297431387"),
3362                "io_uring_register IORING_REGISTER_IOWQ_MAX_WORKERS",
3363                opcode
3364            );
3365            // The current implementation only ever use 1 worker for read and 1 for write.
3366            return Ok(SUCCESS);
3367        }
3368        IORING_REGISTER_RING_FDS => {
3369            track_stub!(
3370                TODO("https://fxbug.dev/297431387"),
3371                "io_uring_register IORING_REGISTER_RING_FDS",
3372                opcode
3373            );
3374            // The current implementation doesn't use any thread local specific identifier for
3375            // performance. Instead, when registering a fd, just return the passed fd as the value
3376            // to use.
3377            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
3378            if nr_args > 16 {
3379                return error!(EINVAL);
3380            }
3381            let updates_addr = UserRef::<uapi::io_uring_rsrc_update>::from(arg);
3382            let mut updates = current_task
3383                .read_objects_to_smallvec::<uapi::io_uring_rsrc_update, 1>(updates_addr, nr_args)?;
3384            let mut result = 0;
3385            for update in updates.iter_mut() {
3386                if update.offset == u32::MAX {
3387                    update.offset = update.data.try_into().map_err(|_| errno!(EINVAL))?;
3388                    result += 1;
3389                }
3390            }
3391            current_task.write_objects(updates_addr, &updates)?;
3392            return Ok(result.into());
3393        }
3394        IORING_UNREGISTER_RING_FDS => {
3395            track_stub!(
3396                TODO("https://fxbug.dev/297431387"),
3397                "io_uring_register IORING_UNREGISTER_RING_FDS",
3398                opcode
3399            );
3400            // Because registering a fd doesn't use any resource currently, unregistering is free.
3401            return Ok(SUCCESS);
3402        }
3403        IORING_REGISTER_PBUF_RING => {
3404            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
3405            if nr_args != 1 {
3406                return error!(EINVAL);
3407            }
3408            let buffer_definition: uapi::io_uring_buf_reg = current_task.read_object(arg.into())?;
3409            io_uring.register_ring_buffers(locked, buffer_definition)?;
3410            return Ok(SUCCESS);
3411        }
3412
3413        IORING_UNREGISTER_PBUF_RING => {
3414            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
3415            if nr_args != 1 {
3416                return error!(EINVAL);
3417            }
3418            let buffer_definition: uapi::io_uring_buf_reg = current_task.read_object(arg.into())?;
3419            io_uring.unregister_ring_buffers(locked, buffer_definition)?;
3420            return Ok(SUCCESS);
3421        }
3422
3423        IORING_REGISTER_PBUF_STATUS => {
3424            let nr_args: usize = nr_args.raw().try_into().map_err(|_| errno!(EINVAL))?;
3425            if nr_args != 1 {
3426                return error!(EINVAL);
3427            }
3428            let buffer_status_addr = UserRef::<uapi::io_uring_buf_status>::from(arg);
3429            let mut buffer_status: uapi::io_uring_buf_status =
3430                current_task.read_object(buffer_status_addr)?;
3431            io_uring.ring_buffer_status(locked, &mut buffer_status)?;
3432            current_task.write_object(buffer_status_addr, &buffer_status)?;
3433            return Ok(SUCCESS);
3434        }
3435
3436        _ => {
3437            track_stub!(
3438                TODO("https://fxbug.dev/297431387"),
3439                "io_uring_register unknown op",
3440                opcode
3441            );
3442            return error!(EINVAL);
3443        }
3444    }
3445}
3446
3447// Syscalls for arch32 usage
3448#[cfg(target_arch = "aarch64")]
3449mod arch32 {
3450    use crate::mm::MemoryAccessorExt;
3451    use crate::task::CurrentTask;
3452    use crate::vfs::syscalls::{
3453        LookupFlags, OpenFlags, lookup_at, sys_dup3, sys_faccessat, sys_fallocate, sys_lseek,
3454        sys_mkdirat, sys_openat, sys_readlinkat, sys_unlinkat,
3455    };
3456    use crate::vfs::{FdNumber, FsNode};
3457    use linux_uapi::off_t;
3458    use starnix_sync::{Locked, Unlocked};
3459    use starnix_syscalls::SyscallArg;
3460    use starnix_types::time::duration_from_poll_timeout;
3461    use starnix_uapi::errors::Errno;
3462    use starnix_uapi::file_mode::FileMode;
3463    use starnix_uapi::signals::SigSet;
3464    use starnix_uapi::user_address::{MultiArchUserRef, UserAddress, UserCString, UserRef};
3465    use starnix_uapi::vfs::EpollEvent;
3466    use starnix_uapi::{AT_REMOVEDIR, errno, error, uapi};
3467
3468    type StatFs64Ptr = MultiArchUserRef<uapi::statfs, uapi::arch32::statfs64>;
3469
3470    fn merge_low_and_high(low: u32, high: u32) -> off_t {
3471        ((high as off_t) << 32) | (low as off_t)
3472    }
3473
3474    pub fn sys_arch32_open(
3475        locked: &mut Locked<Unlocked>,
3476        current_task: &CurrentTask,
3477        user_path: UserCString,
3478        flags: u32,
3479        mode: FileMode,
3480    ) -> Result<FdNumber, Errno> {
3481        sys_openat(locked, current_task, FdNumber::AT_FDCWD, user_path, flags, mode)
3482    }
3483
3484    pub fn sys_arch32_access(
3485        locked: &mut Locked<Unlocked>,
3486        current_task: &CurrentTask,
3487        user_path: UserCString,
3488        mode: u32,
3489    ) -> Result<(), Errno> {
3490        sys_faccessat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode)
3491    }
3492    pub fn stat64(
3493        locked: &mut Locked<Unlocked>,
3494        current_task: &CurrentTask,
3495        node: &FsNode,
3496        arch32_stat_buf: UserRef<uapi::arch32::stat64>,
3497    ) -> Result<(), Errno> {
3498        let stat_buffer = node.stat(locked, current_task)?;
3499        let result: uapi::arch32::stat64 = stat_buffer.try_into().map_err(|_| errno!(EINVAL))?;
3500        // Now we copy to the arch32 version and write.
3501        current_task.write_object(arch32_stat_buf, &result)?;
3502        Ok(())
3503    }
3504
3505    pub fn sys_arch32_fstat64(
3506        locked: &mut Locked<Unlocked>,
3507        current_task: &CurrentTask,
3508        fd: FdNumber,
3509        arch32_stat_buf: UserRef<uapi::arch32::stat64>,
3510    ) -> Result<(), Errno> {
3511        let file = current_task.get_file_allowing_opath(fd)?;
3512        stat64(locked, current_task, file.node(), arch32_stat_buf)
3513    }
3514
3515    pub fn sys_arch32_fallocate(
3516        locked: &mut Locked<Unlocked>,
3517        current_task: &CurrentTask,
3518        fd: FdNumber,
3519        mode: u32,
3520        offset_low: u32,
3521        offset_high: u32,
3522        len_low: u32,
3523        len_high: u32,
3524    ) -> Result<(), Errno> {
3525        let offset = merge_low_and_high(offset_low, offset_high);
3526        let len = merge_low_and_high(len_low, len_high);
3527        sys_fallocate(locked, current_task, fd, mode, offset, len)
3528    }
3529
3530    pub fn sys_arch32_stat64(
3531        locked: &mut Locked<Unlocked>,
3532        current_task: &CurrentTask,
3533        user_path: UserCString,
3534        arch32_stat_buf: UserRef<uapi::arch32::stat64>,
3535    ) -> Result<(), Errno> {
3536        let name =
3537            lookup_at(locked, current_task, FdNumber::AT_FDCWD, user_path, LookupFlags::default())?;
3538        stat64(locked, current_task, &name.entry.node, arch32_stat_buf)
3539    }
3540
3541    pub fn sys_arch32_readlink(
3542        locked: &mut Locked<Unlocked>,
3543        current_task: &CurrentTask,
3544        user_path: UserCString,
3545        buffer: UserAddress,
3546        buffer_size: usize,
3547    ) -> Result<usize, Errno> {
3548        sys_readlinkat(locked, current_task, FdNumber::AT_FDCWD, user_path, buffer, buffer_size)
3549    }
3550
3551    pub fn sys_arch32_mkdir(
3552        locked: &mut Locked<Unlocked>,
3553        current_task: &CurrentTask,
3554        user_path: UserCString,
3555        mode: FileMode,
3556    ) -> Result<(), Errno> {
3557        sys_mkdirat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode)
3558    }
3559
3560    pub fn sys_arch32_rmdir(
3561        locked: &mut Locked<Unlocked>,
3562        current_task: &CurrentTask,
3563        user_path: UserCString,
3564    ) -> Result<(), Errno> {
3565        sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, user_path, AT_REMOVEDIR)
3566    }
3567
3568    #[allow(non_snake_case)]
3569    pub fn sys_arch32__llseek(
3570        locked: &mut Locked<Unlocked>,
3571        current_task: &CurrentTask,
3572        fd: FdNumber,
3573        offset_high: u32,
3574        offset_low: u32,
3575        result: UserRef<off_t>,
3576        whence: u32,
3577    ) -> Result<(), Errno> {
3578        let offset = merge_low_and_high(offset_low, offset_high);
3579        let result_value = sys_lseek(locked, current_task, fd, offset, whence)?;
3580        current_task.write_object(result, &result_value).map(|_| ())
3581    }
3582
3583    pub fn sys_arch32_dup2(
3584        locked: &mut Locked<Unlocked>,
3585        current_task: &CurrentTask,
3586        oldfd: FdNumber,
3587        newfd: FdNumber,
3588    ) -> Result<FdNumber, Errno> {
3589        if oldfd == newfd {
3590            // O_PATH allowed for:
3591            //
3592            //  Duplicating the file descriptor (dup(2), fcntl(2)
3593            //  F_DUPFD, etc.).
3594            //
3595            // See https://man7.org/linux/man-pages/man2/open.2.html
3596            current_task.get_file_allowing_opath(oldfd)?;
3597            return Ok(newfd);
3598        }
3599        sys_dup3(locked, current_task, oldfd, newfd, 0)
3600    }
3601
3602    pub fn sys_arch32_unlink(
3603        locked: &mut Locked<Unlocked>,
3604        current_task: &CurrentTask,
3605        user_path: UserCString,
3606    ) -> Result<(), Errno> {
3607        sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, user_path, 0)
3608    }
3609
3610    pub fn sys_arch32_pread64(
3611        locked: &mut Locked<Unlocked>,
3612        current_task: &CurrentTask,
3613        fd: FdNumber,
3614        address: UserAddress,
3615        length: usize,
3616        _: SyscallArg,
3617        offset_low: u32,
3618        offset_high: u32,
3619    ) -> Result<usize, Errno> {
3620        super::sys_pread64(
3621            locked,
3622            current_task,
3623            fd,
3624            address,
3625            length,
3626            merge_low_and_high(offset_low, offset_high),
3627        )
3628    }
3629
3630    pub fn sys_arch32_pwrite64(
3631        locked: &mut Locked<Unlocked>,
3632        current_task: &CurrentTask,
3633        fd: FdNumber,
3634        address: UserAddress,
3635        length: usize,
3636        _: SyscallArg,
3637        offset_low: u32,
3638        offset_high: u32,
3639    ) -> Result<usize, Errno> {
3640        super::sys_pwrite64(
3641            locked,
3642            current_task,
3643            fd,
3644            address,
3645            length,
3646            merge_low_and_high(offset_low, offset_high),
3647        )
3648    }
3649
3650    pub fn sys_arch32_truncate64(
3651        locked: &mut Locked<Unlocked>,
3652        current_task: &CurrentTask,
3653        user_path: UserCString,
3654        _unused: SyscallArg,
3655        length_low: u32,
3656        length_high: u32,
3657    ) -> Result<(), Errno> {
3658        super::sys_truncate(
3659            locked,
3660            current_task,
3661            user_path,
3662            merge_low_and_high(length_low, length_high),
3663        )
3664    }
3665
3666    pub fn sys_arch32_ftruncate64(
3667        locked: &mut Locked<Unlocked>,
3668        current_task: &CurrentTask,
3669        fd: FdNumber,
3670        _: SyscallArg,
3671        length_low: u32,
3672        length_high: u32,
3673    ) -> Result<(), Errno> {
3674        super::sys_ftruncate(locked, current_task, fd, merge_low_and_high(length_low, length_high))
3675    }
3676
3677    pub fn sys_arch32_chmod(
3678        locked: &mut Locked<Unlocked>,
3679        current_task: &CurrentTask,
3680        user_path: UserCString,
3681        mode: FileMode,
3682    ) -> Result<(), Errno> {
3683        super::sys_fchmodat(locked, current_task, FdNumber::AT_FDCWD, user_path, mode)
3684    }
3685
3686    pub fn sys_arch32_chown32(
3687        locked: &mut Locked<Unlocked>,
3688        current_task: &CurrentTask,
3689        user_path: UserCString,
3690        owner: uapi::arch32::__kernel_uid32_t,
3691        group: uapi::arch32::__kernel_uid32_t,
3692    ) -> Result<(), Errno> {
3693        super::sys_fchownat(locked, current_task, FdNumber::AT_FDCWD, user_path, owner, group, 0)
3694    }
3695
3696    pub fn sys_arch32_poll(
3697        locked: &mut Locked<Unlocked>,
3698        current_task: &mut CurrentTask,
3699        user_fds: UserRef<uapi::pollfd>,
3700        num_fds: i32,
3701        timeout: i32,
3702    ) -> Result<usize, Errno> {
3703        let deadline = zx::MonotonicInstant::after(duration_from_poll_timeout(timeout)?);
3704        super::poll(locked, current_task, user_fds, num_fds, None, deadline)
3705    }
3706
3707    pub fn sys_arch32_epoll_create(
3708        locked: &mut Locked<Unlocked>,
3709        current_task: &CurrentTask,
3710        size: i32,
3711    ) -> Result<FdNumber, Errno> {
3712        if size < 1 {
3713            // The man page for epoll_create says the size was used in a previous implementation as
3714            // a hint but no longer does anything. But it's still required to be >= 1 to ensure
3715            // programs are backwards-compatible.
3716            return error!(EINVAL);
3717        }
3718        super::sys_epoll_create1(locked, current_task, 0)
3719    }
3720
3721    pub fn sys_arch32_epoll_wait(
3722        locked: &mut Locked<Unlocked>,
3723        current_task: &mut CurrentTask,
3724        epfd: FdNumber,
3725        events: UserRef<EpollEvent>,
3726        max_events: i32,
3727        timeout: i32,
3728    ) -> Result<usize, Errno> {
3729        super::sys_epoll_pwait(
3730            locked,
3731            current_task,
3732            epfd,
3733            events,
3734            max_events,
3735            timeout,
3736            UserRef::<SigSet>::default(),
3737        )
3738    }
3739
3740    pub fn sys_arch32_rename(
3741        locked: &mut Locked<Unlocked>,
3742        current_task: &CurrentTask,
3743        old_user_path: UserCString,
3744        new_user_path: UserCString,
3745    ) -> Result<(), Errno> {
3746        super::sys_renameat2(
3747            locked,
3748            current_task,
3749            FdNumber::AT_FDCWD,
3750            old_user_path,
3751            FdNumber::AT_FDCWD,
3752            new_user_path,
3753            0,
3754        )
3755    }
3756
3757    pub fn sys_arch32_creat(
3758        locked: &mut Locked<Unlocked>,
3759        current_task: &CurrentTask,
3760        user_path: UserCString,
3761        mode: FileMode,
3762    ) -> Result<FdNumber, Errno> {
3763        super::sys_openat(
3764            locked,
3765            current_task,
3766            FdNumber::AT_FDCWD,
3767            user_path,
3768            (OpenFlags::WRONLY | OpenFlags::CREAT | OpenFlags::TRUNC).bits(),
3769            mode,
3770        )
3771    }
3772
3773    pub fn sys_arch32_symlink(
3774        locked: &mut Locked<Unlocked>,
3775        current_task: &CurrentTask,
3776        user_target: UserCString,
3777        user_path: UserCString,
3778    ) -> Result<(), Errno> {
3779        super::sys_symlinkat(locked, current_task, user_target, FdNumber::AT_FDCWD, user_path)
3780    }
3781
3782    pub fn sys_arch32_eventfd(
3783        locked: &mut Locked<Unlocked>,
3784        current_task: &CurrentTask,
3785        value: u32,
3786    ) -> Result<FdNumber, Errno> {
3787        super::sys_eventfd2(locked, current_task, value, 0)
3788    }
3789
3790    pub fn sys_arch32_inotify_init(
3791        locked: &mut Locked<Unlocked>,
3792        current_task: &CurrentTask,
3793    ) -> Result<FdNumber, Errno> {
3794        super::sys_inotify_init1(locked, current_task, 0)
3795    }
3796
3797    pub fn sys_arch32_link(
3798        locked: &mut Locked<Unlocked>,
3799        current_task: &CurrentTask,
3800        old_user_path: UserCString,
3801        new_user_path: UserCString,
3802    ) -> Result<(), Errno> {
3803        super::sys_linkat(
3804            locked,
3805            current_task,
3806            FdNumber::AT_FDCWD,
3807            old_user_path,
3808            FdNumber::AT_FDCWD,
3809            new_user_path,
3810            0,
3811        )
3812    }
3813
3814    pub fn sys_arch32_fstatfs64(
3815        locked: &mut Locked<Unlocked>,
3816        current_task: &CurrentTask,
3817        fd: FdNumber,
3818        user_buf_len: u32,
3819        user_buf: StatFs64Ptr,
3820    ) -> Result<(), Errno> {
3821        if (user_buf_len as usize) < std::mem::size_of::<uapi::arch32::statfs64>() {
3822            return error!(EINVAL);
3823        }
3824        super::fstatfs(locked, current_task, fd, user_buf)
3825    }
3826
3827    pub fn sys_arch32_statfs64(
3828        locked: &mut Locked<Unlocked>,
3829        current_task: &CurrentTask,
3830        user_path: UserCString,
3831        user_buf_len: u32,
3832        user_buf: StatFs64Ptr,
3833    ) -> Result<(), Errno> {
3834        if (user_buf_len as usize) < std::mem::size_of::<uapi::arch32::statfs64>() {
3835            return error!(EINVAL);
3836        }
3837        super::statfs(locked, current_task, user_path, user_buf)
3838    }
3839
3840    pub fn sys_arch32_arm_fadvise64_64(
3841        locked: &mut Locked<Unlocked>,
3842        current_task: &CurrentTask,
3843        fd: FdNumber,
3844        advice: u32,
3845        offset_low: u32,
3846        offset_high: u32,
3847        len_low: u32,
3848        len_high: u32,
3849    ) -> Result<(), Errno> {
3850        let offset = merge_low_and_high(offset_low, offset_high);
3851        let len = merge_low_and_high(len_low, len_high);
3852        super::sys_fadvise64(locked, current_task, fd, offset, len, advice)
3853    }
3854
3855    pub fn sys_arch32_sendfile64(
3856        locked: &mut Locked<Unlocked>,
3857        current_task: &CurrentTask,
3858        out_fd: FdNumber,
3859        in_fd: FdNumber,
3860        user_offset: UserRef<uapi::off_t>,
3861        count: i32,
3862    ) -> Result<usize, Errno> {
3863        super::sys_sendfile(locked, current_task, out_fd, in_fd, user_offset.into(), count)
3864    }
3865
3866    pub use super::{
3867        sys_chdir as sys_arch32_chdir, sys_chroot as sys_arch32_chroot,
3868        sys_copy_file_range as sys_arch32_copy_file_range, sys_dup3 as sys_arch32_dup3,
3869        sys_epoll_create1 as sys_arch32_epoll_create1, sys_epoll_ctl as sys_arch32_epoll_ctl,
3870        sys_epoll_pwait as sys_arch32_epoll_pwait, sys_epoll_pwait2 as sys_arch32_epoll_pwait2,
3871        sys_eventfd2 as sys_arch32_eventfd2, sys_fchmod as sys_arch32_fchmod,
3872        sys_fchmodat as sys_arch32_fchmodat, sys_fchown as sys_arch32_fchown32,
3873        sys_fchown as sys_arch32_fchown, sys_fchownat as sys_arch32_fchownat,
3874        sys_fdatasync as sys_arch32_fdatasync, sys_flock as sys_arch32_flock,
3875        sys_fsetxattr as sys_arch32_fsetxattr, sys_fstatat64 as sys_arch32_fstatat64,
3876        sys_fstatfs as sys_arch32_fstatfs, sys_fsync as sys_arch32_fsync,
3877        sys_ftruncate as sys_arch32_ftruncate,
3878        sys_inotify_add_watch as sys_arch32_inotify_add_watch,
3879        sys_inotify_init1 as sys_arch32_inotify_init1,
3880        sys_inotify_rm_watch as sys_arch32_inotify_rm_watch, sys_io_cancel as sys_arch32_io_cancel,
3881        sys_io_destroy as sys_arch32_io_destroy, sys_io_getevents as sys_arch32_io_getevents,
3882        sys_io_setup as sys_arch32_io_setup, sys_io_submit as sys_arch32_io_submit,
3883        sys_io_uring_enter as sys_arch32_io_uring_enter,
3884        sys_io_uring_register as sys_arch32_io_uring_register,
3885        sys_io_uring_setup as sys_arch32_io_uring_setup, sys_lgetxattr as sys_arch32_lgetxattr,
3886        sys_linkat as sys_arch32_linkat, sys_listxattr as sys_arch32_listxattr,
3887        sys_llistxattr as sys_arch32_llistxattr, sys_lsetxattr as sys_arch32_lsetxattr,
3888        sys_mkdirat as sys_arch32_mkdirat, sys_mknodat as sys_arch32_mknodat,
3889        sys_pidfd_getfd as sys_arch32_pidfd_getfd, sys_pidfd_open as sys_arch32_pidfd_open,
3890        sys_ppoll as sys_arch32_ppoll, sys_preadv as sys_arch32_preadv,
3891        sys_pselect6 as sys_arch32_pselect6, sys_readv as sys_arch32_readv,
3892        sys_removexattr as sys_arch32_removexattr, sys_renameat2 as sys_arch32_renameat2,
3893        sys_select as sys_arch32__newselect, sys_sendfile as sys_arch32_sendfile,
3894        sys_setxattr as sys_arch32_setxattr, sys_splice as sys_arch32_splice,
3895        sys_statfs as sys_arch32_statfs, sys_statx as sys_arch32_statx,
3896        sys_symlinkat as sys_arch32_symlinkat, sys_sync as sys_arch32_sync,
3897        sys_syncfs as sys_arch32_syncfs, sys_tee as sys_arch32_tee,
3898        sys_timerfd_create as sys_arch32_timerfd_create,
3899        sys_timerfd_gettime as sys_arch32_timerfd_gettime,
3900        sys_timerfd_settime as sys_arch32_timerfd_settime, sys_truncate as sys_arch32_truncate,
3901        sys_umask as sys_arch32_umask, sys_utimensat as sys_arch32_utimensat,
3902        sys_vmsplice as sys_arch32_vmsplice,
3903    };
3904}
3905
3906#[cfg(target_arch = "aarch64")]
3907pub use arch32::*;
3908
3909#[cfg(test)]
3910mod tests {
3911    use super::*;
3912    use crate::task::KernelFeatures;
3913    use crate::testing::*;
3914    use starnix_types::vfs::default_statfs;
3915    use starnix_uapi::{O_RDONLY, SEEK_CUR, SEEK_END, SEEK_SET};
3916    use zerocopy::IntoBytes;
3917
3918    #[::fuchsia::test]
3919    async fn test_sys_lseek() -> Result<(), Errno> {
3920        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
3921            let fd = FdNumber::from_raw(10);
3922            let file_handle =
3923                current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?;
3924            let file_size = file_handle.node().stat(locked, current_task).unwrap().st_size;
3925            current_task.live().files.insert(locked, current_task, fd, file_handle).unwrap();
3926
3927            assert_eq!(sys_lseek(locked, current_task, fd, 0, SEEK_CUR)?, 0);
3928            assert_eq!(sys_lseek(locked, current_task, fd, 1, SEEK_CUR)?, 1);
3929            assert_eq!(sys_lseek(locked, current_task, fd, 3, SEEK_SET)?, 3);
3930            assert_eq!(sys_lseek(locked, current_task, fd, -3, SEEK_CUR)?, 0);
3931            assert_eq!(sys_lseek(locked, current_task, fd, 0, SEEK_END)?, file_size);
3932            assert_eq!(sys_lseek(locked, current_task, fd, -5, SEEK_SET), error!(EINVAL));
3933
3934            // Make sure that the failed call above did not change the offset.
3935            assert_eq!(sys_lseek(locked, current_task, fd, 0, SEEK_CUR)?, file_size);
3936
3937            // Prepare for an overflow.
3938            assert_eq!(sys_lseek(locked, current_task, fd, 3, SEEK_SET)?, 3);
3939
3940            // Check for overflow.
3941            assert_eq!(sys_lseek(locked, current_task, fd, i64::MAX, SEEK_CUR), error!(EINVAL));
3942
3943            Ok(())
3944        })
3945        .await
3946    }
3947
3948    #[::fuchsia::test]
3949    async fn test_sys_dup() -> Result<(), Errno> {
3950        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
3951            let file_handle =
3952                current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?;
3953            let oldfd = current_task.add_file(locked, file_handle, FdFlags::empty())?;
3954            let newfd = sys_dup(locked, current_task, oldfd)?;
3955
3956            assert_ne!(oldfd, newfd);
3957            let files = &current_task.live().files;
3958            assert!(Arc::ptr_eq(&files.get(oldfd).unwrap(), &files.get(newfd).unwrap()));
3959
3960            assert_eq!(sys_dup(locked, current_task, FdNumber::from_raw(3)), error!(EBADF));
3961
3962            Ok(())
3963        })
3964        .await
3965    }
3966
3967    #[::fuchsia::test]
3968    async fn test_sys_dup3() -> Result<(), Errno> {
3969        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
3970            let file_handle =
3971                current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?;
3972            let oldfd = current_task.add_file(locked, file_handle, FdFlags::empty())?;
3973            let newfd = FdNumber::from_raw(2);
3974            sys_dup3(locked, current_task, oldfd, newfd, O_CLOEXEC)?;
3975
3976            assert_ne!(oldfd, newfd);
3977            let files = &current_task.live().files;
3978            assert!(Arc::ptr_eq(&files.get(oldfd).unwrap(), &files.get(newfd).unwrap()));
3979            assert_eq!(files.get_fd_flags_allowing_opath(oldfd).unwrap(), FdFlags::empty());
3980            assert_eq!(files.get_fd_flags_allowing_opath(newfd).unwrap(), FdFlags::CLOEXEC);
3981
3982            assert_eq!(sys_dup3(locked, current_task, oldfd, oldfd, O_CLOEXEC), error!(EINVAL));
3983
3984            // Pass invalid flags.
3985            let invalid_flags = 1234;
3986            assert_eq!(sys_dup3(locked, current_task, oldfd, newfd, invalid_flags), error!(EINVAL));
3987
3988            // Makes sure that dup closes the old file handle before the fd points
3989            // to the new file handle.
3990            let second_file_handle =
3991                current_task.open_file(locked, "data/testfile.txt".into(), OpenFlags::RDONLY)?;
3992            let different_file_fd =
3993                current_task.add_file(locked, second_file_handle, FdFlags::empty())?;
3994            assert!(!Arc::ptr_eq(
3995                &files.get(oldfd).unwrap(),
3996                &files.get(different_file_fd).unwrap()
3997            ));
3998            sys_dup3(locked, current_task, oldfd, different_file_fd, O_CLOEXEC)?;
3999            assert!(Arc::ptr_eq(
4000                &files.get(oldfd).unwrap(),
4001                &files.get(different_file_fd).unwrap()
4002            ));
4003
4004            Ok(())
4005        })
4006        .await
4007    }
4008
4009    #[::fuchsia::test]
4010    async fn test_sys_open_cloexec() -> Result<(), Errno> {
4011        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
4012            let path_addr = map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4013            let path = b"data/testfile.txt\0";
4014            current_task.write_memory(path_addr, path)?;
4015            let fd = sys_openat(
4016                locked,
4017                &current_task,
4018                FdNumber::AT_FDCWD,
4019                UserCString::new(current_task, path_addr),
4020                O_RDONLY | O_CLOEXEC,
4021                FileMode::default(),
4022            )?;
4023            assert!(
4024                current_task
4025                    .live()
4026                    .files
4027                    .get_fd_flags_allowing_opath(fd)?
4028                    .contains(FdFlags::CLOEXEC)
4029            );
4030            Ok(())
4031        })
4032        .await
4033    }
4034
4035    #[::fuchsia::test]
4036    async fn test_sys_epoll() -> Result<(), Errno> {
4037        spawn_kernel_and_run_with_pkgfs(async |locked, current_task| {
4038            let epoll_fd =
4039                sys_epoll_create1(locked, current_task, 0).expect("sys_epoll_create1 failed");
4040            sys_close(locked, current_task, epoll_fd).expect("sys_close failed");
4041
4042            Ok(())
4043        })
4044        .await
4045    }
4046
4047    #[::fuchsia::test]
4048    async fn test_fstat_tmp_file() {
4049        spawn_kernel_and_run(async |locked, current_task| {
4050            // Create the file that will be used to stat.
4051            let file_path = "testfile.txt";
4052            let _file_handle = current_task
4053                .open_file_at(
4054                    locked,
4055                    FdNumber::AT_FDCWD,
4056                    file_path.into(),
4057                    OpenFlags::RDWR | OpenFlags::CREAT,
4058                    FileMode::ALLOW_ALL,
4059                    ResolveFlags::empty(),
4060                    AccessCheck::default(),
4061                )
4062                .unwrap();
4063
4064            // Write the path to user memory.
4065            let path_addr = map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4066            current_task
4067                .write_memory(path_addr, file_path.as_bytes())
4068                .expect("failed to clear struct");
4069
4070            let memory_len = (path_addr + file_path.len()).expect("OOB memory allocation!");
4071            let user_stat = UserRef::new(memory_len);
4072            current_task
4073                .write_object(user_stat, &default_statfs(0))
4074                .expect("failed to clear struct");
4075
4076            let user_path = UserCString::new(current_task, path_addr);
4077
4078            assert_eq!(sys_statfs(locked, current_task, user_path, user_stat.into()), Ok(()));
4079
4080            let returned_stat = current_task.read_object(user_stat).expect("failed to read struct");
4081            let expected_stat = starnix_uapi::statfs {
4082                f_blocks: 0x100000000,
4083                f_bavail: 0x100000000,
4084                f_bfree: 0x100000000,
4085                f_flags: starnix_uapi::MS_RELATIME as i64,
4086                ..default_statfs(starnix_uapi::TMPFS_MAGIC)
4087            };
4088            assert!(
4089                returned_stat.as_bytes() == expected_stat.as_bytes(),
4090                "Expected {:?}, got {:?}",
4091                expected_stat,
4092                returned_stat
4093            );
4094        })
4095        .await;
4096    }
4097
4098    #[::fuchsia::test]
4099    async fn test_unlinkat_dir() {
4100        spawn_kernel_and_run(async |locked, current_task| {
4101            // Create the dir that we will attempt to unlink later.
4102            let no_slash_path = b"testdir";
4103            let no_slash_path_addr =
4104                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
4105            current_task
4106                .write_memory(no_slash_path_addr, no_slash_path)
4107                .expect("failed to write path");
4108            let no_slash_user_path = UserCString::new(current_task, no_slash_path_addr);
4109            sys_mkdirat(
4110                locked,
4111                &current_task,
4112                FdNumber::AT_FDCWD,
4113                no_slash_user_path,
4114                FileMode::ALLOW_ALL.with_type(FileMode::IFDIR),
4115            )
4116            .unwrap();
4117
4118            let slash_path = b"testdir/";
4119            let slash_path_addr =
4120                map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4121            current_task.write_memory(slash_path_addr, slash_path).expect("failed to write path");
4122            let slash_user_path = UserCString::new(current_task, slash_path_addr);
4123
4124            // Try to remove a directory without specifying AT_REMOVEDIR.
4125            // This should fail with EISDIR, irrespective of the terminating slash.
4126            let error = sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, slash_user_path, 0)
4127                .unwrap_err();
4128            assert_eq!(error, errno!(EISDIR));
4129            let error =
4130                sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, no_slash_user_path, 0)
4131                    .unwrap_err();
4132            assert_eq!(error, errno!(EISDIR));
4133
4134            // Success with AT_REMOVEDIR.
4135            sys_unlinkat(locked, current_task, FdNumber::AT_FDCWD, slash_user_path, AT_REMOVEDIR)
4136                .unwrap();
4137        })
4138        .await;
4139    }
4140
4141    #[::fuchsia::test]
4142    async fn test_rename_noreplace() {
4143        spawn_kernel_and_run(async |locked, current_task| {
4144            // Create the file that will be renamed.
4145            let old_user_path = "testfile.txt";
4146            let _old_file_handle = current_task
4147                .open_file_at(
4148                    locked,
4149                    FdNumber::AT_FDCWD,
4150                    old_user_path.into(),
4151                    OpenFlags::RDWR | OpenFlags::CREAT,
4152                    FileMode::ALLOW_ALL,
4153                    ResolveFlags::empty(),
4154                    AccessCheck::default(),
4155                )
4156                .unwrap();
4157
4158            // Write the path to user memory.
4159            let old_path_addr =
4160                map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4161            current_task
4162                .write_memory(old_path_addr, old_user_path.as_bytes())
4163                .expect("failed to clear struct");
4164
4165            // Create a second file that we will attempt to rename to.
4166            let new_user_path = "testfile2.txt";
4167            let _new_file_handle = current_task
4168                .open_file_at(
4169                    locked,
4170                    FdNumber::AT_FDCWD,
4171                    new_user_path.into(),
4172                    OpenFlags::RDWR | OpenFlags::CREAT,
4173                    FileMode::ALLOW_ALL,
4174                    ResolveFlags::empty(),
4175                    AccessCheck::default(),
4176                )
4177                .unwrap();
4178
4179            // Write the path to user memory.
4180            let new_path_addr =
4181                map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4182            current_task
4183                .write_memory(new_path_addr, new_user_path.as_bytes())
4184                .expect("failed to clear struct");
4185
4186            // Try to rename first file to second file's name with RENAME_NOREPLACE flag.
4187            // This should fail with EEXIST.
4188            let error = sys_renameat2(
4189                locked,
4190                &current_task,
4191                FdNumber::AT_FDCWD,
4192                UserCString::new(current_task, old_path_addr),
4193                FdNumber::AT_FDCWD,
4194                UserCString::new(current_task, new_path_addr),
4195                RenameFlags::NOREPLACE.bits(),
4196            )
4197            .unwrap_err();
4198            assert_eq!(error, errno!(EEXIST));
4199        })
4200        .await;
4201    }
4202
4203    #[::fuchsia::test]
4204    async fn test_sys_sync() -> Result<(), Errno> {
4205        spawn_kernel_and_run(async |locked, current_task| {
4206            sys_sync(locked, current_task)?;
4207            Ok(())
4208        })
4209        .await
4210    }
4211
4212    #[::fuchsia::test]
4213    async fn test_sys_syncfs() -> Result<(), Errno> {
4214        spawn_kernel_and_run(async |locked, current_task| {
4215            let file_handle = current_task.open_file(locked, ".".into(), OpenFlags::RDONLY)?;
4216            let fd = current_task.add_file(locked, file_handle, FdFlags::empty())?;
4217            sys_syncfs(locked, current_task, fd)?;
4218            Ok(())
4219        })
4220        .await
4221    }
4222
4223    // TODO(https://fxbug.dev/485370648) remove when unnecessary
4224    #[::fuchsia::test]
4225    async fn test_fake_ion_stat() {
4226        // Test with fake_ion disabled (default).
4227        spawn_kernel_and_run(async |locked, current_task| {
4228            let ion_path = b"/dev/ion\0";
4229            let path_addr = map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4230            current_task.write_memory(path_addr, ion_path).expect("failed to write path");
4231            let user_path = UserCString::new(current_task, path_addr);
4232
4233            let stat_addr = map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4234            let stat_ptr = StatPtr::new(current_task, stat_addr);
4235
4236            let error =
4237                sys_fstatat64(locked, current_task, FdNumber::AT_FDCWD, user_path, stat_ptr, 0)
4238                    .unwrap_err();
4239            assert_eq!(error, errno!(ENOENT));
4240        })
4241        .await;
4242
4243        // Test with fake_ion enabled.
4244        let mut features = KernelFeatures::default();
4245        features.fake_ion = true;
4246        spawn_kernel_with_features_and_run(
4247            async |locked, current_task| {
4248                let ion_path = b"/dev/ion\0";
4249                let path_addr =
4250                    map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4251                current_task.write_memory(path_addr, ion_path).expect("failed to write path");
4252                let user_path = UserCString::new(current_task, path_addr);
4253
4254                let stat_addr =
4255                    map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4256                let stat_ptr = StatPtr::new(current_task, stat_addr);
4257
4258                sys_fstatat64(locked, current_task, FdNumber::AT_FDCWD, user_path, stat_ptr, 0)
4259                    .expect("sys_fstatat64 should succeed with fake_ion");
4260
4261                let stat_result: uapi::stat =
4262                    current_task.read_object(stat_addr.into()).expect("failed to read stat");
4263                assert_eq!(stat_result.st_mode, uapi::S_IFCHR | 0o666);
4264                assert_eq!(stat_result.st_rdev, DeviceId::new(10, 59).bits());
4265
4266                // Test statx as well.
4267                let statx_addr =
4268                    map_memory(locked, current_task, UserAddress::default(), *PAGE_SIZE);
4269                let statx_ptr = UserRef::new(statx_addr);
4270                sys_statx(
4271                    locked,
4272                    current_task,
4273                    FdNumber::AT_FDCWD,
4274                    user_path,
4275                    0,
4276                    uapi::STATX_BASIC_STATS,
4277                    statx_ptr,
4278                )
4279                .expect("sys_statx should succeed with fake_ion");
4280
4281                let statx_result: statx =
4282                    current_task.read_object(statx_ptr).expect("failed to read statx");
4283                assert_eq!(statx_result.stx_mode, (uapi::S_IFCHR | 0o666) as u16);
4284                assert_eq!(statx_result.stx_rdev_major, 10);
4285                assert_eq!(statx_result.stx_rdev_minor, 59);
4286            },
4287            features,
4288        )
4289        .await;
4290    }
4291}