Skip to main content

starnix_core/task/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::execution::execute_task;
6use crate::mm::{DumpPolicy, MemoryAccessor, MemoryAccessorExt, PAGE_SIZE};
7use crate::ptrace::{
8    PR_SET_PTRACER_ANY, PtraceAllowedPtracers, PtraceAttachType, PtraceOptions, ptrace_attach,
9    ptrace_dispatch, ptrace_traceme,
10};
11use crate::security;
12use crate::signals::syscalls::RUsagePtr;
13use crate::task::{
14    CurrentTask, ExitStatus, NormalPriority, SchedulingPolicy, SeccompAction, SeccompStateValue,
15    SyslogAccess, Task, ThreadGroup, max_priority_for_sched_policy, min_priority_for_sched_policy,
16};
17use crate::vfs::{
18    FdNumber, FileHandle, MountNamespaceFile, PidFdFileObject, UserBuffersOutputBuffer,
19    VecOutputBuffer,
20};
21use starnix_logging::{log_error, log_info, log_trace, track_stub};
22use starnix_sync::{LockDepRwLock, Locked, Unlocked};
23use starnix_syscalls::SyscallResult;
24use starnix_task_command::TaskCommand;
25use starnix_types::time::timeval_from_duration;
26use starnix_uapi::auth::{
27    CAP_SETGID, CAP_SETPCAP, CAP_SETUID, CAP_SYS_ADMIN, CAP_SYS_NICE, CAP_SYS_RESOURCE,
28    CAP_SYS_TTY_CONFIG, Capabilities, Credentials, PTRACE_MODE_READ_REALCREDS, SecureBits,
29};
30use starnix_uapi::errors::{ENAMETOOLONG, Errno};
31use starnix_uapi::file_mode::{Access, AccessCheck, FileMode};
32use starnix_uapi::kcmp::KcmpResource;
33use starnix_uapi::open_flags::OpenFlags;
34use starnix_uapi::resource_limits::Resource;
35use starnix_uapi::signals::{Signal, UncheckedSignal};
36use starnix_uapi::syslog::SyslogAction;
37use starnix_uapi::user_address::{
38    ArchSpecific, MappingMultiArchUserRef, MultiArchUserRef, UserAddress, UserCString,
39    UserCStringPtr, UserRef,
40};
41use starnix_uapi::vfs::ResolveFlags;
42use starnix_uapi::{
43    __user_cap_data_struct, __user_cap_header_struct, _LINUX_CAPABILITY_VERSION_1,
44    _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3, AT_EMPTY_PATH, AT_SYMLINK_NOFOLLOW,
45    BPF_MAXINSNS, CLONE_ARGS_SIZE_VER0, CLONE_ARGS_SIZE_VER1, CLONE_ARGS_SIZE_VER2, CLONE_FILES,
46    CLONE_FS, CLONE_NEWNS, CLONE_NEWUTS, CLONE_SETTLS, CLONE_VFORK, NGROUPS_MAX, PR_CAP_AMBIENT,
47    PR_CAP_AMBIENT_CLEAR_ALL, PR_CAP_AMBIENT_IS_SET, PR_CAP_AMBIENT_LOWER, PR_CAP_AMBIENT_RAISE,
48    PR_CAPBSET_DROP, PR_CAPBSET_READ, PR_GET_CHILD_SUBREAPER, PR_GET_DUMPABLE, PR_GET_KEEPCAPS,
49    PR_GET_NAME, PR_GET_NO_NEW_PRIVS, PR_GET_SECCOMP, PR_GET_SECUREBITS, PR_SET_CHILD_SUBREAPER,
50    PR_SET_DUMPABLE, PR_SET_KEEPCAPS, PR_SET_NAME, PR_SET_NO_NEW_PRIVS, PR_SET_PDEATHSIG,
51    PR_SET_PTRACER, PR_SET_SECCOMP, PR_SET_SECUREBITS, PR_SET_TIMERSLACK, PR_SET_VMA,
52    PR_SET_VMA_ANON_NAME, PRIO_PROCESS, PTRACE_ATTACH, PTRACE_SEIZE, PTRACE_TRACEME,
53    RUSAGE_CHILDREN, SCHED_RESET_ON_FORK, SECCOMP_FILTER_FLAG_LOG,
54    SECCOMP_FILTER_FLAG_NEW_LISTENER, SECCOMP_FILTER_FLAG_SPEC_ALLOW, SECCOMP_FILTER_FLAG_TSYNC,
55    SECCOMP_FILTER_FLAG_TSYNC_ESRCH, SECCOMP_GET_ACTION_AVAIL, SECCOMP_GET_NOTIF_SIZES,
56    SECCOMP_MODE_FILTER, SECCOMP_MODE_STRICT, SECCOMP_SET_MODE_FILTER, SECCOMP_SET_MODE_STRICT,
57    c_char, c_int, clone_args, errno, error, gid_t, pid_t, rlimit, rusage, sched_param,
58    sock_filter, uapi, uid_t,
59};
60use static_assertions::const_assert;
61use std::cmp;
62use std::ffi::CString;
63use std::sync::{Arc, LazyLock};
64use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
65
66#[cfg(target_arch = "aarch64")]
67use starnix_uapi::{PR_GET_TAGGED_ADDR_CTRL, PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE};
68
69pub type SockFProgPtr =
70    MappingMultiArchUserRef<SockFProg, uapi::sock_fprog, uapi::arch32::sock_fprog>;
71pub type SockFilterPtr = MultiArchUserRef<uapi::sock_filter, uapi::arch32::sock_filter>;
72
73pub struct SockFProg {
74    pub len: u32,
75    pub filter: SockFilterPtr,
76}
77
78uapi::arch_map_data! {
79    BidiTryFrom<SockFProg, sock_fprog> {
80        len = len;
81        filter = filter;
82    }
83}
84
85uapi::check_arch_independent_layout! {
86    sched_param {
87        sched_priority,
88    }
89}
90
91pub fn do_clone(
92    locked: &mut Locked<Unlocked>,
93    current_task: &mut CurrentTask,
94    args: &clone_args,
95) -> Result<pid_t, Errno> {
96    security::check_task_create_access(current_task)?;
97
98    let child_exit_signal = if args.exit_signal == 0 {
99        None
100    } else {
101        Some(Signal::try_from(UncheckedSignal::new(args.exit_signal))?)
102    };
103
104    let mut new_task = current_task.clone_task(
105        locked,
106        args.flags,
107        child_exit_signal,
108        UserRef::<pid_t>::new(UserAddress::from(args.parent_tid)),
109        UserRef::<pid_t>::new(UserAddress::from(args.child_tid)),
110        UserRef::<FdNumber>::new(UserAddress::from(args.pidfd)),
111    )?;
112
113    // Set the result register to 0 for the return value from clone in the
114    // cloned process.
115    new_task.thread_state.registers.set_return_register(0);
116    let (trace_kind, ptrace_state) = current_task.get_ptrace_core_state_for_clone(args);
117
118    if args.stack != 0 {
119        // In clone() the `stack` argument points to the top of the stack, while in clone3()
120        // `stack` points to the bottom of the stack. Therefore, in clone3() we need to add
121        // `stack_size` to calculate the stack pointer. Note that in clone() `stack_size` is 0.
122        new_task
123            .thread_state
124            .registers
125            .set_stack_pointer_register(args.stack.wrapping_add(args.stack_size));
126    }
127
128    if args.flags & (CLONE_SETTLS as u64) != 0 {
129        new_task.thread_state.registers.set_thread_pointer_register(args.tls);
130    }
131
132    let tid = new_task.task.tid;
133    let task_ref = Arc::downgrade(&new_task.task);
134    execute_task(locked, new_task, |_, _| Ok(()), |_| {}, ptrace_state)?;
135
136    current_task.ptrace_event(locked, trace_kind, tid as u64);
137
138    if args.flags & (CLONE_VFORK as u64) != 0 {
139        current_task.wait_for_execve(task_ref)?;
140        current_task.ptrace_event(locked, PtraceOptions::TRACEVFORKDONE, tid as u64);
141    }
142
143    Ok(tid)
144}
145
146pub fn sys_clone3(
147    locked: &mut Locked<Unlocked>,
148    current_task: &mut CurrentTask,
149    user_clone_args: UserRef<clone_args>,
150    user_clone_args_size: usize,
151) -> Result<pid_t, Errno> {
152    // Only these specific sized versions are supported.
153    if !(user_clone_args_size == CLONE_ARGS_SIZE_VER0 as usize
154        || user_clone_args_size == CLONE_ARGS_SIZE_VER1 as usize
155        || user_clone_args_size == CLONE_ARGS_SIZE_VER2 as usize)
156    {
157        return error!(EINVAL);
158    }
159
160    // The most recent version of the struct size should match our definition.
161    const_assert!(std::mem::size_of::<clone_args>() == CLONE_ARGS_SIZE_VER2 as usize);
162
163    let clone_args = current_task.read_object_partial(user_clone_args, user_clone_args_size)?;
164    do_clone(locked, current_task, &clone_args)
165}
166
167fn read_c_string_vector(
168    mm: &CurrentTask,
169    user_vector: UserCStringPtr,
170    elem_limit: usize,
171    vec_limit: usize,
172) -> Result<(Vec<CString>, usize), Errno> {
173    let mut user_current = user_vector;
174    let mut vector: Vec<CString> = vec![];
175    let mut vec_size: usize = 0;
176    loop {
177        let user_string = mm.read_multi_arch_ptr(user_current)?;
178        if user_string.is_null() {
179            break;
180        }
181        let string = mm
182            .read_c_string_to_vec(user_string, elem_limit)
183            .map_err(|e| if e.code == ENAMETOOLONG { errno!(E2BIG) } else { e })?;
184        let cstring = CString::new(string).map_err(|_| errno!(EINVAL))?;
185        vec_size =
186            vec_size.checked_add(cstring.as_bytes_with_nul().len()).ok_or_else(|| errno!(E2BIG))?;
187        if vec_size > vec_limit {
188            return error!(E2BIG);
189        }
190        vector.push(cstring);
191        user_current = user_current.next()?;
192    }
193    Ok((vector, vec_size))
194}
195
196pub fn sys_execve(
197    locked: &mut Locked<Unlocked>,
198    current_task: &mut CurrentTask,
199    user_path: UserCString,
200    user_argv: UserCStringPtr,
201    user_environ: UserCStringPtr,
202) -> Result<(), Errno> {
203    sys_execveat(locked, current_task, FdNumber::AT_FDCWD, user_path, user_argv, user_environ, 0)
204}
205
206pub fn sys_execveat(
207    locked: &mut Locked<Unlocked>,
208    current_task: &mut CurrentTask,
209    dir_fd: FdNumber,
210    user_path: UserCString,
211    user_argv: UserCStringPtr,
212    user_environ: UserCStringPtr,
213    flags: u32,
214) -> Result<(), Errno> {
215    if flags & !(AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW) != 0 {
216        return error!(EINVAL);
217    }
218
219    // Calculate the limit for argv and environ size as 1/4 of the stack size, floored at 32 pages.
220    // See the Limits sections in https://man7.org/linux/man-pages/man2/execve.2.html
221    const PAGE_LIMIT: usize = 32;
222    let page_limit_size: usize = PAGE_LIMIT * *PAGE_SIZE as usize;
223    let rlimit = current_task.thread_group().get_rlimit(locked, Resource::STACK);
224    let stack_limit = rlimit / 4;
225    let argv_env_limit = cmp::max(page_limit_size, stack_limit as usize);
226
227    // The limit per argument or environment variable is 32 pages.
228    // See the Limits sections in https://man7.org/linux/man-pages/man2/execve.2.html
229    let (argv, argv_size) = if user_argv.is_null() {
230        (Vec::new(), 0)
231    } else {
232        read_c_string_vector(current_task, user_argv, page_limit_size, argv_env_limit)?
233    };
234
235    let (environ, _) = if user_environ.is_null() {
236        (Vec::new(), 0)
237    } else {
238        read_c_string_vector(
239            current_task,
240            user_environ,
241            page_limit_size,
242            argv_env_limit - argv_size,
243        )?
244    };
245
246    let path = &current_task.read_path(user_path)?;
247
248    log_trace!(argv:?, environ:?, flags:?; "execveat({dir_fd}, {path})");
249
250    let mut open_flags = OpenFlags::RDONLY;
251
252    if flags & AT_SYMLINK_NOFOLLOW != 0 {
253        open_flags |= OpenFlags::NOFOLLOW;
254    }
255
256    let executable = if path.is_empty() {
257        if flags & AT_EMPTY_PATH == 0 {
258            // If AT_EMPTY_PATH is not set, this is an error.
259            return error!(ENOENT);
260        }
261
262        // O_PATH allowed for:
263        //
264        //   Passing the file descriptor as the dirfd argument of
265        //   openat() and the other "*at()" system calls.  This
266        //   includes linkat(2) with AT_EMPTY_PATH (or via procfs
267        //   using AT_SYMLINK_FOLLOW) even if the file is not a
268        //   directory.
269        //
270        // See https://man7.org/linux/man-pages/man2/open.2.html
271        let file = current_task.get_file_allowing_opath(dir_fd)?;
272
273        // We are forced to reopen the file with O_RDONLY to get access to the underlying VMO.
274        // Note that skip the access check in the arguments in case the file mode does
275        // not actually have the read permission bit.
276        //
277        // This can happen because a file could have --x--x--x mode permissions and then
278        // be opened with O_PATH. Internally, the file operations would all be stubbed out
279        // for that file, which is undesirable here.
280        //
281        // See https://man7.org/linux/man-pages/man3/fexecve.3.html#DESCRIPTION
282        file.name.open(
283            locked,
284            current_task,
285            OpenFlags::RDONLY,
286            AccessCheck::check_for(Access::EXEC),
287        )?
288    } else {
289        current_task.open_file_at(
290            locked,
291            dir_fd,
292            path.as_ref(),
293            open_flags,
294            FileMode::default(),
295            ResolveFlags::empty(),
296            AccessCheck::check_for(Access::EXEC),
297        )?
298    };
299
300    // This path can affect script resolution (the path is appended to the script args)
301    // and the auxiliary value `AT_EXECFN` from the syscall `getauxval()`
302    let path = if dir_fd == FdNumber::AT_FDCWD {
303        // The file descriptor is CWD, so the path is exactly
304        // what the user specified.
305        path.to_vec()
306    } else {
307        // The path is `/dev/fd/N/P` where N is the file descriptor
308        // number and P is the user-provided path (if relative and non-empty).
309        //
310        // See https://man7.org/linux/man-pages/man2/execveat.2.html#NOTES
311        match path.first() {
312            Some(b'/') => {
313                // The user-provided path is absolute, so dir_fd is ignored.
314                path.to_vec()
315            }
316            Some(_) => {
317                // User-provided path is relative, append it.
318                let mut new_path = format!("/dev/fd/{}/", dir_fd.raw()).into_bytes();
319                new_path.append(&mut path.to_vec());
320                new_path
321            }
322            // User-provided path is empty
323            None => format!("/dev/fd/{}", dir_fd.raw()).into_bytes(),
324        }
325    };
326
327    let path = CString::new(path).map_err(|_| errno!(EINVAL))?;
328
329    current_task.exec(locked, executable, path, argv, environ)?;
330    Ok(())
331}
332
333pub fn sys_getcpu(
334    _locked: &mut Locked<Unlocked>,
335    current_task: &CurrentTask,
336    cpu_out: UserRef<u32>,
337    node_out: UserRef<u32>,
338) -> Result<(), Errno> {
339    // "When either cpu or node is NULL nothing is written to the respective pointer."
340    // from https://man7.org/linux/man-pages/man2/getcpu.2.html
341    if !cpu_out.is_null() {
342        let thread_stats = current_task
343            .running_state()
344            .thread
345            .get()
346            .expect("current thread is never None when executing")
347            .thread
348            .stats()
349            .map_err(|e| errno!(EINVAL, format!("getting thread stats failed {e:?}")))?;
350        current_task.write_object(cpu_out, &thread_stats.last_scheduled_cpu)?;
351    }
352    if !node_out.is_null() {
353        // Zircon does not yet have a concept of NUMA task scheduling, always tell userspace that
354        // it's on the "first" node which should be true for non-NUMA systems.
355        track_stub!(TODO("https://fxbug.dev/325643815"), "getcpu() numa node");
356        current_task.write_object(node_out, &0)?;
357    }
358    Ok(())
359}
360
361pub fn sys_getpid(
362    _locked: &mut Locked<Unlocked>,
363    current_task: &CurrentTask,
364) -> Result<pid_t, Errno> {
365    Ok(current_task.get_pid())
366}
367
368pub fn sys_gettid(
369    _locked: &mut Locked<Unlocked>,
370    current_task: &CurrentTask,
371) -> Result<pid_t, Errno> {
372    Ok(current_task.get_tid())
373}
374
375pub fn sys_getppid(
376    _locked: &mut Locked<Unlocked>,
377    current_task: &CurrentTask,
378) -> Result<pid_t, Errno> {
379    Ok(current_task.thread_group().read().get_ppid())
380}
381
382fn get_task_or_current(current_task: &CurrentTask, pid: pid_t) -> Result<Arc<Task>, Errno> {
383    if pid == 0 { Ok(current_task.task.clone()) } else { current_task.get_task(pid) }
384}
385
386pub fn sys_getsid(
387    _locked: &mut Locked<Unlocked>,
388    current_task: &CurrentTask,
389    pid: pid_t,
390) -> Result<pid_t, Errno> {
391    let target_task = get_task_or_current(current_task, pid)?;
392    security::check_task_getsid(current_task, &target_task)?;
393    let sid = target_task.thread_group().read().process_group.session.leader;
394    Ok(sid)
395}
396
397pub fn sys_getpgid(
398    _locked: &mut Locked<Unlocked>,
399    current_task: &CurrentTask,
400    pid: pid_t,
401) -> Result<pid_t, Errno> {
402    let task = get_task_or_current(current_task, pid)?;
403
404    security::check_getpgid_access(current_task, &task)?;
405    let pgid = task.thread_group().read().process_group.leader;
406    Ok(pgid)
407}
408
409pub fn sys_setpgid(
410    locked: &mut Locked<Unlocked>,
411    current_task: &CurrentTask,
412    pid: pid_t,
413    pgid: pid_t,
414) -> Result<(), Errno> {
415    let task = get_task_or_current(current_task, pid)?;
416
417    current_task.thread_group().setpgid(locked, current_task, &task, pgid)?;
418    Ok(())
419}
420
421impl CurrentTask {
422    /// Returns true if the `current_task`'s effective user ID (EUID) is the same as the
423    /// EUID or UID of the `target_task`. We describe this as the current task being
424    /// "EUID-friendly" to the target and it enables actions to be performed that would
425    /// otherwise require additional privileges.
426    ///
427    /// See "The caller needs an effective user ID equal to the real user ID or effective
428    /// user ID of the [target]" at sched_setaffinity(2), comparable language at
429    /// setpriority(2), more ambiguous language at sched_setscheduler(2), and no
430    /// particular specification at sched_setparam(2).
431    fn is_euid_friendly_with(&self, target_task: &Task) -> bool {
432        let self_creds = self.current_creds();
433        let target_creds = target_task.real_creds();
434        self_creds.euid == target_creds.uid || self_creds.euid == target_creds.euid
435    }
436}
437
438// A non-root process is allowed to set any of its three uids to the value of any other. The
439// CAP_SETUID capability bypasses these checks and allows setting any uid to any integer. Likewise
440// for gids.
441fn new_uid_allowed(current_task: &CurrentTask, uid: uid_t) -> bool {
442    let current_creds = current_task.current_creds();
443    uid == current_creds.uid
444        || uid == current_creds.euid
445        || uid == current_creds.saved_uid
446        || security::is_task_capable_noaudit(current_task, CAP_SETUID)
447}
448
449fn new_gid_allowed(current_task: &CurrentTask, gid: gid_t) -> bool {
450    let current_creds = current_task.current_creds();
451    gid == current_creds.gid
452        || gid == current_creds.egid
453        || gid == current_creds.saved_gid
454        || security::is_task_capable_noaudit(current_task, CAP_SETGID)
455}
456
457pub fn sys_getuid(
458    _locked: &mut Locked<Unlocked>,
459    current_task: &CurrentTask,
460) -> Result<uid_t, Errno> {
461    Ok(current_task.current_creds().uid)
462}
463
464pub fn sys_getgid(
465    _locked: &mut Locked<Unlocked>,
466    current_task: &CurrentTask,
467) -> Result<gid_t, Errno> {
468    Ok(current_task.current_creds().gid)
469}
470
471pub fn sys_setuid(
472    _locked: &mut Locked<Unlocked>,
473    current_task: &CurrentTask,
474    uid: uid_t,
475) -> Result<(), Errno> {
476    if uid == uid_t::MAX {
477        return error!(EINVAL);
478    }
479    if !new_uid_allowed(&current_task, uid) {
480        return error!(EPERM);
481    }
482
483    let prev = current_task.current_creds();
484    let mut creds = Credentials::clone(&prev);
485    creds.euid = uid;
486    creds.fsuid = uid;
487    if security::is_task_capable_noaudit(current_task, CAP_SETUID) {
488        creds.uid = uid;
489        creds.saved_uid = uid;
490    }
491
492    creds.update_capabilities(&prev);
493    std::mem::drop(prev);
494    current_task.set_creds(creds);
495    Ok(())
496}
497
498pub fn sys_setgid(
499    _locked: &mut Locked<Unlocked>,
500    current_task: &CurrentTask,
501    gid: gid_t,
502) -> Result<(), Errno> {
503    if gid == gid_t::MAX {
504        return error!(EINVAL);
505    }
506    if !new_gid_allowed(&current_task, gid) {
507        return error!(EPERM);
508    }
509
510    let mut creds = Credentials::clone(&current_task.current_creds());
511    creds.egid = gid;
512    creds.fsgid = gid;
513    if security::is_task_capable_noaudit(current_task, CAP_SETGID) {
514        creds.gid = gid;
515        creds.saved_gid = gid;
516    }
517    current_task.set_creds(creds);
518    Ok(())
519}
520
521pub fn sys_geteuid(
522    _locked: &mut Locked<Unlocked>,
523    current_task: &CurrentTask,
524) -> Result<uid_t, Errno> {
525    Ok(current_task.current_creds().euid)
526}
527
528pub fn sys_getegid(
529    _locked: &mut Locked<Unlocked>,
530    current_task: &CurrentTask,
531) -> Result<gid_t, Errno> {
532    Ok(current_task.current_creds().egid)
533}
534
535pub fn sys_setfsuid(
536    _locked: &mut Locked<Unlocked>,
537    current_task: &CurrentTask,
538    fsuid: uid_t,
539) -> Result<uid_t, Errno> {
540    let prev = current_task.current_creds();
541    let prev_fsuid = prev.fsuid;
542    if fsuid != u32::MAX && new_uid_allowed(&current_task, fsuid) {
543        let mut creds = Credentials::clone(&prev);
544        creds.fsuid = fsuid;
545        creds.update_capabilities(&prev);
546        std::mem::drop(prev);
547        current_task.set_creds(creds);
548    }
549
550    Ok(prev_fsuid)
551}
552
553pub fn sys_setfsgid(
554    _locked: &mut Locked<Unlocked>,
555    current_task: &CurrentTask,
556    fsgid: gid_t,
557) -> Result<gid_t, Errno> {
558    let prev = current_task.current_creds();
559    let prev_fsgid = prev.fsgid;
560
561    if fsgid != u32::MAX && new_gid_allowed(&current_task, fsgid) {
562        let mut creds = Credentials::clone(&prev);
563        creds.fsgid = fsgid;
564        creds.update_capabilities(&prev);
565        std::mem::drop(prev);
566        current_task.set_creds(creds);
567    }
568
569    Ok(prev_fsgid)
570}
571
572pub fn sys_getresuid(
573    _locked: &mut Locked<Unlocked>,
574    current_task: &CurrentTask,
575    ruid_addr: UserRef<uid_t>,
576    euid_addr: UserRef<uid_t>,
577    suid_addr: UserRef<uid_t>,
578) -> Result<(), Errno> {
579    let creds = current_task.current_creds();
580    current_task.write_object(ruid_addr, &creds.uid)?;
581    current_task.write_object(euid_addr, &creds.euid)?;
582    current_task.write_object(suid_addr, &creds.saved_uid)?;
583    Ok(())
584}
585
586pub fn sys_getresgid(
587    _locked: &mut Locked<Unlocked>,
588    current_task: &CurrentTask,
589    rgid_addr: UserRef<gid_t>,
590    egid_addr: UserRef<gid_t>,
591    sgid_addr: UserRef<gid_t>,
592) -> Result<(), Errno> {
593    let creds = current_task.current_creds();
594    current_task.write_object(rgid_addr, &creds.gid)?;
595    current_task.write_object(egid_addr, &creds.egid)?;
596    current_task.write_object(sgid_addr, &creds.saved_gid)?;
597    Ok(())
598}
599
600pub fn sys_setreuid(
601    _locked: &mut Locked<Unlocked>,
602    current_task: &CurrentTask,
603    ruid: uid_t,
604    euid: uid_t,
605) -> Result<(), Errno> {
606    // Linux __sys_setreuid() uses asymmetric checks: ruid cannot be set
607    // to saved_uid, while euid can. This prevents regaining root via
608    // setreuid after a privilege drop when setresuid would be required.
609    let validate_ruid = |uid: uid_t| {
610        let creds = current_task.current_creds();
611        uid == u32::MAX
612            || uid == creds.uid
613            || uid == creds.euid
614            || security::is_task_capable_noaudit(current_task, CAP_SETUID)
615    };
616    let validate_euid = |uid: uid_t| {
617        let creds = current_task.current_creds();
618        uid == u32::MAX
619            || uid == creds.uid
620            || uid == creds.euid
621            || uid == creds.saved_uid
622            || security::is_task_capable_noaudit(current_task, CAP_SETUID)
623    };
624    if !validate_ruid(ruid) || !validate_euid(euid) {
625        return error!(EPERM);
626    }
627
628    let prev = current_task.current_creds();
629    let mut creds = Credentials::clone(&prev);
630    let is_ruid_set = ruid != u32::MAX;
631    if is_ruid_set {
632        creds.uid = ruid;
633    }
634    let is_euid_set = euid != u32::MAX;
635    if is_euid_set {
636        creds.euid = euid;
637        creds.fsuid = euid;
638    }
639
640    // If the real user ID is set (i.e., ruid is not -1) or the effective
641    // user ID is set to a value not equal to the previous real user ID,
642    // the saved set-user-ID will be set to the new effective user ID.
643    if is_ruid_set || (is_euid_set && euid != prev.uid) {
644        creds.saved_uid = creds.euid;
645    }
646
647    creds.update_capabilities(&prev);
648    std::mem::drop(prev);
649    current_task.set_creds(creds);
650    Ok(())
651}
652
653pub fn sys_setregid(
654    _locked: &mut Locked<Unlocked>,
655    current_task: &CurrentTask,
656    rgid: gid_t,
657    egid: gid_t,
658) -> Result<(), Errno> {
659    // Same asymmetric permission model as setreuid — see above.
660    let validate_rgid = |gid: gid_t| {
661        let creds = current_task.current_creds();
662        gid == u32::MAX
663            || gid == creds.gid
664            || gid == creds.egid
665            || security::is_task_capable_noaudit(current_task, CAP_SETGID)
666    };
667    let validate_egid = |gid: gid_t| {
668        let creds = current_task.current_creds();
669        gid == u32::MAX
670            || gid == creds.gid
671            || gid == creds.egid
672            || gid == creds.saved_gid
673            || security::is_task_capable_noaudit(current_task, CAP_SETGID)
674    };
675    if !validate_rgid(rgid) || !validate_egid(egid) {
676        return error!(EPERM);
677    }
678
679    let mut creds = Credentials::clone(&current_task.current_creds());
680    let previous_rgid = creds.gid;
681    let is_rgid_set = rgid != u32::MAX;
682    if is_rgid_set {
683        creds.gid = rgid;
684    }
685    let is_egid_set = egid != u32::MAX;
686    if is_egid_set {
687        creds.egid = egid;
688        creds.fsgid = egid;
689    }
690
691    // If the real group ID is set (i.e., rgid is not -1) or the effective
692    // group ID is set to a value not equal to the previous real group ID,
693    // the saved set-group-ID will be set to the new effective group ID.
694    if is_rgid_set || (is_egid_set && egid != previous_rgid) {
695        creds.saved_gid = creds.egid;
696    }
697
698    current_task.set_creds(creds);
699    Ok(())
700}
701
702pub fn sys_setresuid(
703    _locked: &mut Locked<Unlocked>,
704    current_task: &CurrentTask,
705    ruid: uid_t,
706    euid: uid_t,
707    suid: uid_t,
708) -> Result<(), Errno> {
709    let allowed = |uid| uid == u32::MAX || new_uid_allowed(&current_task, uid);
710    if !allowed(ruid) || !allowed(euid) || !allowed(suid) {
711        return error!(EPERM);
712    }
713
714    let prev = current_task.current_creds();
715    let mut creds = Credentials::clone(&prev);
716    if ruid != u32::MAX {
717        creds.uid = ruid;
718    }
719    if euid != u32::MAX {
720        creds.euid = euid;
721        creds.fsuid = euid;
722    }
723    if suid != u32::MAX {
724        creds.saved_uid = suid;
725    }
726    creds.update_capabilities(&prev);
727    std::mem::drop(prev);
728    current_task.set_creds(creds);
729    Ok(())
730}
731
732pub fn sys_setresgid(
733    _locked: &mut Locked<Unlocked>,
734    current_task: &CurrentTask,
735    rgid: gid_t,
736    egid: gid_t,
737    sgid: gid_t,
738) -> Result<(), Errno> {
739    let allowed = |gid| gid == u32::MAX || new_gid_allowed(&current_task, gid);
740    if !allowed(rgid) || !allowed(egid) || !allowed(sgid) {
741        return error!(EPERM);
742    }
743
744    let mut creds = Credentials::clone(&current_task.current_creds());
745    if rgid != u32::MAX {
746        creds.gid = rgid;
747    }
748    if egid != u32::MAX {
749        creds.egid = egid;
750        creds.fsgid = egid;
751    }
752    if sgid != u32::MAX {
753        creds.saved_gid = sgid;
754    }
755    current_task.set_creds(creds);
756    Ok(())
757}
758
759pub fn sys_exit(
760    _locked: &mut Locked<Unlocked>,
761    current_task: &CurrentTask,
762    code: i32,
763) -> Result<(), Errno> {
764    // Only change the current exit status if this has not been already set by exit_group, as
765    // otherwise it has priority.
766    current_task.write().set_exit_status_if_not_already(ExitStatus::Exit(code as u8));
767    Ok(())
768}
769
770pub fn sys_exit_group(
771    locked: &mut Locked<Unlocked>,
772    current_task: &mut CurrentTask,
773    code: i32,
774) -> Result<(), Errno> {
775    current_task.kill_thread_group(locked, ExitStatus::Exit(code as u8));
776    Ok(())
777}
778
779pub fn sys_sched_getscheduler(
780    _locked: &mut Locked<Unlocked>,
781    current_task: &CurrentTask,
782    pid: pid_t,
783) -> Result<u32, Errno> {
784    if pid < 0 {
785        return error!(EINVAL);
786    }
787
788    let target_task = get_task_or_current(current_task, pid)?;
789    security::check_task_getscheduler_access(current_task, target_task.as_ref())?;
790    let current_scheduler_state = target_task.read().scheduler_state;
791    Ok(current_scheduler_state.policy_for_sched_getscheduler())
792}
793
794pub fn sys_sched_setscheduler(
795    locked: &mut Locked<Unlocked>,
796    current_task: &CurrentTask,
797    pid: pid_t,
798    policy: u32,
799    param: UserRef<sched_param>,
800) -> Result<(), Errno> {
801    // Parse & validate the arguments.
802    if pid < 0 || param.is_null() {
803        return error!(EINVAL);
804    }
805
806    let target_task = get_task_or_current(current_task, pid)?;
807
808    let reset_on_fork = policy & SCHED_RESET_ON_FORK != 0;
809
810    let policy = SchedulingPolicy::try_from(policy & !SCHED_RESET_ON_FORK)?;
811    let realtime_priority =
812        policy.realtime_priority_from(current_task.read_object(param)?.sched_priority)?;
813
814    // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
815    let current_state = target_task.read().scheduler_state;
816
817    // Check capabilities and permissions, if required, for the operation.
818    let euid_friendly = current_task.is_euid_friendly_with(&target_task);
819    let strengthening = current_state.realtime_priority < realtime_priority;
820    let rlimited = strengthening
821        && realtime_priority
822            .exceeds(target_task.thread_group().get_rlimit(locked, Resource::RTPRIO));
823    let clearing_reset_on_fork = current_state.reset_on_fork && !reset_on_fork;
824    let caught_in_idle_trap = current_state.policy == SchedulingPolicy::Idle
825        && policy != SchedulingPolicy::Idle
826        && current_state
827            .normal_priority
828            .exceeds(target_task.thread_group().get_rlimit(locked, Resource::NICE));
829    if !euid_friendly || rlimited || clearing_reset_on_fork || caught_in_idle_trap {
830        security::check_task_capable(current_task, CAP_SYS_NICE)?;
831    }
832
833    security::check_task_setscheduler_access(current_task, &target_task)?;
834
835    // Apply the new scheduler configuration to the task.
836    target_task.set_scheduler_policy_priority_and_reset_on_fork(
837        policy,
838        realtime_priority,
839        reset_on_fork,
840    )?;
841
842    Ok(())
843}
844
845const CPU_SET_SIZE: usize = 128;
846
847#[repr(C)]
848#[derive(Debug, Copy, Clone, IntoBytes, FromBytes, KnownLayout, Immutable)]
849pub struct CpuSet {
850    bits: [u8; CPU_SET_SIZE],
851}
852
853impl Default for CpuSet {
854    fn default() -> Self {
855        Self { bits: [0; CPU_SET_SIZE] }
856    }
857}
858
859fn check_cpu_set_alignment(current_task: &CurrentTask, cpusetsize: u32) -> Result<(), Errno> {
860    let alignment = if current_task.is_arch32() { 4 } else { 8 };
861    if cpusetsize < alignment || cpusetsize % alignment != 0 {
862        return error!(EINVAL);
863    }
864    Ok(())
865}
866
867fn get_default_cpu_set() -> CpuSet {
868    let mut result = CpuSet::default();
869    let mut cpus_count = zx::system_get_num_cpus();
870    let cpus_count_max = (CPU_SET_SIZE * 8) as u32;
871    if cpus_count > cpus_count_max {
872        log_error!("cpus_count={cpus_count}, greater than the {cpus_count_max} max supported.");
873        cpus_count = cpus_count_max;
874    }
875    let mut index = 0;
876    while cpus_count > 0 {
877        let count = std::cmp::min(cpus_count, 8);
878        let (shl, overflow) = 1_u8.overflowing_shl(count);
879        let mask = if overflow { u8::max_value() } else { shl - 1 };
880        result.bits[index] = mask;
881        index += 1;
882        cpus_count -= count;
883    }
884    result
885}
886
887pub fn sys_sched_getaffinity(
888    _locked: &mut Locked<Unlocked>,
889    current_task: &CurrentTask,
890    pid: pid_t,
891    cpusetsize: u32,
892    user_mask: UserAddress,
893) -> Result<usize, Errno> {
894    if pid < 0 {
895        return error!(EINVAL);
896    }
897
898    check_cpu_set_alignment(current_task, cpusetsize)?;
899
900    let target_task = get_task_or_current(current_task, pid)?;
901    security::check_task_getscheduler_access(current_task, &target_task)?;
902
903    // sched_setaffinity() is not implemented. Fake affinity mask based on the number of CPUs.
904    let mask = get_default_cpu_set();
905    let mask_size = std::cmp::min(cpusetsize as usize, CPU_SET_SIZE);
906    current_task.write_memory(user_mask, &mask.bits[..mask_size])?;
907    track_stub!(TODO("https://fxbug.dev/322874659"), "sched_getaffinity");
908    Ok(mask_size)
909}
910
911pub fn sys_sched_setaffinity(
912    _locked: &mut Locked<Unlocked>,
913    current_task: &CurrentTask,
914    pid: pid_t,
915    cpusetsize: u32,
916    user_mask: UserAddress,
917) -> Result<(), Errno> {
918    if pid < 0 {
919        return error!(EINVAL);
920    }
921    let target_task = get_task_or_current(current_task, pid)?;
922
923    check_cpu_set_alignment(current_task, cpusetsize)?;
924
925    let mask_size = std::cmp::min(cpusetsize as usize, CPU_SET_SIZE);
926    let mut mask = CpuSet::default();
927    current_task.read_memory_to_slice(user_mask, &mut mask.bits[..mask_size])?;
928
929    // Specified mask must include at least one valid CPU.
930    let max_mask = get_default_cpu_set();
931    let mut has_valid_cpu_in_mask = false;
932    for (l1, l2) in std::iter::zip(max_mask.bits, mask.bits) {
933        has_valid_cpu_in_mask = has_valid_cpu_in_mask || (l1 & l2 > 0);
934    }
935    if !has_valid_cpu_in_mask {
936        return error!(EINVAL);
937    }
938
939    if !current_task.is_euid_friendly_with(&target_task) {
940        security::check_task_capable(current_task, CAP_SYS_NICE)?;
941    }
942
943    security::check_task_setscheduler_access(current_task, &target_task)?;
944
945    // Currently, we ignore the mask and act as if the system reset the mask
946    // immediately to allowing all CPUs.
947    track_stub!(TODO("https://fxbug.dev/322874889"), "sched_setaffinity");
948    Ok(())
949}
950
951pub fn sys_sched_getparam(
952    _locked: &mut Locked<Unlocked>,
953    current_task: &CurrentTask,
954    pid: pid_t,
955    param: UserRef<sched_param>,
956) -> Result<(), Errno> {
957    if pid < 0 || param.is_null() {
958        return error!(EINVAL);
959    }
960
961    let target_task = get_task_or_current(current_task, pid)?;
962    let param_value = target_task.read().scheduler_state.get_sched_param();
963    current_task.write_object(param, &param_value)?;
964    Ok(())
965}
966
967pub fn sys_sched_setparam(
968    locked: &mut Locked<Unlocked>,
969    current_task: &CurrentTask,
970    pid: pid_t,
971    param: UserRef<sched_param>,
972) -> Result<(), Errno> {
973    // Parse & validate the arguments.
974    if pid < 0 || param.is_null() {
975        return error!(EINVAL);
976    }
977    let target_task = get_task_or_current(current_task, pid)?;
978
979    // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
980    let current_state = target_task.read().scheduler_state;
981
982    let realtime_priority = current_state
983        .policy
984        .realtime_priority_from(current_task.read_object(param)?.sched_priority)?;
985
986    // Check capabilities and permissions, if required, for the operation.
987    let euid_friendly = current_task.is_euid_friendly_with(&target_task);
988    let strengthening = current_state.realtime_priority < realtime_priority;
989    let rlimited = strengthening
990        && realtime_priority
991            .exceeds(target_task.thread_group().get_rlimit(locked, Resource::RTPRIO));
992    if !euid_friendly || rlimited {
993        security::check_task_capable(current_task, CAP_SYS_NICE)?;
994    }
995
996    security::check_task_setscheduler_access(current_task, &target_task)?;
997
998    // Apply the new scheduler configuration to the task.
999    target_task.set_scheduler_priority(realtime_priority)?;
1000
1001    Ok(())
1002}
1003
1004pub fn sys_sched_get_priority_min(
1005    _locked: &mut Locked<Unlocked>,
1006    _ctx: &CurrentTask,
1007    policy: u32,
1008) -> Result<u8, Errno> {
1009    min_priority_for_sched_policy(policy)
1010}
1011
1012pub fn sys_sched_get_priority_max(
1013    _locked: &mut Locked<Unlocked>,
1014    _ctx: &CurrentTask,
1015    policy: u32,
1016) -> Result<u8, Errno> {
1017    max_priority_for_sched_policy(policy)
1018}
1019
1020pub fn sys_ioprio_set(
1021    _locked: &mut Locked<Unlocked>,
1022    _current_task: &mut CurrentTask,
1023    _which: i32,
1024    _who: i32,
1025    _ioprio: i32,
1026) -> Result<(), Errno> {
1027    track_stub!(TODO("https://fxbug.dev/297591758"), "ioprio_set()");
1028    error!(ENOSYS)
1029}
1030
1031pub fn sys_prctl(
1032    locked: &mut Locked<Unlocked>,
1033    current_task: &mut CurrentTask,
1034    option: u32,
1035    arg2: u64,
1036    arg3: u64,
1037    arg4: u64,
1038    arg5: u64,
1039) -> Result<SyscallResult, Errno> {
1040    match option {
1041        PR_SET_VMA => {
1042            if arg2 != PR_SET_VMA_ANON_NAME as u64 {
1043                track_stub!(TODO("https://fxbug.dev/322874826"), "prctl PR_SET_VMA", arg2);
1044                return error!(ENOSYS);
1045            }
1046            let addr = UserAddress::from(arg3);
1047            let length = arg4 as usize;
1048            let name_addr = UserAddress::from(arg5);
1049            let name = if name_addr.is_null() {
1050                None
1051            } else {
1052                let name = UserCString::new(current_task, UserAddress::from(arg5));
1053                let name = current_task.read_c_string_to_vec(name, 256).map_err(|e| {
1054                    // An overly long name produces EINVAL and not ENAMETOOLONG in Linux 5.15.
1055                    if e.code == ENAMETOOLONG { errno!(EINVAL) } else { e }
1056                })?;
1057                // Some characters are forbidden in VMA names.
1058                if name.iter().any(|b| {
1059                    matches!(b,
1060                        0..=0x1f |
1061                        0x7f..=0xff |
1062                        b'\\' | b'`' | b'$' | b'[' | b']'
1063                    )
1064                }) {
1065                    return error!(EINVAL);
1066                }
1067                Some(name)
1068            };
1069            current_task.mm()?.set_mapping_name(addr, length, name)?;
1070            Ok(().into())
1071        }
1072        PR_SET_DUMPABLE => {
1073            let mm = current_task.mm()?;
1074            let mut dumpable = mm.dumpable.lock(locked);
1075            *dumpable = if arg2 == 1 { DumpPolicy::User } else { DumpPolicy::Disable };
1076            Ok(().into())
1077        }
1078        PR_GET_DUMPABLE => {
1079            let mm = current_task.mm()?;
1080            let dumpable = mm.dumpable.lock(locked);
1081            Ok(match *dumpable {
1082                DumpPolicy::Disable => 0.into(),
1083                DumpPolicy::User => 1.into(),
1084            })
1085        }
1086        PR_SET_PDEATHSIG => {
1087            track_stub!(TODO("https://fxbug.dev/322874397"), "PR_SET_PDEATHSIG");
1088            Ok(().into())
1089        }
1090        PR_SET_NAME => {
1091            let addr = UserAddress::from(arg2);
1092            let name = TaskCommand::new(&current_task.read_memory_to_array::<16>(addr)?);
1093            current_task.set_command_name(name);
1094            if current_task.tid == current_task.thread_group.leader {
1095                current_task.thread_group.sync_syscall_log_level();
1096            }
1097            Ok(0.into())
1098        }
1099        PR_GET_NAME => {
1100            let addr = UserAddress::from(arg2);
1101            let name = current_task.command().prctl_name();
1102            current_task.write_memory(addr, &name[..])?;
1103            Ok(().into())
1104        }
1105        PR_SET_PTRACER => {
1106            let allowed_ptracers = if arg2 == PR_SET_PTRACER_ANY as u64 {
1107                PtraceAllowedPtracers::Any
1108            } else if arg2 == 0 {
1109                PtraceAllowedPtracers::None
1110            } else {
1111                if current_task.kernel().pids.read().get_task(arg2 as i32).is_err() {
1112                    return error!(EINVAL);
1113                }
1114                PtraceAllowedPtracers::Some(arg2 as pid_t)
1115            };
1116            current_task.thread_group().write().allowed_ptracers = allowed_ptracers;
1117            Ok(().into())
1118        }
1119        PR_GET_KEEPCAPS => {
1120            Ok(current_task.current_creds().securebits.contains(SecureBits::KEEP_CAPS).into())
1121        }
1122        PR_SET_KEEPCAPS => {
1123            if arg2 != 0 && arg2 != 1 {
1124                return error!(EINVAL);
1125            }
1126            let mut creds = Credentials::clone(&current_task.current_creds());
1127            let mut securebits = creds.securebits;
1128            securebits.set(SecureBits::KEEP_CAPS, arg2 != 0);
1129            creds.set_securebits(securebits)?;
1130            current_task.set_creds(creds);
1131            Ok(().into())
1132        }
1133        PR_SET_NO_NEW_PRIVS => {
1134            // If any args are set other than arg2 to 1, this should return einval
1135            if arg2 != 1 || arg3 != 0 || arg4 != 0 || arg5 != 0 {
1136                return error!(EINVAL);
1137            }
1138            current_task.write().enable_no_new_privs();
1139            Ok(().into())
1140        }
1141        PR_GET_NO_NEW_PRIVS => {
1142            // If any args are set, this should return einval
1143            if arg2 != 0 || arg3 != 0 || arg4 != 0 {
1144                return error!(EINVAL);
1145            }
1146            Ok(current_task.read().no_new_privs().into())
1147        }
1148        PR_GET_SECCOMP => {
1149            if current_task.seccomp_filter_state.get() == SeccompStateValue::None {
1150                Ok(0.into())
1151            } else {
1152                Ok(2.into())
1153            }
1154        }
1155        PR_SET_SECCOMP => {
1156            if arg2 == SECCOMP_MODE_STRICT as u64 {
1157                return sys_seccomp(
1158                    locked,
1159                    current_task,
1160                    SECCOMP_SET_MODE_STRICT,
1161                    0,
1162                    UserAddress::NULL,
1163                );
1164            } else if arg2 == SECCOMP_MODE_FILTER as u64 {
1165                return sys_seccomp(locked, current_task, SECCOMP_SET_MODE_FILTER, 0, arg3.into());
1166            }
1167            Ok(().into())
1168        }
1169        PR_GET_CHILD_SUBREAPER => {
1170            let addr = UserAddress::from(arg2);
1171            #[allow(clippy::bool_to_int_with_if)]
1172            let value: i32 =
1173                if current_task.thread_group().read().is_child_subreaper { 1 } else { 0 };
1174            current_task.write_object(addr.into(), &value)?;
1175            Ok(().into())
1176        }
1177        PR_SET_CHILD_SUBREAPER => {
1178            current_task.thread_group().write().is_child_subreaper = arg2 != 0;
1179            Ok(().into())
1180        }
1181        PR_GET_SECUREBITS => Ok(current_task.current_creds().securebits.bits().into()),
1182        PR_SET_SECUREBITS => {
1183            security::check_task_capable(current_task, CAP_SETPCAP)?;
1184
1185            let securebits = SecureBits::from_bits(arg2 as u32).ok_or_else(|| {
1186                track_stub!(TODO("https://fxbug.dev/322875244"), "PR_SET_SECUREBITS", arg2);
1187                errno!(ENOSYS)
1188            })?;
1189
1190            let mut creds = Credentials::clone(&current_task.current_creds());
1191            creds.set_securebits(securebits)?;
1192            current_task.set_creds(creds);
1193            Ok(().into())
1194        }
1195        PR_CAPBSET_READ => {
1196            let cap = Capabilities::try_from(arg2)?;
1197            Ok(current_task.current_creds().cap_bounding.contains(cap).into())
1198        }
1199        PR_CAPBSET_DROP => {
1200            let mut creds = Credentials::clone(&current_task.current_creds());
1201            security::check_task_capable(current_task, CAP_SETPCAP)?;
1202
1203            creds.cap_bounding.remove(Capabilities::try_from(arg2)?);
1204            current_task.set_creds(creds);
1205            Ok(().into())
1206        }
1207        PR_CAP_AMBIENT => {
1208            let operation = arg2 as u32;
1209            let capability_arg = Capabilities::try_from(arg3)?;
1210            if arg4 != 0 || arg5 != 0 {
1211                return error!(EINVAL);
1212            }
1213
1214            // TODO(security): We don't currently validate capabilities, but this should return an
1215            // error if the capability_arg is invalid.
1216            match operation {
1217                PR_CAP_AMBIENT_RAISE => {
1218                    let mut creds = Credentials::clone(&current_task.current_creds());
1219                    if !(creds.cap_permitted.contains(capability_arg)
1220                        && creds.cap_inheritable.contains(capability_arg))
1221                    {
1222                        return error!(EPERM);
1223                    }
1224                    if creds.securebits.contains(SecureBits::NO_CAP_AMBIENT_RAISE) {
1225                        return error!(EPERM);
1226                    }
1227
1228                    creds.cap_ambient.insert(capability_arg);
1229                    current_task.set_creds(creds);
1230                    Ok(().into())
1231                }
1232                PR_CAP_AMBIENT_LOWER => {
1233                    let mut creds = Credentials::clone(&current_task.current_creds());
1234                    creds.cap_ambient.remove(capability_arg);
1235                    current_task.set_creds(creds);
1236                    Ok(().into())
1237                }
1238                PR_CAP_AMBIENT_IS_SET => {
1239                    Ok(current_task.current_creds().cap_ambient.contains(capability_arg).into())
1240                }
1241                PR_CAP_AMBIENT_CLEAR_ALL => {
1242                    if arg3 != 0 {
1243                        return error!(EINVAL);
1244                    }
1245
1246                    let mut creds = Credentials::clone(&current_task.current_creds());
1247                    creds.cap_ambient = Capabilities::empty();
1248                    current_task.set_creds(creds);
1249                    Ok(().into())
1250                }
1251                _ => error!(EINVAL),
1252            }
1253        }
1254        PR_SET_TIMERSLACK => {
1255            current_task.write().set_timerslack_ns(arg2);
1256            Ok(().into())
1257        }
1258        #[cfg(target_arch = "aarch64")]
1259        PR_GET_TAGGED_ADDR_CTRL => {
1260            track_stub!(TODO("https://fxbug.dev/408554469"), "PR_GET_TAGGED_ADDR_CTRL");
1261            Ok(0.into())
1262        }
1263        #[cfg(target_arch = "aarch64")]
1264        PR_SET_TAGGED_ADDR_CTRL => match u32::try_from(arg2).map_err(|_| errno!(EINVAL))? {
1265            // Only untagged pointers are allowed, the default.
1266            0 => Ok(().into()),
1267            PR_TAGGED_ADDR_ENABLE => {
1268                track_stub!(TODO("https://fxbug.dev/408554469"), "PR_TAGGED_ADDR_ENABLE");
1269                error!(EINVAL)
1270            }
1271            unknown_mode => {
1272                track_stub!(
1273                    TODO("https://fxbug.dev/408554469"),
1274                    "PR_SET_TAGGED_ADDR_CTRL unknown mode",
1275                    unknown_mode,
1276                );
1277                error!(EINVAL)
1278            }
1279        },
1280        _ => {
1281            track_stub!(TODO("https://fxbug.dev/322874733"), "prctl fallthrough", option);
1282            error!(ENOSYS)
1283        }
1284    }
1285}
1286
1287pub fn sys_ptrace(
1288    locked: &mut Locked<Unlocked>,
1289    current_task: &mut CurrentTask,
1290    request: u32,
1291    pid: pid_t,
1292    addr: UserAddress,
1293    data: UserAddress,
1294) -> Result<SyscallResult, Errno> {
1295    match request {
1296        PTRACE_TRACEME => ptrace_traceme(current_task),
1297        PTRACE_ATTACH => ptrace_attach(locked, current_task, pid, PtraceAttachType::Attach, data),
1298        PTRACE_SEIZE => ptrace_attach(locked, current_task, pid, PtraceAttachType::Seize, data),
1299        _ => ptrace_dispatch(locked, current_task, request, pid, addr, data),
1300    }
1301}
1302
1303pub fn sys_set_tid_address(
1304    _locked: &mut Locked<Unlocked>,
1305    current_task: &CurrentTask,
1306    user_tid: UserRef<pid_t>,
1307) -> Result<pid_t, Errno> {
1308    current_task.write().clear_child_tid = user_tid;
1309    Ok(current_task.get_tid())
1310}
1311
1312pub fn sys_getrusage(
1313    _locked: &mut Locked<Unlocked>,
1314    current_task: &CurrentTask,
1315    who: i32,
1316    user_usage: RUsagePtr,
1317) -> Result<(), Errno> {
1318    const RUSAGE_SELF: i32 = starnix_uapi::uapi::RUSAGE_SELF as i32;
1319    const RUSAGE_THREAD: i32 = starnix_uapi::uapi::RUSAGE_THREAD as i32;
1320    track_stub!(TODO("https://fxbug.dev/297370242"), "real rusage");
1321    let time_stats = match who {
1322        RUSAGE_CHILDREN => current_task.task.thread_group().read().children_time_stats,
1323        RUSAGE_SELF => current_task.task.thread_group().time_stats(),
1324        RUSAGE_THREAD => current_task.task.time_stats(),
1325        _ => return error!(EINVAL),
1326    };
1327
1328    let usage = rusage {
1329        ru_utime: timeval_from_duration(time_stats.user_time),
1330        ru_stime: timeval_from_duration(time_stats.system_time),
1331        ..rusage::default()
1332    };
1333    current_task.write_multi_arch_object(user_usage, usage)?;
1334
1335    Ok(())
1336}
1337
1338type PrLimitRef = MultiArchUserRef<uapi::rlimit, uapi::arch32::rlimit>;
1339
1340pub fn sys_getrlimit(
1341    locked: &mut Locked<Unlocked>,
1342    current_task: &CurrentTask,
1343    resource: u32,
1344    user_rlimit: PrLimitRef,
1345) -> Result<(), Errno> {
1346    do_prlimit64(locked, current_task, 0, resource, PrLimitRef::null(current_task), user_rlimit)
1347}
1348
1349pub fn sys_setrlimit(
1350    locked: &mut Locked<Unlocked>,
1351    current_task: &CurrentTask,
1352    resource: u32,
1353    user_rlimit: PrLimitRef,
1354) -> Result<(), Errno> {
1355    do_prlimit64(locked, current_task, 0, resource, user_rlimit, PrLimitRef::null(current_task))
1356}
1357
1358pub fn sys_prlimit64(
1359    locked: &mut Locked<Unlocked>,
1360    current_task: &CurrentTask,
1361    pid: pid_t,
1362    user_resource: u32,
1363    new_limit_ref: UserRef<uapi::rlimit>,
1364    old_limit_ref: UserRef<uapi::rlimit>,
1365) -> Result<(), Errno> {
1366    do_prlimit64::<uapi::rlimit>(
1367        locked,
1368        current_task,
1369        pid,
1370        user_resource,
1371        new_limit_ref.into(),
1372        old_limit_ref.into(),
1373    )
1374}
1375
1376pub fn do_prlimit64<T>(
1377    locked: &mut Locked<Unlocked>,
1378    current_task: &CurrentTask,
1379    pid: pid_t,
1380    user_resource: u32,
1381    new_limit_ref: MultiArchUserRef<uapi::rlimit, T>,
1382    old_limit_ref: MultiArchUserRef<uapi::rlimit, T>,
1383) -> Result<(), Errno>
1384where
1385    T: FromBytes + IntoBytes + Immutable + From<uapi::rlimit> + Into<uapi::rlimit>,
1386{
1387    let target_task = get_task_or_current(current_task, pid)?;
1388
1389    // To get or set the resource of a process other than itself, the caller must have either:
1390    // * the same `uid`, `euid`, `saved_uid`, `gid`, `egid`, `saved_gid` as the target.
1391    // * the CAP_SYS_RESOURCE
1392    if current_task.get_pid() != target_task.get_pid() {
1393        let self_creds = current_task.current_creds();
1394        let target_creds = target_task.real_creds();
1395        if self_creds.uid != target_creds.uid
1396            || self_creds.euid != target_creds.euid
1397            || self_creds.saved_uid != target_creds.saved_uid
1398            || self_creds.gid != target_creds.gid
1399            || self_creds.egid != target_creds.egid
1400            || self_creds.saved_gid != target_creds.saved_gid
1401        {
1402            security::check_task_capable(current_task, CAP_SYS_RESOURCE)?;
1403        }
1404        security::task_prlimit(
1405            current_task,
1406            &target_task,
1407            !old_limit_ref.is_null(),
1408            !new_limit_ref.is_null(),
1409        )?;
1410    }
1411
1412    let resource = Resource::from_raw(user_resource)?;
1413
1414    let old_limit = match resource {
1415        // TODO: Integrate Resource::STACK with generic ResourceLimits machinery.
1416        Resource::STACK => {
1417            if !new_limit_ref.is_null() {
1418                track_stub!(
1419                    TODO("https://fxbug.dev/322874791"),
1420                    "prlimit64 cannot set RLIMIT_STACK"
1421                );
1422            }
1423            // The stack size is fixed at the moment, but
1424            // if MAP_GROWSDOWN is implemented this should
1425            // report the limit that it can be grown.
1426            let mm = target_task.mm()?;
1427            let mm_state = mm.state.read();
1428            let stack_size = mm_state.stack_size as u64;
1429            rlimit { rlim_cur: stack_size, rlim_max: stack_size }
1430        }
1431        _ => {
1432            let new_limit = if new_limit_ref.is_null() {
1433                None
1434            } else {
1435                let new_limit = current_task.read_multi_arch_object(new_limit_ref)?;
1436                if new_limit.rlim_cur > new_limit.rlim_max {
1437                    return error!(EINVAL);
1438                }
1439                Some(new_limit)
1440            };
1441            ThreadGroup::adjust_rlimits(locked, current_task, &target_task, resource, new_limit)?
1442        }
1443    };
1444    if !old_limit_ref.is_null() {
1445        current_task.write_multi_arch_object(old_limit_ref, old_limit)?;
1446    }
1447    Ok(())
1448}
1449
1450pub fn sys_quotactl(
1451    _locked: &mut Locked<Unlocked>,
1452    _current_task: &CurrentTask,
1453    _cmd: i32,
1454    _special: UserRef<c_char>,
1455    _id: i32,
1456    _addr: UserRef<c_char>,
1457) -> Result<SyscallResult, Errno> {
1458    track_stub!(TODO("https://fxbug.dev/297302197"), "quotacl()");
1459    error!(ENOSYS)
1460}
1461
1462pub fn sys_capget(
1463    _locked: &mut Locked<Unlocked>,
1464    current_task: &CurrentTask,
1465    user_header: UserRef<__user_cap_header_struct>,
1466    user_data: UserRef<__user_cap_data_struct>,
1467) -> Result<(), Errno> {
1468    let mut header = current_task.read_object(user_header)?;
1469    let is_version_valid =
1470        [_LINUX_CAPABILITY_VERSION_1, _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3]
1471            .contains(&header.version);
1472    if !is_version_valid {
1473        header.version = _LINUX_CAPABILITY_VERSION_3;
1474        current_task.write_object(user_header, &header)?;
1475    }
1476    if user_data.is_null() {
1477        return Ok(());
1478    }
1479    if !is_version_valid || header.pid < 0 {
1480        return error!(EINVAL);
1481    }
1482
1483    let target_task = get_task_or_current(current_task, header.pid)?;
1484
1485    security::check_getcap_access(current_task, &target_task)?;
1486
1487    let (permitted, effective, inheritable) = {
1488        let creds = &target_task.real_creds();
1489        (creds.cap_permitted, creds.cap_effective, creds.cap_inheritable)
1490    };
1491
1492    match header.version {
1493        _LINUX_CAPABILITY_VERSION_1 => {
1494            let data: [__user_cap_data_struct; 1] = [__user_cap_data_struct {
1495                effective: effective.as_abi_v1(),
1496                inheritable: inheritable.as_abi_v1(),
1497                permitted: permitted.as_abi_v1(),
1498            }];
1499            current_task.write_objects(user_data, &data)?;
1500        }
1501        _LINUX_CAPABILITY_VERSION_2 | _LINUX_CAPABILITY_VERSION_3 => {
1502            // Return 64 bit capabilities as two sets of 32 bit capabilities, little endian
1503            let (permitted, effective, inheritable) =
1504                (permitted.as_abi_v3(), effective.as_abi_v3(), inheritable.as_abi_v3());
1505            let data: [__user_cap_data_struct; 2] = [
1506                __user_cap_data_struct {
1507                    effective: effective.0,
1508                    inheritable: inheritable.0,
1509                    permitted: permitted.0,
1510                },
1511                __user_cap_data_struct {
1512                    effective: effective.1,
1513                    inheritable: inheritable.1,
1514                    permitted: permitted.1,
1515                },
1516            ];
1517            current_task.write_objects(user_data, &data)?;
1518        }
1519        _ => {
1520            unreachable!("already returned if Linux capability version is not valid")
1521        }
1522    }
1523    Ok(())
1524}
1525
1526pub fn sys_capset(
1527    _locked: &mut Locked<Unlocked>,
1528    current_task: &CurrentTask,
1529    user_header: UserRef<__user_cap_header_struct>,
1530    user_data: UserRef<__user_cap_data_struct>,
1531) -> Result<(), Errno> {
1532    let mut header = current_task.read_object(user_header)?;
1533    let is_version_valid =
1534        [_LINUX_CAPABILITY_VERSION_1, _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3]
1535            .contains(&header.version);
1536    if !is_version_valid {
1537        header.version = _LINUX_CAPABILITY_VERSION_3;
1538        current_task.write_object(user_header, &header)?;
1539        return error!(EINVAL);
1540    }
1541    if header.pid != 0 && header.pid != current_task.tid {
1542        return error!(EPERM);
1543    }
1544
1545    let (new_permitted, new_effective, new_inheritable) = match header.version {
1546        _LINUX_CAPABILITY_VERSION_1 => {
1547            let data = current_task.read_object(user_data)?;
1548            (
1549                Capabilities::from_abi_v1(data.permitted),
1550                Capabilities::from_abi_v1(data.effective),
1551                Capabilities::from_abi_v1(data.inheritable),
1552            )
1553        }
1554        _LINUX_CAPABILITY_VERSION_2 | _LINUX_CAPABILITY_VERSION_3 => {
1555            let data =
1556                current_task.read_objects_to_array::<__user_cap_data_struct, 2>(user_data)?;
1557            (
1558                Capabilities::from_abi_v3((data[0].permitted, data[1].permitted)),
1559                Capabilities::from_abi_v3((data[0].effective, data[1].effective)),
1560                Capabilities::from_abi_v3((data[0].inheritable, data[1].inheritable)),
1561            )
1562        }
1563        _ => {
1564            unreachable!("already returned if Linux capability version is not valid")
1565        }
1566    };
1567
1568    // Permission checks. Copied out of TLPI section 39.7.
1569    let mut creds = Credentials::clone(&current_task.current_creds());
1570    {
1571        log_trace!(
1572            "Capabilities({{permitted={:?} from {:?}, effective={:?} from {:?}, inheritable={:?} from {:?}}}, bounding={:?})",
1573            new_permitted,
1574            creds.cap_permitted,
1575            new_effective,
1576            creds.cap_effective,
1577            new_inheritable,
1578            creds.cap_inheritable,
1579            creds.cap_bounding
1580        );
1581        if !creds.cap_inheritable.union(creds.cap_permitted).contains(new_inheritable) {
1582            security::check_task_capable(current_task, CAP_SETPCAP)?;
1583        }
1584
1585        if !creds.cap_inheritable.union(creds.cap_bounding).contains(new_inheritable) {
1586            return error!(EPERM);
1587        }
1588        if !creds.cap_permitted.contains(new_permitted) {
1589            return error!(EPERM);
1590        }
1591        if !new_permitted.contains(new_effective) {
1592            return error!(EPERM);
1593        }
1594    }
1595    let target_task = get_task_or_current(current_task, header.pid)?;
1596
1597    security::check_setcap_access(current_task, &target_task)?;
1598
1599    creds.cap_permitted = new_permitted;
1600    creds.cap_effective = new_effective;
1601    creds.cap_inheritable = new_inheritable;
1602    creds.cap_ambient = new_permitted & new_inheritable & creds.cap_ambient;
1603    current_task.set_creds(creds);
1604    Ok(())
1605}
1606
1607pub fn sys_seccomp(
1608    locked: &mut Locked<Unlocked>,
1609    current_task: &mut CurrentTask,
1610    operation: u32,
1611    flags: u32,
1612    args: UserAddress,
1613) -> Result<SyscallResult, Errno> {
1614    match operation {
1615        SECCOMP_SET_MODE_STRICT => {
1616            if flags != 0 || args != UserAddress::NULL {
1617                return error!(EINVAL);
1618            }
1619            current_task.set_seccomp_state(SeccompStateValue::Strict)?;
1620            Ok(().into())
1621        }
1622        SECCOMP_SET_MODE_FILTER => {
1623            if flags
1624                & (SECCOMP_FILTER_FLAG_LOG
1625                    | SECCOMP_FILTER_FLAG_NEW_LISTENER
1626                    | SECCOMP_FILTER_FLAG_SPEC_ALLOW
1627                    | SECCOMP_FILTER_FLAG_TSYNC
1628                    | SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
1629                != flags
1630            {
1631                return error!(EINVAL);
1632            }
1633            if (flags & SECCOMP_FILTER_FLAG_TSYNC == 0)
1634                && (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH != 0)
1635            {
1636                return error!(EINVAL);
1637            }
1638            if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER != 0)
1639                && (flags & SECCOMP_FILTER_FLAG_TSYNC != 0)
1640                && (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH == 0)
1641            {
1642                return error!(EINVAL);
1643            }
1644            let fprog =
1645                current_task.read_multi_arch_object(SockFProgPtr::new(current_task, args))?;
1646            if fprog.len > BPF_MAXINSNS || fprog.len == 0 {
1647                return error!(EINVAL);
1648            }
1649            let code: Vec<sock_filter> =
1650                current_task.read_multi_arch_objects_to_vec(fprog.filter, fprog.len as usize)?;
1651
1652            if !current_task.read().no_new_privs() {
1653                security::check_task_capable(current_task, CAP_SYS_ADMIN)
1654                    .map_err(|_| errno!(EACCES))?;
1655            }
1656            current_task.add_seccomp_filter(locked, code, flags)
1657        }
1658        SECCOMP_GET_ACTION_AVAIL => {
1659            if flags != 0 || args.is_null() {
1660                return error!(EINVAL);
1661            }
1662            let action: u32 = current_task.read_object(UserRef::new(args))?;
1663            SeccompAction::is_action_available(action)
1664        }
1665        SECCOMP_GET_NOTIF_SIZES => {
1666            if flags != 0 {
1667                return error!(EINVAL);
1668            }
1669            track_stub!(TODO("https://fxbug.dev/322874791"), "SECCOMP_GET_NOTIF_SIZES");
1670            error!(ENOSYS)
1671        }
1672        _ => {
1673            track_stub!(TODO("https://fxbug.dev/322874916"), "seccomp fallthrough", operation);
1674            error!(EINVAL)
1675        }
1676    }
1677}
1678
1679pub fn sys_setgroups(
1680    _locked: &mut Locked<Unlocked>,
1681    current_task: &CurrentTask,
1682    size: usize,
1683    groups_addr: UserAddress,
1684) -> Result<(), Errno> {
1685    if size > NGROUPS_MAX as usize {
1686        return error!(EINVAL);
1687    }
1688    let groups = current_task.read_objects_to_vec::<gid_t>(groups_addr.into(), size)?;
1689    security::check_task_capable(current_task, CAP_SETGID)?;
1690    let mut creds = Credentials::clone(&current_task.current_creds());
1691    creds.groups = groups;
1692    current_task.set_creds(creds);
1693    Ok(())
1694}
1695
1696pub fn sys_getgroups(
1697    _locked: &mut Locked<Unlocked>,
1698    current_task: &CurrentTask,
1699    size: usize,
1700    groups_addr: UserAddress,
1701) -> Result<usize, Errno> {
1702    if size > NGROUPS_MAX as usize {
1703        return error!(EINVAL);
1704    }
1705    let creds = current_task.current_creds();
1706    if size != 0 {
1707        if size < creds.groups.len() {
1708            return error!(EINVAL);
1709        }
1710        current_task.write_memory(groups_addr, creds.groups.as_slice().as_bytes())?;
1711    }
1712    Ok(creds.groups.len())
1713}
1714
1715pub fn sys_setsid(
1716    locked: &mut Locked<Unlocked>,
1717    current_task: &CurrentTask,
1718) -> Result<pid_t, Errno> {
1719    current_task.thread_group().setsid(locked)?;
1720    Ok(current_task.get_pid())
1721}
1722
1723// Note the asymmetry with sys_setpriority: this returns "kernel nice" which ranges
1724// from 1 (weakest) to 40 (strongest). (It is part of Linux history that this syscall
1725// deals with niceness but has "priority" in its name.)
1726pub fn sys_getpriority(
1727    _locked: &mut Locked<Unlocked>,
1728    current_task: &CurrentTask,
1729    which: u32,
1730    who: i32,
1731) -> Result<u8, Errno> {
1732    match which {
1733        PRIO_PROCESS => {}
1734        // TODO: https://fxbug.dev/287121196 - support PRIO_PGRP and PRIO_USER?
1735        _ => return error!(EINVAL),
1736    }
1737    track_stub!(TODO("https://fxbug.dev/322893809"), "getpriority permissions");
1738    let target_task = get_task_or_current(current_task, who)?;
1739    let state = target_task.read();
1740    Ok(state.scheduler_state.normal_priority.raw_priority())
1741}
1742
1743// Note the asymmetry with sys_getpriority: this call's `priority` parameter is a
1744// "user nice" which ranges from -20 (strongest) to 19 (weakest) (other values can be
1745// passed and are clamped to that range and interpretation). (It is part of Linux
1746// history that this syscall deals with niceness but has "priority" in its name.)
1747pub fn sys_setpriority(
1748    locked: &mut Locked<Unlocked>,
1749    current_task: &CurrentTask,
1750    which: u32,
1751    who: i32,
1752    priority: i32,
1753) -> Result<(), Errno> {
1754    // Parse & validate the arguments.
1755    match which {
1756        PRIO_PROCESS => {}
1757        // TODO: https://fxbug.dev/287121196 - support PRIO_PGRP and PRIO_USER?
1758        _ => return error!(EINVAL),
1759    }
1760
1761    let target_task = get_task_or_current(current_task, who)?;
1762
1763    let normal_priority = NormalPriority::from_setpriority_syscall(priority);
1764
1765    // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
1766    let current_state = target_task.read().scheduler_state;
1767
1768    // Check capabilities and permissions, if required, for the operation.
1769    let euid_friendly = current_task.is_euid_friendly_with(&target_task);
1770    let strengthening = current_state.normal_priority < normal_priority;
1771    let rlimited = strengthening
1772        && normal_priority.exceeds(target_task.thread_group().get_rlimit(locked, Resource::NICE));
1773    if !euid_friendly {
1774        security::check_task_capable(current_task, CAP_SYS_NICE)?;
1775    } else if rlimited {
1776        security::check_task_capable(current_task, CAP_SYS_NICE).map_err(|_| errno!(EACCES))?;
1777    }
1778
1779    security::check_task_setnice_access(current_task, &target_task)?;
1780
1781    // Apply the new scheduler configuration to the task.
1782    target_task.set_scheduler_nice(normal_priority)?;
1783
1784    Ok(())
1785}
1786
1787pub fn sys_setns(
1788    _locked: &mut Locked<Unlocked>,
1789    current_task: &CurrentTask,
1790    ns_fd: FdNumber,
1791    ns_type: c_int,
1792) -> Result<(), Errno> {
1793    let file_handle = current_task.get_file(ns_fd)?;
1794
1795    // From man pages this is not quite right because some namespace types require more capabilities
1796    // or require this capability in multiple namespaces, but it should cover our current test
1797    // cases and we can make this more nuanced once more namespace types are supported.
1798    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1799
1800    if let Some(mount_ns) = file_handle.downcast_file::<MountNamespaceFile>() {
1801        if !(ns_type == 0 || ns_type == CLONE_NEWNS as i32) {
1802            log_trace!("invalid type");
1803            return error!(EINVAL);
1804        }
1805
1806        track_stub!(TODO("https://fxbug.dev/297312091"), "setns CLONE_FS limitations");
1807        current_task.fs().set_namespace(mount_ns.0.clone())?;
1808        return Ok(());
1809    }
1810
1811    if let Some(_pidfd) = file_handle.downcast_file::<PidFdFileObject>() {
1812        track_stub!(TODO("https://fxbug.dev/297312844"), "setns w/ pidfd");
1813        return error!(ENOSYS);
1814    }
1815
1816    track_stub!(TODO("https://fxbug.dev/322893829"), "unknown ns file for setns, see logs");
1817    log_info!("ns_fd was not a supported namespace file: {}", file_handle.ops_type_name());
1818    error!(EINVAL)
1819}
1820
1821pub fn sys_unshare(
1822    _locked: &mut Locked<Unlocked>,
1823    current_task: &CurrentTask,
1824    flags: u32,
1825) -> Result<(), Errno> {
1826    const IMPLEMENTED_FLAGS: u32 = CLONE_FILES | CLONE_FS | CLONE_NEWNS | CLONE_NEWUTS;
1827    if flags & !IMPLEMENTED_FLAGS != 0 {
1828        track_stub!(TODO("https://fxbug.dev/322893372"), "unshare", flags & !IMPLEMENTED_FLAGS);
1829        return error!(EINVAL);
1830    }
1831
1832    if (flags & CLONE_FILES) != 0 {
1833        current_task.running_state().files.unshare();
1834    }
1835
1836    if (flags & CLONE_FS) != 0 {
1837        current_task.unshare_fs();
1838    }
1839
1840    if (flags & CLONE_NEWNS) != 0 {
1841        security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1842        current_task.fs().unshare_namespace();
1843    }
1844
1845    if (flags & CLONE_NEWUTS) != 0 {
1846        security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1847        // Fork the UTS namespace.
1848        let mut task_state = current_task.write();
1849        let new_uts_ns = task_state.uts_ns.read().clone();
1850        task_state.uts_ns = Arc::new(LockDepRwLock::new(new_uts_ns));
1851    }
1852
1853    Ok(())
1854}
1855
1856pub fn sys_swapon(
1857    locked: &mut Locked<Unlocked>,
1858    current_task: &CurrentTask,
1859    user_path: UserCString,
1860    _flags: i32,
1861) -> Result<(), Errno> {
1862    const MAX_SWAPFILES: usize = 32; // See https://man7.org/linux/man-pages/man2/swapon.2.html
1863
1864    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1865
1866    track_stub!(TODO("https://fxbug.dev/322893905"), "swapon validate flags");
1867
1868    let path = current_task.read_path(user_path)?;
1869    let file = current_task.open_file(locked, path.as_ref(), OpenFlags::RDWR)?;
1870
1871    let node = file.node();
1872    let mode = node.info().mode;
1873    if !mode.is_reg() && !mode.is_blk() {
1874        return error!(EINVAL);
1875    }
1876
1877    // We determined this magic number by using the mkswap tool and the file tool. The mkswap tool
1878    // populates a few bytes in the file, including a UUID, which can be replaced with zeros while
1879    // still being recognized by the file tool. This string appears at a fixed offset
1880    // (MAGIC_OFFSET) in the file, which looks quite like a magic number.
1881    const MAGIC_OFFSET: usize = 0xff6;
1882    let swap_magic = b"SWAPSPACE2";
1883    let mut buffer = VecOutputBuffer::new(swap_magic.len());
1884    if file.read_at(locked, current_task, MAGIC_OFFSET, &mut buffer)? != swap_magic.len()
1885        || buffer.data() != swap_magic
1886    {
1887        return error!(EINVAL);
1888    }
1889
1890    let mut swap_files = current_task.kernel().swap_files.lock(locked);
1891    for swap_node in swap_files.iter() {
1892        if Arc::ptr_eq(swap_node, node) {
1893            return error!(EBUSY);
1894        }
1895    }
1896    if swap_files.len() >= MAX_SWAPFILES {
1897        return error!(EPERM);
1898    }
1899    swap_files.push(node.clone());
1900    Ok(())
1901}
1902
1903pub fn sys_swapoff(
1904    locked: &mut Locked<Unlocked>,
1905    current_task: &CurrentTask,
1906    user_path: UserCString,
1907) -> Result<(), Errno> {
1908    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1909
1910    let path = current_task.read_path(user_path)?;
1911    let file = current_task.open_file(locked, path.as_ref(), OpenFlags::RDWR)?;
1912    let node = file.node();
1913
1914    let mut swap_files = current_task.kernel().swap_files.lock(locked);
1915    let original_length = swap_files.len();
1916    swap_files.retain(|swap_node| !Arc::ptr_eq(swap_node, node));
1917    if swap_files.len() == original_length {
1918        return error!(EINVAL);
1919    }
1920    Ok(())
1921}
1922
1923#[derive(Default, Debug, IntoBytes, KnownLayout, FromBytes, Immutable)]
1924#[repr(C)]
1925struct KcmpParams {
1926    mask: usize,
1927    shuffle: usize,
1928}
1929
1930static KCMP_PARAMS: LazyLock<KcmpParams> = LazyLock::new(|| {
1931    let mut params = KcmpParams::default();
1932    starnix_crypto::cprng_draw(params.as_mut_bytes());
1933    // Ensure the shuffle is odd so that multiplying a usize by this value is a permutation.
1934    params.shuffle |= 1;
1935    params
1936});
1937
1938fn obfuscate_value(value: usize) -> usize {
1939    let KcmpParams { mask, shuffle } = *KCMP_PARAMS;
1940    (value ^ mask).wrapping_mul(shuffle)
1941}
1942
1943fn obfuscate_ptr<T>(ptr: *const T) -> usize {
1944    obfuscate_value(ptr as usize)
1945}
1946
1947fn obfuscate_arc<T>(arc: &Arc<T>) -> usize {
1948    obfuscate_ptr(Arc::as_ptr(arc))
1949}
1950
1951pub fn sys_kcmp(
1952    locked: &mut Locked<Unlocked>,
1953    current_task: &CurrentTask,
1954    pid1: pid_t,
1955    pid2: pid_t,
1956    resource_type: u32,
1957    index1: u64,
1958    index2: u64,
1959) -> Result<u32, Errno> {
1960    let task1 = current_task.get_task(pid1)?;
1961    let task2 = current_task.get_task(pid2)?;
1962
1963    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task1)?;
1964    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task2)?;
1965
1966    let resource_type = KcmpResource::from_raw(resource_type)?;
1967
1968    // Output encoding (see <https://man7.org/linux/man-pages/man2/kcmp.2.html>):
1969    //
1970    //   0  v1 is equal to v2; in other words, the two processes share the resource.
1971    //   1  v1 is less than v2.
1972    //   2  v1 is greater than v2.
1973    //   3  v1 is not equal to v2, but ordering information is unavailable.
1974    //
1975    fn encode_ordering(value: cmp::Ordering) -> u32 {
1976        match value {
1977            cmp::Ordering::Equal => 0,
1978            cmp::Ordering::Less => 1,
1979            cmp::Ordering::Greater => 2,
1980        }
1981    }
1982
1983    match resource_type {
1984        KcmpResource::FILE => {
1985            fn get_file(task: &Task, index: u64) -> Result<FileHandle, Errno> {
1986                // TODO: Test whether O_PATH is allowed here. Conceptually, seems like
1987                //       O_PATH should be allowed, but we haven't tested it yet.
1988                task.running_state()?.files.get_allowing_opath(FdNumber::from_raw(
1989                    index.try_into().map_err(|_| errno!(EBADF))?,
1990                ))
1991            }
1992            let file1 = get_file(&task1, index1)?;
1993            let file2 = get_file(&task2, index2)?;
1994            Ok(encode_ordering(obfuscate_arc(&file1).cmp(&obfuscate_arc(&file2))))
1995        }
1996        KcmpResource::FILES => {
1997            let files1 = task1.running_state()?.files.id();
1998            let files2 = task2.running_state()?.files.id();
1999            Ok(encode_ordering(obfuscate_value(files1.raw()).cmp(&obfuscate_value(files2.raw()))))
2000        }
2001        KcmpResource::FS => {
2002            let fs1 = task1.running_state()?.fs();
2003            let fs2 = task2.running_state()?.fs();
2004            Ok(encode_ordering(obfuscate_arc(&fs1).cmp(&obfuscate_arc(&fs2))))
2005        }
2006        KcmpResource::SIGHAND => Ok(encode_ordering(
2007            obfuscate_arc(&task1.thread_group().signal_actions)
2008                .cmp(&obfuscate_arc(&task2.thread_group().signal_actions)),
2009        )),
2010        KcmpResource::VM => {
2011            Ok(encode_ordering(obfuscate_arc(&task1.mm()?).cmp(&obfuscate_arc(&task2.mm()?))))
2012        }
2013        _ => error!(EINVAL),
2014    }
2015}
2016
2017pub fn sys_syslog(
2018    locked: &mut Locked<Unlocked>,
2019    current_task: &CurrentTask,
2020    action_type: i32,
2021    address: UserAddress,
2022    length: i32,
2023) -> Result<i32, Errno> {
2024    let action = SyslogAction::try_from(action_type)?;
2025    let syslog =
2026        current_task.kernel().syslog.access(&current_task, SyslogAccess::Syscall(action))?;
2027    match action {
2028        SyslogAction::Read => {
2029            if address.is_null() || length < 0 {
2030                return error!(EINVAL);
2031            }
2032            let mut output_buffer =
2033                UserBuffersOutputBuffer::unified_new_at(current_task, address, length as usize)?;
2034            syslog.blocking_read(locked, current_task, &mut output_buffer)
2035        }
2036        SyslogAction::ReadAll => {
2037            if address.is_null() || length < 0 {
2038                return error!(EINVAL);
2039            }
2040            let mut output_buffer =
2041                UserBuffersOutputBuffer::unified_new_at(current_task, address, length as usize)?;
2042            syslog.read_all(current_task, &mut output_buffer)
2043        }
2044        SyslogAction::SizeUnread => syslog.size_unread(),
2045        SyslogAction::SizeBuffer => syslog.size_buffer(),
2046        SyslogAction::Close | SyslogAction::Open => Ok(0),
2047        SyslogAction::ReadClear => {
2048            track_stub!(TODO("https://fxbug.dev/322894145"), "syslog: read clear");
2049            Ok(0)
2050        }
2051        SyslogAction::Clear => {
2052            track_stub!(TODO("https://fxbug.dev/322893673"), "syslog: clear");
2053            Ok(0)
2054        }
2055        SyslogAction::ConsoleOff => {
2056            track_stub!(TODO("https://fxbug.dev/322894399"), "syslog: console off");
2057            Ok(0)
2058        }
2059        SyslogAction::ConsoleOn => {
2060            track_stub!(TODO("https://fxbug.dev/322894106"), "syslog: console on");
2061            Ok(0)
2062        }
2063        SyslogAction::ConsoleLevel => {
2064            if length <= 0 || length >= 8 {
2065                return error!(EINVAL);
2066            }
2067            track_stub!(TODO("https://fxbug.dev/322894199"), "syslog: console level");
2068            Ok(0)
2069        }
2070    }
2071}
2072
2073pub fn sys_vhangup(
2074    _locked: &mut Locked<Unlocked>,
2075    current_task: &CurrentTask,
2076) -> Result<(), Errno> {
2077    security::check_task_capable(current_task, CAP_SYS_TTY_CONFIG)?;
2078    track_stub!(TODO("https://fxbug.dev/324079257"), "vhangup");
2079    Ok(())
2080}
2081
2082// Syscalls for arch32 usage
2083#[cfg(target_arch = "aarch64")]
2084mod arch32 {
2085    pub use super::{
2086        sys_execve as sys_arch32_execve, sys_getegid as sys_arch32_getegid32,
2087        sys_geteuid as sys_arch32_geteuid32, sys_getgid as sys_arch32_getgid32,
2088        sys_getgroups as sys_arch32_getgroups32, sys_getpgid as sys_arch32_getpgid,
2089        sys_getppid as sys_arch32_getppid, sys_getpriority as sys_arch32_getpriority,
2090        sys_getresgid as sys_arch32_getresgid32, sys_getresuid as sys_arch32_getresuid32,
2091        sys_getrlimit as sys_arch32_ugetrlimit, sys_getrusage as sys_arch32_getrusage,
2092        sys_getuid as sys_arch32_getuid32, sys_ioprio_set as sys_arch32_ioprio_set,
2093        sys_ptrace as sys_arch32_ptrace, sys_quotactl as sys_arch32_quotactl,
2094        sys_sched_get_priority_max as sys_arch32_sched_get_priority_max,
2095        sys_sched_get_priority_min as sys_arch32_sched_get_priority_min,
2096        sys_sched_getaffinity as sys_arch32_sched_getaffinity,
2097        sys_sched_getparam as sys_arch32_sched_getparam,
2098        sys_sched_setaffinity as sys_arch32_sched_setaffinity,
2099        sys_sched_setparam as sys_arch32_sched_setparam,
2100        sys_sched_setscheduler as sys_arch32_sched_setscheduler, sys_seccomp as sys_arch32_seccomp,
2101        sys_setfsgid as sys_arch32_setfsgid, sys_setfsgid as sys_arch32_setfsgid32,
2102        sys_setfsuid as sys_arch32_setfsuid, sys_setfsuid as sys_arch32_setfsuid32,
2103        sys_setgid as sys_arch32_setgid32, sys_setgroups as sys_arch32_setgroups32,
2104        sys_setns as sys_arch32_setns, sys_setpgid as sys_arch32_setpgid,
2105        sys_setpriority as sys_arch32_setpriority, sys_setregid as sys_arch32_setregid32,
2106        sys_setresgid as sys_arch32_setresgid32, sys_setresuid as sys_arch32_setresuid32,
2107        sys_setreuid as sys_arch32_setreuid32, sys_setreuid as sys_arch32_setreuid,
2108        sys_setrlimit as sys_arch32_setrlimit, sys_setsid as sys_arch32_setsid,
2109        sys_syslog as sys_arch32_syslog, sys_unshare as sys_arch32_unshare,
2110    };
2111}
2112
2113#[cfg(target_arch = "aarch64")]
2114pub use arch32::*;
2115
2116#[cfg(test)]
2117mod tests {
2118    use super::*;
2119    use crate::mm::syscalls::sys_munmap;
2120    use crate::testing::{AutoReleasableTask, map_memory, spawn_kernel_and_run};
2121    use starnix_syscalls::SUCCESS;
2122    use starnix_task_command::TaskCommand;
2123    use starnix_uapi::auth::Credentials;
2124    use starnix_uapi::{SCHED_FIFO, SCHED_NORMAL};
2125    use std::ffi::CString;
2126
2127    #[::fuchsia::test]
2128    async fn test_prctl_set_vma_anon_name() {
2129        spawn_kernel_and_run(async |locked, current_task| {
2130            let mapped_address =
2131                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2132            let name_addr = (mapped_address + 128u64).unwrap();
2133            let name = "test-name\0";
2134            current_task.write_memory(name_addr, name.as_bytes()).expect("failed to write name");
2135            sys_prctl(
2136                locked,
2137                current_task,
2138                PR_SET_VMA,
2139                PR_SET_VMA_ANON_NAME as u64,
2140                mapped_address.ptr() as u64,
2141                32,
2142                name_addr.ptr() as u64,
2143            )
2144            .expect("failed to set name");
2145            assert_eq!(
2146                "test-name",
2147                current_task
2148                    .mm()
2149                    .unwrap()
2150                    .get_mapping_name((mapped_address + 24u64).unwrap())
2151                    .expect("failed to get address")
2152                    .unwrap()
2153                    .to_string(),
2154            );
2155
2156            sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize)
2157                .expect("failed to unmap memory");
2158            assert_eq!(
2159                error!(EFAULT),
2160                current_task.mm().unwrap().get_mapping_name((mapped_address + 24u64).unwrap())
2161            );
2162        })
2163        .await;
2164    }
2165
2166    #[::fuchsia::test]
2167    async fn test_set_vma_name_special_chars() {
2168        spawn_kernel_and_run(async |locked, current_task| {
2169            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2170
2171            let mapping_addr =
2172                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2173
2174            for c in 1..255 {
2175                let vma_name = CString::new([c]).unwrap();
2176                current_task.write_memory(name_addr, vma_name.as_bytes_with_nul()).unwrap();
2177
2178                let result = sys_prctl(
2179                    locked,
2180                    current_task,
2181                    PR_SET_VMA,
2182                    PR_SET_VMA_ANON_NAME as u64,
2183                    mapping_addr.ptr() as u64,
2184                    *PAGE_SIZE,
2185                    name_addr.ptr() as u64,
2186                );
2187
2188                if c > 0x1f
2189                    && c < 0x7f
2190                    && c != b'\\'
2191                    && c != b'`'
2192                    && c != b'$'
2193                    && c != b'['
2194                    && c != b']'
2195                {
2196                    assert_eq!(result, Ok(SUCCESS));
2197                } else {
2198                    assert_eq!(result, error!(EINVAL));
2199                }
2200            }
2201        })
2202        .await;
2203    }
2204
2205    #[::fuchsia::test]
2206    async fn test_set_vma_name_long() {
2207        spawn_kernel_and_run(async |locked, current_task| {
2208            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2209
2210            let mapping_addr =
2211                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2212
2213            let name_too_long = CString::new(vec![b'a'; 256]).unwrap();
2214
2215            current_task.write_memory(name_addr, name_too_long.as_bytes_with_nul()).unwrap();
2216
2217            assert_eq!(
2218                sys_prctl(
2219                    locked,
2220                    current_task,
2221                    PR_SET_VMA,
2222                    PR_SET_VMA_ANON_NAME as u64,
2223                    mapping_addr.ptr() as u64,
2224                    *PAGE_SIZE,
2225                    name_addr.ptr() as u64,
2226                ),
2227                error!(EINVAL)
2228            );
2229
2230            let name_just_long_enough = CString::new(vec![b'a'; 255]).unwrap();
2231
2232            current_task
2233                .write_memory(name_addr, name_just_long_enough.as_bytes_with_nul())
2234                .unwrap();
2235
2236            assert_eq!(
2237                sys_prctl(
2238                    locked,
2239                    current_task,
2240                    PR_SET_VMA,
2241                    PR_SET_VMA_ANON_NAME as u64,
2242                    mapping_addr.ptr() as u64,
2243                    *PAGE_SIZE,
2244                    name_addr.ptr() as u64,
2245                ),
2246                Ok(SUCCESS)
2247            );
2248        })
2249        .await;
2250    }
2251
2252    #[::fuchsia::test]
2253    async fn test_set_vma_name_misaligned() {
2254        spawn_kernel_and_run(async |locked, current_task| {
2255            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2256
2257            let mapping_addr =
2258                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2259
2260            let name = CString::new("name").unwrap();
2261            current_task.write_memory(name_addr, name.as_bytes_with_nul()).unwrap();
2262
2263            // Passing a misaligned pointer to the start of the named region fails.
2264            assert_eq!(
2265                sys_prctl(
2266                    locked,
2267                    current_task,
2268                    PR_SET_VMA,
2269                    PR_SET_VMA_ANON_NAME as u64,
2270                    1 + mapping_addr.ptr() as u64,
2271                    *PAGE_SIZE - 1,
2272                    name_addr.ptr() as u64,
2273                ),
2274                error!(EINVAL)
2275            );
2276
2277            // Passing an unaligned length does work, however.
2278            assert_eq!(
2279                sys_prctl(
2280                    locked,
2281                    current_task,
2282                    PR_SET_VMA,
2283                    PR_SET_VMA_ANON_NAME as u64,
2284                    mapping_addr.ptr() as u64,
2285                    *PAGE_SIZE - 1,
2286                    name_addr.ptr() as u64,
2287                ),
2288                Ok(SUCCESS)
2289            );
2290        })
2291        .await;
2292    }
2293
2294    #[::fuchsia::test]
2295    async fn test_prctl_get_set_dumpable() {
2296        spawn_kernel_and_run(async |locked, current_task| {
2297            sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
2298                .expect("failed to get dumpable");
2299
2300            sys_prctl(locked, current_task, PR_SET_DUMPABLE, 1, 0, 0, 0)
2301                .expect("failed to set dumpable");
2302            sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
2303                .expect("failed to get dumpable");
2304
2305            // SUID_DUMP_ROOT not supported.
2306            sys_prctl(locked, current_task, PR_SET_DUMPABLE, 2, 0, 0, 0)
2307                .expect("failed to set dumpable");
2308            sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
2309                .expect("failed to get dumpable");
2310        })
2311        .await;
2312    }
2313
2314    #[::fuchsia::test]
2315    async fn test_sys_getsid() {
2316        spawn_kernel_and_run(async |locked, current_task| {
2317            let kernel = current_task.kernel();
2318            assert_eq!(
2319                current_task.get_tid(),
2320                sys_getsid(locked, &current_task, 0).expect("failed to get sid")
2321            );
2322
2323            let second_task = crate::execution::create_init_child_process(
2324                locked,
2325                &kernel.weak_self.upgrade().unwrap(),
2326                TaskCommand::new(b"second task"),
2327                Credentials::with_ids(0, 0),
2328                None,
2329            )
2330            .expect("failed to create second task");
2331            let second_current = AutoReleasableTask::from(second_task);
2332
2333            assert_eq!(
2334                second_current.get_tid(),
2335                sys_getsid(locked, &current_task, second_current.get_tid())
2336                    .expect("failed to get sid")
2337            );
2338        })
2339        .await;
2340    }
2341
2342    #[::fuchsia::test]
2343    async fn test_get_affinity_size() {
2344        spawn_kernel_and_run(async |locked, current_task| {
2345            let mapped_address =
2346                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2347            let pid = current_task.get_pid();
2348            assert_eq!(
2349                sys_sched_getaffinity(locked, &current_task, pid, 16, mapped_address),
2350                Ok(16)
2351            );
2352            assert_eq!(
2353                sys_sched_getaffinity(locked, &current_task, pid, 1024, mapped_address),
2354                Ok(std::mem::size_of::<CpuSet>())
2355            );
2356            assert_eq!(
2357                sys_sched_getaffinity(locked, &current_task, pid, 1, mapped_address),
2358                error!(EINVAL)
2359            );
2360            assert_eq!(
2361                sys_sched_getaffinity(locked, &current_task, pid, 9, mapped_address),
2362                error!(EINVAL)
2363            );
2364        })
2365        .await;
2366    }
2367
2368    #[::fuchsia::test]
2369    async fn test_set_affinity_size() {
2370        spawn_kernel_and_run(async |locked, current_task| {
2371            let mapped_address =
2372                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2373            current_task.write_memory(mapped_address, &[0xffu8]).expect("failed to cpumask");
2374            let pid = current_task.get_pid();
2375            assert_eq!(
2376                sys_sched_setaffinity(
2377                    locked,
2378                    &current_task,
2379                    pid,
2380                    *PAGE_SIZE as u32,
2381                    mapped_address
2382                ),
2383                Ok(())
2384            );
2385            assert_eq!(
2386                sys_sched_setaffinity(locked, &current_task, pid, 1, mapped_address),
2387                error!(EINVAL)
2388            );
2389        })
2390        .await;
2391    }
2392
2393    #[::fuchsia::test]
2394    async fn test_task_name() {
2395        spawn_kernel_and_run(async |locked, current_task| {
2396            let mapped_address =
2397                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2398            let name = "my-task-name\0";
2399            current_task
2400                .write_memory(mapped_address, name.as_bytes())
2401                .expect("failed to write name");
2402
2403            let result =
2404                sys_prctl(locked, current_task, PR_SET_NAME, mapped_address.ptr() as u64, 0, 0, 0)
2405                    .unwrap();
2406            assert_eq!(SUCCESS, result);
2407
2408            let mapped_address =
2409                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2410            let result =
2411                sys_prctl(locked, current_task, PR_GET_NAME, mapped_address.ptr() as u64, 0, 0, 0)
2412                    .unwrap();
2413            assert_eq!(SUCCESS, result);
2414
2415            let name_length = name.len();
2416
2417            let out_name = current_task.read_memory_to_vec(mapped_address, name_length).unwrap();
2418            assert_eq!(name.as_bytes(), &out_name);
2419        })
2420        .await;
2421    }
2422
2423    #[::fuchsia::test]
2424    async fn test_sched_get_priority_min_max() {
2425        spawn_kernel_and_run(async |locked, current_task| {
2426            let non_rt_min =
2427                sys_sched_get_priority_min(locked, &current_task, SCHED_NORMAL).unwrap();
2428            assert_eq!(non_rt_min, 0);
2429            let non_rt_max =
2430                sys_sched_get_priority_max(locked, &current_task, SCHED_NORMAL).unwrap();
2431            assert_eq!(non_rt_max, 0);
2432
2433            let rt_min = sys_sched_get_priority_min(locked, &current_task, SCHED_FIFO).unwrap();
2434            assert_eq!(rt_min, 1);
2435            let rt_max = sys_sched_get_priority_max(locked, &current_task, SCHED_FIFO).unwrap();
2436            assert_eq!(rt_max, 99);
2437
2438            let min_bad_policy_error =
2439                sys_sched_get_priority_min(locked, &current_task, std::u32::MAX).unwrap_err();
2440            assert_eq!(min_bad_policy_error, errno!(EINVAL));
2441
2442            let max_bad_policy_error =
2443                sys_sched_get_priority_max(locked, &current_task, std::u32::MAX).unwrap_err();
2444            assert_eq!(max_bad_policy_error, errno!(EINVAL));
2445        })
2446        .await;
2447    }
2448
2449    #[::fuchsia::test]
2450    async fn test_sched_setscheduler() {
2451        spawn_kernel_and_run(async |locked, current_task| {
2452            current_task
2453                .thread_group()
2454                .limits
2455                .lock(locked)
2456                .set(Resource::RTPRIO, rlimit { rlim_cur: 255, rlim_max: 255 });
2457
2458            let scheduler = sys_sched_getscheduler(locked, &current_task, 0).unwrap();
2459            assert_eq!(scheduler, SCHED_NORMAL, "tasks should have normal scheduler by default");
2460
2461            let mapped_address =
2462                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2463            let requested_params = sched_param { sched_priority: 15 };
2464            current_task.write_object(mapped_address.into(), &requested_params).unwrap();
2465
2466            sys_sched_setscheduler(locked, &current_task, 0, SCHED_FIFO, mapped_address.into())
2467                .unwrap();
2468
2469            let new_scheduler = sys_sched_getscheduler(locked, &current_task, 0).unwrap();
2470            assert_eq!(new_scheduler, SCHED_FIFO, "task should have been assigned fifo scheduler");
2471
2472            let mapped_address =
2473                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2474            sys_sched_getparam(locked, &current_task, 0, mapped_address.into())
2475                .expect("sched_getparam");
2476            let param_value: sched_param =
2477                current_task.read_object(mapped_address.into()).expect("read_object");
2478            assert_eq!(param_value.sched_priority, 15);
2479        })
2480        .await;
2481    }
2482
2483    #[::fuchsia::test]
2484    async fn test_sched_getparam() {
2485        spawn_kernel_and_run(async |locked, current_task| {
2486            let mapped_address =
2487                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2488            sys_sched_getparam(locked, &current_task, 0, mapped_address.into())
2489                .expect("sched_getparam");
2490            let param_value: sched_param =
2491                current_task.read_object(mapped_address.into()).expect("read_object");
2492            assert_eq!(param_value.sched_priority, 0);
2493        })
2494        .await;
2495    }
2496
2497    #[::fuchsia::test]
2498    async fn test_setuid() {
2499        spawn_kernel_and_run(async |locked, current_task| {
2500            // Test for root.
2501            current_task.set_creds(Credentials::with_ids(0, 0));
2502            sys_setuid(locked, &current_task, 42).expect("setuid");
2503            let mut creds = Credentials::clone(&current_task.current_creds());
2504            assert_eq!(creds.euid, 42);
2505            assert_eq!(creds.uid, 42);
2506            assert_eq!(creds.saved_uid, 42);
2507
2508            // Remove the CAP_SETUID capability to avoid overwriting permission checks.
2509            creds.cap_effective.remove(CAP_SETUID);
2510            current_task.set_creds(creds);
2511
2512            // Test for non root, which task now is.
2513            assert_eq!(sys_setuid(locked, &current_task, 0), error!(EPERM));
2514            assert_eq!(sys_setuid(locked, &current_task, 43), error!(EPERM));
2515
2516            sys_setuid(locked, &current_task, 42).expect("setuid");
2517            assert_eq!(current_task.current_creds().euid, 42);
2518            assert_eq!(current_task.current_creds().uid, 42);
2519            assert_eq!(current_task.current_creds().saved_uid, 42);
2520
2521            // Change uid and saved_uid, and check that one can set the euid to these.
2522            let mut creds = Credentials::clone(&current_task.current_creds());
2523            creds.uid = 41;
2524            creds.euid = 42;
2525            creds.saved_uid = 43;
2526            current_task.set_creds(creds);
2527
2528            sys_setuid(locked, &current_task, 41).expect("setuid");
2529            assert_eq!(current_task.current_creds().euid, 41);
2530            assert_eq!(current_task.current_creds().uid, 41);
2531            assert_eq!(current_task.current_creds().saved_uid, 43);
2532
2533            let mut creds = Credentials::clone(&current_task.current_creds());
2534            creds.uid = 41;
2535            creds.euid = 42;
2536            creds.saved_uid = 43;
2537            current_task.set_creds(creds);
2538
2539            sys_setuid(locked, &current_task, 43).expect("setuid");
2540            assert_eq!(current_task.current_creds().euid, 43);
2541            assert_eq!(current_task.current_creds().uid, 41);
2542            assert_eq!(current_task.current_creds().saved_uid, 43);
2543        })
2544        .await;
2545    }
2546
2547    #[::fuchsia::test]
2548    async fn test_read_c_string_vector() {
2549        spawn_kernel_and_run(async |locked, current_task| {
2550            let arg_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2551            let arg = b"test-arg\0";
2552            current_task.write_memory(arg_addr, arg).expect("failed to write test arg");
2553            let arg_usercstr = UserCString::new(current_task, arg_addr);
2554            let null_usercstr = UserCString::null(current_task);
2555
2556            let argv_addr = UserCStringPtr::new(
2557                current_task,
2558                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE),
2559            );
2560            current_task
2561                .write_multi_arch_ptr(argv_addr.addr(), arg_usercstr)
2562                .expect("failed to write UserCString");
2563            current_task
2564                .write_multi_arch_ptr(argv_addr.next().unwrap().addr(), null_usercstr)
2565                .expect("failed to write UserCString");
2566
2567            // The arguments size limit should include the null terminator.
2568            assert!(read_c_string_vector(&current_task, argv_addr, 100, arg.len()).is_ok());
2569            assert_eq!(
2570                read_c_string_vector(
2571                    &current_task,
2572                    argv_addr,
2573                    100,
2574                    std::str::from_utf8(arg).unwrap().trim_matches('\0').len()
2575                ),
2576                error!(E2BIG)
2577            );
2578        })
2579        .await;
2580    }
2581}