Skip to main content

starnix_core/task/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::execution::execute_task;
6use crate::mm::{DumpPolicy, MemoryAccessor, MemoryAccessorExt, PAGE_SIZE};
7use crate::ptrace::{
8    PR_SET_PTRACER_ANY, PtraceAllowedPtracers, PtraceAttachType, PtraceOptions, ptrace_attach,
9    ptrace_dispatch, ptrace_traceme,
10};
11use crate::security;
12use crate::signals::syscalls::RUsagePtr;
13use crate::task::{
14    CurrentTask, ExitStatus, NormalPriority, SchedulingPolicy, SeccompAction, SeccompStateValue,
15    SyslogAccess, Task, ThreadGroup, max_priority_for_sched_policy, min_priority_for_sched_policy,
16};
17use crate::vfs::{
18    FdNumber, FileHandle, MountNamespaceFile, PidFdFileObject, UserBuffersOutputBuffer,
19    VecOutputBuffer,
20};
21use starnix_logging::{log_error, log_info, log_trace, track_stub};
22use starnix_sync::{Locked, RwLock, Unlocked};
23use starnix_syscalls::SyscallResult;
24use starnix_task_command::TaskCommand;
25use starnix_types::time::timeval_from_duration;
26use starnix_uapi::auth::{
27    CAP_SETGID, CAP_SETPCAP, CAP_SETUID, CAP_SYS_ADMIN, CAP_SYS_NICE, CAP_SYS_RESOURCE,
28    CAP_SYS_TTY_CONFIG, Capabilities, Credentials, PTRACE_MODE_READ_REALCREDS, SecureBits,
29};
30use starnix_uapi::errors::{ENAMETOOLONG, Errno};
31use starnix_uapi::file_mode::{Access, AccessCheck, FileMode};
32use starnix_uapi::kcmp::KcmpResource;
33use starnix_uapi::open_flags::OpenFlags;
34use starnix_uapi::resource_limits::Resource;
35use starnix_uapi::signals::{Signal, UncheckedSignal};
36use starnix_uapi::syslog::SyslogAction;
37use starnix_uapi::user_address::{
38    ArchSpecific, MappingMultiArchUserRef, MultiArchUserRef, UserAddress, UserCString,
39    UserCStringPtr, UserRef,
40};
41use starnix_uapi::vfs::ResolveFlags;
42use starnix_uapi::{
43    __user_cap_data_struct, __user_cap_header_struct, _LINUX_CAPABILITY_VERSION_1,
44    _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3, AT_EMPTY_PATH, AT_SYMLINK_NOFOLLOW,
45    BPF_MAXINSNS, CLONE_ARGS_SIZE_VER0, CLONE_ARGS_SIZE_VER1, CLONE_ARGS_SIZE_VER2, CLONE_FILES,
46    CLONE_FS, CLONE_NEWNS, CLONE_NEWUTS, CLONE_SETTLS, CLONE_VFORK, NGROUPS_MAX, PR_CAP_AMBIENT,
47    PR_CAP_AMBIENT_CLEAR_ALL, PR_CAP_AMBIENT_IS_SET, PR_CAP_AMBIENT_LOWER, PR_CAP_AMBIENT_RAISE,
48    PR_CAPBSET_DROP, PR_CAPBSET_READ, PR_GET_CHILD_SUBREAPER, PR_GET_DUMPABLE, PR_GET_KEEPCAPS,
49    PR_GET_NAME, PR_GET_NO_NEW_PRIVS, PR_GET_SECCOMP, PR_GET_SECUREBITS, PR_SET_CHILD_SUBREAPER,
50    PR_SET_DUMPABLE, PR_SET_KEEPCAPS, PR_SET_NAME, PR_SET_NO_NEW_PRIVS, PR_SET_PDEATHSIG,
51    PR_SET_PTRACER, PR_SET_SECCOMP, PR_SET_SECUREBITS, PR_SET_TIMERSLACK, PR_SET_VMA,
52    PR_SET_VMA_ANON_NAME, PRIO_PROCESS, PTRACE_ATTACH, PTRACE_SEIZE, PTRACE_TRACEME,
53    RUSAGE_CHILDREN, SCHED_RESET_ON_FORK, SECCOMP_FILTER_FLAG_LOG,
54    SECCOMP_FILTER_FLAG_NEW_LISTENER, SECCOMP_FILTER_FLAG_SPEC_ALLOW, SECCOMP_FILTER_FLAG_TSYNC,
55    SECCOMP_FILTER_FLAG_TSYNC_ESRCH, SECCOMP_GET_ACTION_AVAIL, SECCOMP_GET_NOTIF_SIZES,
56    SECCOMP_MODE_FILTER, SECCOMP_MODE_STRICT, SECCOMP_SET_MODE_FILTER, SECCOMP_SET_MODE_STRICT,
57    c_char, c_int, clone_args, errno, error, gid_t, pid_t, rlimit, rusage, sched_param,
58    sock_filter, uapi, uid_t,
59};
60use static_assertions::const_assert;
61use std::cmp;
62use std::ffi::CString;
63use std::sync::{Arc, LazyLock};
64use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
65
66#[cfg(target_arch = "aarch64")]
67use starnix_uapi::{PR_GET_TAGGED_ADDR_CTRL, PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE};
68
69pub type SockFProgPtr =
70    MappingMultiArchUserRef<SockFProg, uapi::sock_fprog, uapi::arch32::sock_fprog>;
71pub type SockFilterPtr = MultiArchUserRef<uapi::sock_filter, uapi::arch32::sock_filter>;
72
73pub struct SockFProg {
74    pub len: u32,
75    pub filter: SockFilterPtr,
76}
77
78uapi::arch_map_data! {
79    BidiTryFrom<SockFProg, sock_fprog> {
80        len = len;
81        filter = filter;
82    }
83}
84
85uapi::check_arch_independent_layout! {
86    sched_param {
87        sched_priority,
88    }
89}
90
91pub fn do_clone(
92    locked: &mut Locked<Unlocked>,
93    current_task: &mut CurrentTask,
94    args: &clone_args,
95) -> Result<pid_t, Errno> {
96    security::check_task_create_access(current_task)?;
97
98    let child_exit_signal = if args.exit_signal == 0 {
99        None
100    } else {
101        Some(Signal::try_from(UncheckedSignal::new(args.exit_signal))?)
102    };
103
104    let mut new_task = current_task.clone_task(
105        locked,
106        args.flags,
107        child_exit_signal,
108        UserRef::<pid_t>::new(UserAddress::from(args.parent_tid)),
109        UserRef::<pid_t>::new(UserAddress::from(args.child_tid)),
110        UserRef::<FdNumber>::new(UserAddress::from(args.pidfd)),
111    )?;
112
113    // Set the result register to 0 for the return value from clone in the
114    // cloned process.
115    new_task.thread_state.registers.set_return_register(0);
116    let (trace_kind, ptrace_state) = current_task.get_ptrace_core_state_for_clone(args);
117
118    if args.stack != 0 {
119        // In clone() the `stack` argument points to the top of the stack, while in clone3()
120        // `stack` points to the bottom of the stack. Therefore, in clone3() we need to add
121        // `stack_size` to calculate the stack pointer. Note that in clone() `stack_size` is 0.
122        new_task
123            .thread_state
124            .registers
125            .set_stack_pointer_register(args.stack.wrapping_add(args.stack_size));
126    }
127
128    if args.flags & (CLONE_SETTLS as u64) != 0 {
129        new_task.thread_state.registers.set_thread_pointer_register(args.tls);
130    }
131
132    let tid = new_task.task.tid;
133    let task_ref = Arc::downgrade(&new_task.task);
134    execute_task(locked, new_task, |_, _| Ok(()), |_| {}, ptrace_state)?;
135
136    current_task.ptrace_event(locked, trace_kind, tid as u64);
137
138    if args.flags & (CLONE_VFORK as u64) != 0 {
139        current_task.wait_for_execve(task_ref)?;
140        current_task.ptrace_event(locked, PtraceOptions::TRACEVFORKDONE, tid as u64);
141    }
142
143    Ok(tid)
144}
145
146pub fn sys_clone3(
147    locked: &mut Locked<Unlocked>,
148    current_task: &mut CurrentTask,
149    user_clone_args: UserRef<clone_args>,
150    user_clone_args_size: usize,
151) -> Result<pid_t, Errno> {
152    // Only these specific sized versions are supported.
153    if !(user_clone_args_size == CLONE_ARGS_SIZE_VER0 as usize
154        || user_clone_args_size == CLONE_ARGS_SIZE_VER1 as usize
155        || user_clone_args_size == CLONE_ARGS_SIZE_VER2 as usize)
156    {
157        return error!(EINVAL);
158    }
159
160    // The most recent version of the struct size should match our definition.
161    const_assert!(std::mem::size_of::<clone_args>() == CLONE_ARGS_SIZE_VER2 as usize);
162
163    let clone_args = current_task.read_object_partial(user_clone_args, user_clone_args_size)?;
164    do_clone(locked, current_task, &clone_args)
165}
166
167fn read_c_string_vector(
168    mm: &CurrentTask,
169    user_vector: UserCStringPtr,
170    elem_limit: usize,
171    vec_limit: usize,
172) -> Result<(Vec<CString>, usize), Errno> {
173    let mut user_current = user_vector;
174    let mut vector: Vec<CString> = vec![];
175    let mut vec_size: usize = 0;
176    loop {
177        let user_string = mm.read_multi_arch_ptr(user_current)?;
178        if user_string.is_null() {
179            break;
180        }
181        let string = mm
182            .read_c_string_to_vec(user_string, elem_limit)
183            .map_err(|e| if e.code == ENAMETOOLONG { errno!(E2BIG) } else { e })?;
184        let cstring = CString::new(string).map_err(|_| errno!(EINVAL))?;
185        vec_size =
186            vec_size.checked_add(cstring.as_bytes_with_nul().len()).ok_or_else(|| errno!(E2BIG))?;
187        if vec_size > vec_limit {
188            return error!(E2BIG);
189        }
190        vector.push(cstring);
191        user_current = user_current.next()?;
192    }
193    Ok((vector, vec_size))
194}
195
196pub fn sys_execve(
197    locked: &mut Locked<Unlocked>,
198    current_task: &mut CurrentTask,
199    user_path: UserCString,
200    user_argv: UserCStringPtr,
201    user_environ: UserCStringPtr,
202) -> Result<(), Errno> {
203    sys_execveat(locked, current_task, FdNumber::AT_FDCWD, user_path, user_argv, user_environ, 0)
204}
205
206pub fn sys_execveat(
207    locked: &mut Locked<Unlocked>,
208    current_task: &mut CurrentTask,
209    dir_fd: FdNumber,
210    user_path: UserCString,
211    user_argv: UserCStringPtr,
212    user_environ: UserCStringPtr,
213    flags: u32,
214) -> Result<(), Errno> {
215    if flags & !(AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW) != 0 {
216        return error!(EINVAL);
217    }
218
219    // Calculate the limit for argv and environ size as 1/4 of the stack size, floored at 32 pages.
220    // See the Limits sections in https://man7.org/linux/man-pages/man2/execve.2.html
221    const PAGE_LIMIT: usize = 32;
222    let page_limit_size: usize = PAGE_LIMIT * *PAGE_SIZE as usize;
223    let rlimit = current_task.thread_group().get_rlimit(locked, Resource::STACK);
224    let stack_limit = rlimit / 4;
225    let argv_env_limit = cmp::max(page_limit_size, stack_limit as usize);
226
227    // The limit per argument or environment variable is 32 pages.
228    // See the Limits sections in https://man7.org/linux/man-pages/man2/execve.2.html
229    let (argv, argv_size) = if user_argv.is_null() {
230        (Vec::new(), 0)
231    } else {
232        read_c_string_vector(current_task, user_argv, page_limit_size, argv_env_limit)?
233    };
234
235    let (environ, _) = if user_environ.is_null() {
236        (Vec::new(), 0)
237    } else {
238        read_c_string_vector(
239            current_task,
240            user_environ,
241            page_limit_size,
242            argv_env_limit - argv_size,
243        )?
244    };
245
246    let path = &current_task.read_path(user_path)?;
247
248    log_trace!(argv:?, environ:?, flags:?; "execveat({dir_fd}, {path})");
249
250    let mut open_flags = OpenFlags::RDONLY;
251
252    if flags & AT_SYMLINK_NOFOLLOW != 0 {
253        open_flags |= OpenFlags::NOFOLLOW;
254    }
255
256    let executable = if path.is_empty() {
257        if flags & AT_EMPTY_PATH == 0 {
258            // If AT_EMPTY_PATH is not set, this is an error.
259            return error!(ENOENT);
260        }
261
262        // O_PATH allowed for:
263        //
264        //   Passing the file descriptor as the dirfd argument of
265        //   openat() and the other "*at()" system calls.  This
266        //   includes linkat(2) with AT_EMPTY_PATH (or via procfs
267        //   using AT_SYMLINK_FOLLOW) even if the file is not a
268        //   directory.
269        //
270        // See https://man7.org/linux/man-pages/man2/open.2.html
271        let file = current_task.get_file_allowing_opath(dir_fd)?;
272
273        // We are forced to reopen the file with O_RDONLY to get access to the underlying VMO.
274        // Note that skip the access check in the arguments in case the file mode does
275        // not actually have the read permission bit.
276        //
277        // This can happen because a file could have --x--x--x mode permissions and then
278        // be opened with O_PATH. Internally, the file operations would all be stubbed out
279        // for that file, which is undesirable here.
280        //
281        // See https://man7.org/linux/man-pages/man3/fexecve.3.html#DESCRIPTION
282        file.name.open(
283            locked,
284            current_task,
285            OpenFlags::RDONLY,
286            AccessCheck::check_for(Access::EXEC),
287        )?
288    } else {
289        current_task.open_file_at(
290            locked,
291            dir_fd,
292            path.as_ref(),
293            open_flags,
294            FileMode::default(),
295            ResolveFlags::empty(),
296            AccessCheck::check_for(Access::EXEC),
297        )?
298    };
299
300    // This path can affect script resolution (the path is appended to the script args)
301    // and the auxiliary value `AT_EXECFN` from the syscall `getauxval()`
302    let path = if dir_fd == FdNumber::AT_FDCWD {
303        // The file descriptor is CWD, so the path is exactly
304        // what the user specified.
305        path.to_vec()
306    } else {
307        // The path is `/dev/fd/N/P` where N is the file descriptor
308        // number and P is the user-provided path (if relative and non-empty).
309        //
310        // See https://man7.org/linux/man-pages/man2/execveat.2.html#NOTES
311        match path.first() {
312            Some(b'/') => {
313                // The user-provided path is absolute, so dir_fd is ignored.
314                path.to_vec()
315            }
316            Some(_) => {
317                // User-provided path is relative, append it.
318                let mut new_path = format!("/dev/fd/{}/", dir_fd.raw()).into_bytes();
319                new_path.append(&mut path.to_vec());
320                new_path
321            }
322            // User-provided path is empty
323            None => format!("/dev/fd/{}", dir_fd.raw()).into_bytes(),
324        }
325    };
326
327    let path = CString::new(path).map_err(|_| errno!(EINVAL))?;
328
329    current_task.exec(locked, executable, path, argv, environ)?;
330    Ok(())
331}
332
333pub fn sys_getcpu(
334    _locked: &mut Locked<Unlocked>,
335    current_task: &CurrentTask,
336    cpu_out: UserRef<u32>,
337    node_out: UserRef<u32>,
338) -> Result<(), Errno> {
339    // "When either cpu or node is NULL nothing is written to the respective pointer."
340    // from https://man7.org/linux/man-pages/man2/getcpu.2.html
341    if !cpu_out.is_null() {
342        let thread_stats = current_task
343            .live()
344            .thread
345            .read()
346            .as_ref()
347            .expect("current thread is never None when executing")
348            .stats()
349            .map_err(|e| errno!(EINVAL, format!("getting thread stats failed {e:?}")))?;
350        current_task.write_object(cpu_out, &thread_stats.last_scheduled_cpu)?;
351    }
352    if !node_out.is_null() {
353        // Zircon does not yet have a concept of NUMA task scheduling, always tell userspace that
354        // it's on the "first" node which should be true for non-NUMA systems.
355        track_stub!(TODO("https://fxbug.dev/325643815"), "getcpu() numa node");
356        current_task.write_object(node_out, &0)?;
357    }
358    Ok(())
359}
360
361pub fn sys_getpid(
362    _locked: &mut Locked<Unlocked>,
363    current_task: &CurrentTask,
364) -> Result<pid_t, Errno> {
365    Ok(current_task.get_pid())
366}
367
368pub fn sys_gettid(
369    _locked: &mut Locked<Unlocked>,
370    current_task: &CurrentTask,
371) -> Result<pid_t, Errno> {
372    Ok(current_task.get_tid())
373}
374
375pub fn sys_getppid(
376    _locked: &mut Locked<Unlocked>,
377    current_task: &CurrentTask,
378) -> Result<pid_t, Errno> {
379    Ok(current_task.thread_group().read().get_ppid())
380}
381
382fn get_task_or_current(current_task: &CurrentTask, pid: pid_t) -> Result<Arc<Task>, Errno> {
383    if pid == 0 { Ok(current_task.task.clone()) } else { current_task.get_task(pid) }
384}
385
386pub fn sys_getsid(
387    _locked: &mut Locked<Unlocked>,
388    current_task: &CurrentTask,
389    pid: pid_t,
390) -> Result<pid_t, Errno> {
391    let target_task = get_task_or_current(current_task, pid)?;
392    security::check_task_getsid(current_task, &target_task)?;
393    let sid = target_task.thread_group().read().process_group.session.leader;
394    Ok(sid)
395}
396
397pub fn sys_getpgid(
398    _locked: &mut Locked<Unlocked>,
399    current_task: &CurrentTask,
400    pid: pid_t,
401) -> Result<pid_t, Errno> {
402    let task = get_task_or_current(current_task, pid)?;
403
404    security::check_getpgid_access(current_task, &task)?;
405    let pgid = task.thread_group().read().process_group.leader;
406    Ok(pgid)
407}
408
409pub fn sys_setpgid(
410    locked: &mut Locked<Unlocked>,
411    current_task: &CurrentTask,
412    pid: pid_t,
413    pgid: pid_t,
414) -> Result<(), Errno> {
415    let task = get_task_or_current(current_task, pid)?;
416
417    current_task.thread_group().setpgid(locked, current_task, &task, pgid)?;
418    Ok(())
419}
420
421impl CurrentTask {
422    /// Returns true if the `current_task`'s effective user ID (EUID) is the same as the
423    /// EUID or UID of the `target_task`. We describe this as the current task being
424    /// "EUID-friendly" to the target and it enables actions to be performed that would
425    /// otherwise require additional privileges.
426    ///
427    /// See "The caller needs an effective user ID equal to the real user ID or effective
428    /// user ID of the [target]" at sched_setaffinity(2), comparable language at
429    /// setpriority(2), more ambiguous language at sched_setscheduler(2), and no
430    /// particular specification at sched_setparam(2).
431    fn is_euid_friendly_with(&self, target_task: &Task) -> bool {
432        let self_creds = self.current_creds();
433        let target_creds = target_task.real_creds();
434        self_creds.euid == target_creds.uid || self_creds.euid == target_creds.euid
435    }
436}
437
438// A non-root process is allowed to set any of its three uids to the value of any other. The
439// CAP_SETUID capability bypasses these checks and allows setting any uid to any integer. Likewise
440// for gids.
441fn new_uid_allowed(current_task: &CurrentTask, uid: uid_t) -> bool {
442    let current_creds = current_task.current_creds();
443    uid == current_creds.uid
444        || uid == current_creds.euid
445        || uid == current_creds.saved_uid
446        || security::is_task_capable_noaudit(current_task, CAP_SETUID)
447}
448
449fn new_gid_allowed(current_task: &CurrentTask, gid: gid_t) -> bool {
450    let current_creds = current_task.current_creds();
451    gid == current_creds.gid
452        || gid == current_creds.egid
453        || gid == current_creds.saved_gid
454        || security::is_task_capable_noaudit(current_task, CAP_SETGID)
455}
456
457pub fn sys_getuid(
458    _locked: &mut Locked<Unlocked>,
459    current_task: &CurrentTask,
460) -> Result<uid_t, Errno> {
461    Ok(current_task.current_creds().uid)
462}
463
464pub fn sys_getgid(
465    _locked: &mut Locked<Unlocked>,
466    current_task: &CurrentTask,
467) -> Result<gid_t, Errno> {
468    Ok(current_task.current_creds().gid)
469}
470
471pub fn sys_setuid(
472    _locked: &mut Locked<Unlocked>,
473    current_task: &CurrentTask,
474    uid: uid_t,
475) -> Result<(), Errno> {
476    if uid == gid_t::MAX {
477        return error!(EINVAL);
478    }
479    if !new_uid_allowed(&current_task, uid) {
480        return error!(EPERM);
481    }
482
483    let mut creds = Credentials::clone(&current_task.current_creds());
484    let prev = creds.copy_user_credentials();
485    creds.euid = uid;
486    creds.fsuid = uid;
487    if security::is_task_capable_noaudit(current_task, CAP_SETUID) {
488        creds.uid = uid;
489        creds.saved_uid = uid;
490    }
491
492    creds.update_capabilities(prev);
493    current_task.set_creds(creds);
494    Ok(())
495}
496
497pub fn sys_setgid(
498    _locked: &mut Locked<Unlocked>,
499    current_task: &CurrentTask,
500    gid: gid_t,
501) -> Result<(), Errno> {
502    if gid == gid_t::MAX {
503        return error!(EINVAL);
504    }
505    if !new_gid_allowed(&current_task, gid) {
506        return error!(EPERM);
507    }
508
509    let mut creds = Credentials::clone(&current_task.current_creds());
510    creds.egid = gid;
511    creds.fsgid = gid;
512    if security::is_task_capable_noaudit(current_task, CAP_SETGID) {
513        creds.gid = gid;
514        creds.saved_gid = gid;
515    }
516    current_task.set_creds(creds);
517    Ok(())
518}
519
520pub fn sys_geteuid(
521    _locked: &mut Locked<Unlocked>,
522    current_task: &CurrentTask,
523) -> Result<uid_t, Errno> {
524    Ok(current_task.current_creds().euid)
525}
526
527pub fn sys_getegid(
528    _locked: &mut Locked<Unlocked>,
529    current_task: &CurrentTask,
530) -> Result<gid_t, Errno> {
531    Ok(current_task.current_creds().egid)
532}
533
534pub fn sys_setfsuid(
535    _locked: &mut Locked<Unlocked>,
536    current_task: &CurrentTask,
537    fsuid: uid_t,
538) -> Result<uid_t, Errno> {
539    let mut creds = Credentials::clone(&current_task.current_creds());
540    let prev = creds.copy_user_credentials();
541    if fsuid != u32::MAX && new_uid_allowed(&current_task, fsuid) {
542        creds.fsuid = fsuid;
543        creds.update_capabilities(prev);
544        current_task.set_creds(creds);
545    }
546
547    Ok(prev.fsuid)
548}
549
550pub fn sys_setfsgid(
551    _locked: &mut Locked<Unlocked>,
552    current_task: &CurrentTask,
553    fsgid: gid_t,
554) -> Result<gid_t, Errno> {
555    let mut creds = Credentials::clone(&current_task.current_creds());
556    let prev = creds.copy_user_credentials();
557    let prev_fsgid = creds.fsgid;
558
559    if fsgid != u32::MAX && new_gid_allowed(&current_task, fsgid) {
560        creds.fsgid = fsgid;
561        creds.update_capabilities(prev);
562        current_task.set_creds(creds);
563    }
564
565    Ok(prev_fsgid)
566}
567
568pub fn sys_getresuid(
569    _locked: &mut Locked<Unlocked>,
570    current_task: &CurrentTask,
571    ruid_addr: UserRef<uid_t>,
572    euid_addr: UserRef<uid_t>,
573    suid_addr: UserRef<uid_t>,
574) -> Result<(), Errno> {
575    let creds = current_task.current_creds();
576    current_task.write_object(ruid_addr, &creds.uid)?;
577    current_task.write_object(euid_addr, &creds.euid)?;
578    current_task.write_object(suid_addr, &creds.saved_uid)?;
579    Ok(())
580}
581
582pub fn sys_getresgid(
583    _locked: &mut Locked<Unlocked>,
584    current_task: &CurrentTask,
585    rgid_addr: UserRef<gid_t>,
586    egid_addr: UserRef<gid_t>,
587    sgid_addr: UserRef<gid_t>,
588) -> Result<(), Errno> {
589    let creds = current_task.current_creds();
590    current_task.write_object(rgid_addr, &creds.gid)?;
591    current_task.write_object(egid_addr, &creds.egid)?;
592    current_task.write_object(sgid_addr, &creds.saved_gid)?;
593    Ok(())
594}
595
596pub fn sys_setreuid(
597    _locked: &mut Locked<Unlocked>,
598    current_task: &CurrentTask,
599    ruid: uid_t,
600    euid: uid_t,
601) -> Result<(), Errno> {
602    // Linux __sys_setreuid() uses asymmetric checks: ruid cannot be set
603    // to saved_uid, while euid can. This prevents regaining root via
604    // setreuid after a privilege drop when setresuid would be required.
605    let validate_ruid = |uid: uid_t| {
606        let creds = current_task.current_creds();
607        uid == u32::MAX
608            || uid == creds.uid
609            || uid == creds.euid
610            || security::is_task_capable_noaudit(current_task, CAP_SETUID)
611    };
612    let validate_euid = |uid: uid_t| {
613        let creds = current_task.current_creds();
614        uid == u32::MAX
615            || uid == creds.uid
616            || uid == creds.euid
617            || uid == creds.saved_uid
618            || security::is_task_capable_noaudit(current_task, CAP_SETUID)
619    };
620    if !validate_ruid(ruid) || !validate_euid(euid) {
621        return error!(EPERM);
622    }
623
624    let mut creds = Credentials::clone(&current_task.current_creds());
625    let prev = creds.copy_user_credentials();
626    let is_ruid_set = ruid != u32::MAX;
627    if is_ruid_set {
628        creds.uid = ruid;
629    }
630    let is_euid_set = euid != u32::MAX;
631    if is_euid_set {
632        creds.euid = euid;
633        creds.fsuid = euid;
634    }
635
636    // If the real user ID is set (i.e., ruid is not -1) or the effective
637    // user ID is set to a value not equal to the previous real user ID,
638    // the saved set-user-ID will be set to the new effective user ID.
639    if is_ruid_set || (is_euid_set && euid != prev.uid) {
640        creds.saved_uid = creds.euid;
641    }
642
643    creds.update_capabilities(prev);
644    current_task.set_creds(creds);
645    Ok(())
646}
647
648pub fn sys_setregid(
649    _locked: &mut Locked<Unlocked>,
650    current_task: &CurrentTask,
651    rgid: gid_t,
652    egid: gid_t,
653) -> Result<(), Errno> {
654    // Same asymmetric permission model as setreuid — see above.
655    let validate_rgid = |gid: gid_t| {
656        let creds = current_task.current_creds();
657        gid == u32::MAX
658            || gid == creds.gid
659            || gid == creds.egid
660            || security::is_task_capable_noaudit(current_task, CAP_SETGID)
661    };
662    let validate_egid = |gid: gid_t| {
663        let creds = current_task.current_creds();
664        gid == u32::MAX
665            || gid == creds.gid
666            || gid == creds.egid
667            || gid == creds.saved_gid
668            || security::is_task_capable_noaudit(current_task, CAP_SETGID)
669    };
670    if !validate_rgid(rgid) || !validate_egid(egid) {
671        return error!(EPERM);
672    }
673
674    let mut creds = Credentials::clone(&current_task.current_creds());
675    let previous_rgid = creds.gid;
676    let is_rgid_set = rgid != u32::MAX;
677    if is_rgid_set {
678        creds.gid = rgid;
679    }
680    let is_egid_set = egid != u32::MAX;
681    if is_egid_set {
682        creds.egid = egid;
683        creds.fsgid = egid;
684    }
685
686    // If the real group ID is set (i.e., rgid is not -1) or the effective
687    // group ID is set to a value not equal to the previous real group ID,
688    // the saved set-group-ID will be set to the new effective group ID.
689    if is_rgid_set || (is_egid_set && egid != previous_rgid) {
690        creds.saved_gid = creds.egid;
691    }
692
693    current_task.set_creds(creds);
694    Ok(())
695}
696
697pub fn sys_setresuid(
698    _locked: &mut Locked<Unlocked>,
699    current_task: &CurrentTask,
700    ruid: uid_t,
701    euid: uid_t,
702    suid: uid_t,
703) -> Result<(), Errno> {
704    let allowed = |uid| uid == u32::MAX || new_uid_allowed(&current_task, uid);
705    if !allowed(ruid) || !allowed(euid) || !allowed(suid) {
706        return error!(EPERM);
707    }
708
709    let mut creds = Credentials::clone(&current_task.current_creds());
710    let prev = creds.copy_user_credentials();
711    if ruid != u32::MAX {
712        creds.uid = ruid;
713    }
714    if euid != u32::MAX {
715        creds.euid = euid;
716        creds.fsuid = euid;
717    }
718    if suid != u32::MAX {
719        creds.saved_uid = suid;
720    }
721    creds.update_capabilities(prev);
722    current_task.set_creds(creds);
723    Ok(())
724}
725
726pub fn sys_setresgid(
727    _locked: &mut Locked<Unlocked>,
728    current_task: &CurrentTask,
729    rgid: gid_t,
730    egid: gid_t,
731    sgid: gid_t,
732) -> Result<(), Errno> {
733    let allowed = |gid| gid == u32::MAX || new_gid_allowed(&current_task, gid);
734    if !allowed(rgid) || !allowed(egid) || !allowed(sgid) {
735        return error!(EPERM);
736    }
737
738    let mut creds = Credentials::clone(&current_task.current_creds());
739    if rgid != u32::MAX {
740        creds.gid = rgid;
741    }
742    if egid != u32::MAX {
743        creds.egid = egid;
744        creds.fsgid = egid;
745    }
746    if sgid != u32::MAX {
747        creds.saved_gid = sgid;
748    }
749    current_task.set_creds(creds);
750    Ok(())
751}
752
753pub fn sys_exit(
754    _locked: &mut Locked<Unlocked>,
755    current_task: &CurrentTask,
756    code: i32,
757) -> Result<(), Errno> {
758    // Only change the current exit status if this has not been already set by exit_group, as
759    // otherwise it has priority.
760    current_task.write().set_exit_status_if_not_already(ExitStatus::Exit(code as u8));
761    Ok(())
762}
763
764pub fn sys_exit_group(
765    locked: &mut Locked<Unlocked>,
766    current_task: &mut CurrentTask,
767    code: i32,
768) -> Result<(), Errno> {
769    current_task.thread_group_exit(locked, ExitStatus::Exit(code as u8));
770    Ok(())
771}
772
773pub fn sys_sched_getscheduler(
774    _locked: &mut Locked<Unlocked>,
775    current_task: &CurrentTask,
776    pid: pid_t,
777) -> Result<u32, Errno> {
778    if pid < 0 {
779        return error!(EINVAL);
780    }
781
782    let target_task = get_task_or_current(current_task, pid)?;
783    security::check_getsched_access(current_task, target_task.as_ref())?;
784    let current_scheduler_state = target_task.read().scheduler_state;
785    Ok(current_scheduler_state.policy_for_sched_getscheduler())
786}
787
788pub fn sys_sched_setscheduler(
789    locked: &mut Locked<Unlocked>,
790    current_task: &CurrentTask,
791    pid: pid_t,
792    policy: u32,
793    param: UserRef<sched_param>,
794) -> Result<(), Errno> {
795    // Parse & validate the arguments.
796    if pid < 0 || param.is_null() {
797        return error!(EINVAL);
798    }
799
800    let target_task = get_task_or_current(current_task, pid)?;
801
802    let reset_on_fork = policy & SCHED_RESET_ON_FORK != 0;
803
804    let policy = SchedulingPolicy::try_from(policy & !SCHED_RESET_ON_FORK)?;
805    let realtime_priority =
806        policy.realtime_priority_from(current_task.read_object(param)?.sched_priority)?;
807
808    // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
809    let current_state = target_task.read().scheduler_state;
810
811    // Check capabilities and permissions, if required, for the operation.
812    let euid_friendly = current_task.is_euid_friendly_with(&target_task);
813    let strengthening = current_state.realtime_priority < realtime_priority;
814    let rlimited = strengthening
815        && realtime_priority
816            .exceeds(target_task.thread_group().get_rlimit(locked, Resource::RTPRIO));
817    let clearing_reset_on_fork = current_state.reset_on_fork && !reset_on_fork;
818    let caught_in_idle_trap = current_state.policy == SchedulingPolicy::Idle
819        && policy != SchedulingPolicy::Idle
820        && current_state
821            .normal_priority
822            .exceeds(target_task.thread_group().get_rlimit(locked, Resource::NICE));
823    if !euid_friendly || rlimited || clearing_reset_on_fork || caught_in_idle_trap {
824        security::check_task_capable(current_task, CAP_SYS_NICE)?;
825    }
826
827    security::check_setsched_access(current_task, &target_task)?;
828
829    // Apply the new scheduler configuration to the task.
830    target_task.set_scheduler_policy_priority_and_reset_on_fork(
831        policy,
832        realtime_priority,
833        reset_on_fork,
834    )?;
835
836    Ok(())
837}
838
839const CPU_SET_SIZE: usize = 128;
840
841#[repr(C)]
842#[derive(Debug, Copy, Clone, IntoBytes, FromBytes, KnownLayout, Immutable)]
843pub struct CpuSet {
844    bits: [u8; CPU_SET_SIZE],
845}
846
847impl Default for CpuSet {
848    fn default() -> Self {
849        Self { bits: [0; CPU_SET_SIZE] }
850    }
851}
852
853fn check_cpu_set_alignment(current_task: &CurrentTask, cpusetsize: u32) -> Result<(), Errno> {
854    let alignment = if current_task.is_arch32() { 4 } else { 8 };
855    if cpusetsize < alignment || cpusetsize % alignment != 0 {
856        return error!(EINVAL);
857    }
858    Ok(())
859}
860
861fn get_default_cpu_set() -> CpuSet {
862    let mut result = CpuSet::default();
863    let mut cpus_count = zx::system_get_num_cpus();
864    let cpus_count_max = (CPU_SET_SIZE * 8) as u32;
865    if cpus_count > cpus_count_max {
866        log_error!("cpus_count={cpus_count}, greater than the {cpus_count_max} max supported.");
867        cpus_count = cpus_count_max;
868    }
869    let mut index = 0;
870    while cpus_count > 0 {
871        let count = std::cmp::min(cpus_count, 8);
872        let (shl, overflow) = 1_u8.overflowing_shl(count);
873        let mask = if overflow { u8::max_value() } else { shl - 1 };
874        result.bits[index] = mask;
875        index += 1;
876        cpus_count -= count;
877    }
878    result
879}
880
881pub fn sys_sched_getaffinity(
882    _locked: &mut Locked<Unlocked>,
883    current_task: &CurrentTask,
884    pid: pid_t,
885    cpusetsize: u32,
886    user_mask: UserAddress,
887) -> Result<usize, Errno> {
888    if pid < 0 {
889        return error!(EINVAL);
890    }
891
892    check_cpu_set_alignment(current_task, cpusetsize)?;
893
894    let _task = get_task_or_current(current_task, pid)?;
895
896    // sched_setaffinity() is not implemented. Fake affinity mask based on the number of CPUs.
897    let mask = get_default_cpu_set();
898    let mask_size = std::cmp::min(cpusetsize as usize, CPU_SET_SIZE);
899    current_task.write_memory(user_mask, &mask.bits[..mask_size])?;
900    track_stub!(TODO("https://fxbug.dev/322874659"), "sched_getaffinity");
901    Ok(mask_size)
902}
903
904pub fn sys_sched_setaffinity(
905    _locked: &mut Locked<Unlocked>,
906    current_task: &CurrentTask,
907    pid: pid_t,
908    cpusetsize: u32,
909    user_mask: UserAddress,
910) -> Result<(), Errno> {
911    if pid < 0 {
912        return error!(EINVAL);
913    }
914    let target_task = get_task_or_current(current_task, pid)?;
915
916    check_cpu_set_alignment(current_task, cpusetsize)?;
917
918    let mask_size = std::cmp::min(cpusetsize as usize, CPU_SET_SIZE);
919    let mut mask = CpuSet::default();
920    current_task.read_memory_to_slice(user_mask, &mut mask.bits[..mask_size])?;
921
922    // Specified mask must include at least one valid CPU.
923    let max_mask = get_default_cpu_set();
924    let mut has_valid_cpu_in_mask = false;
925    for (l1, l2) in std::iter::zip(max_mask.bits, mask.bits) {
926        has_valid_cpu_in_mask = has_valid_cpu_in_mask || (l1 & l2 > 0);
927    }
928    if !has_valid_cpu_in_mask {
929        return error!(EINVAL);
930    }
931
932    if !current_task.is_euid_friendly_with(&target_task) {
933        security::check_task_capable(current_task, CAP_SYS_NICE)?;
934    }
935
936    // Currently, we ignore the mask and act as if the system reset the mask
937    // immediately to allowing all CPUs.
938    track_stub!(TODO("https://fxbug.dev/322874889"), "sched_setaffinity");
939    Ok(())
940}
941
942pub fn sys_sched_getparam(
943    _locked: &mut Locked<Unlocked>,
944    current_task: &CurrentTask,
945    pid: pid_t,
946    param: UserRef<sched_param>,
947) -> Result<(), Errno> {
948    if pid < 0 || param.is_null() {
949        return error!(EINVAL);
950    }
951
952    let target_task = get_task_or_current(current_task, pid)?;
953    let param_value = target_task.read().scheduler_state.get_sched_param();
954    current_task.write_object(param, &param_value)?;
955    Ok(())
956}
957
958pub fn sys_sched_setparam(
959    locked: &mut Locked<Unlocked>,
960    current_task: &CurrentTask,
961    pid: pid_t,
962    param: UserRef<sched_param>,
963) -> Result<(), Errno> {
964    // Parse & validate the arguments.
965    if pid < 0 || param.is_null() {
966        return error!(EINVAL);
967    }
968    let target_task = get_task_or_current(current_task, pid)?;
969
970    // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
971    let current_state = target_task.read().scheduler_state;
972
973    let realtime_priority = current_state
974        .policy
975        .realtime_priority_from(current_task.read_object(param)?.sched_priority)?;
976
977    // Check capabilities and permissions, if required, for the operation.
978    let euid_friendly = current_task.is_euid_friendly_with(&target_task);
979    let strengthening = current_state.realtime_priority < realtime_priority;
980    let rlimited = strengthening
981        && realtime_priority
982            .exceeds(target_task.thread_group().get_rlimit(locked, Resource::RTPRIO));
983    if !euid_friendly || rlimited {
984        security::check_task_capable(current_task, CAP_SYS_NICE)?;
985    }
986
987    security::check_setsched_access(current_task, &target_task)?;
988
989    // Apply the new scheduler configuration to the task.
990    target_task.set_scheduler_priority(realtime_priority)?;
991
992    Ok(())
993}
994
995pub fn sys_sched_get_priority_min(
996    _locked: &mut Locked<Unlocked>,
997    _ctx: &CurrentTask,
998    policy: u32,
999) -> Result<u8, Errno> {
1000    min_priority_for_sched_policy(policy)
1001}
1002
1003pub fn sys_sched_get_priority_max(
1004    _locked: &mut Locked<Unlocked>,
1005    _ctx: &CurrentTask,
1006    policy: u32,
1007) -> Result<u8, Errno> {
1008    max_priority_for_sched_policy(policy)
1009}
1010
1011pub fn sys_ioprio_set(
1012    _locked: &mut Locked<Unlocked>,
1013    _current_task: &mut CurrentTask,
1014    _which: i32,
1015    _who: i32,
1016    _ioprio: i32,
1017) -> Result<(), Errno> {
1018    track_stub!(TODO("https://fxbug.dev/297591758"), "ioprio_set()");
1019    error!(ENOSYS)
1020}
1021
1022pub fn sys_prctl(
1023    locked: &mut Locked<Unlocked>,
1024    current_task: &mut CurrentTask,
1025    option: u32,
1026    arg2: u64,
1027    arg3: u64,
1028    arg4: u64,
1029    arg5: u64,
1030) -> Result<SyscallResult, Errno> {
1031    match option {
1032        PR_SET_VMA => {
1033            if arg2 != PR_SET_VMA_ANON_NAME as u64 {
1034                track_stub!(TODO("https://fxbug.dev/322874826"), "prctl PR_SET_VMA", arg2);
1035                return error!(ENOSYS);
1036            }
1037            let addr = UserAddress::from(arg3);
1038            let length = arg4 as usize;
1039            let name_addr = UserAddress::from(arg5);
1040            let name = if name_addr.is_null() {
1041                None
1042            } else {
1043                let name = UserCString::new(current_task, UserAddress::from(arg5));
1044                let name = current_task.read_c_string_to_vec(name, 256).map_err(|e| {
1045                    // An overly long name produces EINVAL and not ENAMETOOLONG in Linux 5.15.
1046                    if e.code == ENAMETOOLONG { errno!(EINVAL) } else { e }
1047                })?;
1048                // Some characters are forbidden in VMA names.
1049                if name.iter().any(|b| {
1050                    matches!(b,
1051                        0..=0x1f |
1052                        0x7f..=0xff |
1053                        b'\\' | b'`' | b'$' | b'[' | b']'
1054                    )
1055                }) {
1056                    return error!(EINVAL);
1057                }
1058                Some(name)
1059            };
1060            current_task.mm()?.set_mapping_name(addr, length, name)?;
1061            Ok(().into())
1062        }
1063        PR_SET_DUMPABLE => {
1064            let mm = current_task.mm()?;
1065            let mut dumpable = mm.dumpable.lock(locked);
1066            *dumpable = if arg2 == 1 { DumpPolicy::User } else { DumpPolicy::Disable };
1067            Ok(().into())
1068        }
1069        PR_GET_DUMPABLE => {
1070            let mm = current_task.mm()?;
1071            let dumpable = mm.dumpable.lock(locked);
1072            Ok(match *dumpable {
1073                DumpPolicy::Disable => 0.into(),
1074                DumpPolicy::User => 1.into(),
1075            })
1076        }
1077        PR_SET_PDEATHSIG => {
1078            track_stub!(TODO("https://fxbug.dev/322874397"), "PR_SET_PDEATHSIG");
1079            Ok(().into())
1080        }
1081        PR_SET_NAME => {
1082            let addr = UserAddress::from(arg2);
1083            let name = TaskCommand::new(&current_task.read_memory_to_array::<16>(addr)?);
1084            current_task.set_command_name(name);
1085            if current_task.tid == current_task.thread_group.leader {
1086                current_task.thread_group.sync_syscall_log_level();
1087            }
1088            Ok(0.into())
1089        }
1090        PR_GET_NAME => {
1091            let addr = UserAddress::from(arg2);
1092            let name = current_task.command().prctl_name();
1093            current_task.write_memory(addr, &name[..])?;
1094            Ok(().into())
1095        }
1096        PR_SET_PTRACER => {
1097            let allowed_ptracers = if arg2 == PR_SET_PTRACER_ANY as u64 {
1098                PtraceAllowedPtracers::Any
1099            } else if arg2 == 0 {
1100                PtraceAllowedPtracers::None
1101            } else {
1102                if current_task.kernel().pids.read().get_task(arg2 as i32).is_err() {
1103                    return error!(EINVAL);
1104                }
1105                PtraceAllowedPtracers::Some(arg2 as pid_t)
1106            };
1107            current_task.thread_group().write().allowed_ptracers = allowed_ptracers;
1108            Ok(().into())
1109        }
1110        PR_GET_KEEPCAPS => {
1111            Ok(current_task.current_creds().securebits.contains(SecureBits::KEEP_CAPS).into())
1112        }
1113        PR_SET_KEEPCAPS => {
1114            if arg2 != 0 && arg2 != 1 {
1115                return error!(EINVAL);
1116            }
1117            let mut creds = Credentials::clone(&current_task.current_creds());
1118            let mut securebits = creds.securebits;
1119            securebits.set(SecureBits::KEEP_CAPS, arg2 != 0);
1120            creds.set_securebits(securebits)?;
1121            current_task.set_creds(creds);
1122            Ok(().into())
1123        }
1124        PR_SET_NO_NEW_PRIVS => {
1125            // If any args are set other than arg2 to 1, this should return einval
1126            if arg2 != 1 || arg3 != 0 || arg4 != 0 || arg5 != 0 {
1127                return error!(EINVAL);
1128            }
1129            current_task.write().enable_no_new_privs();
1130            Ok(().into())
1131        }
1132        PR_GET_NO_NEW_PRIVS => {
1133            // If any args are set, this should return einval
1134            if arg2 != 0 || arg3 != 0 || arg4 != 0 {
1135                return error!(EINVAL);
1136            }
1137            Ok(current_task.read().no_new_privs().into())
1138        }
1139        PR_GET_SECCOMP => {
1140            if current_task.seccomp_filter_state.get() == SeccompStateValue::None {
1141                Ok(0.into())
1142            } else {
1143                Ok(2.into())
1144            }
1145        }
1146        PR_SET_SECCOMP => {
1147            if arg2 == SECCOMP_MODE_STRICT as u64 {
1148                return sys_seccomp(
1149                    locked,
1150                    current_task,
1151                    SECCOMP_SET_MODE_STRICT,
1152                    0,
1153                    UserAddress::NULL,
1154                );
1155            } else if arg2 == SECCOMP_MODE_FILTER as u64 {
1156                return sys_seccomp(locked, current_task, SECCOMP_SET_MODE_FILTER, 0, arg3.into());
1157            }
1158            Ok(().into())
1159        }
1160        PR_GET_CHILD_SUBREAPER => {
1161            let addr = UserAddress::from(arg2);
1162            #[allow(clippy::bool_to_int_with_if)]
1163            let value: i32 =
1164                if current_task.thread_group().read().is_child_subreaper { 1 } else { 0 };
1165            current_task.write_object(addr.into(), &value)?;
1166            Ok(().into())
1167        }
1168        PR_SET_CHILD_SUBREAPER => {
1169            current_task.thread_group().write().is_child_subreaper = arg2 != 0;
1170            Ok(().into())
1171        }
1172        PR_GET_SECUREBITS => Ok(current_task.current_creds().securebits.bits().into()),
1173        PR_SET_SECUREBITS => {
1174            security::check_task_capable(current_task, CAP_SETPCAP)?;
1175
1176            let securebits = SecureBits::from_bits(arg2 as u32).ok_or_else(|| {
1177                track_stub!(TODO("https://fxbug.dev/322875244"), "PR_SET_SECUREBITS", arg2);
1178                errno!(ENOSYS)
1179            })?;
1180
1181            let mut creds = Credentials::clone(&current_task.current_creds());
1182            creds.set_securebits(securebits)?;
1183            current_task.set_creds(creds);
1184            Ok(().into())
1185        }
1186        PR_CAPBSET_READ => {
1187            let cap = Capabilities::try_from(arg2)?;
1188            Ok(current_task.current_creds().cap_bounding.contains(cap).into())
1189        }
1190        PR_CAPBSET_DROP => {
1191            let mut creds = Credentials::clone(&current_task.current_creds());
1192            security::check_task_capable(current_task, CAP_SETPCAP)?;
1193
1194            creds.cap_bounding.remove(Capabilities::try_from(arg2)?);
1195            current_task.set_creds(creds);
1196            Ok(().into())
1197        }
1198        PR_CAP_AMBIENT => {
1199            let operation = arg2 as u32;
1200            let capability_arg = Capabilities::try_from(arg3)?;
1201            if arg4 != 0 || arg5 != 0 {
1202                return error!(EINVAL);
1203            }
1204
1205            // TODO(security): We don't currently validate capabilities, but this should return an
1206            // error if the capability_arg is invalid.
1207            match operation {
1208                PR_CAP_AMBIENT_RAISE => {
1209                    let mut creds = Credentials::clone(&current_task.current_creds());
1210                    if !(creds.cap_permitted.contains(capability_arg)
1211                        && creds.cap_inheritable.contains(capability_arg))
1212                    {
1213                        return error!(EPERM);
1214                    }
1215                    if creds.securebits.contains(SecureBits::NO_CAP_AMBIENT_RAISE) {
1216                        return error!(EPERM);
1217                    }
1218
1219                    creds.cap_ambient.insert(capability_arg);
1220                    current_task.set_creds(creds);
1221                    Ok(().into())
1222                }
1223                PR_CAP_AMBIENT_LOWER => {
1224                    let mut creds = Credentials::clone(&current_task.current_creds());
1225                    creds.cap_ambient.remove(capability_arg);
1226                    current_task.set_creds(creds);
1227                    Ok(().into())
1228                }
1229                PR_CAP_AMBIENT_IS_SET => {
1230                    Ok(current_task.current_creds().cap_ambient.contains(capability_arg).into())
1231                }
1232                PR_CAP_AMBIENT_CLEAR_ALL => {
1233                    if arg3 != 0 {
1234                        return error!(EINVAL);
1235                    }
1236
1237                    let mut creds = Credentials::clone(&current_task.current_creds());
1238                    creds.cap_ambient = Capabilities::empty();
1239                    current_task.set_creds(creds);
1240                    Ok(().into())
1241                }
1242                _ => error!(EINVAL),
1243            }
1244        }
1245        PR_SET_TIMERSLACK => {
1246            current_task.write().set_timerslack_ns(arg2);
1247            Ok(().into())
1248        }
1249        #[cfg(target_arch = "aarch64")]
1250        PR_GET_TAGGED_ADDR_CTRL => {
1251            track_stub!(TODO("https://fxbug.dev/408554469"), "PR_GET_TAGGED_ADDR_CTRL");
1252            Ok(0.into())
1253        }
1254        #[cfg(target_arch = "aarch64")]
1255        PR_SET_TAGGED_ADDR_CTRL => match u32::try_from(arg2).map_err(|_| errno!(EINVAL))? {
1256            // Only untagged pointers are allowed, the default.
1257            0 => Ok(().into()),
1258            PR_TAGGED_ADDR_ENABLE => {
1259                track_stub!(TODO("https://fxbug.dev/408554469"), "PR_TAGGED_ADDR_ENABLE");
1260                error!(EINVAL)
1261            }
1262            unknown_mode => {
1263                track_stub!(
1264                    TODO("https://fxbug.dev/408554469"),
1265                    "PR_SET_TAGGED_ADDR_CTRL unknown mode",
1266                    unknown_mode,
1267                );
1268                error!(EINVAL)
1269            }
1270        },
1271        _ => {
1272            track_stub!(TODO("https://fxbug.dev/322874733"), "prctl fallthrough", option);
1273            error!(ENOSYS)
1274        }
1275    }
1276}
1277
1278pub fn sys_ptrace(
1279    locked: &mut Locked<Unlocked>,
1280    current_task: &mut CurrentTask,
1281    request: u32,
1282    pid: pid_t,
1283    addr: UserAddress,
1284    data: UserAddress,
1285) -> Result<SyscallResult, Errno> {
1286    match request {
1287        PTRACE_TRACEME => ptrace_traceme(current_task),
1288        PTRACE_ATTACH => ptrace_attach(locked, current_task, pid, PtraceAttachType::Attach, data),
1289        PTRACE_SEIZE => ptrace_attach(locked, current_task, pid, PtraceAttachType::Seize, data),
1290        _ => ptrace_dispatch(locked, current_task, request, pid, addr, data),
1291    }
1292}
1293
1294pub fn sys_set_tid_address(
1295    _locked: &mut Locked<Unlocked>,
1296    current_task: &CurrentTask,
1297    user_tid: UserRef<pid_t>,
1298) -> Result<pid_t, Errno> {
1299    current_task.write().clear_child_tid = user_tid;
1300    Ok(current_task.get_tid())
1301}
1302
1303pub fn sys_getrusage(
1304    _locked: &mut Locked<Unlocked>,
1305    current_task: &CurrentTask,
1306    who: i32,
1307    user_usage: RUsagePtr,
1308) -> Result<(), Errno> {
1309    const RUSAGE_SELF: i32 = starnix_uapi::uapi::RUSAGE_SELF as i32;
1310    const RUSAGE_THREAD: i32 = starnix_uapi::uapi::RUSAGE_THREAD as i32;
1311    track_stub!(TODO("https://fxbug.dev/297370242"), "real rusage");
1312    let time_stats = match who {
1313        RUSAGE_CHILDREN => current_task.task.thread_group().read().children_time_stats,
1314        RUSAGE_SELF => current_task.task.thread_group().time_stats(),
1315        RUSAGE_THREAD => current_task.task.time_stats(),
1316        _ => return error!(EINVAL),
1317    };
1318
1319    let usage = rusage {
1320        ru_utime: timeval_from_duration(time_stats.user_time),
1321        ru_stime: timeval_from_duration(time_stats.system_time),
1322        ..rusage::default()
1323    };
1324    current_task.write_multi_arch_object(user_usage, usage)?;
1325
1326    Ok(())
1327}
1328
1329type PrLimitRef = MultiArchUserRef<uapi::rlimit, uapi::arch32::rlimit>;
1330
1331pub fn sys_getrlimit(
1332    locked: &mut Locked<Unlocked>,
1333    current_task: &CurrentTask,
1334    resource: u32,
1335    user_rlimit: PrLimitRef,
1336) -> Result<(), Errno> {
1337    do_prlimit64(locked, current_task, 0, resource, PrLimitRef::null(current_task), user_rlimit)
1338}
1339
1340pub fn sys_setrlimit(
1341    locked: &mut Locked<Unlocked>,
1342    current_task: &CurrentTask,
1343    resource: u32,
1344    user_rlimit: PrLimitRef,
1345) -> Result<(), Errno> {
1346    do_prlimit64(locked, current_task, 0, resource, user_rlimit, PrLimitRef::null(current_task))
1347}
1348
1349pub fn sys_prlimit64(
1350    locked: &mut Locked<Unlocked>,
1351    current_task: &CurrentTask,
1352    pid: pid_t,
1353    user_resource: u32,
1354    new_limit_ref: UserRef<uapi::rlimit>,
1355    old_limit_ref: UserRef<uapi::rlimit>,
1356) -> Result<(), Errno> {
1357    do_prlimit64::<uapi::rlimit>(
1358        locked,
1359        current_task,
1360        pid,
1361        user_resource,
1362        new_limit_ref.into(),
1363        old_limit_ref.into(),
1364    )
1365}
1366
1367pub fn do_prlimit64<T>(
1368    locked: &mut Locked<Unlocked>,
1369    current_task: &CurrentTask,
1370    pid: pid_t,
1371    user_resource: u32,
1372    new_limit_ref: MultiArchUserRef<uapi::rlimit, T>,
1373    old_limit_ref: MultiArchUserRef<uapi::rlimit, T>,
1374) -> Result<(), Errno>
1375where
1376    T: FromBytes + IntoBytes + Immutable + From<uapi::rlimit> + Into<uapi::rlimit>,
1377{
1378    let target_task = get_task_or_current(current_task, pid)?;
1379
1380    // To get or set the resource of a process other than itself, the caller must have either:
1381    // * the same `uid`, `euid`, `saved_uid`, `gid`, `egid`, `saved_gid` as the target.
1382    // * the CAP_SYS_RESOURCE
1383    if current_task.get_pid() != target_task.get_pid() {
1384        let self_creds = current_task.current_creds();
1385        let target_creds = target_task.real_creds();
1386        if self_creds.uid != target_creds.uid
1387            || self_creds.euid != target_creds.euid
1388            || self_creds.saved_uid != target_creds.saved_uid
1389            || self_creds.gid != target_creds.gid
1390            || self_creds.egid != target_creds.egid
1391            || self_creds.saved_gid != target_creds.saved_gid
1392        {
1393            security::check_task_capable(current_task, CAP_SYS_RESOURCE)?;
1394        }
1395        security::task_prlimit(
1396            current_task,
1397            &target_task,
1398            !old_limit_ref.is_null(),
1399            !new_limit_ref.is_null(),
1400        )?;
1401    }
1402
1403    let resource = Resource::from_raw(user_resource)?;
1404
1405    let old_limit = match resource {
1406        // TODO: Integrate Resource::STACK with generic ResourceLimits machinery.
1407        Resource::STACK => {
1408            if !new_limit_ref.is_null() {
1409                track_stub!(
1410                    TODO("https://fxbug.dev/322874791"),
1411                    "prlimit64 cannot set RLIMIT_STACK"
1412                );
1413            }
1414            // The stack size is fixed at the moment, but
1415            // if MAP_GROWSDOWN is implemented this should
1416            // report the limit that it can be grown.
1417            let mm = target_task.mm()?;
1418            let mm_state = mm.state.read();
1419            let stack_size = mm_state.stack_size as u64;
1420            rlimit { rlim_cur: stack_size, rlim_max: stack_size }
1421        }
1422        _ => {
1423            let new_limit = if new_limit_ref.is_null() {
1424                None
1425            } else {
1426                let new_limit = current_task.read_multi_arch_object(new_limit_ref)?;
1427                if new_limit.rlim_cur > new_limit.rlim_max {
1428                    return error!(EINVAL);
1429                }
1430                Some(new_limit)
1431            };
1432            ThreadGroup::adjust_rlimits(locked, current_task, &target_task, resource, new_limit)?
1433        }
1434    };
1435    if !old_limit_ref.is_null() {
1436        current_task.write_multi_arch_object(old_limit_ref, old_limit)?;
1437    }
1438    Ok(())
1439}
1440
1441pub fn sys_quotactl(
1442    _locked: &mut Locked<Unlocked>,
1443    _current_task: &CurrentTask,
1444    _cmd: i32,
1445    _special: UserRef<c_char>,
1446    _id: i32,
1447    _addr: UserRef<c_char>,
1448) -> Result<SyscallResult, Errno> {
1449    track_stub!(TODO("https://fxbug.dev/297302197"), "quotacl()");
1450    error!(ENOSYS)
1451}
1452
1453pub fn sys_capget(
1454    _locked: &mut Locked<Unlocked>,
1455    current_task: &CurrentTask,
1456    user_header: UserRef<__user_cap_header_struct>,
1457    user_data: UserRef<__user_cap_data_struct>,
1458) -> Result<(), Errno> {
1459    let mut header = current_task.read_object(user_header)?;
1460    let is_version_valid =
1461        [_LINUX_CAPABILITY_VERSION_1, _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3]
1462            .contains(&header.version);
1463    if !is_version_valid {
1464        header.version = _LINUX_CAPABILITY_VERSION_3;
1465        current_task.write_object(user_header, &header)?;
1466    }
1467    if user_data.is_null() {
1468        return Ok(());
1469    }
1470    if !is_version_valid || header.pid < 0 {
1471        return error!(EINVAL);
1472    }
1473
1474    let target_task = get_task_or_current(current_task, header.pid)?;
1475
1476    security::check_getcap_access(current_task, &target_task)?;
1477
1478    let (permitted, effective, inheritable) = {
1479        let creds = &target_task.real_creds();
1480        (creds.cap_permitted, creds.cap_effective, creds.cap_inheritable)
1481    };
1482
1483    match header.version {
1484        _LINUX_CAPABILITY_VERSION_1 => {
1485            let data: [__user_cap_data_struct; 1] = [__user_cap_data_struct {
1486                effective: effective.as_abi_v1(),
1487                inheritable: inheritable.as_abi_v1(),
1488                permitted: permitted.as_abi_v1(),
1489            }];
1490            current_task.write_objects(user_data, &data)?;
1491        }
1492        _LINUX_CAPABILITY_VERSION_2 | _LINUX_CAPABILITY_VERSION_3 => {
1493            // Return 64 bit capabilities as two sets of 32 bit capabilities, little endian
1494            let (permitted, effective, inheritable) =
1495                (permitted.as_abi_v3(), effective.as_abi_v3(), inheritable.as_abi_v3());
1496            let data: [__user_cap_data_struct; 2] = [
1497                __user_cap_data_struct {
1498                    effective: effective.0,
1499                    inheritable: inheritable.0,
1500                    permitted: permitted.0,
1501                },
1502                __user_cap_data_struct {
1503                    effective: effective.1,
1504                    inheritable: inheritable.1,
1505                    permitted: permitted.1,
1506                },
1507            ];
1508            current_task.write_objects(user_data, &data)?;
1509        }
1510        _ => {
1511            unreachable!("already returned if Linux capability version is not valid")
1512        }
1513    }
1514    Ok(())
1515}
1516
1517pub fn sys_capset(
1518    _locked: &mut Locked<Unlocked>,
1519    current_task: &CurrentTask,
1520    user_header: UserRef<__user_cap_header_struct>,
1521    user_data: UserRef<__user_cap_data_struct>,
1522) -> Result<(), Errno> {
1523    let mut header = current_task.read_object(user_header)?;
1524    let is_version_valid =
1525        [_LINUX_CAPABILITY_VERSION_1, _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3]
1526            .contains(&header.version);
1527    if !is_version_valid {
1528        header.version = _LINUX_CAPABILITY_VERSION_3;
1529        current_task.write_object(user_header, &header)?;
1530        return error!(EINVAL);
1531    }
1532    if header.pid != 0 && header.pid != current_task.tid {
1533        return error!(EPERM);
1534    }
1535
1536    let (new_permitted, new_effective, new_inheritable) = match header.version {
1537        _LINUX_CAPABILITY_VERSION_1 => {
1538            let data = current_task.read_object(user_data)?;
1539            (
1540                Capabilities::from_abi_v1(data.permitted),
1541                Capabilities::from_abi_v1(data.effective),
1542                Capabilities::from_abi_v1(data.inheritable),
1543            )
1544        }
1545        _LINUX_CAPABILITY_VERSION_2 | _LINUX_CAPABILITY_VERSION_3 => {
1546            let data =
1547                current_task.read_objects_to_array::<__user_cap_data_struct, 2>(user_data)?;
1548            (
1549                Capabilities::from_abi_v3((data[0].permitted, data[1].permitted)),
1550                Capabilities::from_abi_v3((data[0].effective, data[1].effective)),
1551                Capabilities::from_abi_v3((data[0].inheritable, data[1].inheritable)),
1552            )
1553        }
1554        _ => {
1555            unreachable!("already returned if Linux capability version is not valid")
1556        }
1557    };
1558
1559    // Permission checks. Copied out of TLPI section 39.7.
1560    let mut creds = Credentials::clone(&current_task.current_creds());
1561    {
1562        log_trace!(
1563            "Capabilities({{permitted={:?} from {:?}, effective={:?} from {:?}, inheritable={:?} from {:?}}}, bounding={:?})",
1564            new_permitted,
1565            creds.cap_permitted,
1566            new_effective,
1567            creds.cap_effective,
1568            new_inheritable,
1569            creds.cap_inheritable,
1570            creds.cap_bounding
1571        );
1572        if !creds.cap_inheritable.union(creds.cap_permitted).contains(new_inheritable) {
1573            security::check_task_capable(current_task, CAP_SETPCAP)?;
1574        }
1575
1576        if !creds.cap_inheritable.union(creds.cap_bounding).contains(new_inheritable) {
1577            return error!(EPERM);
1578        }
1579        if !creds.cap_permitted.contains(new_permitted) {
1580            return error!(EPERM);
1581        }
1582        if !new_permitted.contains(new_effective) {
1583            return error!(EPERM);
1584        }
1585    }
1586    let target_task = get_task_or_current(current_task, header.pid)?;
1587
1588    security::check_setcap_access(current_task, &target_task)?;
1589
1590    creds.cap_permitted = new_permitted;
1591    creds.cap_effective = new_effective;
1592    creds.cap_inheritable = new_inheritable;
1593    creds.cap_ambient = new_permitted & new_inheritable & creds.cap_ambient;
1594    current_task.set_creds(creds);
1595    Ok(())
1596}
1597
1598pub fn sys_seccomp(
1599    locked: &mut Locked<Unlocked>,
1600    current_task: &mut CurrentTask,
1601    operation: u32,
1602    flags: u32,
1603    args: UserAddress,
1604) -> Result<SyscallResult, Errno> {
1605    match operation {
1606        SECCOMP_SET_MODE_STRICT => {
1607            if flags != 0 || args != UserAddress::NULL {
1608                return error!(EINVAL);
1609            }
1610            current_task.set_seccomp_state(SeccompStateValue::Strict)?;
1611            Ok(().into())
1612        }
1613        SECCOMP_SET_MODE_FILTER => {
1614            if flags
1615                & (SECCOMP_FILTER_FLAG_LOG
1616                    | SECCOMP_FILTER_FLAG_NEW_LISTENER
1617                    | SECCOMP_FILTER_FLAG_SPEC_ALLOW
1618                    | SECCOMP_FILTER_FLAG_TSYNC
1619                    | SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
1620                != flags
1621            {
1622                return error!(EINVAL);
1623            }
1624            if (flags & SECCOMP_FILTER_FLAG_TSYNC == 0)
1625                && (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH != 0)
1626            {
1627                return error!(EINVAL);
1628            }
1629            if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER != 0)
1630                && (flags & SECCOMP_FILTER_FLAG_TSYNC != 0)
1631                && (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH == 0)
1632            {
1633                return error!(EINVAL);
1634            }
1635            let fprog =
1636                current_task.read_multi_arch_object(SockFProgPtr::new(current_task, args))?;
1637            if fprog.len > BPF_MAXINSNS || fprog.len == 0 {
1638                return error!(EINVAL);
1639            }
1640            let code: Vec<sock_filter> =
1641                current_task.read_multi_arch_objects_to_vec(fprog.filter, fprog.len as usize)?;
1642
1643            if !current_task.read().no_new_privs() {
1644                security::check_task_capable(current_task, CAP_SYS_ADMIN)
1645                    .map_err(|_| errno!(EACCES))?;
1646            }
1647            current_task.add_seccomp_filter(locked, code, flags)
1648        }
1649        SECCOMP_GET_ACTION_AVAIL => {
1650            if flags != 0 || args.is_null() {
1651                return error!(EINVAL);
1652            }
1653            let action: u32 = current_task.read_object(UserRef::new(args))?;
1654            SeccompAction::is_action_available(action)
1655        }
1656        SECCOMP_GET_NOTIF_SIZES => {
1657            if flags != 0 {
1658                return error!(EINVAL);
1659            }
1660            track_stub!(TODO("https://fxbug.dev/322874791"), "SECCOMP_GET_NOTIF_SIZES");
1661            error!(ENOSYS)
1662        }
1663        _ => {
1664            track_stub!(TODO("https://fxbug.dev/322874916"), "seccomp fallthrough", operation);
1665            error!(EINVAL)
1666        }
1667    }
1668}
1669
1670pub fn sys_setgroups(
1671    _locked: &mut Locked<Unlocked>,
1672    current_task: &CurrentTask,
1673    size: usize,
1674    groups_addr: UserAddress,
1675) -> Result<(), Errno> {
1676    if size > NGROUPS_MAX as usize {
1677        return error!(EINVAL);
1678    }
1679    let groups = current_task.read_objects_to_vec::<gid_t>(groups_addr.into(), size)?;
1680    security::check_task_capable(current_task, CAP_SETGID)?;
1681    let mut creds = Credentials::clone(&current_task.current_creds());
1682    creds.groups = groups;
1683    current_task.set_creds(creds);
1684    Ok(())
1685}
1686
1687pub fn sys_getgroups(
1688    _locked: &mut Locked<Unlocked>,
1689    current_task: &CurrentTask,
1690    size: usize,
1691    groups_addr: UserAddress,
1692) -> Result<usize, Errno> {
1693    if size > NGROUPS_MAX as usize {
1694        return error!(EINVAL);
1695    }
1696    let creds = current_task.current_creds();
1697    if size != 0 {
1698        if size < creds.groups.len() {
1699            return error!(EINVAL);
1700        }
1701        current_task.write_memory(groups_addr, creds.groups.as_slice().as_bytes())?;
1702    }
1703    Ok(creds.groups.len())
1704}
1705
1706pub fn sys_setsid(
1707    locked: &mut Locked<Unlocked>,
1708    current_task: &CurrentTask,
1709) -> Result<pid_t, Errno> {
1710    current_task.thread_group().setsid(locked)?;
1711    Ok(current_task.get_pid())
1712}
1713
1714// Note the asymmetry with sys_setpriority: this returns "kernel nice" which ranges
1715// from 1 (weakest) to 40 (strongest). (It is part of Linux history that this syscall
1716// deals with niceness but has "priority" in its name.)
1717pub fn sys_getpriority(
1718    _locked: &mut Locked<Unlocked>,
1719    current_task: &CurrentTask,
1720    which: u32,
1721    who: i32,
1722) -> Result<u8, Errno> {
1723    match which {
1724        PRIO_PROCESS => {}
1725        // TODO: https://fxbug.dev/287121196 - support PRIO_PGRP and PRIO_USER?
1726        _ => return error!(EINVAL),
1727    }
1728    track_stub!(TODO("https://fxbug.dev/322893809"), "getpriority permissions");
1729    let target_task = get_task_or_current(current_task, who)?;
1730    let state = target_task.read();
1731    Ok(state.scheduler_state.normal_priority.raw_priority())
1732}
1733
1734// Note the asymmetry with sys_getpriority: this call's `priority` parameter is a
1735// "user nice" which ranges from -20 (strongest) to 19 (weakest) (other values can be
1736// passed and are clamped to that range and interpretation). (It is part of Linux
1737// history that this syscall deals with niceness but has "priority" in its name.)
1738pub fn sys_setpriority(
1739    locked: &mut Locked<Unlocked>,
1740    current_task: &CurrentTask,
1741    which: u32,
1742    who: i32,
1743    priority: i32,
1744) -> Result<(), Errno> {
1745    // Parse & validate the arguments.
1746    match which {
1747        PRIO_PROCESS => {}
1748        // TODO: https://fxbug.dev/287121196 - support PRIO_PGRP and PRIO_USER?
1749        _ => return error!(EINVAL),
1750    }
1751
1752    let target_task = get_task_or_current(current_task, who)?;
1753
1754    let normal_priority = NormalPriority::from_setpriority_syscall(priority);
1755
1756    // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
1757    let current_state = target_task.read().scheduler_state;
1758
1759    // Check capabilities and permissions, if required, for the operation.
1760    let euid_friendly = current_task.is_euid_friendly_with(&target_task);
1761    let strengthening = current_state.normal_priority < normal_priority;
1762    let rlimited = strengthening
1763        && normal_priority.exceeds(target_task.thread_group().get_rlimit(locked, Resource::NICE));
1764    if !euid_friendly {
1765        security::check_task_capable(current_task, CAP_SYS_NICE)?;
1766    } else if rlimited {
1767        security::check_task_capable(current_task, CAP_SYS_NICE).map_err(|_| errno!(EACCES))?;
1768    }
1769
1770    security::check_setsched_access(current_task, &target_task)?;
1771
1772    // Apply the new scheduler configuration to the task.
1773    target_task.set_scheduler_nice(normal_priority)?;
1774
1775    Ok(())
1776}
1777
1778pub fn sys_setns(
1779    _locked: &mut Locked<Unlocked>,
1780    current_task: &CurrentTask,
1781    ns_fd: FdNumber,
1782    ns_type: c_int,
1783) -> Result<(), Errno> {
1784    let file_handle = current_task.get_file(ns_fd)?;
1785
1786    // From man pages this is not quite right because some namespace types require more capabilities
1787    // or require this capability in multiple namespaces, but it should cover our current test
1788    // cases and we can make this more nuanced once more namespace types are supported.
1789    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1790
1791    if let Some(mount_ns) = file_handle.downcast_file::<MountNamespaceFile>() {
1792        if !(ns_type == 0 || ns_type == CLONE_NEWNS as i32) {
1793            log_trace!("invalid type");
1794            return error!(EINVAL);
1795        }
1796
1797        track_stub!(TODO("https://fxbug.dev/297312091"), "setns CLONE_FS limitations");
1798        current_task.fs().set_namespace(mount_ns.0.clone())?;
1799        return Ok(());
1800    }
1801
1802    if let Some(_pidfd) = file_handle.downcast_file::<PidFdFileObject>() {
1803        track_stub!(TODO("https://fxbug.dev/297312844"), "setns w/ pidfd");
1804        return error!(ENOSYS);
1805    }
1806
1807    track_stub!(TODO("https://fxbug.dev/322893829"), "unknown ns file for setns, see logs");
1808    log_info!("ns_fd was not a supported namespace file: {}", file_handle.ops_type_name());
1809    error!(EINVAL)
1810}
1811
1812pub fn sys_unshare(
1813    _locked: &mut Locked<Unlocked>,
1814    current_task: &CurrentTask,
1815    flags: u32,
1816) -> Result<(), Errno> {
1817    const IMPLEMENTED_FLAGS: u32 = CLONE_FILES | CLONE_FS | CLONE_NEWNS | CLONE_NEWUTS;
1818    if flags & !IMPLEMENTED_FLAGS != 0 {
1819        track_stub!(TODO("https://fxbug.dev/322893372"), "unshare", flags & !IMPLEMENTED_FLAGS);
1820        return error!(EINVAL);
1821    }
1822
1823    if (flags & CLONE_FILES) != 0 {
1824        current_task.live().files.unshare();
1825    }
1826
1827    if (flags & CLONE_FS) != 0 {
1828        current_task.unshare_fs();
1829    }
1830
1831    if (flags & CLONE_NEWNS) != 0 {
1832        security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1833        current_task.fs().unshare_namespace();
1834    }
1835
1836    if (flags & CLONE_NEWUTS) != 0 {
1837        security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1838        // Fork the UTS namespace.
1839        let mut task_state = current_task.write();
1840        let new_uts_ns = task_state.uts_ns.read().clone();
1841        task_state.uts_ns = Arc::new(RwLock::new(new_uts_ns));
1842    }
1843
1844    Ok(())
1845}
1846
1847pub fn sys_swapon(
1848    locked: &mut Locked<Unlocked>,
1849    current_task: &CurrentTask,
1850    user_path: UserCString,
1851    _flags: i32,
1852) -> Result<(), Errno> {
1853    const MAX_SWAPFILES: usize = 32; // See https://man7.org/linux/man-pages/man2/swapon.2.html
1854
1855    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1856
1857    track_stub!(TODO("https://fxbug.dev/322893905"), "swapon validate flags");
1858
1859    let path = current_task.read_path(user_path)?;
1860    let file = current_task.open_file(locked, path.as_ref(), OpenFlags::RDWR)?;
1861
1862    let node = file.node();
1863    let mode = node.info().mode;
1864    if !mode.is_reg() && !mode.is_blk() {
1865        return error!(EINVAL);
1866    }
1867
1868    // We determined this magic number by using the mkswap tool and the file tool. The mkswap tool
1869    // populates a few bytes in the file, including a UUID, which can be replaced with zeros while
1870    // still being recognized by the file tool. This string appears at a fixed offset
1871    // (MAGIC_OFFSET) in the file, which looks quite like a magic number.
1872    const MAGIC_OFFSET: usize = 0xff6;
1873    let swap_magic = b"SWAPSPACE2";
1874    let mut buffer = VecOutputBuffer::new(swap_magic.len());
1875    if file.read_at(locked, current_task, MAGIC_OFFSET, &mut buffer)? != swap_magic.len()
1876        || buffer.data() != swap_magic
1877    {
1878        return error!(EINVAL);
1879    }
1880
1881    let mut swap_files = current_task.kernel().swap_files.lock(locked);
1882    for swap_node in swap_files.iter() {
1883        if Arc::ptr_eq(swap_node, node) {
1884            return error!(EBUSY);
1885        }
1886    }
1887    if swap_files.len() >= MAX_SWAPFILES {
1888        return error!(EPERM);
1889    }
1890    swap_files.push(node.clone());
1891    Ok(())
1892}
1893
1894pub fn sys_swapoff(
1895    locked: &mut Locked<Unlocked>,
1896    current_task: &CurrentTask,
1897    user_path: UserCString,
1898) -> Result<(), Errno> {
1899    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1900
1901    let path = current_task.read_path(user_path)?;
1902    let file = current_task.open_file(locked, path.as_ref(), OpenFlags::RDWR)?;
1903    let node = file.node();
1904
1905    let mut swap_files = current_task.kernel().swap_files.lock(locked);
1906    let original_length = swap_files.len();
1907    swap_files.retain(|swap_node| !Arc::ptr_eq(swap_node, node));
1908    if swap_files.len() == original_length {
1909        return error!(EINVAL);
1910    }
1911    Ok(())
1912}
1913
1914#[derive(Default, Debug, IntoBytes, KnownLayout, FromBytes, Immutable)]
1915#[repr(C)]
1916struct KcmpParams {
1917    mask: usize,
1918    shuffle: usize,
1919}
1920
1921static KCMP_PARAMS: LazyLock<KcmpParams> = LazyLock::new(|| {
1922    let mut params = KcmpParams::default();
1923    starnix_crypto::cprng_draw(params.as_mut_bytes());
1924    // Ensure the shuffle is odd so that multiplying a usize by this value is a permutation.
1925    params.shuffle |= 1;
1926    params
1927});
1928
1929fn obfuscate_value(value: usize) -> usize {
1930    let KcmpParams { mask, shuffle } = *KCMP_PARAMS;
1931    (value ^ mask).wrapping_mul(shuffle)
1932}
1933
1934fn obfuscate_ptr<T>(ptr: *const T) -> usize {
1935    obfuscate_value(ptr as usize)
1936}
1937
1938fn obfuscate_arc<T>(arc: &Arc<T>) -> usize {
1939    obfuscate_ptr(Arc::as_ptr(arc))
1940}
1941
1942pub fn sys_kcmp(
1943    locked: &mut Locked<Unlocked>,
1944    current_task: &CurrentTask,
1945    pid1: pid_t,
1946    pid2: pid_t,
1947    resource_type: u32,
1948    index1: u64,
1949    index2: u64,
1950) -> Result<u32, Errno> {
1951    let task1 = current_task.get_task(pid1)?;
1952    let task2 = current_task.get_task(pid2)?;
1953
1954    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task1)?;
1955    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task2)?;
1956
1957    let resource_type = KcmpResource::from_raw(resource_type)?;
1958
1959    // Output encoding (see <https://man7.org/linux/man-pages/man2/kcmp.2.html>):
1960    //
1961    //   0  v1 is equal to v2; in other words, the two processes share the resource.
1962    //   1  v1 is less than v2.
1963    //   2  v1 is greater than v2.
1964    //   3  v1 is not equal to v2, but ordering information is unavailable.
1965    //
1966    fn encode_ordering(value: cmp::Ordering) -> u32 {
1967        match value {
1968            cmp::Ordering::Equal => 0,
1969            cmp::Ordering::Less => 1,
1970            cmp::Ordering::Greater => 2,
1971        }
1972    }
1973
1974    match resource_type {
1975        KcmpResource::FILE => {
1976            fn get_file(task: &Task, index: u64) -> Result<FileHandle, Errno> {
1977                // TODO: Test whether O_PATH is allowed here. Conceptually, seems like
1978                //       O_PATH should be allowed, but we haven't tested it yet.
1979                task.live()?.files.get_allowing_opath(FdNumber::from_raw(
1980                    index.try_into().map_err(|_| errno!(EBADF))?,
1981                ))
1982            }
1983            let file1 = get_file(&task1, index1)?;
1984            let file2 = get_file(&task2, index2)?;
1985            Ok(encode_ordering(obfuscate_arc(&file1).cmp(&obfuscate_arc(&file2))))
1986        }
1987        KcmpResource::FILES => {
1988            let files1 = task1.live()?.files.id();
1989            let files2 = task2.live()?.files.id();
1990            Ok(encode_ordering(obfuscate_value(files1.raw()).cmp(&obfuscate_value(files2.raw()))))
1991        }
1992        KcmpResource::FS => {
1993            let fs1 = task1.live()?.fs();
1994            let fs2 = task2.live()?.fs();
1995            Ok(encode_ordering(obfuscate_arc(&fs1).cmp(&obfuscate_arc(&fs2))))
1996        }
1997        KcmpResource::SIGHAND => Ok(encode_ordering(
1998            obfuscate_arc(&task1.thread_group().signal_actions)
1999                .cmp(&obfuscate_arc(&task2.thread_group().signal_actions)),
2000        )),
2001        KcmpResource::VM => {
2002            Ok(encode_ordering(obfuscate_arc(&task1.mm()?).cmp(&obfuscate_arc(&task2.mm()?))))
2003        }
2004        _ => error!(EINVAL),
2005    }
2006}
2007
2008pub fn sys_syslog(
2009    locked: &mut Locked<Unlocked>,
2010    current_task: &CurrentTask,
2011    action_type: i32,
2012    address: UserAddress,
2013    length: i32,
2014) -> Result<i32, Errno> {
2015    let action = SyslogAction::try_from(action_type)?;
2016    let syslog =
2017        current_task.kernel().syslog.access(&current_task, SyslogAccess::Syscall(action))?;
2018    match action {
2019        SyslogAction::Read => {
2020            if address.is_null() || length < 0 {
2021                return error!(EINVAL);
2022            }
2023            let mut output_buffer =
2024                UserBuffersOutputBuffer::unified_new_at(current_task, address, length as usize)?;
2025            syslog.blocking_read(locked, current_task, &mut output_buffer)
2026        }
2027        SyslogAction::ReadAll => {
2028            if address.is_null() || length < 0 {
2029                return error!(EINVAL);
2030            }
2031            let mut output_buffer =
2032                UserBuffersOutputBuffer::unified_new_at(current_task, address, length as usize)?;
2033            syslog.read_all(current_task, &mut output_buffer)
2034        }
2035        SyslogAction::SizeUnread => syslog.size_unread(),
2036        SyslogAction::SizeBuffer => syslog.size_buffer(),
2037        SyslogAction::Close | SyslogAction::Open => Ok(0),
2038        SyslogAction::ReadClear => {
2039            track_stub!(TODO("https://fxbug.dev/322894145"), "syslog: read clear");
2040            Ok(0)
2041        }
2042        SyslogAction::Clear => {
2043            track_stub!(TODO("https://fxbug.dev/322893673"), "syslog: clear");
2044            Ok(0)
2045        }
2046        SyslogAction::ConsoleOff => {
2047            track_stub!(TODO("https://fxbug.dev/322894399"), "syslog: console off");
2048            Ok(0)
2049        }
2050        SyslogAction::ConsoleOn => {
2051            track_stub!(TODO("https://fxbug.dev/322894106"), "syslog: console on");
2052            Ok(0)
2053        }
2054        SyslogAction::ConsoleLevel => {
2055            if length <= 0 || length >= 8 {
2056                return error!(EINVAL);
2057            }
2058            track_stub!(TODO("https://fxbug.dev/322894199"), "syslog: console level");
2059            Ok(0)
2060        }
2061    }
2062}
2063
2064pub fn sys_vhangup(
2065    _locked: &mut Locked<Unlocked>,
2066    current_task: &CurrentTask,
2067) -> Result<(), Errno> {
2068    security::check_task_capable(current_task, CAP_SYS_TTY_CONFIG)?;
2069    track_stub!(TODO("https://fxbug.dev/324079257"), "vhangup");
2070    Ok(())
2071}
2072
2073// Syscalls for arch32 usage
2074#[cfg(target_arch = "aarch64")]
2075mod arch32 {
2076    pub use super::{
2077        sys_execve as sys_arch32_execve, sys_getegid as sys_arch32_getegid32,
2078        sys_geteuid as sys_arch32_geteuid32, sys_getgid as sys_arch32_getgid32,
2079        sys_getgroups as sys_arch32_getgroups32, sys_getpgid as sys_arch32_getpgid,
2080        sys_getppid as sys_arch32_getppid, sys_getpriority as sys_arch32_getpriority,
2081        sys_getresgid as sys_arch32_getresgid32, sys_getresuid as sys_arch32_getresuid32,
2082        sys_getrlimit as sys_arch32_ugetrlimit, sys_getrusage as sys_arch32_getrusage,
2083        sys_getuid as sys_arch32_getuid32, sys_ioprio_set as sys_arch32_ioprio_set,
2084        sys_ptrace as sys_arch32_ptrace, sys_quotactl as sys_arch32_quotactl,
2085        sys_sched_get_priority_max as sys_arch32_sched_get_priority_max,
2086        sys_sched_get_priority_min as sys_arch32_sched_get_priority_min,
2087        sys_sched_getaffinity as sys_arch32_sched_getaffinity,
2088        sys_sched_getparam as sys_arch32_sched_getparam,
2089        sys_sched_setaffinity as sys_arch32_sched_setaffinity,
2090        sys_sched_setparam as sys_arch32_sched_setparam,
2091        sys_sched_setscheduler as sys_arch32_sched_setscheduler, sys_seccomp as sys_arch32_seccomp,
2092        sys_setfsuid as sys_arch32_setfsuid, sys_setfsuid as sys_arch32_setfsuid32,
2093        sys_setgid as sys_arch32_setgid32, sys_setgroups as sys_arch32_setgroups32,
2094        sys_setns as sys_arch32_setns, sys_setpgid as sys_arch32_setpgid,
2095        sys_setpriority as sys_arch32_setpriority, sys_setregid as sys_arch32_setregid32,
2096        sys_setresgid as sys_arch32_setresgid32, sys_setresuid as sys_arch32_setresuid32,
2097        sys_setreuid as sys_arch32_setreuid32, sys_setreuid as sys_arch32_setreuid,
2098        sys_setrlimit as sys_arch32_setrlimit, sys_setsid as sys_arch32_setsid,
2099        sys_syslog as sys_arch32_syslog, sys_unshare as sys_arch32_unshare,
2100    };
2101}
2102
2103#[cfg(target_arch = "aarch64")]
2104pub use arch32::*;
2105
2106#[cfg(test)]
2107mod tests {
2108    use super::*;
2109    use crate::mm::syscalls::sys_munmap;
2110    use crate::testing::{AutoReleasableTask, map_memory, spawn_kernel_and_run};
2111    use starnix_syscalls::SUCCESS;
2112    use starnix_task_command::TaskCommand;
2113    use starnix_uapi::auth::Credentials;
2114    use starnix_uapi::{SCHED_FIFO, SCHED_NORMAL};
2115    use std::ffi::CString;
2116
2117    #[::fuchsia::test]
2118    async fn test_prctl_set_vma_anon_name() {
2119        spawn_kernel_and_run(async |locked, current_task| {
2120            let mapped_address =
2121                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2122            let name_addr = (mapped_address + 128u64).unwrap();
2123            let name = "test-name\0";
2124            current_task.write_memory(name_addr, name.as_bytes()).expect("failed to write name");
2125            sys_prctl(
2126                locked,
2127                current_task,
2128                PR_SET_VMA,
2129                PR_SET_VMA_ANON_NAME as u64,
2130                mapped_address.ptr() as u64,
2131                32,
2132                name_addr.ptr() as u64,
2133            )
2134            .expect("failed to set name");
2135            assert_eq!(
2136                "test-name",
2137                current_task
2138                    .mm()
2139                    .unwrap()
2140                    .get_mapping_name((mapped_address + 24u64).unwrap())
2141                    .expect("failed to get address")
2142                    .unwrap()
2143                    .to_string(),
2144            );
2145
2146            sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize)
2147                .expect("failed to unmap memory");
2148            assert_eq!(
2149                error!(EFAULT),
2150                current_task.mm().unwrap().get_mapping_name((mapped_address + 24u64).unwrap())
2151            );
2152        })
2153        .await;
2154    }
2155
2156    #[::fuchsia::test]
2157    async fn test_set_vma_name_special_chars() {
2158        spawn_kernel_and_run(async |locked, current_task| {
2159            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2160
2161            let mapping_addr =
2162                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2163
2164            for c in 1..255 {
2165                let vma_name = CString::new([c]).unwrap();
2166                current_task.write_memory(name_addr, vma_name.as_bytes_with_nul()).unwrap();
2167
2168                let result = sys_prctl(
2169                    locked,
2170                    current_task,
2171                    PR_SET_VMA,
2172                    PR_SET_VMA_ANON_NAME as u64,
2173                    mapping_addr.ptr() as u64,
2174                    *PAGE_SIZE,
2175                    name_addr.ptr() as u64,
2176                );
2177
2178                if c > 0x1f
2179                    && c < 0x7f
2180                    && c != b'\\'
2181                    && c != b'`'
2182                    && c != b'$'
2183                    && c != b'['
2184                    && c != b']'
2185                {
2186                    assert_eq!(result, Ok(SUCCESS));
2187                } else {
2188                    assert_eq!(result, error!(EINVAL));
2189                }
2190            }
2191        })
2192        .await;
2193    }
2194
2195    #[::fuchsia::test]
2196    async fn test_set_vma_name_long() {
2197        spawn_kernel_and_run(async |locked, current_task| {
2198            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2199
2200            let mapping_addr =
2201                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2202
2203            let name_too_long = CString::new(vec![b'a'; 256]).unwrap();
2204
2205            current_task.write_memory(name_addr, name_too_long.as_bytes_with_nul()).unwrap();
2206
2207            assert_eq!(
2208                sys_prctl(
2209                    locked,
2210                    current_task,
2211                    PR_SET_VMA,
2212                    PR_SET_VMA_ANON_NAME as u64,
2213                    mapping_addr.ptr() as u64,
2214                    *PAGE_SIZE,
2215                    name_addr.ptr() as u64,
2216                ),
2217                error!(EINVAL)
2218            );
2219
2220            let name_just_long_enough = CString::new(vec![b'a'; 255]).unwrap();
2221
2222            current_task
2223                .write_memory(name_addr, name_just_long_enough.as_bytes_with_nul())
2224                .unwrap();
2225
2226            assert_eq!(
2227                sys_prctl(
2228                    locked,
2229                    current_task,
2230                    PR_SET_VMA,
2231                    PR_SET_VMA_ANON_NAME as u64,
2232                    mapping_addr.ptr() as u64,
2233                    *PAGE_SIZE,
2234                    name_addr.ptr() as u64,
2235                ),
2236                Ok(SUCCESS)
2237            );
2238        })
2239        .await;
2240    }
2241
2242    #[::fuchsia::test]
2243    async fn test_set_vma_name_misaligned() {
2244        spawn_kernel_and_run(async |locked, current_task| {
2245            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2246
2247            let mapping_addr =
2248                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2249
2250            let name = CString::new("name").unwrap();
2251            current_task.write_memory(name_addr, name.as_bytes_with_nul()).unwrap();
2252
2253            // Passing a misaligned pointer to the start of the named region fails.
2254            assert_eq!(
2255                sys_prctl(
2256                    locked,
2257                    current_task,
2258                    PR_SET_VMA,
2259                    PR_SET_VMA_ANON_NAME as u64,
2260                    1 + mapping_addr.ptr() as u64,
2261                    *PAGE_SIZE - 1,
2262                    name_addr.ptr() as u64,
2263                ),
2264                error!(EINVAL)
2265            );
2266
2267            // Passing an unaligned length does work, however.
2268            assert_eq!(
2269                sys_prctl(
2270                    locked,
2271                    current_task,
2272                    PR_SET_VMA,
2273                    PR_SET_VMA_ANON_NAME as u64,
2274                    mapping_addr.ptr() as u64,
2275                    *PAGE_SIZE - 1,
2276                    name_addr.ptr() as u64,
2277                ),
2278                Ok(SUCCESS)
2279            );
2280        })
2281        .await;
2282    }
2283
2284    #[::fuchsia::test]
2285    async fn test_prctl_get_set_dumpable() {
2286        spawn_kernel_and_run(async |locked, current_task| {
2287            sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
2288                .expect("failed to get dumpable");
2289
2290            sys_prctl(locked, current_task, PR_SET_DUMPABLE, 1, 0, 0, 0)
2291                .expect("failed to set dumpable");
2292            sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
2293                .expect("failed to get dumpable");
2294
2295            // SUID_DUMP_ROOT not supported.
2296            sys_prctl(locked, current_task, PR_SET_DUMPABLE, 2, 0, 0, 0)
2297                .expect("failed to set dumpable");
2298            sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
2299                .expect("failed to get dumpable");
2300        })
2301        .await;
2302    }
2303
2304    #[::fuchsia::test]
2305    async fn test_sys_getsid() {
2306        spawn_kernel_and_run(async |locked, current_task| {
2307            let kernel = current_task.kernel();
2308            assert_eq!(
2309                current_task.get_tid(),
2310                sys_getsid(locked, &current_task, 0).expect("failed to get sid")
2311            );
2312
2313            let second_task = crate::execution::create_init_child_process(
2314                locked,
2315                &kernel.weak_self.upgrade().unwrap(),
2316                TaskCommand::new(b"second task"),
2317                Credentials::with_ids(0, 0),
2318                None,
2319            )
2320            .expect("failed to create second task");
2321            let second_current = AutoReleasableTask::from(second_task);
2322
2323            assert_eq!(
2324                second_current.get_tid(),
2325                sys_getsid(locked, &current_task, second_current.get_tid())
2326                    .expect("failed to get sid")
2327            );
2328        })
2329        .await;
2330    }
2331
2332    #[::fuchsia::test]
2333    async fn test_get_affinity_size() {
2334        spawn_kernel_and_run(async |locked, current_task| {
2335            let mapped_address =
2336                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2337            let pid = current_task.get_pid();
2338            assert_eq!(
2339                sys_sched_getaffinity(locked, &current_task, pid, 16, mapped_address),
2340                Ok(16)
2341            );
2342            assert_eq!(
2343                sys_sched_getaffinity(locked, &current_task, pid, 1024, mapped_address),
2344                Ok(std::mem::size_of::<CpuSet>())
2345            );
2346            assert_eq!(
2347                sys_sched_getaffinity(locked, &current_task, pid, 1, mapped_address),
2348                error!(EINVAL)
2349            );
2350            assert_eq!(
2351                sys_sched_getaffinity(locked, &current_task, pid, 9, mapped_address),
2352                error!(EINVAL)
2353            );
2354        })
2355        .await;
2356    }
2357
2358    #[::fuchsia::test]
2359    async fn test_set_affinity_size() {
2360        spawn_kernel_and_run(async |locked, current_task| {
2361            let mapped_address =
2362                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2363            current_task.write_memory(mapped_address, &[0xffu8]).expect("failed to cpumask");
2364            let pid = current_task.get_pid();
2365            assert_eq!(
2366                sys_sched_setaffinity(
2367                    locked,
2368                    &current_task,
2369                    pid,
2370                    *PAGE_SIZE as u32,
2371                    mapped_address
2372                ),
2373                Ok(())
2374            );
2375            assert_eq!(
2376                sys_sched_setaffinity(locked, &current_task, pid, 1, mapped_address),
2377                error!(EINVAL)
2378            );
2379        })
2380        .await;
2381    }
2382
2383    #[::fuchsia::test]
2384    async fn test_task_name() {
2385        spawn_kernel_and_run(async |locked, current_task| {
2386            let mapped_address =
2387                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2388            let name = "my-task-name\0";
2389            current_task
2390                .write_memory(mapped_address, name.as_bytes())
2391                .expect("failed to write name");
2392
2393            let result =
2394                sys_prctl(locked, current_task, PR_SET_NAME, mapped_address.ptr() as u64, 0, 0, 0)
2395                    .unwrap();
2396            assert_eq!(SUCCESS, result);
2397
2398            let mapped_address =
2399                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2400            let result =
2401                sys_prctl(locked, current_task, PR_GET_NAME, mapped_address.ptr() as u64, 0, 0, 0)
2402                    .unwrap();
2403            assert_eq!(SUCCESS, result);
2404
2405            let name_length = name.len();
2406
2407            let out_name = current_task.read_memory_to_vec(mapped_address, name_length).unwrap();
2408            assert_eq!(name.as_bytes(), &out_name);
2409        })
2410        .await;
2411    }
2412
2413    #[::fuchsia::test]
2414    async fn test_sched_get_priority_min_max() {
2415        spawn_kernel_and_run(async |locked, current_task| {
2416            let non_rt_min =
2417                sys_sched_get_priority_min(locked, &current_task, SCHED_NORMAL).unwrap();
2418            assert_eq!(non_rt_min, 0);
2419            let non_rt_max =
2420                sys_sched_get_priority_max(locked, &current_task, SCHED_NORMAL).unwrap();
2421            assert_eq!(non_rt_max, 0);
2422
2423            let rt_min = sys_sched_get_priority_min(locked, &current_task, SCHED_FIFO).unwrap();
2424            assert_eq!(rt_min, 1);
2425            let rt_max = sys_sched_get_priority_max(locked, &current_task, SCHED_FIFO).unwrap();
2426            assert_eq!(rt_max, 99);
2427
2428            let min_bad_policy_error =
2429                sys_sched_get_priority_min(locked, &current_task, std::u32::MAX).unwrap_err();
2430            assert_eq!(min_bad_policy_error, errno!(EINVAL));
2431
2432            let max_bad_policy_error =
2433                sys_sched_get_priority_max(locked, &current_task, std::u32::MAX).unwrap_err();
2434            assert_eq!(max_bad_policy_error, errno!(EINVAL));
2435        })
2436        .await;
2437    }
2438
2439    #[::fuchsia::test]
2440    async fn test_sched_setscheduler() {
2441        spawn_kernel_and_run(async |locked, current_task| {
2442            current_task
2443                .thread_group()
2444                .limits
2445                .lock(locked)
2446                .set(Resource::RTPRIO, rlimit { rlim_cur: 255, rlim_max: 255 });
2447
2448            let scheduler = sys_sched_getscheduler(locked, &current_task, 0).unwrap();
2449            assert_eq!(scheduler, SCHED_NORMAL, "tasks should have normal scheduler by default");
2450
2451            let mapped_address =
2452                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2453            let requested_params = sched_param { sched_priority: 15 };
2454            current_task.write_object(mapped_address.into(), &requested_params).unwrap();
2455
2456            sys_sched_setscheduler(locked, &current_task, 0, SCHED_FIFO, mapped_address.into())
2457                .unwrap();
2458
2459            let new_scheduler = sys_sched_getscheduler(locked, &current_task, 0).unwrap();
2460            assert_eq!(new_scheduler, SCHED_FIFO, "task should have been assigned fifo scheduler");
2461
2462            let mapped_address =
2463                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2464            sys_sched_getparam(locked, &current_task, 0, mapped_address.into())
2465                .expect("sched_getparam");
2466            let param_value: sched_param =
2467                current_task.read_object(mapped_address.into()).expect("read_object");
2468            assert_eq!(param_value.sched_priority, 15);
2469        })
2470        .await;
2471    }
2472
2473    #[::fuchsia::test]
2474    async fn test_sched_getparam() {
2475        spawn_kernel_and_run(async |locked, current_task| {
2476            let mapped_address =
2477                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2478            sys_sched_getparam(locked, &current_task, 0, mapped_address.into())
2479                .expect("sched_getparam");
2480            let param_value: sched_param =
2481                current_task.read_object(mapped_address.into()).expect("read_object");
2482            assert_eq!(param_value.sched_priority, 0);
2483        })
2484        .await;
2485    }
2486
2487    #[::fuchsia::test]
2488    async fn test_setuid() {
2489        spawn_kernel_and_run(async |locked, current_task| {
2490            // Test for root.
2491            current_task.set_creds(Credentials::with_ids(0, 0));
2492            sys_setuid(locked, &current_task, 42).expect("setuid");
2493            let mut creds = Credentials::clone(&current_task.current_creds());
2494            assert_eq!(creds.euid, 42);
2495            assert_eq!(creds.uid, 42);
2496            assert_eq!(creds.saved_uid, 42);
2497
2498            // Remove the CAP_SETUID capability to avoid overwriting permission checks.
2499            creds.cap_effective.remove(CAP_SETUID);
2500            current_task.set_creds(creds);
2501
2502            // Test for non root, which task now is.
2503            assert_eq!(sys_setuid(locked, &current_task, 0), error!(EPERM));
2504            assert_eq!(sys_setuid(locked, &current_task, 43), error!(EPERM));
2505
2506            sys_setuid(locked, &current_task, 42).expect("setuid");
2507            assert_eq!(current_task.current_creds().euid, 42);
2508            assert_eq!(current_task.current_creds().uid, 42);
2509            assert_eq!(current_task.current_creds().saved_uid, 42);
2510
2511            // Change uid and saved_uid, and check that one can set the euid to these.
2512            let mut creds = Credentials::clone(&current_task.current_creds());
2513            creds.uid = 41;
2514            creds.euid = 42;
2515            creds.saved_uid = 43;
2516            current_task.set_creds(creds);
2517
2518            sys_setuid(locked, &current_task, 41).expect("setuid");
2519            assert_eq!(current_task.current_creds().euid, 41);
2520            assert_eq!(current_task.current_creds().uid, 41);
2521            assert_eq!(current_task.current_creds().saved_uid, 43);
2522
2523            let mut creds = Credentials::clone(&current_task.current_creds());
2524            creds.uid = 41;
2525            creds.euid = 42;
2526            creds.saved_uid = 43;
2527            current_task.set_creds(creds);
2528
2529            sys_setuid(locked, &current_task, 43).expect("setuid");
2530            assert_eq!(current_task.current_creds().euid, 43);
2531            assert_eq!(current_task.current_creds().uid, 41);
2532            assert_eq!(current_task.current_creds().saved_uid, 43);
2533        })
2534        .await;
2535    }
2536
2537    #[::fuchsia::test]
2538    async fn test_read_c_string_vector() {
2539        spawn_kernel_and_run(async |locked, current_task| {
2540            let arg_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2541            let arg = b"test-arg\0";
2542            current_task.write_memory(arg_addr, arg).expect("failed to write test arg");
2543            let arg_usercstr = UserCString::new(current_task, arg_addr);
2544            let null_usercstr = UserCString::null(current_task);
2545
2546            let argv_addr = UserCStringPtr::new(
2547                current_task,
2548                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE),
2549            );
2550            current_task
2551                .write_multi_arch_ptr(argv_addr.addr(), arg_usercstr)
2552                .expect("failed to write UserCString");
2553            current_task
2554                .write_multi_arch_ptr(argv_addr.next().unwrap().addr(), null_usercstr)
2555                .expect("failed to write UserCString");
2556
2557            // The arguments size limit should include the null terminator.
2558            assert!(read_c_string_vector(&current_task, argv_addr, 100, arg.len()).is_ok());
2559            assert_eq!(
2560                read_c_string_vector(
2561                    &current_task,
2562                    argv_addr,
2563                    100,
2564                    std::str::from_utf8(arg).unwrap().trim_matches('\0').len()
2565                ),
2566                error!(E2BIG)
2567            );
2568        })
2569        .await;
2570    }
2571}