starnix_core/task/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::execution::execute_task;
6use crate::mm::{DumpPolicy, MemoryAccessor, MemoryAccessorExt, PAGE_SIZE};
7use crate::security;
8use crate::signals::syscalls::RUsagePtr;
9use crate::task::{
10    CurrentTask, ExitStatus, NormalPriority, PR_SET_PTRACER_ANY, PtraceAllowedPtracers,
11    PtraceAttachType, PtraceOptions, SchedulingPolicy, SeccompAction, SeccompStateValue,
12    SyslogAccess, Task, ThreadGroup, max_priority_for_sched_policy, min_priority_for_sched_policy,
13    ptrace_attach, ptrace_dispatch, ptrace_traceme,
14};
15use crate::vfs::{
16    FdNumber, FileHandle, MountNamespaceFile, PidFdFileObject, UserBuffersOutputBuffer,
17    VecOutputBuffer,
18};
19use starnix_logging::{log_error, log_info, log_trace, track_stub};
20use starnix_sync::{Locked, RwLock, Unlocked};
21use starnix_syscalls::SyscallResult;
22use starnix_task_command::TaskCommand;
23use starnix_types::ownership::WeakRef;
24use starnix_types::time::timeval_from_duration;
25use starnix_uapi::auth::{
26    CAP_SETGID, CAP_SETPCAP, CAP_SETUID, CAP_SYS_ADMIN, CAP_SYS_NICE, CAP_SYS_RESOURCE,
27    CAP_SYS_TTY_CONFIG, Capabilities, PTRACE_MODE_READ_REALCREDS, SecureBits,
28};
29use starnix_uapi::errors::{ENAMETOOLONG, Errno};
30use starnix_uapi::file_mode::{Access, AccessCheck, FileMode};
31use starnix_uapi::kcmp::KcmpResource;
32use starnix_uapi::open_flags::OpenFlags;
33use starnix_uapi::resource_limits::Resource;
34use starnix_uapi::signals::{Signal, UncheckedSignal};
35use starnix_uapi::syslog::SyslogAction;
36use starnix_uapi::user_address::{
37    ArchSpecific, MappingMultiArchUserRef, MultiArchUserRef, UserAddress, UserCString,
38    UserCStringPtr, UserRef,
39};
40use starnix_uapi::vfs::ResolveFlags;
41use starnix_uapi::{
42    __user_cap_data_struct, __user_cap_header_struct, _LINUX_CAPABILITY_VERSION_1,
43    _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3, AT_EMPTY_PATH, AT_SYMLINK_NOFOLLOW,
44    BPF_MAXINSNS, CLONE_ARGS_SIZE_VER0, CLONE_ARGS_SIZE_VER1, CLONE_ARGS_SIZE_VER2, CLONE_FILES,
45    CLONE_FS, CLONE_NEWNS, CLONE_NEWUTS, CLONE_SETTLS, CLONE_VFORK, NGROUPS_MAX, PR_CAP_AMBIENT,
46    PR_CAP_AMBIENT_CLEAR_ALL, PR_CAP_AMBIENT_IS_SET, PR_CAP_AMBIENT_LOWER, PR_CAP_AMBIENT_RAISE,
47    PR_CAPBSET_DROP, PR_CAPBSET_READ, PR_GET_CHILD_SUBREAPER, PR_GET_DUMPABLE, PR_GET_KEEPCAPS,
48    PR_GET_NAME, PR_GET_NO_NEW_PRIVS, PR_GET_SECCOMP, PR_GET_SECUREBITS, PR_SET_CHILD_SUBREAPER,
49    PR_SET_DUMPABLE, PR_SET_KEEPCAPS, PR_SET_NAME, PR_SET_NO_NEW_PRIVS, PR_SET_PDEATHSIG,
50    PR_SET_PTRACER, PR_SET_SECCOMP, PR_SET_SECUREBITS, PR_SET_TIMERSLACK, PR_SET_VMA,
51    PR_SET_VMA_ANON_NAME, PRIO_PROCESS, PTRACE_ATTACH, PTRACE_SEIZE, PTRACE_TRACEME,
52    RUSAGE_CHILDREN, SCHED_RESET_ON_FORK, SECCOMP_FILTER_FLAG_LOG,
53    SECCOMP_FILTER_FLAG_NEW_LISTENER, SECCOMP_FILTER_FLAG_SPEC_ALLOW, SECCOMP_FILTER_FLAG_TSYNC,
54    SECCOMP_FILTER_FLAG_TSYNC_ESRCH, SECCOMP_GET_ACTION_AVAIL, SECCOMP_GET_NOTIF_SIZES,
55    SECCOMP_MODE_FILTER, SECCOMP_MODE_STRICT, SECCOMP_SET_MODE_FILTER, SECCOMP_SET_MODE_STRICT,
56    c_char, c_int, clone_args, errno, error, gid_t, pid_t, rlimit, rusage, sched_param,
57    sock_filter, uapi, uid_t,
58};
59use static_assertions::const_assert;
60use std::cmp;
61use std::ffi::CString;
62use std::sync::{Arc, LazyLock};
63use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
64
65#[cfg(target_arch = "aarch64")]
66use starnix_uapi::{PR_GET_TAGGED_ADDR_CTRL, PR_SET_TAGGED_ADDR_CTRL, PR_TAGGED_ADDR_ENABLE};
67
68pub type SockFProgPtr =
69    MappingMultiArchUserRef<SockFProg, uapi::sock_fprog, uapi::arch32::sock_fprog>;
70pub type SockFilterPtr = MultiArchUserRef<uapi::sock_filter, uapi::arch32::sock_filter>;
71
72pub struct SockFProg {
73    pub len: u32,
74    pub filter: SockFilterPtr,
75}
76
77uapi::arch_map_data! {
78    BidiTryFrom<SockFProg, sock_fprog> {
79        len = len;
80        filter = filter;
81    }
82}
83
84uapi::check_arch_independent_layout! {
85    sched_param {
86        sched_priority,
87    }
88}
89
90pub fn do_clone(
91    locked: &mut Locked<Unlocked>,
92    current_task: &mut CurrentTask,
93    args: &clone_args,
94) -> Result<pid_t, Errno> {
95    security::check_task_create_access(current_task)?;
96
97    let child_exit_signal = if args.exit_signal == 0 {
98        None
99    } else {
100        Some(Signal::try_from(UncheckedSignal::new(args.exit_signal))?)
101    };
102
103    let mut new_task = current_task.clone_task(
104        locked,
105        args.flags,
106        child_exit_signal,
107        UserRef::<pid_t>::new(UserAddress::from(args.parent_tid)),
108        UserRef::<pid_t>::new(UserAddress::from(args.child_tid)),
109        UserRef::<FdNumber>::new(UserAddress::from(args.pidfd)),
110    )?;
111
112    // Set the result register to 0 for the return value from clone in the
113    // cloned process.
114    new_task.thread_state.registers.set_return_register(0);
115    let (trace_kind, ptrace_state) = current_task.get_ptrace_core_state_for_clone(args);
116
117    if args.stack != 0 {
118        // In clone() the `stack` argument points to the top of the stack, while in clone3()
119        // `stack` points to the bottom of the stack. Therefore, in clone3() we need to add
120        // `stack_size` to calculate the stack pointer. Note that in clone() `stack_size` is 0.
121        new_task
122            .thread_state
123            .registers
124            .set_stack_pointer_register(args.stack.wrapping_add(args.stack_size));
125    }
126
127    if args.flags & (CLONE_SETTLS as u64) != 0 {
128        new_task.thread_state.registers.set_thread_pointer_register(args.tls);
129    }
130
131    let tid = new_task.task.tid;
132    let task_ref = WeakRef::from(&new_task.task);
133    execute_task(locked, new_task, |_, _| Ok(()), |_| {}, ptrace_state)?;
134
135    current_task.ptrace_event(locked, trace_kind, tid as u64);
136
137    if args.flags & (CLONE_VFORK as u64) != 0 {
138        current_task.wait_for_execve(task_ref)?;
139        current_task.ptrace_event(locked, PtraceOptions::TRACEVFORKDONE, tid as u64);
140    }
141
142    Ok(tid)
143}
144
145pub fn sys_clone3(
146    locked: &mut Locked<Unlocked>,
147    current_task: &mut CurrentTask,
148    user_clone_args: UserRef<clone_args>,
149    user_clone_args_size: usize,
150) -> Result<pid_t, Errno> {
151    // Only these specific sized versions are supported.
152    if !(user_clone_args_size == CLONE_ARGS_SIZE_VER0 as usize
153        || user_clone_args_size == CLONE_ARGS_SIZE_VER1 as usize
154        || user_clone_args_size == CLONE_ARGS_SIZE_VER2 as usize)
155    {
156        return error!(EINVAL);
157    }
158
159    // The most recent version of the struct size should match our definition.
160    const_assert!(std::mem::size_of::<clone_args>() == CLONE_ARGS_SIZE_VER2 as usize);
161
162    let clone_args = current_task.read_object_partial(user_clone_args, user_clone_args_size)?;
163    do_clone(locked, current_task, &clone_args)
164}
165
166fn read_c_string_vector(
167    mm: &CurrentTask,
168    user_vector: UserCStringPtr,
169    elem_limit: usize,
170    vec_limit: usize,
171) -> Result<(Vec<CString>, usize), Errno> {
172    let mut user_current = user_vector;
173    let mut vector: Vec<CString> = vec![];
174    let mut vec_size: usize = 0;
175    loop {
176        let user_string = mm.read_multi_arch_ptr(user_current)?;
177        if user_string.is_null() {
178            break;
179        }
180        let string = mm
181            .read_c_string_to_vec(user_string, elem_limit)
182            .map_err(|e| if e.code == ENAMETOOLONG { errno!(E2BIG) } else { e })?;
183        let cstring = CString::new(string).map_err(|_| errno!(EINVAL))?;
184        vec_size =
185            vec_size.checked_add(cstring.as_bytes_with_nul().len()).ok_or_else(|| errno!(E2BIG))?;
186        if vec_size > vec_limit {
187            return error!(E2BIG);
188        }
189        vector.push(cstring);
190        user_current = user_current.next()?;
191    }
192    Ok((vector, vec_size))
193}
194
195pub fn sys_execve(
196    locked: &mut Locked<Unlocked>,
197    current_task: &mut CurrentTask,
198    user_path: UserCString,
199    user_argv: UserCStringPtr,
200    user_environ: UserCStringPtr,
201) -> Result<(), Errno> {
202    sys_execveat(locked, current_task, FdNumber::AT_FDCWD, user_path, user_argv, user_environ, 0)
203}
204
205pub fn sys_execveat(
206    locked: &mut Locked<Unlocked>,
207    current_task: &mut CurrentTask,
208    dir_fd: FdNumber,
209    user_path: UserCString,
210    user_argv: UserCStringPtr,
211    user_environ: UserCStringPtr,
212    flags: u32,
213) -> Result<(), Errno> {
214    if flags & !(AT_EMPTY_PATH | AT_SYMLINK_NOFOLLOW) != 0 {
215        return error!(EINVAL);
216    }
217
218    // Calculate the limit for argv and environ size as 1/4 of the stack size, floored at 32 pages.
219    // See the Limits sections in https://man7.org/linux/man-pages/man2/execve.2.html
220    const PAGE_LIMIT: usize = 32;
221    let page_limit_size: usize = PAGE_LIMIT * *PAGE_SIZE as usize;
222    let rlimit = current_task.thread_group().get_rlimit(locked, Resource::STACK);
223    let stack_limit = rlimit / 4;
224    let argv_env_limit = cmp::max(page_limit_size, stack_limit as usize);
225
226    // The limit per argument or environment variable is 32 pages.
227    // See the Limits sections in https://man7.org/linux/man-pages/man2/execve.2.html
228    let (argv, argv_size) = if user_argv.is_null() {
229        (Vec::new(), 0)
230    } else {
231        read_c_string_vector(current_task, user_argv, page_limit_size, argv_env_limit)?
232    };
233
234    let (environ, _) = if user_environ.is_null() {
235        (Vec::new(), 0)
236    } else {
237        read_c_string_vector(
238            current_task,
239            user_environ,
240            page_limit_size,
241            argv_env_limit - argv_size,
242        )?
243    };
244
245    let path = &current_task.read_path(user_path)?;
246
247    log_trace!(argv:?, environ:?, flags:?; "execveat({dir_fd}, {path})");
248
249    let mut open_flags = OpenFlags::RDONLY;
250
251    if flags & AT_SYMLINK_NOFOLLOW != 0 {
252        open_flags |= OpenFlags::NOFOLLOW;
253    }
254
255    let executable = if path.is_empty() {
256        if flags & AT_EMPTY_PATH == 0 {
257            // If AT_EMPTY_PATH is not set, this is an error.
258            return error!(ENOENT);
259        }
260
261        // O_PATH allowed for:
262        //
263        //   Passing the file descriptor as the dirfd argument of
264        //   openat() and the other "*at()" system calls.  This
265        //   includes linkat(2) with AT_EMPTY_PATH (or via procfs
266        //   using AT_SYMLINK_FOLLOW) even if the file is not a
267        //   directory.
268        //
269        // See https://man7.org/linux/man-pages/man2/open.2.html
270        let file = current_task.files.get_allowing_opath(dir_fd)?;
271
272        // We are forced to reopen the file with O_RDONLY to get access to the underlying VMO.
273        // Note that skip the access check in the arguments in case the file mode does
274        // not actually have the read permission bit.
275        //
276        // This can happen because a file could have --x--x--x mode permissions and then
277        // be opened with O_PATH. Internally, the file operations would all be stubbed out
278        // for that file, which is undesirable here.
279        //
280        // See https://man7.org/linux/man-pages/man3/fexecve.3.html#DESCRIPTION
281        file.name.open(
282            locked,
283            current_task,
284            OpenFlags::RDONLY,
285            AccessCheck::check_for(Access::EXEC),
286        )?
287    } else {
288        current_task.open_file_at(
289            locked,
290            dir_fd,
291            path.as_ref(),
292            open_flags,
293            FileMode::default(),
294            ResolveFlags::empty(),
295            AccessCheck::check_for(Access::EXEC),
296        )?
297    };
298
299    // This path can affect script resolution (the path is appended to the script args)
300    // and the auxiliary value `AT_EXECFN` from the syscall `getauxval()`
301    let path = if dir_fd == FdNumber::AT_FDCWD {
302        // The file descriptor is CWD, so the path is exactly
303        // what the user specified.
304        path.to_vec()
305    } else {
306        // The path is `/dev/fd/N/P` where N is the file descriptor
307        // number and P is the user-provided path (if relative and non-empty).
308        //
309        // See https://man7.org/linux/man-pages/man2/execveat.2.html#NOTES
310        match path.first() {
311            Some(b'/') => {
312                // The user-provided path is absolute, so dir_fd is ignored.
313                path.to_vec()
314            }
315            Some(_) => {
316                // User-provided path is relative, append it.
317                let mut new_path = format!("/dev/fd/{}/", dir_fd.raw()).into_bytes();
318                new_path.append(&mut path.to_vec());
319                new_path
320            }
321            // User-provided path is empty
322            None => format!("/dev/fd/{}", dir_fd.raw()).into_bytes(),
323        }
324    };
325
326    let path = CString::new(path).map_err(|_| errno!(EINVAL))?;
327
328    current_task.exec(locked, executable, path, argv, environ)?;
329    Ok(())
330}
331
332pub fn sys_getcpu(
333    _locked: &mut Locked<Unlocked>,
334    current_task: &CurrentTask,
335    cpu_out: UserRef<u32>,
336    node_out: UserRef<u32>,
337) -> Result<(), Errno> {
338    // "When either cpu or node is NULL nothing is written to the respective pointer."
339    // from https://man7.org/linux/man-pages/man2/getcpu.2.html
340    if !cpu_out.is_null() {
341        let thread_stats = current_task
342            .thread
343            .read()
344            .as_ref()
345            .expect("current thread is never None when executing")
346            .stats()
347            .map_err(|e| errno!(EINVAL, format!("getting thread stats failed {e:?}")))?;
348        current_task.write_object(cpu_out, &thread_stats.last_scheduled_cpu)?;
349    }
350    if !node_out.is_null() {
351        // Zircon does not yet have a concept of NUMA task scheduling, always tell userspace that
352        // it's on the "first" node which should be true for non-NUMA systems.
353        track_stub!(TODO("https://fxbug.dev/325643815"), "getcpu() numa node");
354        current_task.write_object(node_out, &0)?;
355    }
356    Ok(())
357}
358
359pub fn sys_getpid(
360    _locked: &mut Locked<Unlocked>,
361    current_task: &CurrentTask,
362) -> Result<pid_t, Errno> {
363    Ok(current_task.get_pid())
364}
365
366pub fn sys_gettid(
367    _locked: &mut Locked<Unlocked>,
368    current_task: &CurrentTask,
369) -> Result<pid_t, Errno> {
370    Ok(current_task.get_tid())
371}
372
373pub fn sys_getppid(
374    _locked: &mut Locked<Unlocked>,
375    current_task: &CurrentTask,
376) -> Result<pid_t, Errno> {
377    Ok(current_task.thread_group().read().get_ppid())
378}
379
380fn get_task_or_current(current_task: &CurrentTask, pid: pid_t) -> WeakRef<Task> {
381    if pid == 0 { current_task.weak_task() } else { current_task.get_task(pid) }
382}
383
384pub fn sys_getsid(
385    _locked: &mut Locked<Unlocked>,
386    current_task: &CurrentTask,
387    pid: pid_t,
388) -> Result<pid_t, Errno> {
389    let weak = get_task_or_current(current_task, pid);
390    let target_task = Task::from_weak(&weak)?;
391    security::check_task_getsid(current_task, &target_task)?;
392    let sid = target_task.thread_group().read().process_group.session.leader;
393    Ok(sid)
394}
395
396pub fn sys_getpgid(
397    _locked: &mut Locked<Unlocked>,
398    current_task: &CurrentTask,
399    pid: pid_t,
400) -> Result<pid_t, Errno> {
401    let weak = get_task_or_current(current_task, pid);
402    let task = Task::from_weak(&weak)?;
403
404    security::check_getpgid_access(current_task, &task)?;
405    let pgid = task.thread_group().read().process_group.leader;
406    Ok(pgid)
407}
408
409pub fn sys_setpgid(
410    locked: &mut Locked<Unlocked>,
411    current_task: &CurrentTask,
412    pid: pid_t,
413    pgid: pid_t,
414) -> Result<(), Errno> {
415    let weak = get_task_or_current(current_task, pid);
416    let task = Task::from_weak(&weak)?;
417
418    current_task.thread_group().setpgid(locked, current_task, &task, pgid)?;
419    Ok(())
420}
421
422impl CurrentTask {
423    /// Returns true if the `current_task`'s effective user ID (EUID) is the same as the
424    /// EUID or UID of the `target_task`. We describe this as the current task being
425    /// "EUID-friendly" to the target and it enables actions to be performed that would
426    /// otherwise require additional privileges.
427    ///
428    /// See "The caller needs an effective user ID equal to the real user ID or effective
429    /// user ID of the [target]" at sched_setaffinity(2), comparable language at
430    /// setpriority(2), more ambiguous language at sched_setscheduler(2), and no
431    /// particular specification at sched_setparam(2).
432    fn is_euid_friendly_with(&self, target_task: &Task) -> bool {
433        let (target_uid, target_euid) =
434            target_task.with_real_creds(|creds| (creds.uid, creds.euid));
435        self.with_current_creds(|self_creds| {
436            self_creds.euid == target_uid || self_creds.euid == target_euid
437        })
438    }
439}
440
441// A non-root process is allowed to set any of its three uids to the value of any other. The
442// CAP_SETUID capability bypasses these checks and allows setting any uid to any integer. Likewise
443// for gids.
444fn new_uid_allowed(current_task: &CurrentTask, uid: uid_t) -> bool {
445    // is_task_capable_noaudit also acquires the credentials lock, so we need to copy the data we need
446    // and release it before calling is_task_capable_noaudit.
447    let (current_uid, current_euid, current_saved_uid) =
448        current_task.with_current_creds(|creds| (creds.uid, creds.euid, creds.saved_uid));
449    uid == current_uid
450        || uid == current_euid
451        || uid == current_saved_uid
452        || security::is_task_capable_noaudit(current_task, CAP_SETUID)
453}
454
455fn new_gid_allowed(current_task: &CurrentTask, gid: gid_t) -> bool {
456    // is_task_capable_noaudit also acquires the credentials lock, so we need to copy the data we need
457    // and release it before calling is_task_capable_noaudit.
458    let (current_gid, current_egid, current_saved_gid) =
459        current_task.with_current_creds(|creds| (creds.gid, creds.egid, creds.saved_gid));
460    gid == current_gid
461        || gid == current_egid
462        || gid == current_saved_gid
463        || security::is_task_capable_noaudit(current_task, CAP_SETGID)
464}
465
466pub fn sys_getuid(
467    _locked: &mut Locked<Unlocked>,
468    current_task: &CurrentTask,
469) -> Result<uid_t, Errno> {
470    Ok(current_task.with_current_creds(|creds| creds.uid))
471}
472
473pub fn sys_getgid(
474    _locked: &mut Locked<Unlocked>,
475    current_task: &CurrentTask,
476) -> Result<gid_t, Errno> {
477    Ok(current_task.with_current_creds(|creds| creds.gid))
478}
479
480pub fn sys_setuid(
481    _locked: &mut Locked<Unlocked>,
482    current_task: &CurrentTask,
483    uid: uid_t,
484) -> Result<(), Errno> {
485    let mut creds = current_task.current_creds();
486    if uid == gid_t::MAX {
487        return error!(EINVAL);
488    }
489    if !new_uid_allowed(&current_task, uid) {
490        return error!(EPERM);
491    }
492    let prev = creds.copy_user_credentials();
493    creds.euid = uid;
494    creds.fsuid = uid;
495    if security::is_task_capable_noaudit(current_task, CAP_SETUID) {
496        creds.uid = uid;
497        creds.saved_uid = uid;
498    }
499
500    creds.update_capabilities(prev);
501    current_task.set_creds(creds);
502    Ok(())
503}
504
505pub fn sys_setgid(
506    _locked: &mut Locked<Unlocked>,
507    current_task: &CurrentTask,
508    gid: gid_t,
509) -> Result<(), Errno> {
510    let mut creds = current_task.current_creds();
511    if gid == gid_t::MAX {
512        return error!(EINVAL);
513    }
514    if !new_gid_allowed(&current_task, gid) {
515        return error!(EPERM);
516    }
517    creds.egid = gid;
518    creds.fsgid = gid;
519    if security::is_task_capable_noaudit(current_task, CAP_SETGID) {
520        creds.gid = gid;
521        creds.saved_gid = gid;
522    }
523    current_task.set_creds(creds);
524    Ok(())
525}
526
527pub fn sys_geteuid(
528    _locked: &mut Locked<Unlocked>,
529    current_task: &CurrentTask,
530) -> Result<uid_t, Errno> {
531    Ok(current_task.with_current_creds(|creds| creds.euid))
532}
533
534pub fn sys_getegid(
535    _locked: &mut Locked<Unlocked>,
536    current_task: &CurrentTask,
537) -> Result<gid_t, Errno> {
538    Ok(current_task.with_current_creds(|creds| creds.egid))
539}
540
541pub fn sys_setfsuid(
542    _locked: &mut Locked<Unlocked>,
543    current_task: &CurrentTask,
544    fsuid: uid_t,
545) -> Result<uid_t, Errno> {
546    let mut creds = current_task.current_creds();
547    let prev = creds.copy_user_credentials();
548    if fsuid != u32::MAX && new_uid_allowed(&current_task, fsuid) {
549        creds.fsuid = fsuid;
550        creds.update_capabilities(prev);
551        current_task.set_creds(creds);
552    }
553
554    Ok(prev.fsuid)
555}
556
557pub fn sys_setfsgid(
558    _locked: &mut Locked<Unlocked>,
559    current_task: &CurrentTask,
560    fsgid: gid_t,
561) -> Result<gid_t, Errno> {
562    let mut creds = current_task.current_creds();
563    let prev = creds.copy_user_credentials();
564    let prev_fsgid = creds.fsgid;
565
566    if fsgid != u32::MAX && new_gid_allowed(&current_task, fsgid) {
567        creds.fsgid = fsgid;
568        creds.update_capabilities(prev);
569        current_task.set_creds(creds);
570    }
571
572    Ok(prev_fsgid)
573}
574
575pub fn sys_getresuid(
576    _locked: &mut Locked<Unlocked>,
577    current_task: &CurrentTask,
578    ruid_addr: UserRef<uid_t>,
579    euid_addr: UserRef<uid_t>,
580    suid_addr: UserRef<uid_t>,
581) -> Result<(), Errno> {
582    current_task.with_current_creds(|creds| {
583        current_task.write_object(ruid_addr, &creds.uid)?;
584        current_task.write_object(euid_addr, &creds.euid)?;
585        current_task.write_object(suid_addr, &creds.saved_uid)?;
586        Ok(())
587    })
588}
589
590pub fn sys_getresgid(
591    _locked: &mut Locked<Unlocked>,
592    current_task: &CurrentTask,
593    rgid_addr: UserRef<gid_t>,
594    egid_addr: UserRef<gid_t>,
595    sgid_addr: UserRef<gid_t>,
596) -> Result<(), Errno> {
597    current_task.with_current_creds(|creds| {
598        current_task.write_object(rgid_addr, &creds.gid)?;
599        current_task.write_object(egid_addr, &creds.egid)?;
600        current_task.write_object(sgid_addr, &creds.saved_gid)?;
601        Ok(())
602    })
603}
604
605pub fn sys_setreuid(
606    _locked: &mut Locked<Unlocked>,
607    current_task: &CurrentTask,
608    ruid: uid_t,
609    euid: uid_t,
610) -> Result<(), Errno> {
611    let mut creds = current_task.current_creds();
612    let allowed = |uid| uid == u32::MAX || new_uid_allowed(&current_task, uid);
613    if !allowed(ruid) || !allowed(euid) {
614        return error!(EPERM);
615    }
616
617    let prev = creds.copy_user_credentials();
618    let mut is_ruid_set = false;
619    if ruid != u32::MAX {
620        creds.uid = ruid;
621        is_ruid_set = true;
622    }
623    if euid != u32::MAX {
624        creds.euid = euid;
625        creds.fsuid = euid;
626    }
627
628    if is_ruid_set || prev.uid != euid {
629        creds.saved_uid = creds.euid;
630    }
631
632    creds.update_capabilities(prev);
633    current_task.set_creds(creds);
634    Ok(())
635}
636
637pub fn sys_setregid(
638    _locked: &mut Locked<Unlocked>,
639    current_task: &CurrentTask,
640    rgid: gid_t,
641    egid: gid_t,
642) -> Result<(), Errno> {
643    let mut creds = current_task.current_creds();
644    let allowed = |gid| gid == u32::MAX || new_gid_allowed(&current_task, gid);
645    if !allowed(rgid) || !allowed(egid) {
646        return error!(EPERM);
647    }
648    let previous_rgid = creds.gid;
649    let mut is_rgid_set = false;
650    if rgid != u32::MAX {
651        creds.gid = rgid;
652        is_rgid_set = true;
653    }
654    if egid != u32::MAX {
655        creds.egid = egid;
656        creds.fsgid = egid;
657    }
658
659    if is_rgid_set || previous_rgid != egid {
660        creds.saved_gid = creds.egid;
661    }
662
663    current_task.set_creds(creds);
664    Ok(())
665}
666
667pub fn sys_setresuid(
668    _locked: &mut Locked<Unlocked>,
669    current_task: &CurrentTask,
670    ruid: uid_t,
671    euid: uid_t,
672    suid: uid_t,
673) -> Result<(), Errno> {
674    let mut creds = current_task.current_creds();
675    let allowed = |uid| uid == u32::MAX || new_uid_allowed(&current_task, uid);
676    if !allowed(ruid) || !allowed(euid) || !allowed(suid) {
677        return error!(EPERM);
678    }
679
680    let prev = creds.copy_user_credentials();
681    if ruid != u32::MAX {
682        creds.uid = ruid;
683    }
684    if euid != u32::MAX {
685        creds.euid = euid;
686        creds.fsuid = euid;
687    }
688    if suid != u32::MAX {
689        creds.saved_uid = suid;
690    }
691    creds.update_capabilities(prev);
692    current_task.set_creds(creds);
693    Ok(())
694}
695
696pub fn sys_setresgid(
697    _locked: &mut Locked<Unlocked>,
698    current_task: &CurrentTask,
699    rgid: gid_t,
700    egid: gid_t,
701    sgid: gid_t,
702) -> Result<(), Errno> {
703    let mut creds = current_task.current_creds();
704    let allowed = |gid| gid == u32::MAX || new_gid_allowed(&current_task, gid);
705    if !allowed(rgid) || !allowed(egid) || !allowed(sgid) {
706        return error!(EPERM);
707    }
708    if rgid != u32::MAX {
709        creds.gid = rgid;
710    }
711    if egid != u32::MAX {
712        creds.egid = egid;
713        creds.fsgid = egid;
714    }
715    if sgid != u32::MAX {
716        creds.saved_gid = sgid;
717    }
718    current_task.set_creds(creds);
719    Ok(())
720}
721
722pub fn sys_exit(
723    _locked: &mut Locked<Unlocked>,
724    current_task: &CurrentTask,
725    code: i32,
726) -> Result<(), Errno> {
727    // Only change the current exit status if this has not been already set by exit_group, as
728    // otherwise it has priority.
729    current_task.write().set_exit_status_if_not_already(ExitStatus::Exit(code as u8));
730    Ok(())
731}
732
733pub fn sys_exit_group(
734    locked: &mut Locked<Unlocked>,
735    current_task: &mut CurrentTask,
736    code: i32,
737) -> Result<(), Errno> {
738    current_task.thread_group_exit(locked, ExitStatus::Exit(code as u8));
739    Ok(())
740}
741
742pub fn sys_sched_getscheduler(
743    _locked: &mut Locked<Unlocked>,
744    current_task: &CurrentTask,
745    pid: pid_t,
746) -> Result<u32, Errno> {
747    if pid < 0 {
748        return error!(EINVAL);
749    }
750
751    let weak = get_task_or_current(current_task, pid);
752    let target_task = Task::from_weak(&weak)?;
753    security::check_getsched_access(current_task, target_task.as_ref())?;
754    let current_scheduler_state = target_task.read().scheduler_state;
755    Ok(current_scheduler_state.policy_for_sched_getscheduler())
756}
757
758pub fn sys_sched_setscheduler(
759    locked: &mut Locked<Unlocked>,
760    current_task: &CurrentTask,
761    pid: pid_t,
762    policy: u32,
763    param: UserRef<sched_param>,
764) -> Result<(), Errno> {
765    // Parse & validate the arguments.
766    if pid < 0 || param.is_null() {
767        return error!(EINVAL);
768    }
769
770    let weak = get_task_or_current(current_task, pid);
771    let target_task = Task::from_weak(&weak)?;
772
773    let reset_on_fork = policy & SCHED_RESET_ON_FORK != 0;
774
775    let policy = SchedulingPolicy::try_from(policy & !SCHED_RESET_ON_FORK)?;
776    let realtime_priority =
777        policy.realtime_priority_from(current_task.read_object(param)?.sched_priority)?;
778
779    // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
780    let current_state = target_task.read().scheduler_state;
781
782    // Check capabilities and permissions, if required, for the operation.
783    let euid_friendly = current_task.is_euid_friendly_with(&target_task);
784    let strengthening = current_state.realtime_priority < realtime_priority;
785    let rlimited = strengthening
786        && realtime_priority
787            .exceeds(target_task.thread_group().get_rlimit(locked, Resource::RTPRIO));
788    let clearing_reset_on_fork = current_state.reset_on_fork && !reset_on_fork;
789    let caught_in_idle_trap = current_state.policy == SchedulingPolicy::Idle
790        && policy != SchedulingPolicy::Idle
791        && current_state
792            .normal_priority
793            .exceeds(target_task.thread_group().get_rlimit(locked, Resource::NICE));
794    if !euid_friendly || rlimited || clearing_reset_on_fork || caught_in_idle_trap {
795        security::check_task_capable(current_task, CAP_SYS_NICE)?;
796    }
797
798    security::check_setsched_access(current_task, &target_task)?;
799
800    // Apply the new scheduler configuration to the task.
801    target_task.set_scheduler_policy_priority_and_reset_on_fork(
802        policy,
803        realtime_priority,
804        reset_on_fork,
805    )?;
806
807    Ok(())
808}
809
810const CPU_SET_SIZE: usize = 128;
811
812#[repr(C)]
813#[derive(Debug, Copy, Clone, IntoBytes, FromBytes, KnownLayout, Immutable)]
814pub struct CpuSet {
815    bits: [u8; CPU_SET_SIZE],
816}
817
818impl Default for CpuSet {
819    fn default() -> Self {
820        Self { bits: [0; CPU_SET_SIZE] }
821    }
822}
823
824fn check_cpu_set_alignment(current_task: &CurrentTask, cpusetsize: u32) -> Result<(), Errno> {
825    let alignment = if current_task.is_arch32() { 4 } else { 8 };
826    if cpusetsize < alignment || cpusetsize % alignment != 0 {
827        return error!(EINVAL);
828    }
829    Ok(())
830}
831
832fn get_default_cpu_set() -> CpuSet {
833    let mut result = CpuSet::default();
834    let mut cpus_count = zx::system_get_num_cpus();
835    let cpus_count_max = (CPU_SET_SIZE * 8) as u32;
836    if cpus_count > cpus_count_max {
837        log_error!("cpus_count={cpus_count}, greater than the {cpus_count_max} max supported.");
838        cpus_count = cpus_count_max;
839    }
840    let mut index = 0;
841    while cpus_count > 0 {
842        let count = std::cmp::min(cpus_count, 8);
843        let (shl, overflow) = 1_u8.overflowing_shl(count);
844        let mask = if overflow { u8::max_value() } else { shl - 1 };
845        result.bits[index] = mask;
846        index += 1;
847        cpus_count -= count;
848    }
849    result
850}
851
852pub fn sys_sched_getaffinity(
853    _locked: &mut Locked<Unlocked>,
854    current_task: &CurrentTask,
855    pid: pid_t,
856    cpusetsize: u32,
857    user_mask: UserAddress,
858) -> Result<usize, Errno> {
859    if pid < 0 {
860        return error!(EINVAL);
861    }
862
863    check_cpu_set_alignment(current_task, cpusetsize)?;
864
865    let weak = get_task_or_current(current_task, pid);
866    let _task = Task::from_weak(&weak)?;
867
868    // sched_setaffinity() is not implemented. Fake affinity mask based on the number of CPUs.
869    let mask = get_default_cpu_set();
870    let mask_size = std::cmp::min(cpusetsize as usize, CPU_SET_SIZE);
871    current_task.write_memory(user_mask, &mask.bits[..mask_size])?;
872    track_stub!(TODO("https://fxbug.dev/322874659"), "sched_getaffinity");
873    Ok(mask_size)
874}
875
876pub fn sys_sched_setaffinity(
877    _locked: &mut Locked<Unlocked>,
878    current_task: &CurrentTask,
879    pid: pid_t,
880    cpusetsize: u32,
881    user_mask: UserAddress,
882) -> Result<(), Errno> {
883    if pid < 0 {
884        return error!(EINVAL);
885    }
886    let weak = get_task_or_current(current_task, pid);
887    let target_task = Task::from_weak(&weak)?;
888
889    check_cpu_set_alignment(current_task, cpusetsize)?;
890
891    let mask_size = std::cmp::min(cpusetsize as usize, CPU_SET_SIZE);
892    let mut mask = CpuSet::default();
893    current_task.read_memory_to_slice(user_mask, &mut mask.bits[..mask_size])?;
894
895    // Specified mask must include at least one valid CPU.
896    let max_mask = get_default_cpu_set();
897    let mut has_valid_cpu_in_mask = false;
898    for (l1, l2) in std::iter::zip(max_mask.bits, mask.bits) {
899        has_valid_cpu_in_mask = has_valid_cpu_in_mask || (l1 & l2 > 0);
900    }
901    if !has_valid_cpu_in_mask {
902        return error!(EINVAL);
903    }
904
905    if !current_task.is_euid_friendly_with(&target_task) {
906        security::check_task_capable(current_task, CAP_SYS_NICE)?;
907    }
908
909    // Currently, we ignore the mask and act as if the system reset the mask
910    // immediately to allowing all CPUs.
911    track_stub!(TODO("https://fxbug.dev/322874889"), "sched_setaffinity");
912    Ok(())
913}
914
915pub fn sys_sched_getparam(
916    _locked: &mut Locked<Unlocked>,
917    current_task: &CurrentTask,
918    pid: pid_t,
919    param: UserRef<sched_param>,
920) -> Result<(), Errno> {
921    if pid < 0 || param.is_null() {
922        return error!(EINVAL);
923    }
924
925    let weak = get_task_or_current(current_task, pid);
926    let target_task = Task::from_weak(&weak)?;
927    let param_value = target_task.read().scheduler_state.get_sched_param();
928    current_task.write_object(param, &param_value)?;
929    Ok(())
930}
931
932pub fn sys_sched_setparam(
933    locked: &mut Locked<Unlocked>,
934    current_task: &CurrentTask,
935    pid: pid_t,
936    param: UserRef<sched_param>,
937) -> Result<(), Errno> {
938    // Parse & validate the arguments.
939    if pid < 0 || param.is_null() {
940        return error!(EINVAL);
941    }
942    let weak = get_task_or_current(current_task, pid);
943    let target_task = Task::from_weak(&weak)?;
944
945    // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
946    let current_state = target_task.read().scheduler_state;
947
948    let realtime_priority = current_state
949        .policy
950        .realtime_priority_from(current_task.read_object(param)?.sched_priority)?;
951
952    // Check capabilities and permissions, if required, for the operation.
953    let euid_friendly = current_task.is_euid_friendly_with(&target_task);
954    let strengthening = current_state.realtime_priority < realtime_priority;
955    let rlimited = strengthening
956        && realtime_priority
957            .exceeds(target_task.thread_group().get_rlimit(locked, Resource::RTPRIO));
958    if !euid_friendly || rlimited {
959        security::check_task_capable(current_task, CAP_SYS_NICE)?;
960    }
961
962    security::check_setsched_access(current_task, &target_task)?;
963
964    // Apply the new scheduler configuration to the task.
965    target_task.set_scheduler_priority(realtime_priority)?;
966
967    Ok(())
968}
969
970pub fn sys_sched_get_priority_min(
971    _locked: &mut Locked<Unlocked>,
972    _ctx: &CurrentTask,
973    policy: u32,
974) -> Result<u8, Errno> {
975    min_priority_for_sched_policy(policy)
976}
977
978pub fn sys_sched_get_priority_max(
979    _locked: &mut Locked<Unlocked>,
980    _ctx: &CurrentTask,
981    policy: u32,
982) -> Result<u8, Errno> {
983    max_priority_for_sched_policy(policy)
984}
985
986pub fn sys_ioprio_set(
987    _locked: &mut Locked<Unlocked>,
988    _current_task: &mut CurrentTask,
989    _which: i32,
990    _who: i32,
991    _ioprio: i32,
992) -> Result<(), Errno> {
993    track_stub!(TODO("https://fxbug.dev/297591758"), "ioprio_set()");
994    error!(ENOSYS)
995}
996
997pub fn sys_prctl(
998    locked: &mut Locked<Unlocked>,
999    current_task: &mut CurrentTask,
1000    option: u32,
1001    arg2: u64,
1002    arg3: u64,
1003    arg4: u64,
1004    arg5: u64,
1005) -> Result<SyscallResult, Errno> {
1006    match option {
1007        PR_SET_VMA => {
1008            if arg2 != PR_SET_VMA_ANON_NAME as u64 {
1009                track_stub!(TODO("https://fxbug.dev/322874826"), "prctl PR_SET_VMA", arg2);
1010                return error!(ENOSYS);
1011            }
1012            let addr = UserAddress::from(arg3);
1013            let length = arg4 as usize;
1014            let name_addr = UserAddress::from(arg5);
1015            let name = if name_addr.is_null() {
1016                None
1017            } else {
1018                let name = UserCString::new(current_task, UserAddress::from(arg5));
1019                let name = current_task.read_c_string_to_vec(name, 256).map_err(|e| {
1020                    // An overly long name produces EINVAL and not ENAMETOOLONG in Linux 5.15.
1021                    if e.code == ENAMETOOLONG { errno!(EINVAL) } else { e }
1022                })?;
1023                // Some characters are forbidden in VMA names.
1024                if name.iter().any(|b| {
1025                    matches!(b,
1026                        0..=0x1f |
1027                        0x7f..=0xff |
1028                        b'\\' | b'`' | b'$' | b'[' | b']'
1029                    )
1030                }) {
1031                    return error!(EINVAL);
1032                }
1033                Some(name)
1034            };
1035            current_task.mm()?.set_mapping_name(addr, length, name)?;
1036            Ok(().into())
1037        }
1038        PR_SET_DUMPABLE => {
1039            let mm = current_task.mm()?;
1040            let mut dumpable = mm.dumpable.lock(locked);
1041            *dumpable = if arg2 == 1 { DumpPolicy::User } else { DumpPolicy::Disable };
1042            Ok(().into())
1043        }
1044        PR_GET_DUMPABLE => {
1045            let mm = current_task.mm()?;
1046            let dumpable = mm.dumpable.lock(locked);
1047            Ok(match *dumpable {
1048                DumpPolicy::Disable => 0.into(),
1049                DumpPolicy::User => 1.into(),
1050            })
1051        }
1052        PR_SET_PDEATHSIG => {
1053            track_stub!(TODO("https://fxbug.dev/322874397"), "PR_SET_PDEATHSIG");
1054            Ok(().into())
1055        }
1056        PR_SET_NAME => {
1057            let addr = UserAddress::from(arg2);
1058            let name = TaskCommand::new(&current_task.read_memory_to_array::<16>(addr)?);
1059            current_task.set_command_name(name);
1060            Ok(0.into())
1061        }
1062        PR_GET_NAME => {
1063            let addr = UserAddress::from(arg2);
1064            let name = current_task.command().prctl_name();
1065            current_task.write_memory(addr, &name[..])?;
1066            Ok(().into())
1067        }
1068        PR_SET_PTRACER => {
1069            let allowed_ptracers = if arg2 == PR_SET_PTRACER_ANY as u64 {
1070                PtraceAllowedPtracers::Any
1071            } else if arg2 == 0 {
1072                PtraceAllowedPtracers::None
1073            } else {
1074                if current_task.kernel().pids.read().get_task(arg2 as i32).upgrade().is_none() {
1075                    return error!(EINVAL);
1076                }
1077                PtraceAllowedPtracers::Some(arg2 as pid_t)
1078            };
1079            current_task.thread_group().write().allowed_ptracers = allowed_ptracers;
1080            Ok(().into())
1081        }
1082        PR_GET_KEEPCAPS => Ok(current_task
1083            .with_current_creds(|creds| creds.securebits.contains(SecureBits::KEEP_CAPS).into())),
1084        PR_SET_KEEPCAPS => {
1085            if arg2 != 0 && arg2 != 1 {
1086                return error!(EINVAL);
1087            }
1088            let mut creds = current_task.current_creds();
1089            creds.securebits.set(SecureBits::KEEP_CAPS, arg2 != 0);
1090            current_task.set_creds(creds);
1091            Ok(().into())
1092        }
1093        PR_SET_NO_NEW_PRIVS => {
1094            // If any args are set other than arg2 to 1, this should return einval
1095            if arg2 != 1 || arg3 != 0 || arg4 != 0 || arg5 != 0 {
1096                return error!(EINVAL);
1097            }
1098            current_task.write().enable_no_new_privs();
1099            Ok(().into())
1100        }
1101        PR_GET_NO_NEW_PRIVS => {
1102            // If any args are set, this should return einval
1103            if arg2 != 0 || arg3 != 0 || arg4 != 0 {
1104                return error!(EINVAL);
1105            }
1106            Ok(current_task.read().no_new_privs().into())
1107        }
1108        PR_GET_SECCOMP => {
1109            if current_task.seccomp_filter_state.get() == SeccompStateValue::None {
1110                Ok(0.into())
1111            } else {
1112                Ok(2.into())
1113            }
1114        }
1115        PR_SET_SECCOMP => {
1116            if arg2 == SECCOMP_MODE_STRICT as u64 {
1117                return sys_seccomp(
1118                    locked,
1119                    current_task,
1120                    SECCOMP_SET_MODE_STRICT,
1121                    0,
1122                    UserAddress::NULL,
1123                );
1124            } else if arg2 == SECCOMP_MODE_FILTER as u64 {
1125                return sys_seccomp(locked, current_task, SECCOMP_SET_MODE_FILTER, 0, arg3.into());
1126            }
1127            Ok(().into())
1128        }
1129        PR_GET_CHILD_SUBREAPER => {
1130            let addr = UserAddress::from(arg2);
1131            #[allow(clippy::bool_to_int_with_if)]
1132            let value: i32 =
1133                if current_task.thread_group().read().is_child_subreaper { 1 } else { 0 };
1134            current_task.write_object(addr.into(), &value)?;
1135            Ok(().into())
1136        }
1137        PR_SET_CHILD_SUBREAPER => {
1138            current_task.thread_group().write().is_child_subreaper = arg2 != 0;
1139            Ok(().into())
1140        }
1141        PR_GET_SECUREBITS => {
1142            let value = current_task.with_current_creds(|creds| creds.securebits.bits());
1143            Ok(value.into())
1144        }
1145        PR_SET_SECUREBITS => {
1146            // TODO(security): This does not yet respect locked flags.
1147            let mut creds = current_task.current_creds();
1148            security::check_task_capable(current_task, CAP_SETPCAP)?;
1149
1150            let securebits = SecureBits::from_bits(arg2 as u32).ok_or_else(|| {
1151                track_stub!(TODO("https://fxbug.dev/322875244"), "PR_SET_SECUREBITS", arg2);
1152                errno!(ENOSYS)
1153            })?;
1154            creds.securebits = securebits;
1155            current_task.set_creds(creds);
1156            Ok(().into())
1157        }
1158        PR_CAPBSET_READ => {
1159            let cap = Capabilities::try_from(arg2)?;
1160            let has_cap = current_task.with_current_creds(|creds| creds.cap_bounding.contains(cap));
1161            Ok(has_cap.into())
1162        }
1163        PR_CAPBSET_DROP => {
1164            let mut creds = current_task.current_creds();
1165            security::check_task_capable(current_task, CAP_SETPCAP)?;
1166
1167            creds.cap_bounding.remove(Capabilities::try_from(arg2)?);
1168            current_task.set_creds(creds);
1169            Ok(().into())
1170        }
1171        PR_CAP_AMBIENT => {
1172            let operation = arg2 as u32;
1173            let capability_arg = Capabilities::try_from(arg3)?;
1174            if arg4 != 0 || arg5 != 0 {
1175                return error!(EINVAL);
1176            }
1177
1178            // TODO(security): We don't currently validate capabilities, but this should return an
1179            // error if the capability_arg is invalid.
1180            match operation {
1181                PR_CAP_AMBIENT_RAISE => {
1182                    let mut creds = current_task.current_creds();
1183                    if !(creds.cap_permitted.contains(capability_arg)
1184                        && creds.cap_inheritable.contains(capability_arg))
1185                    {
1186                        return error!(EPERM);
1187                    }
1188                    if creds.securebits.contains(SecureBits::NO_CAP_AMBIENT_RAISE)
1189                        || creds.securebits.contains(SecureBits::NO_CAP_AMBIENT_RAISE_LOCKED)
1190                    {
1191                        return error!(EPERM);
1192                    }
1193
1194                    creds.cap_ambient.insert(capability_arg);
1195                    current_task.set_creds(creds);
1196                    Ok(().into())
1197                }
1198                PR_CAP_AMBIENT_LOWER => {
1199                    let mut creds = current_task.current_creds();
1200                    creds.cap_ambient.remove(capability_arg);
1201                    current_task.set_creds(creds);
1202                    Ok(().into())
1203                }
1204                PR_CAP_AMBIENT_IS_SET => {
1205                    let has_cap = current_task
1206                        .with_current_creds(|creds| creds.cap_ambient.contains(capability_arg));
1207                    Ok(has_cap.into())
1208                }
1209                PR_CAP_AMBIENT_CLEAR_ALL => {
1210                    if arg3 != 0 {
1211                        return error!(EINVAL);
1212                    }
1213
1214                    let mut creds = current_task.current_creds();
1215                    creds.cap_ambient = Capabilities::empty();
1216                    current_task.set_creds(creds);
1217                    Ok(().into())
1218                }
1219                _ => error!(EINVAL),
1220            }
1221        }
1222        PR_SET_TIMERSLACK => {
1223            current_task.write().set_timerslack_ns(arg2);
1224            Ok(().into())
1225        }
1226        #[cfg(target_arch = "aarch64")]
1227        PR_GET_TAGGED_ADDR_CTRL => {
1228            track_stub!(TODO("https://fxbug.dev/408554469"), "PR_GET_TAGGED_ADDR_CTRL");
1229            Ok(0.into())
1230        }
1231        #[cfg(target_arch = "aarch64")]
1232        PR_SET_TAGGED_ADDR_CTRL => match u32::try_from(arg2).map_err(|_| errno!(EINVAL))? {
1233            // Only untagged pointers are allowed, the default.
1234            0 => Ok(().into()),
1235            PR_TAGGED_ADDR_ENABLE => {
1236                track_stub!(TODO("https://fxbug.dev/408554469"), "PR_TAGGED_ADDR_ENABLE");
1237                error!(EINVAL)
1238            }
1239            unknown_mode => {
1240                track_stub!(
1241                    TODO("https://fxbug.dev/408554469"),
1242                    "PR_SET_TAGGED_ADDR_CTRL unknown mode",
1243                    unknown_mode,
1244                );
1245                error!(EINVAL)
1246            }
1247        },
1248        _ => {
1249            track_stub!(TODO("https://fxbug.dev/322874733"), "prctl fallthrough", option);
1250            error!(ENOSYS)
1251        }
1252    }
1253}
1254
1255pub fn sys_ptrace(
1256    locked: &mut Locked<Unlocked>,
1257    current_task: &mut CurrentTask,
1258    request: u32,
1259    pid: pid_t,
1260    addr: UserAddress,
1261    data: UserAddress,
1262) -> Result<SyscallResult, Errno> {
1263    match request {
1264        PTRACE_TRACEME => ptrace_traceme(current_task),
1265        PTRACE_ATTACH => ptrace_attach(locked, current_task, pid, PtraceAttachType::Attach, data),
1266        PTRACE_SEIZE => ptrace_attach(locked, current_task, pid, PtraceAttachType::Seize, data),
1267        _ => ptrace_dispatch(locked, current_task, request, pid, addr, data),
1268    }
1269}
1270
1271pub fn sys_set_tid_address(
1272    _locked: &mut Locked<Unlocked>,
1273    current_task: &CurrentTask,
1274    user_tid: UserRef<pid_t>,
1275) -> Result<pid_t, Errno> {
1276    current_task.write().clear_child_tid = user_tid;
1277    Ok(current_task.get_tid())
1278}
1279
1280pub fn sys_getrusage(
1281    _locked: &mut Locked<Unlocked>,
1282    current_task: &CurrentTask,
1283    who: i32,
1284    user_usage: RUsagePtr,
1285) -> Result<(), Errno> {
1286    const RUSAGE_SELF: i32 = starnix_uapi::uapi::RUSAGE_SELF as i32;
1287    const RUSAGE_THREAD: i32 = starnix_uapi::uapi::RUSAGE_THREAD as i32;
1288    track_stub!(TODO("https://fxbug.dev/297370242"), "real rusage");
1289    let time_stats = match who {
1290        RUSAGE_CHILDREN => current_task.task.thread_group().read().children_time_stats,
1291        RUSAGE_SELF => current_task.task.thread_group().time_stats(),
1292        RUSAGE_THREAD => current_task.task.time_stats(),
1293        _ => return error!(EINVAL),
1294    };
1295
1296    let usage = rusage {
1297        ru_utime: timeval_from_duration(time_stats.user_time),
1298        ru_stime: timeval_from_duration(time_stats.system_time),
1299        ..rusage::default()
1300    };
1301    current_task.write_multi_arch_object(user_usage, usage)?;
1302
1303    Ok(())
1304}
1305
1306type PrLimitRef = MultiArchUserRef<uapi::rlimit, uapi::arch32::rlimit>;
1307
1308pub fn sys_getrlimit(
1309    locked: &mut Locked<Unlocked>,
1310    current_task: &CurrentTask,
1311    resource: u32,
1312    user_rlimit: PrLimitRef,
1313) -> Result<(), Errno> {
1314    do_prlimit64(locked, current_task, 0, resource, PrLimitRef::null(current_task), user_rlimit)
1315}
1316
1317pub fn sys_setrlimit(
1318    locked: &mut Locked<Unlocked>,
1319    current_task: &CurrentTask,
1320    resource: u32,
1321    user_rlimit: PrLimitRef,
1322) -> Result<(), Errno> {
1323    do_prlimit64(locked, current_task, 0, resource, user_rlimit, PrLimitRef::null(current_task))
1324}
1325
1326pub fn sys_prlimit64(
1327    locked: &mut Locked<Unlocked>,
1328    current_task: &CurrentTask,
1329    pid: pid_t,
1330    user_resource: u32,
1331    new_limit_ref: UserRef<uapi::rlimit>,
1332    old_limit_ref: UserRef<uapi::rlimit>,
1333) -> Result<(), Errno> {
1334    do_prlimit64::<uapi::rlimit>(
1335        locked,
1336        current_task,
1337        pid,
1338        user_resource,
1339        new_limit_ref.into(),
1340        old_limit_ref.into(),
1341    )
1342}
1343
1344pub fn do_prlimit64<T>(
1345    locked: &mut Locked<Unlocked>,
1346    current_task: &CurrentTask,
1347    pid: pid_t,
1348    user_resource: u32,
1349    new_limit_ref: MultiArchUserRef<uapi::rlimit, T>,
1350    old_limit_ref: MultiArchUserRef<uapi::rlimit, T>,
1351) -> Result<(), Errno>
1352where
1353    T: FromBytes + IntoBytes + Immutable + From<uapi::rlimit> + Into<uapi::rlimit>,
1354{
1355    let weak = get_task_or_current(current_task, pid);
1356    let target_task = Task::from_weak(&weak)?;
1357
1358    // To get or set the resource of a process other than itself, the caller must have either:
1359    // * the same `uid`, `euid`, `saved_uid`, `gid`, `egid`, `saved_gid` as the target.
1360    // * the CAP_SYS_RESOURCE
1361    if current_task.get_pid() != target_task.get_pid() {
1362        let (target_uid, target_euid, target_saved_uid, target_gid, target_egid, target_saved_gid) =
1363            target_task.with_real_creds(|creds| {
1364                (creds.uid, creds.euid, creds.saved_uid, creds.gid, creds.egid, creds.saved_gid)
1365            });
1366        let (
1367            current_uid,
1368            current_euid,
1369            current_saved_uid,
1370            current_gid,
1371            current_egid,
1372            current_saved_gid,
1373        ) = current_task.with_current_creds(|creds| {
1374            (creds.uid, creds.euid, creds.saved_uid, creds.gid, creds.egid, creds.saved_gid)
1375        });
1376        if current_uid != target_uid
1377            || current_euid != target_euid
1378            || current_saved_uid != target_saved_uid
1379            || current_gid != target_gid
1380            || current_egid != target_egid
1381            || current_saved_gid != target_saved_gid
1382        {
1383            security::check_task_capable(current_task, CAP_SYS_RESOURCE)?;
1384        }
1385        security::task_prlimit(
1386            current_task,
1387            &target_task,
1388            !old_limit_ref.is_null(),
1389            !new_limit_ref.is_null(),
1390        )?;
1391    }
1392
1393    let resource = Resource::from_raw(user_resource)?;
1394
1395    let old_limit = match resource {
1396        // TODO: Integrate Resource::STACK with generic ResourceLimits machinery.
1397        Resource::STACK => {
1398            if !new_limit_ref.is_null() {
1399                track_stub!(
1400                    TODO("https://fxbug.dev/322874791"),
1401                    "prlimit64 cannot set RLIMIT_STACK"
1402                );
1403            }
1404            // The stack size is fixed at the moment, but
1405            // if MAP_GROWSDOWN is implemented this should
1406            // report the limit that it can be grown.
1407            let mm = target_task.mm()?;
1408            let mm_state = mm.state.read();
1409            let stack_size = mm_state.stack_size as u64;
1410            rlimit { rlim_cur: stack_size, rlim_max: stack_size }
1411        }
1412        _ => {
1413            let new_limit = if new_limit_ref.is_null() {
1414                None
1415            } else {
1416                let new_limit = current_task.read_multi_arch_object(new_limit_ref)?;
1417                if new_limit.rlim_cur > new_limit.rlim_max {
1418                    return error!(EINVAL);
1419                }
1420                Some(new_limit)
1421            };
1422            ThreadGroup::adjust_rlimits(locked, current_task, &target_task, resource, new_limit)?
1423        }
1424    };
1425    if !old_limit_ref.is_null() {
1426        current_task.write_multi_arch_object(old_limit_ref, old_limit)?;
1427    }
1428    Ok(())
1429}
1430
1431pub fn sys_quotactl(
1432    _locked: &mut Locked<Unlocked>,
1433    _current_task: &CurrentTask,
1434    _cmd: i32,
1435    _special: UserRef<c_char>,
1436    _id: i32,
1437    _addr: UserRef<c_char>,
1438) -> Result<SyscallResult, Errno> {
1439    track_stub!(TODO("https://fxbug.dev/297302197"), "quotacl()");
1440    error!(ENOSYS)
1441}
1442
1443pub fn sys_capget(
1444    _locked: &mut Locked<Unlocked>,
1445    current_task: &CurrentTask,
1446    user_header: UserRef<__user_cap_header_struct>,
1447    user_data: UserRef<__user_cap_data_struct>,
1448) -> Result<(), Errno> {
1449    let mut header = current_task.read_object(user_header)?;
1450    let is_version_valid =
1451        [_LINUX_CAPABILITY_VERSION_1, _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3]
1452            .contains(&header.version);
1453    if !is_version_valid {
1454        header.version = _LINUX_CAPABILITY_VERSION_3;
1455        current_task.write_object(user_header, &header)?;
1456    }
1457    if user_data.is_null() {
1458        return Ok(());
1459    }
1460    if !is_version_valid || header.pid < 0 {
1461        return error!(EINVAL);
1462    }
1463
1464    let weak = get_task_or_current(current_task, header.pid);
1465    let target_task = Task::from_weak(&weak)?;
1466
1467    security::check_getcap_access(current_task, &target_task)?;
1468
1469    let (permitted, effective, inheritable) = {
1470        let creds = &target_task.real_creds();
1471        (creds.cap_permitted, creds.cap_effective, creds.cap_inheritable)
1472    };
1473
1474    match header.version {
1475        _LINUX_CAPABILITY_VERSION_1 => {
1476            let data: [__user_cap_data_struct; 1] = [__user_cap_data_struct {
1477                effective: effective.as_abi_v1(),
1478                inheritable: inheritable.as_abi_v1(),
1479                permitted: permitted.as_abi_v1(),
1480            }];
1481            current_task.write_objects(user_data, &data)?;
1482        }
1483        _LINUX_CAPABILITY_VERSION_2 | _LINUX_CAPABILITY_VERSION_3 => {
1484            // Return 64 bit capabilities as two sets of 32 bit capabilities, little endian
1485            let (permitted, effective, inheritable) =
1486                (permitted.as_abi_v3(), effective.as_abi_v3(), inheritable.as_abi_v3());
1487            let data: [__user_cap_data_struct; 2] = [
1488                __user_cap_data_struct {
1489                    effective: effective.0,
1490                    inheritable: inheritable.0,
1491                    permitted: permitted.0,
1492                },
1493                __user_cap_data_struct {
1494                    effective: effective.1,
1495                    inheritable: inheritable.1,
1496                    permitted: permitted.1,
1497                },
1498            ];
1499            current_task.write_objects(user_data, &data)?;
1500        }
1501        _ => {
1502            unreachable!("already returned if Linux capability version is not valid")
1503        }
1504    }
1505    Ok(())
1506}
1507
1508pub fn sys_capset(
1509    _locked: &mut Locked<Unlocked>,
1510    current_task: &CurrentTask,
1511    user_header: UserRef<__user_cap_header_struct>,
1512    user_data: UserRef<__user_cap_data_struct>,
1513) -> Result<(), Errno> {
1514    let mut header = current_task.read_object(user_header)?;
1515    let is_version_valid =
1516        [_LINUX_CAPABILITY_VERSION_1, _LINUX_CAPABILITY_VERSION_2, _LINUX_CAPABILITY_VERSION_3]
1517            .contains(&header.version);
1518    if !is_version_valid {
1519        header.version = _LINUX_CAPABILITY_VERSION_3;
1520        current_task.write_object(user_header, &header)?;
1521        return error!(EINVAL);
1522    }
1523    if header.pid != 0 && header.pid != current_task.tid {
1524        return error!(EPERM);
1525    }
1526
1527    let (new_permitted, new_effective, new_inheritable) = match header.version {
1528        _LINUX_CAPABILITY_VERSION_1 => {
1529            let data = current_task.read_object(user_data)?;
1530            (
1531                Capabilities::from_abi_v1(data.permitted),
1532                Capabilities::from_abi_v1(data.effective),
1533                Capabilities::from_abi_v1(data.inheritable),
1534            )
1535        }
1536        _LINUX_CAPABILITY_VERSION_2 | _LINUX_CAPABILITY_VERSION_3 => {
1537            let data =
1538                current_task.read_objects_to_array::<__user_cap_data_struct, 2>(user_data)?;
1539            (
1540                Capabilities::from_abi_v3((data[0].permitted, data[1].permitted)),
1541                Capabilities::from_abi_v3((data[0].effective, data[1].effective)),
1542                Capabilities::from_abi_v3((data[0].inheritable, data[1].inheritable)),
1543            )
1544        }
1545        _ => {
1546            unreachable!("already returned if Linux capability version is not valid")
1547        }
1548    };
1549
1550    // Permission checks. Copied out of TLPI section 39.7.
1551    let mut creds = current_task.current_creds();
1552    {
1553        log_trace!(
1554            "Capabilities({{permitted={:?} from {:?}, effective={:?} from {:?}, inheritable={:?} from {:?}}}, bounding={:?})",
1555            new_permitted,
1556            creds.cap_permitted,
1557            new_effective,
1558            creds.cap_effective,
1559            new_inheritable,
1560            creds.cap_inheritable,
1561            creds.cap_bounding
1562        );
1563        if !creds.cap_inheritable.union(creds.cap_permitted).contains(new_inheritable) {
1564            security::check_task_capable(current_task, CAP_SETPCAP)?;
1565        }
1566
1567        if !creds.cap_inheritable.union(creds.cap_bounding).contains(new_inheritable) {
1568            return error!(EPERM);
1569        }
1570        if !creds.cap_permitted.contains(new_permitted) {
1571            return error!(EPERM);
1572        }
1573        if !new_permitted.contains(new_effective) {
1574            return error!(EPERM);
1575        }
1576    }
1577    let weak = get_task_or_current(current_task, header.pid);
1578    let target_task = Task::from_weak(&weak)?;
1579
1580    security::check_setcap_access(current_task, &target_task)?;
1581
1582    creds.cap_permitted = new_permitted;
1583    creds.cap_effective = new_effective;
1584    creds.cap_inheritable = new_inheritable;
1585    creds.cap_ambient = new_permitted & new_inheritable & creds.cap_ambient;
1586    current_task.set_creds(creds);
1587    Ok(())
1588}
1589
1590pub fn sys_seccomp(
1591    locked: &mut Locked<Unlocked>,
1592    current_task: &mut CurrentTask,
1593    operation: u32,
1594    flags: u32,
1595    args: UserAddress,
1596) -> Result<SyscallResult, Errno> {
1597    match operation {
1598        SECCOMP_SET_MODE_STRICT => {
1599            if flags != 0 || args != UserAddress::NULL {
1600                return error!(EINVAL);
1601            }
1602            current_task.set_seccomp_state(SeccompStateValue::Strict)?;
1603            Ok(().into())
1604        }
1605        SECCOMP_SET_MODE_FILTER => {
1606            if flags
1607                & (SECCOMP_FILTER_FLAG_LOG
1608                    | SECCOMP_FILTER_FLAG_NEW_LISTENER
1609                    | SECCOMP_FILTER_FLAG_SPEC_ALLOW
1610                    | SECCOMP_FILTER_FLAG_TSYNC
1611                    | SECCOMP_FILTER_FLAG_TSYNC_ESRCH)
1612                != flags
1613            {
1614                return error!(EINVAL);
1615            }
1616            if (flags & SECCOMP_FILTER_FLAG_NEW_LISTENER != 0)
1617                && (flags & SECCOMP_FILTER_FLAG_TSYNC != 0)
1618                && (flags & SECCOMP_FILTER_FLAG_TSYNC_ESRCH == 0)
1619            {
1620                return error!(EINVAL);
1621            }
1622            let fprog =
1623                current_task.read_multi_arch_object(SockFProgPtr::new(current_task, args))?;
1624            if fprog.len > BPF_MAXINSNS || fprog.len == 0 {
1625                return error!(EINVAL);
1626            }
1627            let code: Vec<sock_filter> =
1628                current_task.read_multi_arch_objects_to_vec(fprog.filter, fprog.len as usize)?;
1629
1630            if !current_task.read().no_new_privs() {
1631                security::check_task_capable(current_task, CAP_SYS_ADMIN)
1632                    .map_err(|_| errno!(EACCES))?;
1633            }
1634            current_task.add_seccomp_filter(locked, code, flags)
1635        }
1636        SECCOMP_GET_ACTION_AVAIL => {
1637            if flags != 0 || args.is_null() {
1638                return error!(EINVAL);
1639            }
1640            let action: u32 = current_task.read_object(UserRef::new(args))?;
1641            SeccompAction::is_action_available(action)
1642        }
1643        SECCOMP_GET_NOTIF_SIZES => {
1644            if flags != 0 {
1645                return error!(EINVAL);
1646            }
1647            track_stub!(TODO("https://fxbug.dev/322874791"), "SECCOMP_GET_NOTIF_SIZES");
1648            error!(ENOSYS)
1649        }
1650        _ => {
1651            track_stub!(TODO("https://fxbug.dev/322874916"), "seccomp fallthrough", operation);
1652            error!(EINVAL)
1653        }
1654    }
1655}
1656
1657pub fn sys_setgroups(
1658    _locked: &mut Locked<Unlocked>,
1659    current_task: &CurrentTask,
1660    size: usize,
1661    groups_addr: UserAddress,
1662) -> Result<(), Errno> {
1663    if size > NGROUPS_MAX as usize {
1664        return error!(EINVAL);
1665    }
1666    let groups = current_task.read_objects_to_vec::<gid_t>(groups_addr.into(), size)?;
1667    let mut creds = current_task.current_creds();
1668    if !creds.is_superuser() {
1669        return error!(EPERM);
1670    }
1671    creds.groups = groups;
1672    current_task.set_creds(creds);
1673    Ok(())
1674}
1675
1676pub fn sys_getgroups(
1677    _locked: &mut Locked<Unlocked>,
1678    current_task: &CurrentTask,
1679    size: usize,
1680    groups_addr: UserAddress,
1681) -> Result<usize, Errno> {
1682    if size > NGROUPS_MAX as usize {
1683        return error!(EINVAL);
1684    }
1685    current_task.with_current_creds(|creds| {
1686        if size != 0 {
1687            if size < creds.groups.len() {
1688                return error!(EINVAL);
1689            }
1690            current_task.write_memory(groups_addr, creds.groups.as_slice().as_bytes())?;
1691        }
1692        Ok(creds.groups.len())
1693    })
1694}
1695
1696pub fn sys_setsid(
1697    locked: &mut Locked<Unlocked>,
1698    current_task: &CurrentTask,
1699) -> Result<pid_t, Errno> {
1700    current_task.thread_group().setsid(locked)?;
1701    Ok(current_task.get_pid())
1702}
1703
1704// Note the asymmetry with sys_setpriority: this returns "kernel nice" which ranges
1705// from 1 (weakest) to 40 (strongest). (It is part of Linux history that this syscall
1706// deals with niceness but has "priority" in its name.)
1707pub fn sys_getpriority(
1708    _locked: &mut Locked<Unlocked>,
1709    current_task: &CurrentTask,
1710    which: u32,
1711    who: i32,
1712) -> Result<u8, Errno> {
1713    match which {
1714        PRIO_PROCESS => {}
1715        // TODO: https://fxbug.dev/287121196 - support PRIO_PGRP and PRIO_USER?
1716        _ => return error!(EINVAL),
1717    }
1718    track_stub!(TODO("https://fxbug.dev/322893809"), "getpriority permissions");
1719    let weak = get_task_or_current(current_task, who);
1720    let target_task = Task::from_weak(&weak)?;
1721    let state = target_task.read();
1722    Ok(state.scheduler_state.normal_priority.raw_priority())
1723}
1724
1725// Note the asymmetry with sys_getpriority: this call's `priority` parameter is a
1726// "user nice" which ranges from -20 (strongest) to 19 (weakest) (other values can be
1727// passed and are clamped to that range and interpretation). (It is part of Linux
1728// history that this syscall deals with niceness but has "priority" in its name.)
1729pub fn sys_setpriority(
1730    locked: &mut Locked<Unlocked>,
1731    current_task: &CurrentTask,
1732    which: u32,
1733    who: i32,
1734    priority: i32,
1735) -> Result<(), Errno> {
1736    // Parse & validate the arguments.
1737    match which {
1738        PRIO_PROCESS => {}
1739        // TODO: https://fxbug.dev/287121196 - support PRIO_PGRP and PRIO_USER?
1740        _ => return error!(EINVAL),
1741    }
1742
1743    let weak = get_task_or_current(current_task, who);
1744    let target_task = Task::from_weak(&weak)?;
1745
1746    let normal_priority = NormalPriority::from_setpriority_syscall(priority);
1747
1748    // TODO: https://fxbug.dev/425143440 - we probably want to improve the locking here.
1749    let current_state = target_task.read().scheduler_state;
1750
1751    // Check capabilities and permissions, if required, for the operation.
1752    let euid_friendly = current_task.is_euid_friendly_with(&target_task);
1753    let strengthening = current_state.normal_priority < normal_priority;
1754    let rlimited = strengthening
1755        && normal_priority.exceeds(target_task.thread_group().get_rlimit(locked, Resource::NICE));
1756    if !euid_friendly {
1757        security::check_task_capable(current_task, CAP_SYS_NICE)?;
1758    } else if rlimited {
1759        security::check_task_capable(current_task, CAP_SYS_NICE).map_err(|_| errno!(EACCES))?;
1760    }
1761
1762    security::check_setsched_access(current_task, &target_task)?;
1763
1764    // Apply the new scheduler configuration to the task.
1765    target_task.set_scheduler_nice(normal_priority)?;
1766
1767    Ok(())
1768}
1769
1770pub fn sys_setns(
1771    _locked: &mut Locked<Unlocked>,
1772    current_task: &CurrentTask,
1773    ns_fd: FdNumber,
1774    ns_type: c_int,
1775) -> Result<(), Errno> {
1776    let file_handle = current_task.task.files.get(ns_fd)?;
1777
1778    // From man pages this is not quite right because some namespace types require more capabilities
1779    // or require this capability in multiple namespaces, but it should cover our current test
1780    // cases and we can make this more nuanced once more namespace types are supported.
1781    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1782
1783    if let Some(mount_ns) = file_handle.downcast_file::<MountNamespaceFile>() {
1784        if !(ns_type == 0 || ns_type == CLONE_NEWNS as i32) {
1785            log_trace!("invalid type");
1786            return error!(EINVAL);
1787        }
1788
1789        track_stub!(TODO("https://fxbug.dev/297312091"), "setns CLONE_FS limitations");
1790        current_task.task.fs().set_namespace(mount_ns.0.clone())?;
1791        return Ok(());
1792    }
1793
1794    if let Some(_pidfd) = file_handle.downcast_file::<PidFdFileObject>() {
1795        track_stub!(TODO("https://fxbug.dev/297312844"), "setns w/ pidfd");
1796        return error!(ENOSYS);
1797    }
1798
1799    track_stub!(TODO("https://fxbug.dev/322893829"), "unknown ns file for setns, see logs");
1800    log_info!("ns_fd was not a supported namespace file: {}", file_handle.ops_type_name());
1801    error!(EINVAL)
1802}
1803
1804pub fn sys_unshare(
1805    _locked: &mut Locked<Unlocked>,
1806    current_task: &CurrentTask,
1807    flags: u32,
1808) -> Result<(), Errno> {
1809    const IMPLEMENTED_FLAGS: u32 = CLONE_FILES | CLONE_FS | CLONE_NEWNS | CLONE_NEWUTS;
1810    if flags & !IMPLEMENTED_FLAGS != 0 {
1811        track_stub!(TODO("https://fxbug.dev/322893372"), "unshare", flags & !IMPLEMENTED_FLAGS);
1812        return error!(EINVAL);
1813    }
1814
1815    if (flags & CLONE_FILES) != 0 {
1816        current_task.files.unshare();
1817    }
1818
1819    if (flags & CLONE_FS) != 0 {
1820        current_task.unshare_fs();
1821    }
1822
1823    if (flags & CLONE_NEWNS) != 0 {
1824        security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1825        current_task.fs().unshare_namespace();
1826    }
1827
1828    if (flags & CLONE_NEWUTS) != 0 {
1829        security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1830        // Fork the UTS namespace.
1831        let mut task_state = current_task.write();
1832        let new_uts_ns = task_state.uts_ns.read().clone();
1833        task_state.uts_ns = Arc::new(RwLock::new(new_uts_ns));
1834    }
1835
1836    Ok(())
1837}
1838
1839pub fn sys_swapon(
1840    locked: &mut Locked<Unlocked>,
1841    current_task: &CurrentTask,
1842    user_path: UserCString,
1843    _flags: i32,
1844) -> Result<(), Errno> {
1845    const MAX_SWAPFILES: usize = 32; // See https://man7.org/linux/man-pages/man2/swapon.2.html
1846
1847    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1848
1849    track_stub!(TODO("https://fxbug.dev/322893905"), "swapon validate flags");
1850
1851    let path = current_task.read_path(user_path)?;
1852    let file = current_task.open_file(locked, path.as_ref(), OpenFlags::RDWR)?;
1853
1854    let node = file.node();
1855    let mode = node.info().mode;
1856    if !mode.is_reg() && !mode.is_blk() {
1857        return error!(EINVAL);
1858    }
1859
1860    // We determined this magic number by using the mkswap tool and the file tool. The mkswap tool
1861    // populates a few bytes in the file, including a UUID, which can be replaced with zeros while
1862    // still being recognized by the file tool. This string appears at a fixed offset
1863    // (MAGIC_OFFSET) in the file, which looks quite like a magic number.
1864    const MAGIC_OFFSET: usize = 0xff6;
1865    let swap_magic = b"SWAPSPACE2";
1866    let mut buffer = VecOutputBuffer::new(swap_magic.len());
1867    if file.read_at(locked, current_task, MAGIC_OFFSET, &mut buffer)? != swap_magic.len()
1868        || buffer.data() != swap_magic
1869    {
1870        return error!(EINVAL);
1871    }
1872
1873    let mut swap_files = current_task.kernel().swap_files.lock(locked);
1874    for swap_node in swap_files.iter() {
1875        if Arc::ptr_eq(swap_node, node) {
1876            return error!(EBUSY);
1877        }
1878    }
1879    if swap_files.len() >= MAX_SWAPFILES {
1880        return error!(EPERM);
1881    }
1882    swap_files.push(node.clone());
1883    Ok(())
1884}
1885
1886pub fn sys_swapoff(
1887    locked: &mut Locked<Unlocked>,
1888    current_task: &CurrentTask,
1889    user_path: UserCString,
1890) -> Result<(), Errno> {
1891    security::check_task_capable(current_task, CAP_SYS_ADMIN)?;
1892
1893    let path = current_task.read_path(user_path)?;
1894    let file = current_task.open_file(locked, path.as_ref(), OpenFlags::RDWR)?;
1895    let node = file.node();
1896
1897    let mut swap_files = current_task.kernel().swap_files.lock(locked);
1898    let original_length = swap_files.len();
1899    swap_files.retain(|swap_node| !Arc::ptr_eq(swap_node, node));
1900    if swap_files.len() == original_length {
1901        return error!(EINVAL);
1902    }
1903    Ok(())
1904}
1905
1906#[derive(Default, Debug, IntoBytes, KnownLayout, FromBytes, Immutable)]
1907#[repr(C)]
1908struct KcmpParams {
1909    mask: usize,
1910    shuffle: usize,
1911}
1912
1913static KCMP_PARAMS: LazyLock<KcmpParams> = LazyLock::new(|| {
1914    let mut params = KcmpParams::default();
1915    zx::cprng_draw(params.as_mut_bytes());
1916    // Ensure the shuffle is odd so that multiplying a usize by this value is a permutation.
1917    params.shuffle |= 1;
1918    params
1919});
1920
1921fn obfuscate_value(value: usize) -> usize {
1922    let KcmpParams { mask, shuffle } = *KCMP_PARAMS;
1923    (value ^ mask).wrapping_mul(shuffle)
1924}
1925
1926fn obfuscate_ptr<T>(ptr: *const T) -> usize {
1927    obfuscate_value(ptr as usize)
1928}
1929
1930fn obfuscate_arc<T>(arc: &Arc<T>) -> usize {
1931    obfuscate_ptr(Arc::as_ptr(arc))
1932}
1933
1934pub fn sys_kcmp(
1935    locked: &mut Locked<Unlocked>,
1936    current_task: &CurrentTask,
1937    pid1: pid_t,
1938    pid2: pid_t,
1939    resource_type: u32,
1940    index1: u64,
1941    index2: u64,
1942) -> Result<u32, Errno> {
1943    let weak1 = current_task.get_task(pid1);
1944    let weak2 = current_task.get_task(pid2);
1945    let task1 = Task::from_weak(&weak1)?;
1946    let task2 = Task::from_weak(&weak2)?;
1947
1948    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task1)?;
1949    current_task.check_ptrace_access_mode(locked, PTRACE_MODE_READ_REALCREDS, &task2)?;
1950
1951    let resource_type = KcmpResource::from_raw(resource_type)?;
1952
1953    // Output encoding (see <https://man7.org/linux/man-pages/man2/kcmp.2.html>):
1954    //
1955    //   0  v1 is equal to v2; in other words, the two processes share the resource.
1956    //   1  v1 is less than v2.
1957    //   2  v1 is greater than v2.
1958    //   3  v1 is not equal to v2, but ordering information is unavailable.
1959    //
1960    fn encode_ordering(value: cmp::Ordering) -> u32 {
1961        match value {
1962            cmp::Ordering::Equal => 0,
1963            cmp::Ordering::Less => 1,
1964            cmp::Ordering::Greater => 2,
1965        }
1966    }
1967
1968    match resource_type {
1969        KcmpResource::FILE => {
1970            fn get_file(task: &Task, index: u64) -> Result<FileHandle, Errno> {
1971                // TODO: Test whether O_PATH is allowed here. Conceptually, seems like
1972                //       O_PATH should be allowed, but we haven't tested it yet.
1973                task.files.get_allowing_opath(FdNumber::from_raw(
1974                    index.try_into().map_err(|_| errno!(EBADF))?,
1975                ))
1976            }
1977            let file1 = get_file(&task1, index1)?;
1978            let file2 = get_file(&task2, index2)?;
1979            Ok(encode_ordering(obfuscate_arc(&file1).cmp(&obfuscate_arc(&file2))))
1980        }
1981        KcmpResource::FILES => Ok(encode_ordering(
1982            obfuscate_value(task1.files.id().raw()).cmp(&obfuscate_value(task2.files.id().raw())),
1983        )),
1984        KcmpResource::FS => {
1985            Ok(encode_ordering(obfuscate_arc(&task1.fs()).cmp(&obfuscate_arc(&task2.fs()))))
1986        }
1987        KcmpResource::SIGHAND => Ok(encode_ordering(
1988            obfuscate_arc(&task1.thread_group().signal_actions)
1989                .cmp(&obfuscate_arc(&task2.thread_group().signal_actions)),
1990        )),
1991        KcmpResource::VM => {
1992            Ok(encode_ordering(obfuscate_arc(&task1.mm()?).cmp(&obfuscate_arc(&task2.mm()?))))
1993        }
1994        _ => error!(EINVAL),
1995    }
1996}
1997
1998pub fn sys_syslog(
1999    locked: &mut Locked<Unlocked>,
2000    current_task: &CurrentTask,
2001    action_type: i32,
2002    address: UserAddress,
2003    length: i32,
2004) -> Result<i32, Errno> {
2005    let action = SyslogAction::try_from(action_type)?;
2006    let syslog =
2007        current_task.kernel().syslog.access(&current_task, SyslogAccess::Syscall(action))?;
2008    match action {
2009        SyslogAction::Read => {
2010            if address.is_null() || length < 0 {
2011                return error!(EINVAL);
2012            }
2013            let mut output_buffer =
2014                UserBuffersOutputBuffer::unified_new_at(current_task, address, length as usize)?;
2015            syslog.blocking_read(locked, current_task, &mut output_buffer)
2016        }
2017        SyslogAction::ReadAll => {
2018            if address.is_null() || length < 0 {
2019                return error!(EINVAL);
2020            }
2021            let mut output_buffer =
2022                UserBuffersOutputBuffer::unified_new_at(current_task, address, length as usize)?;
2023            syslog.read_all(current_task, &mut output_buffer)
2024        }
2025        SyslogAction::SizeUnread => syslog.size_unread(),
2026        SyslogAction::SizeBuffer => syslog.size_buffer(),
2027        SyslogAction::Close | SyslogAction::Open => Ok(0),
2028        SyslogAction::ReadClear => {
2029            track_stub!(TODO("https://fxbug.dev/322894145"), "syslog: read clear");
2030            Ok(0)
2031        }
2032        SyslogAction::Clear => {
2033            track_stub!(TODO("https://fxbug.dev/322893673"), "syslog: clear");
2034            Ok(0)
2035        }
2036        SyslogAction::ConsoleOff => {
2037            track_stub!(TODO("https://fxbug.dev/322894399"), "syslog: console off");
2038            Ok(0)
2039        }
2040        SyslogAction::ConsoleOn => {
2041            track_stub!(TODO("https://fxbug.dev/322894106"), "syslog: console on");
2042            Ok(0)
2043        }
2044        SyslogAction::ConsoleLevel => {
2045            if length <= 0 || length >= 8 {
2046                return error!(EINVAL);
2047            }
2048            track_stub!(TODO("https://fxbug.dev/322894199"), "syslog: console level");
2049            Ok(0)
2050        }
2051    }
2052}
2053
2054pub fn sys_vhangup(
2055    _locked: &mut Locked<Unlocked>,
2056    current_task: &CurrentTask,
2057) -> Result<(), Errno> {
2058    security::check_task_capable(current_task, CAP_SYS_TTY_CONFIG)?;
2059    track_stub!(TODO("https://fxbug.dev/324079257"), "vhangup");
2060    Ok(())
2061}
2062
2063// Syscalls for arch32 usage
2064#[cfg(target_arch = "aarch64")]
2065mod arch32 {
2066    pub use super::{
2067        sys_execve as sys_arch32_execve, sys_getegid as sys_arch32_getegid32,
2068        sys_geteuid as sys_arch32_geteuid32, sys_getgid as sys_arch32_getgid32,
2069        sys_getgroups as sys_arch32_getgroups32, sys_getpgid as sys_arch32_getpgid,
2070        sys_getppid as sys_arch32_getppid, sys_getpriority as sys_arch32_getpriority,
2071        sys_getresgid as sys_arch32_getresgid32, sys_getresuid as sys_arch32_getresuid32,
2072        sys_getrlimit as sys_arch32_ugetrlimit, sys_getrusage as sys_arch32_getrusage,
2073        sys_getuid as sys_arch32_getuid32, sys_ioprio_set as sys_arch32_ioprio_set,
2074        sys_ptrace as sys_arch32_ptrace, sys_quotactl as sys_arch32_quotactl,
2075        sys_sched_get_priority_max as sys_arch32_sched_get_priority_max,
2076        sys_sched_get_priority_min as sys_arch32_sched_get_priority_min,
2077        sys_sched_getaffinity as sys_arch32_sched_getaffinity,
2078        sys_sched_getparam as sys_arch32_sched_getparam,
2079        sys_sched_setaffinity as sys_arch32_sched_setaffinity,
2080        sys_sched_setparam as sys_arch32_sched_setparam,
2081        sys_sched_setscheduler as sys_arch32_sched_setscheduler, sys_seccomp as sys_arch32_seccomp,
2082        sys_setfsuid as sys_arch32_setfsuid, sys_setfsuid as sys_arch32_setfsuid32,
2083        sys_setgid as sys_arch32_setgid32, sys_setgroups as sys_arch32_setgroups32,
2084        sys_setns as sys_arch32_setns, sys_setpgid as sys_arch32_setpgid,
2085        sys_setpriority as sys_arch32_setpriority, sys_setregid as sys_arch32_setregid32,
2086        sys_setresgid as sys_arch32_setresgid32, sys_setresuid as sys_arch32_setresuid32,
2087        sys_setreuid as sys_arch32_setreuid32, sys_setreuid as sys_arch32_setreuid,
2088        sys_setrlimit as sys_arch32_setrlimit, sys_setsid as sys_arch32_setsid,
2089        sys_syslog as sys_arch32_syslog, sys_unshare as sys_arch32_unshare,
2090    };
2091}
2092
2093#[cfg(target_arch = "aarch64")]
2094pub use arch32::*;
2095
2096#[cfg(test)]
2097mod tests {
2098    use super::*;
2099    use crate::mm::syscalls::sys_munmap;
2100    use crate::testing::{AutoReleasableTask, map_memory, spawn_kernel_and_run};
2101    use starnix_syscalls::SUCCESS;
2102    use starnix_task_command::TaskCommand;
2103    use starnix_uapi::auth::Credentials;
2104    use starnix_uapi::{SCHED_FIFO, SCHED_NORMAL};
2105    use std::ffi::CString;
2106
2107    #[::fuchsia::test]
2108    async fn test_prctl_set_vma_anon_name() {
2109        spawn_kernel_and_run(async |locked, current_task| {
2110            let mapped_address =
2111                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2112            let name_addr = (mapped_address + 128u64).unwrap();
2113            let name = "test-name\0";
2114            current_task.write_memory(name_addr, name.as_bytes()).expect("failed to write name");
2115            sys_prctl(
2116                locked,
2117                current_task,
2118                PR_SET_VMA,
2119                PR_SET_VMA_ANON_NAME as u64,
2120                mapped_address.ptr() as u64,
2121                32,
2122                name_addr.ptr() as u64,
2123            )
2124            .expect("failed to set name");
2125            assert_eq!(
2126                "test-name",
2127                current_task
2128                    .mm()
2129                    .unwrap()
2130                    .get_mapping_name((mapped_address + 24u64).unwrap())
2131                    .expect("failed to get address")
2132                    .unwrap()
2133                    .to_string(),
2134            );
2135
2136            sys_munmap(locked, &current_task, mapped_address, *PAGE_SIZE as usize)
2137                .expect("failed to unmap memory");
2138            assert_eq!(
2139                error!(EFAULT),
2140                current_task.mm().unwrap().get_mapping_name((mapped_address + 24u64).unwrap())
2141            );
2142        })
2143        .await;
2144    }
2145
2146    #[::fuchsia::test]
2147    async fn test_set_vma_name_special_chars() {
2148        spawn_kernel_and_run(async |locked, current_task| {
2149            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2150
2151            let mapping_addr =
2152                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2153
2154            for c in 1..255 {
2155                let vma_name = CString::new([c]).unwrap();
2156                current_task.write_memory(name_addr, vma_name.as_bytes_with_nul()).unwrap();
2157
2158                let result = sys_prctl(
2159                    locked,
2160                    current_task,
2161                    PR_SET_VMA,
2162                    PR_SET_VMA_ANON_NAME as u64,
2163                    mapping_addr.ptr() as u64,
2164                    *PAGE_SIZE,
2165                    name_addr.ptr() as u64,
2166                );
2167
2168                if c > 0x1f
2169                    && c < 0x7f
2170                    && c != b'\\'
2171                    && c != b'`'
2172                    && c != b'$'
2173                    && c != b'['
2174                    && c != b']'
2175                {
2176                    assert_eq!(result, Ok(SUCCESS));
2177                } else {
2178                    assert_eq!(result, error!(EINVAL));
2179                }
2180            }
2181        })
2182        .await;
2183    }
2184
2185    #[::fuchsia::test]
2186    async fn test_set_vma_name_long() {
2187        spawn_kernel_and_run(async |locked, current_task| {
2188            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2189
2190            let mapping_addr =
2191                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2192
2193            let name_too_long = CString::new(vec![b'a'; 256]).unwrap();
2194
2195            current_task.write_memory(name_addr, name_too_long.as_bytes_with_nul()).unwrap();
2196
2197            assert_eq!(
2198                sys_prctl(
2199                    locked,
2200                    current_task,
2201                    PR_SET_VMA,
2202                    PR_SET_VMA_ANON_NAME as u64,
2203                    mapping_addr.ptr() as u64,
2204                    *PAGE_SIZE,
2205                    name_addr.ptr() as u64,
2206                ),
2207                error!(EINVAL)
2208            );
2209
2210            let name_just_long_enough = CString::new(vec![b'a'; 255]).unwrap();
2211
2212            current_task
2213                .write_memory(name_addr, name_just_long_enough.as_bytes_with_nul())
2214                .unwrap();
2215
2216            assert_eq!(
2217                sys_prctl(
2218                    locked,
2219                    current_task,
2220                    PR_SET_VMA,
2221                    PR_SET_VMA_ANON_NAME as u64,
2222                    mapping_addr.ptr() as u64,
2223                    *PAGE_SIZE,
2224                    name_addr.ptr() as u64,
2225                ),
2226                Ok(SUCCESS)
2227            );
2228        })
2229        .await;
2230    }
2231
2232    #[::fuchsia::test]
2233    async fn test_set_vma_name_misaligned() {
2234        spawn_kernel_and_run(async |locked, current_task| {
2235            let name_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2236
2237            let mapping_addr =
2238                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2239
2240            let name = CString::new("name").unwrap();
2241            current_task.write_memory(name_addr, name.as_bytes_with_nul()).unwrap();
2242
2243            // Passing a misaligned pointer to the start of the named region fails.
2244            assert_eq!(
2245                sys_prctl(
2246                    locked,
2247                    current_task,
2248                    PR_SET_VMA,
2249                    PR_SET_VMA_ANON_NAME as u64,
2250                    1 + mapping_addr.ptr() as u64,
2251                    *PAGE_SIZE - 1,
2252                    name_addr.ptr() as u64,
2253                ),
2254                error!(EINVAL)
2255            );
2256
2257            // Passing an unaligned length does work, however.
2258            assert_eq!(
2259                sys_prctl(
2260                    locked,
2261                    current_task,
2262                    PR_SET_VMA,
2263                    PR_SET_VMA_ANON_NAME as u64,
2264                    mapping_addr.ptr() as u64,
2265                    *PAGE_SIZE - 1,
2266                    name_addr.ptr() as u64,
2267                ),
2268                Ok(SUCCESS)
2269            );
2270        })
2271        .await;
2272    }
2273
2274    #[::fuchsia::test]
2275    async fn test_prctl_get_set_dumpable() {
2276        spawn_kernel_and_run(async |locked, current_task| {
2277            sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
2278                .expect("failed to get dumpable");
2279
2280            sys_prctl(locked, current_task, PR_SET_DUMPABLE, 1, 0, 0, 0)
2281                .expect("failed to set dumpable");
2282            sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
2283                .expect("failed to get dumpable");
2284
2285            // SUID_DUMP_ROOT not supported.
2286            sys_prctl(locked, current_task, PR_SET_DUMPABLE, 2, 0, 0, 0)
2287                .expect("failed to set dumpable");
2288            sys_prctl(locked, current_task, PR_GET_DUMPABLE, 0, 0, 0, 0)
2289                .expect("failed to get dumpable");
2290        })
2291        .await;
2292    }
2293
2294    #[::fuchsia::test]
2295    async fn test_sys_getsid() {
2296        spawn_kernel_and_run(async |locked, current_task| {
2297            let kernel = current_task.kernel();
2298            assert_eq!(
2299                current_task.get_tid(),
2300                sys_getsid(locked, &current_task, 0).expect("failed to get sid")
2301            );
2302
2303            let second_task = crate::execution::create_init_child_process(
2304                locked,
2305                &kernel.weak_self.upgrade().unwrap(),
2306                TaskCommand::new(b"second task"),
2307                Some(&CString::new("#kernel").unwrap()),
2308            )
2309            .expect("failed to create second task");
2310            second_task
2311                .mm()
2312                .unwrap()
2313                .initialize_mmap_layout_for_test(starnix_types::arch::ArchWidth::Arch64);
2314            let second_current = AutoReleasableTask::from(second_task);
2315
2316            assert_eq!(
2317                second_current.get_tid(),
2318                sys_getsid(locked, &current_task, second_current.get_tid())
2319                    .expect("failed to get sid")
2320            );
2321        })
2322        .await;
2323    }
2324
2325    #[::fuchsia::test]
2326    async fn test_get_affinity_size() {
2327        spawn_kernel_and_run(async |locked, current_task| {
2328            let mapped_address =
2329                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2330            let pid = current_task.get_pid();
2331            assert_eq!(
2332                sys_sched_getaffinity(locked, &current_task, pid, 16, mapped_address),
2333                Ok(16)
2334            );
2335            assert_eq!(
2336                sys_sched_getaffinity(locked, &current_task, pid, 1024, mapped_address),
2337                Ok(std::mem::size_of::<CpuSet>())
2338            );
2339            assert_eq!(
2340                sys_sched_getaffinity(locked, &current_task, pid, 1, mapped_address),
2341                error!(EINVAL)
2342            );
2343            assert_eq!(
2344                sys_sched_getaffinity(locked, &current_task, pid, 9, mapped_address),
2345                error!(EINVAL)
2346            );
2347        })
2348        .await;
2349    }
2350
2351    #[::fuchsia::test]
2352    async fn test_set_affinity_size() {
2353        spawn_kernel_and_run(async |locked, current_task| {
2354            let mapped_address =
2355                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2356            current_task.write_memory(mapped_address, &[0xffu8]).expect("failed to cpumask");
2357            let pid = current_task.get_pid();
2358            assert_eq!(
2359                sys_sched_setaffinity(
2360                    locked,
2361                    &current_task,
2362                    pid,
2363                    *PAGE_SIZE as u32,
2364                    mapped_address
2365                ),
2366                Ok(())
2367            );
2368            assert_eq!(
2369                sys_sched_setaffinity(locked, &current_task, pid, 1, mapped_address),
2370                error!(EINVAL)
2371            );
2372        })
2373        .await;
2374    }
2375
2376    #[::fuchsia::test]
2377    async fn test_task_name() {
2378        spawn_kernel_and_run(async |locked, current_task| {
2379            let mapped_address =
2380                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2381            let name = "my-task-name\0";
2382            current_task
2383                .write_memory(mapped_address, name.as_bytes())
2384                .expect("failed to write name");
2385
2386            let result =
2387                sys_prctl(locked, current_task, PR_SET_NAME, mapped_address.ptr() as u64, 0, 0, 0)
2388                    .unwrap();
2389            assert_eq!(SUCCESS, result);
2390
2391            let mapped_address =
2392                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2393            let result =
2394                sys_prctl(locked, current_task, PR_GET_NAME, mapped_address.ptr() as u64, 0, 0, 0)
2395                    .unwrap();
2396            assert_eq!(SUCCESS, result);
2397
2398            let name_length = name.len();
2399
2400            let out_name = current_task.read_memory_to_vec(mapped_address, name_length).unwrap();
2401            assert_eq!(name.as_bytes(), &out_name);
2402        })
2403        .await;
2404    }
2405
2406    #[::fuchsia::test]
2407    async fn test_sched_get_priority_min_max() {
2408        spawn_kernel_and_run(async |locked, current_task| {
2409            let non_rt_min =
2410                sys_sched_get_priority_min(locked, &current_task, SCHED_NORMAL).unwrap();
2411            assert_eq!(non_rt_min, 0);
2412            let non_rt_max =
2413                sys_sched_get_priority_max(locked, &current_task, SCHED_NORMAL).unwrap();
2414            assert_eq!(non_rt_max, 0);
2415
2416            let rt_min = sys_sched_get_priority_min(locked, &current_task, SCHED_FIFO).unwrap();
2417            assert_eq!(rt_min, 1);
2418            let rt_max = sys_sched_get_priority_max(locked, &current_task, SCHED_FIFO).unwrap();
2419            assert_eq!(rt_max, 99);
2420
2421            let min_bad_policy_error =
2422                sys_sched_get_priority_min(locked, &current_task, std::u32::MAX).unwrap_err();
2423            assert_eq!(min_bad_policy_error, errno!(EINVAL));
2424
2425            let max_bad_policy_error =
2426                sys_sched_get_priority_max(locked, &current_task, std::u32::MAX).unwrap_err();
2427            assert_eq!(max_bad_policy_error, errno!(EINVAL));
2428        })
2429        .await;
2430    }
2431
2432    #[::fuchsia::test]
2433    async fn test_sched_setscheduler() {
2434        spawn_kernel_and_run(async |locked, current_task| {
2435            current_task
2436                .thread_group()
2437                .limits
2438                .lock(locked)
2439                .set(Resource::RTPRIO, rlimit { rlim_cur: 255, rlim_max: 255 });
2440
2441            let scheduler = sys_sched_getscheduler(locked, &current_task, 0).unwrap();
2442            assert_eq!(scheduler, SCHED_NORMAL, "tasks should have normal scheduler by default");
2443
2444            let mapped_address =
2445                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2446            let requested_params = sched_param { sched_priority: 15 };
2447            current_task.write_object(mapped_address.into(), &requested_params).unwrap();
2448
2449            sys_sched_setscheduler(locked, &current_task, 0, SCHED_FIFO, mapped_address.into())
2450                .unwrap();
2451
2452            let new_scheduler = sys_sched_getscheduler(locked, &current_task, 0).unwrap();
2453            assert_eq!(new_scheduler, SCHED_FIFO, "task should have been assigned fifo scheduler");
2454
2455            let mapped_address =
2456                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2457            sys_sched_getparam(locked, &current_task, 0, mapped_address.into())
2458                .expect("sched_getparam");
2459            let param_value: sched_param =
2460                current_task.read_object(mapped_address.into()).expect("read_object");
2461            assert_eq!(param_value.sched_priority, 15);
2462        })
2463        .await;
2464    }
2465
2466    #[::fuchsia::test]
2467    async fn test_sched_getparam() {
2468        spawn_kernel_and_run(async |locked, current_task| {
2469            let mapped_address =
2470                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2471            sys_sched_getparam(locked, &current_task, 0, mapped_address.into())
2472                .expect("sched_getparam");
2473            let param_value: sched_param =
2474                current_task.read_object(mapped_address.into()).expect("read_object");
2475            assert_eq!(param_value.sched_priority, 0);
2476        })
2477        .await;
2478    }
2479
2480    #[::fuchsia::test]
2481    async fn test_setuid() {
2482        spawn_kernel_and_run(async |locked, current_task| {
2483            // Test for root.
2484            current_task.set_creds(Credentials::root());
2485            sys_setuid(locked, &current_task, 42).expect("setuid");
2486            let mut creds = current_task.current_creds();
2487            assert_eq!(creds.euid, 42);
2488            assert_eq!(creds.uid, 42);
2489            assert_eq!(creds.saved_uid, 42);
2490
2491            // Remove the CAP_SETUID capability to avoid overwriting permission checks.
2492            creds.cap_effective.remove(CAP_SETUID);
2493            current_task.set_creds(creds);
2494
2495            // Test for non root, which task now is.
2496            assert_eq!(sys_setuid(locked, &current_task, 0), error!(EPERM));
2497            assert_eq!(sys_setuid(locked, &current_task, 43), error!(EPERM));
2498
2499            sys_setuid(locked, &current_task, 42).expect("setuid");
2500            let creds = current_task.current_creds();
2501            assert_eq!(creds.euid, 42);
2502            assert_eq!(creds.uid, 42);
2503            assert_eq!(creds.saved_uid, 42);
2504
2505            // Change uid and saved_uid, and check that one can set the euid to these.
2506            let mut creds = current_task.current_creds();
2507            creds.uid = 41;
2508            creds.euid = 42;
2509            creds.saved_uid = 43;
2510            current_task.set_creds(creds);
2511
2512            sys_setuid(locked, &current_task, 41).expect("setuid");
2513            let creds = current_task.current_creds();
2514            assert_eq!(creds.euid, 41);
2515            assert_eq!(creds.uid, 41);
2516            assert_eq!(creds.saved_uid, 43);
2517
2518            let mut creds = current_task.current_creds();
2519            creds.uid = 41;
2520            creds.euid = 42;
2521            creds.saved_uid = 43;
2522            current_task.set_creds(creds);
2523
2524            sys_setuid(locked, &current_task, 43).expect("setuid");
2525            let creds = current_task.current_creds();
2526            assert_eq!(creds.euid, 43);
2527            assert_eq!(creds.uid, 41);
2528            assert_eq!(creds.saved_uid, 43);
2529        })
2530        .await;
2531    }
2532
2533    #[::fuchsia::test]
2534    async fn test_read_c_string_vector() {
2535        spawn_kernel_and_run(async |locked, current_task| {
2536            let arg_addr = map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE);
2537            let arg = b"test-arg\0";
2538            current_task.write_memory(arg_addr, arg).expect("failed to write test arg");
2539            let arg_usercstr = UserCString::new(current_task, arg_addr);
2540            let null_usercstr = UserCString::null(current_task);
2541
2542            let argv_addr = UserCStringPtr::new(
2543                current_task,
2544                map_memory(locked, &current_task, UserAddress::default(), *PAGE_SIZE),
2545            );
2546            current_task
2547                .write_multi_arch_ptr(argv_addr.addr(), arg_usercstr)
2548                .expect("failed to write UserCString");
2549            current_task
2550                .write_multi_arch_ptr(argv_addr.next().unwrap().addr(), null_usercstr)
2551                .expect("failed to write UserCString");
2552
2553            // The arguments size limit should include the null terminator.
2554            assert!(read_c_string_vector(&current_task, argv_addr, 100, arg.len()).is_ok());
2555            assert_eq!(
2556                read_c_string_vector(
2557                    &current_task,
2558                    argv_addr,
2559                    100,
2560                    std::str::from_utf8(arg).unwrap().trim_matches('\0').len()
2561                ),
2562                error!(E2BIG)
2563            );
2564        })
2565        .await;
2566    }
2567}