starnix_core/task/
seccomp.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::MemoryAccessorExt;
6use crate::signals::{SignalDetail, SignalInfo, SignalSource, send_standard_signal};
7use crate::task::{
8    CurrentTask, EventHandler, ExitStatus, Kernel, Task, TaskFlags, WaitCanceler, WaitQueue, Waiter,
9};
10use crate::vfs::buffers::{InputBuffer, OutputBuffer};
11use crate::vfs::{
12    Anon, FdFlags, FdNumber, FileObject, FileObjectState, FileOps, fileops_impl_nonseekable,
13    fileops_impl_noop_sync,
14};
15use bstr::ByteSlice;
16use ebpf::{
17    BPF_ABS, BPF_LD, BPF_ST, BpfProgramContext, CbpfConfig, EbpfProgram, MemoryId, NoMap,
18    ProgramArgument, Type, bpf_addressing_mode, bpf_class, convert_and_link_cbpf,
19};
20use ebpf_api::SECCOMP_CBPF_CONFIG;
21use starnix_lifecycle::AtomicU64Counter;
22use starnix_logging::{log_warn, track_stub};
23use starnix_sync::{FileOpsCore, Locked, Mutex, Unlocked};
24use starnix_syscalls::decls::Syscall;
25use starnix_syscalls::{SyscallArg, SyscallResult};
26use starnix_uapi::errors::Errno;
27use starnix_uapi::open_flags::OpenFlags;
28use starnix_uapi::signals::{SIGKILL, SIGSYS};
29#[cfg(target_arch = "aarch64")]
30use starnix_uapi::user_address::ArchSpecific;
31use starnix_uapi::user_address::{UserAddress, UserRef};
32use starnix_uapi::vfs::FdEvents;
33use starnix_uapi::{
34    __NR_exit, __NR_read, __NR_write, SECCOMP_IOCTL_NOTIF_ADDFD, SECCOMP_IOCTL_NOTIF_ID_VALID,
35    SECCOMP_IOCTL_NOTIF_RECV, SECCOMP_IOCTL_NOTIF_SEND, SECCOMP_MODE_DISABLED, SECCOMP_MODE_FILTER,
36    SECCOMP_MODE_STRICT, SECCOMP_RET_ACTION_FULL, SECCOMP_RET_DATA,
37    SECCOMP_USER_NOTIF_FLAG_CONTINUE, SYS_SECCOMP, errno, errno_from_code, error, seccomp_data,
38    seccomp_notif, seccomp_notif_resp, sock_filter,
39};
40use std::collections::HashMap;
41use std::sync::atomic::{AtomicU8, Ordering};
42use std::sync::{Arc, LazyLock};
43use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
44
45#[cfg(target_arch = "aarch64")]
46use starnix_uapi::__NR_clock_getres;
47#[cfg(target_arch = "aarch64")]
48use starnix_uapi::__NR_clock_gettime;
49#[cfg(target_arch = "aarch64")]
50use starnix_uapi::__NR_gettimeofday;
51#[cfg(target_arch = "aarch64")]
52use starnix_uapi::{AUDIT_ARCH_AARCH64, AUDIT_ARCH_ARM};
53
54#[cfg(target_arch = "x86_64")]
55use starnix_uapi::__NR_clock_gettime;
56#[cfg(target_arch = "x86_64")]
57use starnix_uapi::__NR_getcpu;
58#[cfg(target_arch = "x86_64")]
59use starnix_uapi::__NR_gettimeofday;
60#[cfg(target_arch = "x86_64")]
61use starnix_uapi::__NR_time;
62#[cfg(target_arch = "x86_64")]
63use starnix_uapi::AUDIT_ARCH_X86_64;
64
65#[cfg(target_arch = "riscv64")]
66use starnix_uapi::AUDIT_ARCH_RISCV64;
67
68pub struct SeccompFilter {
69    /// The BPF program associated with this filter.
70    program: EbpfProgram<SeccompFilter>,
71
72    /// The unique-to-this-process id of thi1s filter.  SECCOMP_FILTER_FLAG_TSYNC only works if all
73    /// threads in this process have filters that are a prefix of the filters of the thread
74    /// attempting to do the TSYNC. Identical filters attached in separate seccomp calls are treated
75    /// as different from each other for this purpose, so we need a way of distinguishing them.
76    unique_id: u64,
77
78    /// The next cookie (unique id for this syscall), as used by SECCOMP_RET_USER_NOTIF
79    cookie: AtomicU64Counter,
80
81    // Whether to log the results of this filter
82    log: bool,
83}
84
85/// The result of running a set of seccomp filters.
86pub struct SeccompFilterResult {
87    /// The action indicated by the seccomp filter with the highest priority result.
88    action: SeccompAction,
89
90    /// The filter that returned the highest priority result, as used by SECCOMP_RET_USER_NOTIF,
91    /// which has to have access to its cookie value
92    filter: Option<Arc<SeccompFilter>>,
93}
94
95impl SeccompFilter {
96    /// Creates a SeccompFilter object from the given sock_filter.  Associates the user-provided
97    /// id with it, which is intended to be unique to this process.
98    pub fn from_cbpf(
99        code: &Vec<sock_filter>,
100        maybe_unique_id: u64,
101        should_log: bool,
102    ) -> Result<Self, Errno> {
103        // If an instruction loads from / stores to an absolute address, that address has to be
104        // 32-bit aligned and inside the struct seccomp_data passed in.
105        for insn in code {
106            if (bpf_class(insn) == BPF_LD || bpf_class(insn) == BPF_ST)
107                && (bpf_addressing_mode(insn) == BPF_ABS)
108                && (insn.k & 0x3 != 0 || std::mem::size_of::<seccomp_data>() < insn.k as usize)
109            {
110                return error!(EINVAL);
111            }
112        }
113
114        let program = convert_and_link_cbpf::<SeccompFilter>(code).map_err(|errmsg| {
115            log_warn!("{}", errmsg);
116            errno!(EINVAL)
117        })?;
118
119        Ok(SeccompFilter {
120            program,
121            unique_id: maybe_unique_id,
122            cookie: AtomicU64Counter::new(0),
123            log: should_log,
124        })
125    }
126
127    pub fn run(&self, data: &seccomp_data) -> u32 {
128        self.program.run(&mut (), &SeccompData(*data)) as u32
129    }
130}
131
132// Wrapper for `seccomp_data`. Required in order to implement the `ProgramArgument` trait below.
133#[repr(C)]
134#[derive(Debug, Default, Clone, IntoBytes, FromBytes, KnownLayout, Immutable)]
135pub struct SeccompData(seccomp_data);
136
137impl BpfProgramContext for SeccompFilter {
138    type RunContext<'a> = ();
139    type Packet<'a> = &'a SeccompData;
140    type Map = NoMap;
141    const CBPF_CONFIG: &'static CbpfConfig = &SECCOMP_CBPF_CONFIG;
142}
143
144ebpf::empty_static_helper_set!(SeccompFilter);
145
146static SECCOMP_DATA_TYPE: LazyLock<Type> =
147    LazyLock::new(|| Type::PtrToMemory { id: MemoryId::new(), offset: 0.into(), buffer_size: 0 });
148
149impl ProgramArgument for &'_ SeccompData {
150    fn get_type() -> &'static Type {
151        &*SECCOMP_DATA_TYPE
152    }
153}
154
155const SECCOMP_MAX_INSNS_PER_PATH: u16 = 32768;
156
157/// A list of seccomp filters, intended to be associated with a specific process.
158#[derive(Default)]
159pub struct SeccompFilterContainer {
160    /// List of currently installed seccomp_filters; most recently added is last.
161    pub filters: Vec<Arc<SeccompFilter>>,
162
163    // The total length of the provided seccomp filters, which cannot
164    // exceed SECCOMP_MAX_INSNS_PER_PATH - 4 * the number of filters.  This is stored
165    // instead of computed because we store seccomp filters in an
166    // expanded form, and it is impossible to get the original length.
167    pub provided_instructions: u16,
168
169    // Data needed by SECCOMP_RET_USER_NOTIF
170    pub notifier: Option<SeccompNotifierHandle>,
171}
172
173impl Clone for SeccompFilterContainer {
174    fn clone(&self) -> Self {
175        if let Some(n) = &self.notifier {
176            n.lock().add_thread();
177        }
178        SeccompFilterContainer {
179            filters: self.filters.clone(),
180            provided_instructions: self.provided_instructions,
181            notifier: self.notifier.clone(),
182        }
183    }
184}
185
186impl Drop for SeccompFilterContainer {
187    fn drop(&mut self) {
188        if let Some(n) = &self.notifier {
189            // Notifier needs to send threads a HUP when there is no one left
190            // referencing it.
191            n.lock().remove_thread();
192        }
193    }
194}
195
196fn make_seccomp_data(
197    #[allow(unused_variables)] current_task: &CurrentTask,
198    syscall: &Syscall,
199    ip: u64,
200) -> seccomp_data {
201    #[cfg(target_arch = "x86_64")]
202    let arch_val = AUDIT_ARCH_X86_64;
203    #[cfg(target_arch = "aarch64")]
204    let arch_val = if current_task.is_arch32() { AUDIT_ARCH_ARM } else { AUDIT_ARCH_AARCH64 };
205    #[cfg(target_arch = "riscv64")]
206    let arch_val = AUDIT_ARCH_RISCV64;
207    seccomp_data {
208        nr: syscall.decl.number as i32,
209        arch: arch_val,
210        instruction_pointer: ip,
211        args: [
212            syscall.arg0.raw(),
213            syscall.arg1.raw(),
214            syscall.arg2.raw(),
215            syscall.arg3.raw(),
216            syscall.arg4.raw(),
217            syscall.arg5.raw(),
218        ],
219    }
220}
221
222impl SeccompFilterContainer {
223    /// Ensures that this set of seccomp filters can be "synced to" the given set.
224    /// This means that our filters are a prefix of the given set of filters.
225    pub fn can_sync_to(&self, source: &SeccompFilterContainer) -> bool {
226        if source.filters.len() < self.filters.len() {
227            return false;
228        }
229        for (filter, other_filter) in self.filters.iter().zip(source.filters.iter()) {
230            if other_filter.unique_id != filter.unique_id {
231                return false;
232            }
233        }
234        true
235    }
236
237    /// Adds the given filter to this list.  The original_length parameter is the length of
238    /// the originally provided BPF (i.e., the number of sock_filter instructions), used
239    /// to ensure the total length does not exceed SECCOMP_MAX_INSNS_PER_PATH
240    pub fn add_filter(
241        &mut self,
242        filter: Arc<SeccompFilter>,
243        original_length: u16,
244    ) -> Result<(), Errno> {
245        let maybe_new_length = self.provided_instructions + original_length + 4;
246        if maybe_new_length > SECCOMP_MAX_INSNS_PER_PATH {
247            return error!(ENOMEM);
248        }
249
250        self.provided_instructions = maybe_new_length;
251        self.filters.push(filter);
252        Ok(())
253    }
254
255    /// Runs all of the seccomp filters in this container, most-to-least recent.  Returns the
256    /// highest priority result (which contains a reference to the filter that generated it)
257    pub fn run_all(&self, current_task: &CurrentTask, syscall: &Syscall) -> SeccompFilterResult {
258        let mut r = SeccompFilterResult { action: SeccompAction::Allow, filter: None };
259
260        // VDSO calls can't be caught by seccomp, so most seccomp filters forget to declare them.
261        // But our VDSO implementation is incomplete, and most of the calls forward to the actual
262        // syscalls. So seccomp should ignore them until they're implemented correctly in the VDSO.
263        #[cfg(target_arch = "x86_64")] // The set of VDSO calls is arch dependent.
264        #[allow(non_upper_case_globals)]
265        if let __NR_clock_gettime | __NR_getcpu | __NR_gettimeofday | __NR_time =
266            syscall.decl.number as u32
267        {
268            return r;
269        }
270        #[cfg(target_arch = "aarch64")]
271        #[allow(non_upper_case_globals)]
272        if let __NR_clock_gettime | __NR_clock_getres | __NR_gettimeofday =
273            syscall.decl.number as u32
274        {
275            return r;
276        }
277
278        let data = make_seccomp_data(
279            current_task,
280            syscall,
281            current_task.thread_state.registers.instruction_pointer_register(),
282        );
283
284        // Filters are executed in reverse order of addition
285        for filter in self.filters.iter().rev() {
286            let new_result = filter.run(&data);
287
288            let action = SeccompAction::from_u32(new_result).unwrap_or(SeccompAction::KillProcess);
289
290            if SeccompAction::has_prio(&action, &r.action) == std::cmp::Ordering::Less {
291                r = SeccompFilterResult { action, filter: Some(filter.clone()) };
292            }
293        }
294        r
295    }
296
297    /// Creates a new listener for use by SECCOMP_RET_USER_NOTIF.  Returns its fd.
298    pub fn create_listener(
299        locked: &mut Locked<Unlocked>,
300        current_task: &CurrentTask,
301    ) -> Result<FdNumber, Errno> {
302        // Create the `Anon` handle file before taking the write lock on the task, because
303        // `Anon::new_file()` needs to read the `current_task` SID to label the file object.
304        let the_notifier = SeccompNotifier::new();
305        let handle = Anon::new_file(
306            locked,
307            current_task,
308            Box::new(SeccompNotifierFileObject { notifier: the_notifier.clone() }),
309            OpenFlags::RDWR,
310            "seccomp notify",
311        )?;
312
313        // Take the write lock to check for an existing notifier, and initialize and store the new
314        // notifier otherwise.
315        let filters = &mut current_task.write().seccomp_filters;
316        if filters.notifier.is_some() {
317            return error!(EBUSY);
318        }
319        let fd = current_task.add_file(locked, handle, FdFlags::CLOEXEC)?;
320        {
321            let mut state = the_notifier.lock();
322            state.add_thread();
323        }
324        filters.notifier = Some(the_notifier);
325        Ok(fd)
326    }
327}
328
329/// Possible values for the current status of the seccomp filters for
330/// this process.
331#[repr(u8)]
332#[derive(Clone, Copy, PartialEq)]
333pub enum SeccompStateValue {
334    None = SECCOMP_MODE_DISABLED as u8,
335    Strict = SECCOMP_MODE_STRICT as u8,
336    UserDefined = SECCOMP_MODE_FILTER as u8,
337}
338
339/// Per-process state that cannot be stored in the container (e.g., whether there is a container).
340#[derive(Default)]
341pub struct SeccompState {
342    // This AtomicU8 corresponds to a SeccompStateValue.
343    filter_state: AtomicU8,
344}
345
346impl SeccompState {
347    pub fn from(state: &SeccompState) -> SeccompState {
348        SeccompState { filter_state: AtomicU8::new(state.filter_state.load(Ordering::Acquire)) }
349    }
350
351    fn from_u8(value: u8) -> SeccompStateValue {
352        match value {
353            v if v == SECCOMP_MODE_DISABLED as u8 => SeccompStateValue::None,
354            v if v == SECCOMP_MODE_STRICT as u8 => SeccompStateValue::Strict,
355            v if v == SECCOMP_MODE_FILTER as u8 => SeccompStateValue::UserDefined,
356            _ => unreachable!(),
357        }
358    }
359
360    pub fn get(&self) -> SeccompStateValue {
361        Self::from_u8(self.filter_state.load(Ordering::Acquire))
362    }
363
364    pub fn set(&self, state: &SeccompStateValue) -> Result<(), Errno> {
365        loop {
366            let seccomp_filter_status = self.get();
367            if seccomp_filter_status == *state {
368                return Ok(());
369            }
370            if seccomp_filter_status != SeccompStateValue::None {
371                return error!(EINVAL);
372            }
373
374            if self
375                .filter_state
376                .compare_exchange(
377                    seccomp_filter_status as u8,
378                    *state as u8,
379                    Ordering::Release,
380                    Ordering::Acquire,
381                )
382                .is_ok()
383            {
384                return Ok(());
385            }
386        }
387    }
388
389    /// Check to see if this syscall is allowed in STRICT mode, and, if not,
390    /// send the current task a SIGKILL.
391    pub fn do_strict(
392        locked: &mut Locked<Unlocked>,
393        task: &Task,
394        syscall: &Syscall,
395    ) -> Option<Result<SyscallResult, Errno>> {
396        if syscall.decl.number as u32 != __NR_exit
397            && syscall.decl.number as u32 != __NR_read
398            && syscall.decl.number as u32 != __NR_write
399        {
400            send_standard_signal(locked, task, SignalInfo::default(SIGKILL));
401            return Some(Err(errno_from_code!(0)));
402        }
403        None
404    }
405
406    // This is supposed to be put in the audit log, but starnix does not yet have an
407    // audit log.  Also, it does not match the Linux format.  Still, the machinery
408    // is in place for when we have to support it for real.
409    fn log_action(task: &CurrentTask, syscall: &Syscall) {
410        let (uid, gid) = task.with_current_creds(|creds| (creds.uid, creds.gid));
411        let arch = if cfg!(target_arch = "x86_64") {
412            "x86_64"
413        } else if cfg!(target_arch = "aarch64") {
414            "aarch64"
415        } else {
416            "unknown"
417        };
418        starnix_logging::log_info!(
419            "type=SECCOMP: uid={} gid={} pid={} comm={} syscall={} ip={} ARCH={} SYSCALL={}",
420            uid,
421            gid,
422            task.thread_group().leader,
423            task.command(),
424            syscall.decl.number,
425            task.thread_state.registers.instruction_pointer_register(),
426            arch,
427            syscall.decl.name(),
428        );
429    }
430
431    /// Take the given |action| on the given |task|.  The action is one of the SECCOMP_RET values
432    /// (ALLOW, LOG, KILL, KILL_PROCESS, TRAP, ERRNO, USER_NOTIF, TRACE).  |task| is the thread that
433    /// invoked the syscall, and |syscall| is the syscall that was invoked.
434    /// Returns the result that the syscall will be forced to return by this
435    /// filter, or None, if the syscall should return its actual return value.
436    // NB: Allow warning below so that it is clear what we are doing on KILL_PROCESS
437    #[allow(clippy::wildcard_in_or_patterns)]
438    pub fn do_user_defined(
439        locked: &mut Locked<Unlocked>,
440        result: SeccompFilterResult,
441        current_task: &mut CurrentTask,
442        syscall: &Syscall,
443    ) -> Option<Result<SyscallResult, Errno>> {
444        let action = result.action;
445        if let Some(filter) = result.filter.as_ref() {
446            if action.is_logged(current_task.kernel(), filter.log) {
447                Self::log_action(current_task, syscall);
448            }
449        }
450        match action {
451            SeccompAction::Allow => None,
452            SeccompAction::Errno(code) => Some(Err(errno_from_code!(code as i16))),
453            SeccompAction::KillThread => {
454                let siginfo = SignalInfo::default(SIGSYS);
455
456                let is_last_thread = current_task.thread_group().read().tasks_count() == 1;
457                let mut task_state = current_task.write();
458
459                if is_last_thread {
460                    task_state.set_flags(TaskFlags::DUMP_ON_EXIT, true);
461                    task_state.set_exit_status_if_not_already(ExitStatus::CoreDump(siginfo));
462                } else {
463                    task_state.set_exit_status_if_not_already(ExitStatus::Kill(siginfo));
464                }
465                Some(Err(errno_from_code!(0)))
466            }
467            SeccompAction::KillProcess => {
468                current_task
469                    .thread_group_exit(locked, ExitStatus::CoreDump(SignalInfo::default(SIGSYS)));
470                Some(Err(errno_from_code!(0)))
471            }
472            SeccompAction::Log => {
473                Self::log_action(current_task, syscall);
474                None
475            }
476            SeccompAction::Trace => {
477                track_stub!(TODO("https://fxbug.dev/297311898"), "ptrace seccomp support");
478                Some(error!(ENOSYS))
479            }
480            SeccompAction::Trap(errno) => {
481                #[cfg(target_arch = "x86_64")]
482                let arch_val = AUDIT_ARCH_X86_64;
483                #[cfg(target_arch = "aarch64")]
484                let arch_val = AUDIT_ARCH_AARCH64;
485                #[cfg(target_arch = "riscv64")]
486                let arch_val = AUDIT_ARCH_RISCV64;
487
488                let siginfo = SignalInfo {
489                    signal: SIGSYS,
490                    errno: errno as i32,
491                    code: SYS_SECCOMP as i32,
492                    detail: SignalDetail::SIGSYS {
493                        call_addr: current_task
494                            .thread_state
495                            .registers
496                            .instruction_pointer_register()
497                            .into(),
498                        syscall: syscall.decl.number as i32,
499                        arch: arch_val,
500                    },
501                    force: true,
502                    source: SignalSource::capture(),
503                };
504
505                send_standard_signal(locked, current_task, siginfo);
506                Some(Err(errno_from_code!(-(syscall.decl.number as i16))))
507            }
508            SeccompAction::UserNotif => {
509                if let Some(notifier) = current_task.get_seccomp_notifier() {
510                    let cookie = result.filter.as_ref().unwrap().cookie.next();
511                    let msg = seccomp_notif {
512                        id: cookie,
513                        pid: current_task.tid as u32,
514                        flags: 0,
515                        data: make_seccomp_data(
516                            current_task,
517                            syscall,
518                            current_task.thread_state.registers.instruction_pointer_register(),
519                        ),
520                    };
521                    // First, add a pending notification, and wake up the supervisor waiting for it.
522                    let waiter = Waiter::new();
523                    {
524                        let mut notifier = notifier.lock();
525                        if notifier.is_closed {
526                            // Someone explicitly close()d the fd with the notifier, which does not
527                            // clear the thread-local notifier.  Do it now.
528                            drop(notifier);
529                            current_task.set_seccomp_notifier(None);
530                            return Some(error!(ENOSYS));
531                        }
532                        notifier.create_notification(cookie, msg);
533                        notifier.waiters.wait_async_value(&waiter, cookie);
534                    }
535
536                    // Next, wait for a response from the supervisor
537                    if let Err(e) = waiter.wait(locked, current_task) {
538                        return Some(Err(e));
539                    }
540
541                    // Fetch the response.
542                    let resp: Option<seccomp_notif_resp>;
543                    {
544                        let mut notifier = notifier.lock();
545                        resp = notifier.get_response(cookie);
546                        notifier.delete_notification(cookie);
547                    }
548
549                    // The response indicates what you are supposed to do with this syscall.
550                    if let Some(response) = resp {
551                        if response.val != 0 {
552                            return Some(Ok(response.val.into()));
553                        }
554                        if response.error != 0 {
555                            if response.error > 0 {
556                                return Some(Ok(response.error.into()));
557                            } else {
558                                return Some(Err(errno_from_code!(-response.error as i16)));
559                            }
560                        }
561                        if response.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE != 0 {
562                            return None;
563                        }
564                    }
565                    Some(Ok(0.into()))
566                } else {
567                    Some(error!(ENOSYS))
568                }
569            }
570        }
571    }
572}
573
574#[derive(Clone, Copy, PartialEq)]
575pub enum SeccompAction {
576    Allow,
577    Errno(u32),
578    KillProcess,
579    KillThread,
580    Log,
581    Trap(u32),
582    Trace,
583    UserNotif,
584}
585
586impl SeccompAction {
587    pub fn is_action_available(action: u32) -> Result<SyscallResult, Errno> {
588        if SeccompAction::from_u32(action).is_none() {
589            return error!(EOPNOTSUPP);
590        }
591        Ok(0.into())
592    }
593
594    pub fn from_u32(action: u32) -> Option<SeccompAction> {
595        match action & !SECCOMP_RET_DATA {
596            linux_uapi::SECCOMP_RET_ALLOW => Some(Self::Allow),
597            linux_uapi::SECCOMP_RET_ERRNO => {
598                let mut action = action & SECCOMP_RET_DATA;
599                // Linux kernel compatibility: if errno exceeds 0xfff, it is capped at 0xfff.
600                action = std::cmp::min(action & 0xffff, 0xfff);
601                Some(Self::Errno(action))
602            }
603            linux_uapi::SECCOMP_RET_KILL_PROCESS => Some(Self::KillProcess),
604            linux_uapi::SECCOMP_RET_KILL_THREAD => Some(Self::KillThread),
605            linux_uapi::SECCOMP_RET_LOG => Some(Self::Log),
606            linux_uapi::SECCOMP_RET_TRACE => Some(Self::Trace),
607            linux_uapi::SECCOMP_RET_TRAP => Some(Self::Trap(action & SECCOMP_RET_DATA)),
608
609            linux_uapi::SECCOMP_RET_USER_NOTIF => Some(Self::UserNotif),
610            _ => None,
611        }
612    }
613
614    pub fn to_isize(self) -> isize {
615        match self {
616            Self::Allow => linux_uapi::SECCOMP_RET_ALLOW as isize,
617            Self::Errno(x) => (linux_uapi::SECCOMP_RET_ERRNO | x) as isize,
618            Self::KillProcess => linux_uapi::SECCOMP_RET_KILL_PROCESS as isize,
619            Self::KillThread => linux_uapi::SECCOMP_RET_KILL_THREAD as isize,
620            Self::Log => linux_uapi::SECCOMP_RET_LOG as isize,
621            Self::Trace => linux_uapi::SECCOMP_RET_TRACE as isize,
622            Self::Trap(x) => (linux_uapi::SECCOMP_RET_TRAP | x) as isize,
623            Self::UserNotif => linux_uapi::SECCOMP_RET_USER_NOTIF as isize,
624        }
625    }
626
627    pub fn canonical_name(self) -> &'static str {
628        match self {
629            Self::Allow => &"allow",
630            Self::Errno(_) => &"errno",
631            Self::KillProcess => &"kill_process",
632            Self::KillThread => &"kill_thread",
633            Self::Log => &"log",
634            Self::Trace => &"trace",
635            Self::Trap(_) => &"trap",
636            Self::UserNotif => &"user_notif",
637        }
638    }
639
640    pub fn has_prio(a: &SeccompAction, b: &SeccompAction) -> std::cmp::Ordering {
641        let anum = a.to_isize() as i32;
642        let bnum = b.to_isize() as i32;
643        let fullnum = SECCOMP_RET_ACTION_FULL as i32;
644        let aval = anum & fullnum;
645        let bval = bnum & fullnum;
646        aval.cmp(&bval)
647    }
648
649    /// Returns a vector of all available actions, sorted by priority.
650    pub fn all_actions() -> Vec<SeccompAction> {
651        let mut result = vec![
652            Self::Allow,
653            Self::Errno(0),
654            Self::KillProcess,
655            Self::KillThread,
656            Self::Log,
657            Self::Trace,
658            Self::Trap(0),
659            Self::UserNotif,
660        ];
661
662        result.sort_by(Self::has_prio);
663        result
664    }
665
666    /// Gets the contents of /proc/sys/kernel/seccomp/actions_avail
667    pub fn get_actions_avail_file() -> Vec<u8> {
668        let all_actions = Self::all_actions();
669        if all_actions.len() == 0 {
670            return vec![];
671        }
672        let mut result = String::from(all_actions[0].canonical_name());
673        for i in 1..all_actions.len() {
674            result.push_str(" ");
675            result.push_str(all_actions[i].canonical_name());
676        }
677        result.push('\n');
678        result.into_bytes()
679    }
680
681    fn logged_bit_offset(&self) -> u32 {
682        match self {
683            Self::Allow => 1,
684            Self::Errno(_) => 2,
685            Self::KillProcess => 3,
686            Self::KillThread => 4,
687            Self::Log => 5,
688            Self::Trace => 6,
689            Self::Trap(_) => 7,
690            Self::UserNotif => 8,
691        }
692    }
693
694    fn set_logged_bit(&self, dst: &mut u16) {
695        *dst |= 1 << self.logged_bit_offset();
696    }
697
698    pub fn is_logged(&self, kernel: &Kernel, filter_flag: bool) -> bool {
699        if kernel.actions_logged.load(Ordering::Relaxed) & (1 << self.logged_bit_offset()) != 0 {
700            match self {
701                // Per the documentation on audit logging of seccomp actions in
702                // seccomp(2), just because it is listed as logged, that doesn't
703                // mean we actually log it.
704
705                // If it is KILL_PROCESS or KILL_THREAD, return true
706                Self::KillProcess | Self::KillThread => true,
707                // If it is one of these and the filter flag was set, return true.
708                Self::Errno(_) | Self::Log | Self::Trap(_) | Self::UserNotif => filter_flag,
709                // Never log ALLOW
710                _ => false,
711            }
712        } else {
713            false
714        }
715    }
716
717    pub fn set_actions_logged(kernel: &Kernel, data: &[u8]) -> Result<(), Errno> {
718        let mut new_actions_logged: u16 = 0;
719        for action_res in data.fields_with(|c| c.is_ascii_whitespace()) {
720            if let Ok(action) = action_res.to_str() {
721                match action {
722                    "errno" => Self::Errno(0).set_logged_bit(&mut new_actions_logged),
723                    "kill_process" => Self::KillProcess.set_logged_bit(&mut new_actions_logged),
724                    "kill_thread" => Self::KillThread.set_logged_bit(&mut new_actions_logged),
725                    "log" => Self::Log.set_logged_bit(&mut new_actions_logged),
726                    "trace" => Self::Trace.set_logged_bit(&mut new_actions_logged),
727                    "trap" => Self::Trap(0).set_logged_bit(&mut new_actions_logged),
728                    "user_notif" => Self::UserNotif.set_logged_bit(&mut new_actions_logged),
729                    // Not allowed to write anything other than the approved actions to that list.
730                    _ => return error!(EINVAL),
731                }
732            } else {
733                return error!(EINVAL);
734            }
735        }
736        kernel.actions_logged.store(new_actions_logged, Ordering::Relaxed);
737        Ok(())
738    }
739
740    pub fn get_actions_logged(kernel: &Kernel) -> Vec<u8> {
741        let al = kernel.actions_logged.load(Ordering::Relaxed);
742        let mut result: String = "".to_string();
743        for action in Self::all_actions() {
744            if (al & (1 << action.logged_bit_offset())) != 0 {
745                result.push_str(action.canonical_name());
746                result.push(' ');
747            }
748        }
749        if !result.is_empty() {
750            // remove trailing whitespace.
751            result.pop();
752        }
753
754        result.into_bytes()
755    }
756}
757
758/// This struct contains data that needs to be shuttled back and forth between the thread doing
759/// a USER_NOTIF and the supervisor thread responding to it.
760#[derive(Default)]
761struct SeccompNotification {
762    /// notif is the notification set by the filter.  When this is set, the associated fd will
763    /// be set to POLLIN.
764    notif: seccomp_notif,
765
766    /// Consumed indicates whether a supervisor process has read this notification (and so it
767    /// can no longer be consumed by any other SECCOMP_IOCTL_NOTIF_RECV ioctl).  When the notif
768    /// is consumed, the associated fd will be set to POLLOUT, indicating that it is ready to
769    /// receive a response.
770    consumed: bool,
771
772    /// resp is the response that the supervisor sends.  When this is set, an event will be sent
773    /// to SeccompNotifiers::waiters corresponding to the unique id of the notification.  This
774    /// will wake up the filter that is waiting for this particular response.
775    resp: Option<seccomp_notif_resp>,
776}
777
778impl SeccompNotification {
779    fn new(data: seccomp_notif) -> SeccompNotification {
780        SeccompNotification { notif: data, resp: None, consumed: false }
781    }
782}
783
784/// The underlying implementation of the file descriptor that connects a process that triggers a
785/// SECCOMP_RET_USER_NOTIF with the monitoring process. This support seccomp's ability to notify a
786/// user-space process on specific syscall triggers. See seccomp_unotify(2) for the semantics.
787pub struct SeccompNotifier {
788    waiters: WaitQueue,
789
790    pending_notifications: HashMap<u64, SeccompNotification>,
791
792    // This keeps track of the number of threads using this notifier as a filter.  If that hits
793    // zero, the listeners need to receive a HUP.
794    num_active_threads: u64,
795
796    // notifiers are referenced both by fds and in SeccompFilterContainer. If the file no longer
797    // has fds referring to it, it will be closed, and the SeccompFilterContainers should stop
798    // using it.
799    pub is_closed: bool,
800}
801
802pub type SeccompNotifierHandle = Arc<Mutex<SeccompNotifier>>;
803
804impl SeccompNotifier {
805    pub fn new() -> SeccompNotifierHandle {
806        Arc::new(Mutex::new(SeccompNotifier {
807            waiters: WaitQueue::default(),
808            pending_notifications: HashMap::default(),
809            num_active_threads: 0,
810            is_closed: false,
811        }))
812    }
813
814    fn add_thread(&mut self) {
815        self.num_active_threads += 1;
816    }
817
818    fn remove_thread(&mut self) {
819        self.num_active_threads -= 1;
820        if self.num_active_threads == 0 {
821            self.waiters.notify_fd_events(FdEvents::POLLHUP);
822        }
823    }
824
825    // Creates a pending notification for communication between the
826    // target thread and a supervisor, and notifies readers there is
827    // an opportunity to read.
828    fn create_notification(&mut self, cookie: u64, notif: seccomp_notif) {
829        self.pending_notifications.insert(cookie, SeccompNotification::new(notif));
830        self.waiters.notify_fd_events(FdEvents::POLLIN | FdEvents::POLLRDNORM);
831    }
832
833    // Gets a notification that needs to be handled by a supervisor,
834    // and notifies waiters that there is an opportunity to write.
835    fn consume_some_notification(&mut self) -> Option<seccomp_notif> {
836        for (_, notif) in self.pending_notifications.iter_mut() {
837            if !notif.consumed {
838                notif.consumed = true;
839                self.waiters.notify_fd_events(FdEvents::POLLOUT | FdEvents::POLLWRNORM);
840                return Some(notif.notif);
841            }
842        }
843        None
844    }
845
846    // In case something goes wrong after we consume the notification.
847    fn unconsume(&mut self, cookie: u64) {
848        if let Some(n) = self.pending_notifications.get_mut(&cookie).as_mut() {
849            n.consumed = false;
850        }
851    }
852
853    // Returns the appropriate notifications if someone is waiting with poll/epoll/select.
854    fn get_fd_notifications(&self) -> FdEvents {
855        let mut events = FdEvents::empty();
856
857        for (_, notification) in self.pending_notifications.iter() {
858            if !notification.consumed {
859                events |= FdEvents::POLLIN | FdEvents::POLLRDNORM;
860            } else if notification.resp.is_none() {
861                events |= FdEvents::POLLOUT | FdEvents::POLLWRNORM;
862            }
863        }
864
865        if self.num_active_threads == 0 {
866            events |= FdEvents::POLLHUP;
867        }
868        events
869    }
870
871    // Sets the value read by the target in response to this notification.  Intended for use by the
872    // supervisor.  Notifies the filter there is a response to this request.
873    fn set_response(&mut self, cookie: u64, resp: seccomp_notif_resp) -> Option<Errno> {
874        if let Some(entry) = self.pending_notifications.get_mut(&cookie) {
875            if entry.resp.is_some() {
876                return Some(errno!(EINPROGRESS));
877            }
878            entry.resp = Some(resp);
879            self.waiters.notify_value(resp.id);
880            None
881        } else {
882            Some(errno!(EINVAL))
883        }
884    }
885
886    // Gets the value set by the supervisor for the target to read.
887    fn get_response(&self, cookie: u64) -> Option<seccomp_notif_resp> {
888        if let Some(value) = self.pending_notifications.get(&cookie) {
889            return value.resp;
890        }
891        None
892    }
893
894    // Returns whether the cookie represents an active notification.
895    fn notification_pending(&self, cookie: u64) -> bool {
896        self.pending_notifications.contains_key(&cookie)
897    }
898
899    // Deletes the notification, when the target is done processing it.
900    fn delete_notification(&mut self, cookie: u64) {
901        let _ = self.pending_notifications.remove(&cookie);
902    }
903}
904
905struct SeccompNotifierFileObject {
906    notifier: SeccompNotifierHandle,
907}
908
909impl FileOps for SeccompNotifierFileObject {
910    fileops_impl_nonseekable!();
911    fileops_impl_noop_sync!();
912
913    fn close(
914        self: Box<Self>,
915        _locked: &mut Locked<FileOpsCore>,
916        _file: &FileObjectState,
917        _current_task: &CurrentTask,
918    ) {
919        let mut state = self.notifier.lock();
920
921        for (cookie, notification) in state.pending_notifications.iter() {
922            if !notification.consumed {
923                state.waiters.notify_value(*cookie);
924                state.waiters.notify_fd_events(FdEvents::POLLIN | FdEvents::POLLRDNORM);
925            } else if notification.resp.is_none() {
926                state.waiters.notify_fd_events(FdEvents::POLLOUT | FdEvents::POLLWRNORM);
927            }
928        }
929        state.waiters.notify_fd_events(FdEvents::POLLHUP);
930
931        state.pending_notifications.clear();
932
933        state.is_closed = true;
934    }
935
936    fn read(
937        &self,
938        _locked: &mut Locked<FileOpsCore>,
939        _file: &FileObject,
940        _current_task: &CurrentTask,
941        _offset: usize,
942        _usize: &mut dyn OutputBuffer,
943    ) -> Result<usize, Errno> {
944        error!(EINVAL)
945    }
946
947    fn write(
948        &self,
949        _locked: &mut Locked<FileOpsCore>,
950        _file: &FileObject,
951        _current_task: &CurrentTask,
952        _offset: usize,
953        _buffer: &mut dyn InputBuffer,
954    ) -> Result<usize, Errno> {
955        error!(EINVAL)
956    }
957
958    fn ioctl(
959        &self,
960        locked: &mut Locked<Unlocked>,
961        _file: &FileObject,
962        current_task: &CurrentTask,
963        request: u32,
964        arg: SyscallArg,
965    ) -> Result<SyscallResult, Errno> {
966        let user_addr = UserAddress::from(arg);
967        match request {
968            SECCOMP_IOCTL_NOTIF_RECV => {
969                if let Ok(notif) =
970                    current_task.read_memory_to_vec(user_addr, std::mem::size_of::<seccomp_notif>())
971                {
972                    for value in notif.iter() {
973                        if *value != 0 {
974                            return error!(EINVAL);
975                        }
976                    }
977                }
978                // A RECV reads a notification, optionally waiting for one to become available.
979                let mut notif: Option<seccomp_notif>;
980                loop {
981                    // Grab a notification or wait for one to become readable.
982                    let waiter = Waiter::new();
983                    {
984                        let mut notifier = self.notifier.lock();
985                        notif = notifier.consume_some_notification();
986                        if notif.is_some() {
987                            break;
988                        }
989                        notifier.waiters.wait_async_fd_events(
990                            &waiter,
991                            FdEvents::POLLIN | FdEvents::POLLHUP,
992                            EventHandler::None,
993                        );
994                    }
995                    waiter.wait(locked, current_task)?;
996                }
997                if let Some(notif) = notif {
998                    if let Err(e) =
999                        current_task.write_object(UserRef::<seccomp_notif>::new(user_addr), &notif)
1000                    {
1001                        self.notifier.lock().unconsume(notif.id);
1002                        return Err(e);
1003                    }
1004                }
1005
1006                Ok(0.into())
1007            }
1008            SECCOMP_IOCTL_NOTIF_SEND => {
1009                // A SEND sends a response to a previously received notification.
1010                let resp: seccomp_notif_resp = current_task.read_object(UserRef::new(user_addr))?;
1011                if resp.flags & !SECCOMP_USER_NOTIF_FLAG_CONTINUE != 0 {
1012                    return error!(EINVAL);
1013                }
1014                if resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE != 0
1015                    && (resp.error != 0 || resp.val != 0)
1016                {
1017                    return error!(EINVAL);
1018                }
1019                {
1020                    let mut notifier = self.notifier.lock();
1021                    if let Some(err) = notifier.set_response(resp.id, resp) {
1022                        return Err(err);
1023                    }
1024                }
1025                Ok(0.into())
1026            }
1027            SECCOMP_IOCTL_NOTIF_ID_VALID => {
1028                // An ID_VALID indicates that the notification is still in progress.
1029                let cookie: u64 = current_task.read_object(UserRef::new(user_addr))?;
1030                {
1031                    let notifier = self.notifier.lock();
1032                    if notifier.notification_pending(cookie) {
1033                        Ok(0.into())
1034                    } else {
1035                        error!(ENOENT)
1036                    }
1037                }
1038            }
1039            SECCOMP_IOCTL_NOTIF_ADDFD => error!(EINVAL),
1040            _ => error!(EINVAL),
1041        }
1042    }
1043
1044    fn wait_async(
1045        &self,
1046        _locked: &mut Locked<FileOpsCore>,
1047        _file: &FileObject,
1048        _current_task: &CurrentTask,
1049        waiter: &Waiter,
1050        events: FdEvents,
1051        handler: EventHandler,
1052    ) -> Option<WaitCanceler> {
1053        let notifier = self.notifier.lock();
1054        Some(notifier.waiters.wait_async_fd_events(waiter, events, handler))
1055    }
1056
1057    fn query_events(
1058        &self,
1059        _locked: &mut Locked<FileOpsCore>,
1060        _file: &FileObject,
1061        _current_task: &CurrentTask,
1062    ) -> Result<FdEvents, Errno> {
1063        Ok(self.notifier.lock().get_fd_notifications())
1064    }
1065}
1066
1067#[cfg(test)]
1068mod test {
1069    use crate::task::SeccompAction;
1070    use crate::testing::spawn_kernel_and_run;
1071
1072    #[::fuchsia::test]
1073    async fn test_actions_logged_accepts_legal_string() {
1074        spawn_kernel_and_run(async |_, current_task| {
1075            let kernel = current_task.kernel();
1076            let mut actions = SeccompAction::get_actions_avail_file();
1077            // This is a test in Rust instead of a syscall test because we don't want to change the
1078            // global config in a test.
1079            assert!(
1080                SeccompAction::set_actions_logged(&kernel, &actions[..]).is_err(),
1081                "Should not be able to write allow to actions_logged file"
1082            );
1083            let action_string = std::string::String::from_utf8(actions.clone()).unwrap();
1084            if let Some(action_index) = action_string.find("allow") {
1085                actions.drain(action_index..action_index + "allow".len());
1086            }
1087            let write_result = SeccompAction::set_actions_logged(&kernel, &actions[..]);
1088            assert!(
1089                write_result.is_ok(),
1090                "Could not write legal string \"{}\" to actions_logged file: error {}",
1091                std::string::String::from_utf8(actions.clone()).unwrap(),
1092                write_result.unwrap_err()
1093            );
1094        })
1095        .await;
1096    }
1097}