Skip to main content

starnix_core/task/
seccomp.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::MemoryAccessorExt;
6use crate::signals::{SignalDetail, SignalInfo, send_standard_signal};
7use crate::task::{
8    CurrentTask, EventHandler, ExitStatus, Kernel, Task, TaskFlags, WaitCanceler, WaitQueue, Waiter,
9};
10use crate::vfs::buffers::{InputBuffer, OutputBuffer};
11use crate::vfs::{
12    Anon, FdFlags, FdNumber, FileObject, FileObjectState, FileOps, fileops_impl_nonseekable,
13    fileops_impl_noop_sync,
14};
15use bstr::ByteSlice;
16use ebpf::{
17    BPF_ABS, BPF_IND, BPF_LD, BPF_ST, BPF_W, BpfProgramContext, CbpfConfig, EbpfProgram, MemoryId,
18    NoMap, ProgramArgument, Type, bpf_addressing_mode, bpf_class, bpf_size, convert_and_link_cbpf,
19};
20use ebpf_api::SECCOMP_CBPF_CONFIG;
21use linux_uapi::AUDIT_SECCOMP;
22use starnix_lifecycle::AtomicU64Counter;
23use starnix_logging::{log_warn, track_stub};
24use starnix_sync::{FileOpsCore, Locked, Mutex, Unlocked};
25use starnix_syscalls::decls::Syscall;
26use starnix_syscalls::{SyscallArg, SyscallResult};
27use starnix_uapi::errors::Errno;
28use starnix_uapi::open_flags::OpenFlags;
29use starnix_uapi::signals::{SIGKILL, SIGSYS};
30#[cfg(target_arch = "aarch64")]
31use starnix_uapi::user_address::ArchSpecific;
32use starnix_uapi::user_address::{UserAddress, UserRef};
33use starnix_uapi::vfs::FdEvents;
34use starnix_uapi::{
35    __NR_exit, __NR_read, __NR_write, SECCOMP_IOCTL_NOTIF_ADDFD, SECCOMP_IOCTL_NOTIF_ID_VALID,
36    SECCOMP_IOCTL_NOTIF_RECV, SECCOMP_IOCTL_NOTIF_SEND, SECCOMP_MODE_DISABLED, SECCOMP_MODE_FILTER,
37    SECCOMP_MODE_STRICT, SECCOMP_RET_ACTION_FULL, SECCOMP_RET_DATA,
38    SECCOMP_USER_NOTIF_FLAG_CONTINUE, SYS_SECCOMP, errno, errno_from_code, error, seccomp_data,
39    seccomp_notif, seccomp_notif_resp, sock_filter,
40};
41use std::collections::HashMap;
42use std::sync::atomic::{AtomicU8, Ordering};
43use std::sync::{Arc, LazyLock};
44use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
45
46#[cfg(target_arch = "aarch64")]
47use starnix_uapi::__NR_clock_getres;
48#[cfg(target_arch = "aarch64")]
49use starnix_uapi::__NR_clock_gettime;
50#[cfg(target_arch = "aarch64")]
51use starnix_uapi::__NR_gettimeofday;
52#[cfg(target_arch = "aarch64")]
53use starnix_uapi::{AUDIT_ARCH_AARCH64, AUDIT_ARCH_ARM};
54
55#[cfg(target_arch = "x86_64")]
56use starnix_uapi::__NR_clock_gettime;
57#[cfg(target_arch = "x86_64")]
58use starnix_uapi::__NR_getcpu;
59#[cfg(target_arch = "x86_64")]
60use starnix_uapi::__NR_gettimeofday;
61#[cfg(target_arch = "x86_64")]
62use starnix_uapi::__NR_time;
63#[cfg(target_arch = "x86_64")]
64use starnix_uapi::AUDIT_ARCH_X86_64;
65
66#[cfg(target_arch = "riscv64")]
67use starnix_uapi::AUDIT_ARCH_RISCV64;
68
69pub struct SeccompFilter {
70    /// The BPF program associated with this filter.
71    program: EbpfProgram<SeccompFilter>,
72
73    /// The unique-to-this-process id of thi1s filter.  SECCOMP_FILTER_FLAG_TSYNC only works if all
74    /// threads in this process have filters that are a prefix of the filters of the thread
75    /// attempting to do the TSYNC. Identical filters attached in separate seccomp calls are treated
76    /// as different from each other for this purpose, so we need a way of distinguishing them.
77    unique_id: u64,
78
79    /// The next cookie (unique id for this syscall), as used by SECCOMP_RET_USER_NOTIF
80    cookie: AtomicU64Counter,
81
82    // Whether to log the results of this filter
83    log: bool,
84}
85
86/// The result of running a set of seccomp filters.
87pub struct SeccompFilterResult {
88    /// The action indicated by the seccomp filter with the highest priority result.
89    action: SeccompAction,
90
91    /// The filter that returned the highest priority result, as used by SECCOMP_RET_USER_NOTIF,
92    /// which has to have access to its cookie value
93    filter: Option<Arc<SeccompFilter>>,
94}
95
96impl SeccompFilter {
97    /// Creates a SeccompFilter object from the given sock_filter.  Associates the user-provided
98    /// id with it, which is intended to be unique to this process.
99    pub fn from_cbpf(
100        code: &Vec<sock_filter>,
101        maybe_unique_id: u64,
102        should_log: bool,
103    ) -> Result<Self, Errno> {
104        for insn in code {
105            // If an instruction loads from / stores to an absolute address, that address has to be
106            // 32-bit aligned and inside the struct seccomp_data passed in.
107            if (bpf_class(insn) == BPF_LD || bpf_class(insn) == BPF_ST)
108                && (bpf_addressing_mode(insn) == BPF_ABS)
109                && (insn.k & 0x3 != 0 || std::mem::size_of::<seccomp_data>() < insn.k as usize)
110            {
111                return error!(EINVAL);
112            }
113            // Indirect loads (BPF_IND) are strictly forbidden.
114            if (bpf_class(insn) == BPF_LD || bpf_class(insn) == BPF_ST)
115                && bpf_addressing_mode(insn) == BPF_IND
116            {
117                return error!(EINVAL);
118            }
119            // 8 and 16 bits read and write are strictly forbidden.
120            if (bpf_class(insn) == BPF_LD || bpf_class(insn) == BPF_ST) && bpf_size(insn) != BPF_W {
121                return error!(EINVAL);
122            }
123        }
124
125        let program = convert_and_link_cbpf::<SeccompFilter>(code).map_err(|errmsg| {
126            log_warn!("{}", errmsg);
127            errno!(EINVAL)
128        })?;
129
130        Ok(SeccompFilter {
131            program,
132            unique_id: maybe_unique_id,
133            cookie: AtomicU64Counter::new(0),
134            log: should_log,
135        })
136    }
137
138    pub fn run(&self, data: &seccomp_data) -> u32 {
139        self.program.run(&mut (), &SeccompData(*data)) as u32
140    }
141}
142
143// Wrapper for `seccomp_data`. Required in order to implement the `ProgramArgument` trait below.
144#[repr(C)]
145#[derive(Debug, Default, Clone, IntoBytes, FromBytes, KnownLayout, Immutable)]
146pub struct SeccompData(seccomp_data);
147
148impl BpfProgramContext for SeccompFilter {
149    type RunContext<'a> = ();
150    type Packet<'a> = &'a SeccompData;
151    type Map = NoMap;
152    const CBPF_CONFIG: &'static CbpfConfig = &SECCOMP_CBPF_CONFIG;
153}
154
155ebpf::empty_static_helper_set!(SeccompFilter);
156
157static SECCOMP_DATA_TYPE: LazyLock<Type> =
158    LazyLock::new(|| Type::PtrToMemory { id: MemoryId::new(), offset: 0.into(), buffer_size: 0 });
159
160impl ProgramArgument for &'_ SeccompData {
161    fn get_type() -> &'static Type {
162        &*SECCOMP_DATA_TYPE
163    }
164}
165
166const SECCOMP_MAX_INSNS_PER_PATH: u16 = 32768;
167
168/// A list of seccomp filters, intended to be associated with a specific process.
169#[derive(Default)]
170pub struct SeccompFilterContainer {
171    /// List of currently installed seccomp_filters; most recently added is last.
172    pub filters: Vec<Arc<SeccompFilter>>,
173
174    // The total length of the provided seccomp filters, which cannot
175    // exceed SECCOMP_MAX_INSNS_PER_PATH - 4 * the number of filters.  This is stored
176    // instead of computed because we store seccomp filters in an
177    // expanded form, and it is impossible to get the original length.
178    pub provided_instructions: u16,
179
180    // Data needed by SECCOMP_RET_USER_NOTIF
181    pub notifier: Option<SeccompNotifierHandle>,
182}
183
184impl Clone for SeccompFilterContainer {
185    fn clone(&self) -> Self {
186        if let Some(n) = &self.notifier {
187            n.lock().add_thread();
188        }
189        SeccompFilterContainer {
190            filters: self.filters.clone(),
191            provided_instructions: self.provided_instructions,
192            notifier: self.notifier.clone(),
193        }
194    }
195}
196
197impl Drop for SeccompFilterContainer {
198    fn drop(&mut self) {
199        if let Some(n) = &self.notifier {
200            // Notifier needs to send threads a HUP when there is no one left
201            // referencing it.
202            n.lock().remove_thread();
203        }
204    }
205}
206
207fn make_seccomp_data(
208    #[allow(unused_variables)] current_task: &CurrentTask,
209    syscall: &Syscall,
210    ip: u64,
211) -> seccomp_data {
212    #[cfg(target_arch = "x86_64")]
213    let arch_val = AUDIT_ARCH_X86_64;
214    #[cfg(target_arch = "aarch64")]
215    let arch_val = if current_task.is_arch32() { AUDIT_ARCH_ARM } else { AUDIT_ARCH_AARCH64 };
216    #[cfg(target_arch = "riscv64")]
217    let arch_val = AUDIT_ARCH_RISCV64;
218    seccomp_data {
219        nr: syscall.decl.number as i32,
220        arch: arch_val,
221        instruction_pointer: ip,
222        args: [
223            syscall.arg0.raw(),
224            syscall.arg1.raw(),
225            syscall.arg2.raw(),
226            syscall.arg3.raw(),
227            syscall.arg4.raw(),
228            syscall.arg5.raw(),
229        ],
230    }
231}
232
233impl SeccompFilterContainer {
234    /// Ensures that this set of seccomp filters can be "synced to" the given set.
235    /// This means that our filters are a prefix of the given set of filters.
236    pub fn can_sync_to(&self, source: &SeccompFilterContainer) -> bool {
237        if source.filters.len() < self.filters.len() {
238            return false;
239        }
240        for (filter, other_filter) in self.filters.iter().zip(source.filters.iter()) {
241            if other_filter.unique_id != filter.unique_id {
242                return false;
243            }
244        }
245        true
246    }
247
248    /// Adds the given filter to this list.  The original_length parameter is the length of
249    /// the originally provided BPF (i.e., the number of sock_filter instructions), used
250    /// to ensure the total length does not exceed SECCOMP_MAX_INSNS_PER_PATH
251    pub fn add_filter(
252        &mut self,
253        filter: Arc<SeccompFilter>,
254        original_length: u16,
255    ) -> Result<(), Errno> {
256        let maybe_new_length = self.provided_instructions + original_length + 4;
257        if maybe_new_length > SECCOMP_MAX_INSNS_PER_PATH {
258            return error!(ENOMEM);
259        }
260
261        self.provided_instructions = maybe_new_length;
262        self.filters.push(filter);
263        Ok(())
264    }
265
266    /// Runs all of the seccomp filters in this container, most-to-least recent.  Returns the
267    /// highest priority result (which contains a reference to the filter that generated it)
268    pub fn run_all(&self, current_task: &CurrentTask, syscall: &Syscall) -> SeccompFilterResult {
269        let mut r = SeccompFilterResult { action: SeccompAction::Allow, filter: None };
270
271        // VDSO calls can't be caught by seccomp, so most seccomp filters forget to declare them.
272        // But our VDSO implementation is incomplete, and most of the calls forward to the actual
273        // syscalls. So seccomp should ignore them until they're implemented correctly in the VDSO.
274        #[cfg(target_arch = "x86_64")] // The set of VDSO calls is arch dependent.
275        #[allow(non_upper_case_globals)]
276        if let __NR_clock_gettime | __NR_getcpu | __NR_gettimeofday | __NR_time =
277            syscall.decl.number as u32
278        {
279            return r;
280        }
281        #[cfg(target_arch = "aarch64")]
282        #[allow(non_upper_case_globals)]
283        if let __NR_clock_gettime | __NR_clock_getres | __NR_gettimeofday =
284            syscall.decl.number as u32
285        {
286            return r;
287        }
288
289        let data = make_seccomp_data(
290            current_task,
291            syscall,
292            current_task.thread_state.registers.instruction_pointer_register(),
293        );
294
295        // Filters are executed in reverse order of addition
296        for filter in self.filters.iter().rev() {
297            let new_result = filter.run(&data);
298
299            let action = SeccompAction::from_u32(new_result).unwrap_or(SeccompAction::KillProcess);
300
301            if SeccompAction::has_prio(&action, &r.action) == std::cmp::Ordering::Less {
302                r = SeccompFilterResult { action, filter: Some(filter.clone()) };
303            }
304        }
305        r
306    }
307
308    /// Creates a new listener for use by SECCOMP_RET_USER_NOTIF.  Returns its fd.
309    pub fn create_listener(
310        locked: &mut Locked<Unlocked>,
311        current_task: &CurrentTask,
312    ) -> Result<FdNumber, Errno> {
313        // Create the `Anon` handle file before taking the write lock on the task, because
314        // `Anon::new_file()` needs to read the `current_task` SID to label the file object.
315        let the_notifier = SeccompNotifier::new();
316        let handle = Anon::new_file(
317            locked,
318            current_task,
319            Box::new(SeccompNotifierFileObject { notifier: the_notifier.clone() }),
320            OpenFlags::RDWR,
321            "seccomp notify",
322        )?;
323
324        // Take the write lock to check for an existing notifier, and initialize and store the new
325        // notifier otherwise.
326        let filters = &mut current_task.write().seccomp_filters;
327        if filters.notifier.is_some() {
328            return error!(EBUSY);
329        }
330        let fd = current_task.add_file(locked, handle, FdFlags::CLOEXEC)?;
331        {
332            let mut state = the_notifier.lock();
333            state.add_thread();
334        }
335        filters.notifier = Some(the_notifier);
336        Ok(fd)
337    }
338}
339
340/// Possible values for the current status of the seccomp filters for
341/// this process.
342#[repr(u8)]
343#[derive(Clone, Copy, PartialEq)]
344pub enum SeccompStateValue {
345    None = SECCOMP_MODE_DISABLED as u8,
346    Strict = SECCOMP_MODE_STRICT as u8,
347    UserDefined = SECCOMP_MODE_FILTER as u8,
348}
349
350/// Per-process state that cannot be stored in the container (e.g., whether there is a container).
351#[derive(Default)]
352pub struct SeccompState {
353    // This AtomicU8 corresponds to a SeccompStateValue.
354    filter_state: AtomicU8,
355}
356
357impl SeccompState {
358    pub fn from(state: &SeccompState) -> SeccompState {
359        SeccompState { filter_state: AtomicU8::new(state.filter_state.load(Ordering::Acquire)) }
360    }
361
362    fn from_u8(value: u8) -> SeccompStateValue {
363        match value {
364            v if v == SECCOMP_MODE_DISABLED as u8 => SeccompStateValue::None,
365            v if v == SECCOMP_MODE_STRICT as u8 => SeccompStateValue::Strict,
366            v if v == SECCOMP_MODE_FILTER as u8 => SeccompStateValue::UserDefined,
367            _ => unreachable!(),
368        }
369    }
370
371    pub fn get(&self) -> SeccompStateValue {
372        Self::from_u8(self.filter_state.load(Ordering::Acquire))
373    }
374
375    pub fn set(&self, state: &SeccompStateValue) -> Result<(), Errno> {
376        loop {
377            let seccomp_filter_status = self.get();
378            if seccomp_filter_status == *state {
379                return Ok(());
380            }
381            if seccomp_filter_status != SeccompStateValue::None {
382                return error!(EINVAL);
383            }
384
385            if self
386                .filter_state
387                .compare_exchange(
388                    seccomp_filter_status as u8,
389                    *state as u8,
390                    Ordering::Release,
391                    Ordering::Acquire,
392                )
393                .is_ok()
394            {
395                return Ok(());
396            }
397        }
398    }
399
400    /// Check to see if this syscall is allowed in STRICT mode, and, if not,
401    /// send the current task a SIGKILL.
402    pub fn do_strict(
403        locked: &mut Locked<Unlocked>,
404        task: &Task,
405        syscall: &Syscall,
406    ) -> Option<Result<SyscallResult, Errno>> {
407        if syscall.decl.number as u32 != __NR_exit
408            && syscall.decl.number as u32 != __NR_read
409            && syscall.decl.number as u32 != __NR_write
410        {
411            send_standard_signal(locked, task, SignalInfo::kernel(SIGKILL));
412            return Some(Err(errno_from_code!(0)));
413        }
414        None
415    }
416
417    // This is supposed to be put in the audit log, but starnix does not yet have an
418    // audit log.  Also, it does not match the Linux format.  Still, the machinery
419    // is in place for when we have to support it for real.
420    fn log_action(task: &CurrentTask, syscall: &Syscall) {
421        let creds = task.current_creds();
422        let (uid, gid) = (creds.uid, creds.gid);
423        let arch = if cfg!(target_arch = "x86_64") {
424            "x86_64"
425        } else if cfg!(target_arch = "aarch64") {
426            "aarch64"
427        } else {
428            "unknown"
429        };
430        task.kernel().audit_logger().audit_log(AUDIT_SECCOMP as u16, || {
431            format!(
432                "uid={} gid={} pid={} comm={} syscall={} ip={} ARCH={} SYSCALL={}",
433                uid,
434                gid,
435                task.thread_group().leader,
436                task.command(),
437                syscall.decl.number,
438                task.thread_state.registers.instruction_pointer_register(),
439                arch,
440                syscall.decl.name(),
441            )
442        });
443    }
444
445    /// Take the given |action| on the given |task|.  The action is one of the SECCOMP_RET values
446    /// (ALLOW, LOG, KILL, KILL_PROCESS, TRAP, ERRNO, USER_NOTIF, TRACE).  |task| is the thread that
447    /// invoked the syscall, and |syscall| is the syscall that was invoked.
448    /// Returns the result that the syscall will be forced to return by this
449    /// filter, or None, if the syscall should return its actual return value.
450    // NB: Allow warning below so that it is clear what we are doing on KILL_PROCESS
451    #[allow(clippy::wildcard_in_or_patterns)]
452    pub fn do_user_defined(
453        locked: &mut Locked<Unlocked>,
454        result: SeccompFilterResult,
455        current_task: &mut CurrentTask,
456        syscall: &Syscall,
457    ) -> Option<Result<SyscallResult, Errno>> {
458        let action = result.action;
459        if let Some(filter) = result.filter.as_ref() {
460            if action.is_logged(current_task.kernel(), filter.log) {
461                Self::log_action(current_task, syscall);
462            }
463        }
464        match action {
465            SeccompAction::Allow => None,
466            SeccompAction::Errno(code) => Some(Err(errno_from_code!(code as i16))),
467            SeccompAction::KillThread => {
468                let siginfo = SignalInfo::kernel(SIGSYS);
469
470                let is_last_thread = current_task.thread_group().read().tasks_count() == 1;
471                let mut task_state = current_task.write();
472
473                if is_last_thread {
474                    task_state.set_flags(TaskFlags::DUMP_ON_EXIT, true);
475                    task_state.set_exit_status_if_not_already(ExitStatus::CoreDump(siginfo));
476                } else {
477                    task_state.set_exit_status_if_not_already(ExitStatus::Kill(siginfo));
478                }
479                Some(Err(errno_from_code!(0)))
480            }
481            SeccompAction::KillProcess => {
482                current_task
483                    .thread_group_exit(locked, ExitStatus::CoreDump(SignalInfo::kernel(SIGSYS)));
484                Some(Err(errno_from_code!(0)))
485            }
486            SeccompAction::Log => {
487                Self::log_action(current_task, syscall);
488                None
489            }
490            SeccompAction::Trace => {
491                track_stub!(TODO("https://fxbug.dev/297311898"), "ptrace seccomp support");
492                Some(error!(ENOSYS))
493            }
494            SeccompAction::Trap(errno) => {
495                #[cfg(target_arch = "x86_64")]
496                let arch_val = AUDIT_ARCH_X86_64;
497                #[cfg(target_arch = "aarch64")]
498                let arch_val =
499                    if current_task.is_arch32() { AUDIT_ARCH_ARM } else { AUDIT_ARCH_AARCH64 };
500                #[cfg(target_arch = "riscv64")]
501                let arch_val = AUDIT_ARCH_RISCV64;
502
503                let siginfo = SignalInfo::new(
504                    SIGSYS,
505                    errno as i32,
506                    SYS_SECCOMP as i32,
507                    SignalDetail::SIGSYS {
508                        call_addr: current_task
509                            .thread_state
510                            .registers
511                            .instruction_pointer_register()
512                            .into(),
513                        syscall: syscall.decl.number as i32,
514                        arch: arch_val,
515                    },
516                    true,
517                    None,
518                );
519
520                send_standard_signal(locked, current_task, siginfo);
521                Some(Err(errno_from_code!(-(syscall.decl.number as i16))))
522            }
523            SeccompAction::UserNotif => {
524                if let Some(notifier) = current_task.get_seccomp_notifier() {
525                    let cookie = result.filter.as_ref().unwrap().cookie.next();
526                    let msg = seccomp_notif {
527                        id: cookie,
528                        pid: current_task.tid as u32,
529                        flags: 0,
530                        data: make_seccomp_data(
531                            current_task,
532                            syscall,
533                            current_task.thread_state.registers.instruction_pointer_register(),
534                        ),
535                    };
536                    // First, add a pending notification, and wake up the supervisor waiting for it.
537                    let waiter = Waiter::new();
538                    {
539                        let mut notifier = notifier.lock();
540                        if notifier.is_closed {
541                            // Someone explicitly close()d the fd with the notifier, which does not
542                            // clear the thread-local notifier.  Do it now.
543                            drop(notifier);
544                            current_task.set_seccomp_notifier(None);
545                            return Some(error!(ENOSYS));
546                        }
547                        notifier.create_notification(cookie, msg);
548                        notifier.waiters.wait_async_value(&waiter, cookie);
549                    }
550
551                    // Next, wait for a response from the supervisor
552                    if let Err(e) = waiter.wait(locked, current_task) {
553                        return Some(Err(e));
554                    }
555
556                    // Fetch the response.
557                    let resp: Option<seccomp_notif_resp>;
558                    {
559                        let mut notifier = notifier.lock();
560                        resp = notifier.get_response(cookie);
561                        notifier.delete_notification(cookie);
562                    }
563
564                    // The response indicates what you are supposed to do with this syscall.
565                    if let Some(response) = resp {
566                        if response.val != 0 {
567                            return Some(Ok(response.val.into()));
568                        }
569                        if response.error != 0 {
570                            if response.error > 0 {
571                                return Some(Ok(response.error.into()));
572                            } else {
573                                return Some(Err(errno_from_code!(-response.error as i16)));
574                            }
575                        }
576                        if response.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE != 0 {
577                            return None;
578                        }
579                    }
580                    Some(Ok(0.into()))
581                } else {
582                    Some(error!(ENOSYS))
583                }
584            }
585        }
586    }
587}
588
589#[derive(Clone, Copy, PartialEq)]
590pub enum SeccompAction {
591    Allow,
592    Errno(u32),
593    KillProcess,
594    KillThread,
595    Log,
596    Trap(u32),
597    Trace,
598    UserNotif,
599}
600
601impl SeccompAction {
602    pub fn is_action_available(action: u32) -> Result<SyscallResult, Errno> {
603        if SeccompAction::from_u32(action).is_none() {
604            return error!(EOPNOTSUPP);
605        }
606        Ok(0.into())
607    }
608
609    pub fn from_u32(action: u32) -> Option<SeccompAction> {
610        match action & !SECCOMP_RET_DATA {
611            linux_uapi::SECCOMP_RET_ALLOW => Some(Self::Allow),
612            linux_uapi::SECCOMP_RET_ERRNO => {
613                let mut action = action & SECCOMP_RET_DATA;
614                // Linux kernel compatibility: if errno exceeds 0xfff, it is capped at 0xfff.
615                action = std::cmp::min(action & 0xffff, 0xfff);
616                Some(Self::Errno(action))
617            }
618            linux_uapi::SECCOMP_RET_KILL_PROCESS => Some(Self::KillProcess),
619            linux_uapi::SECCOMP_RET_KILL_THREAD => Some(Self::KillThread),
620            linux_uapi::SECCOMP_RET_LOG => Some(Self::Log),
621            linux_uapi::SECCOMP_RET_TRACE => Some(Self::Trace),
622            linux_uapi::SECCOMP_RET_TRAP => Some(Self::Trap(action & SECCOMP_RET_DATA)),
623
624            linux_uapi::SECCOMP_RET_USER_NOTIF => Some(Self::UserNotif),
625            _ => None,
626        }
627    }
628
629    pub fn to_isize(self) -> isize {
630        match self {
631            Self::Allow => linux_uapi::SECCOMP_RET_ALLOW as isize,
632            Self::Errno(x) => (linux_uapi::SECCOMP_RET_ERRNO | x) as isize,
633            Self::KillProcess => linux_uapi::SECCOMP_RET_KILL_PROCESS as isize,
634            Self::KillThread => linux_uapi::SECCOMP_RET_KILL_THREAD as isize,
635            Self::Log => linux_uapi::SECCOMP_RET_LOG as isize,
636            Self::Trace => linux_uapi::SECCOMP_RET_TRACE as isize,
637            Self::Trap(x) => (linux_uapi::SECCOMP_RET_TRAP | x) as isize,
638            Self::UserNotif => linux_uapi::SECCOMP_RET_USER_NOTIF as isize,
639        }
640    }
641
642    pub fn canonical_name(self) -> &'static str {
643        match self {
644            Self::Allow => &"allow",
645            Self::Errno(_) => &"errno",
646            Self::KillProcess => &"kill_process",
647            Self::KillThread => &"kill_thread",
648            Self::Log => &"log",
649            Self::Trace => &"trace",
650            Self::Trap(_) => &"trap",
651            Self::UserNotif => &"user_notif",
652        }
653    }
654
655    pub fn has_prio(a: &SeccompAction, b: &SeccompAction) -> std::cmp::Ordering {
656        let anum = a.to_isize() as i32;
657        let bnum = b.to_isize() as i32;
658        let fullnum = SECCOMP_RET_ACTION_FULL as i32;
659        let aval = anum & fullnum;
660        let bval = bnum & fullnum;
661        aval.cmp(&bval)
662    }
663
664    /// Returns a vector of all available actions, sorted by priority.
665    pub fn all_actions() -> Vec<SeccompAction> {
666        let mut result = vec![
667            Self::Allow,
668            Self::Errno(0),
669            Self::KillProcess,
670            Self::KillThread,
671            Self::Log,
672            Self::Trace,
673            Self::Trap(0),
674            Self::UserNotif,
675        ];
676
677        result.sort_by(Self::has_prio);
678        result
679    }
680
681    /// Gets the contents of /proc/sys/kernel/seccomp/actions_avail
682    pub fn get_actions_avail_file() -> Vec<u8> {
683        let all_actions = Self::all_actions();
684        if all_actions.len() == 0 {
685            return vec![];
686        }
687        let mut result = String::from(all_actions[0].canonical_name());
688        for i in 1..all_actions.len() {
689            result.push_str(" ");
690            result.push_str(all_actions[i].canonical_name());
691        }
692        result.push('\n');
693        result.into_bytes()
694    }
695
696    fn logged_bit_offset(&self) -> u32 {
697        match self {
698            Self::Allow => 1,
699            Self::Errno(_) => 2,
700            Self::KillProcess => 3,
701            Self::KillThread => 4,
702            Self::Log => 5,
703            Self::Trace => 6,
704            Self::Trap(_) => 7,
705            Self::UserNotif => 8,
706        }
707    }
708
709    fn set_logged_bit(&self, dst: &mut u16) {
710        *dst |= 1 << self.logged_bit_offset();
711    }
712
713    pub fn is_logged(&self, kernel: &Kernel, filter_flag: bool) -> bool {
714        if kernel.actions_logged.load(Ordering::Relaxed) & (1 << self.logged_bit_offset()) != 0 {
715            match self {
716                // Per the documentation on audit logging of seccomp actions in
717                // seccomp(2), just because it is listed as logged, that doesn't
718                // mean we actually log it.
719
720                // If it is KILL_PROCESS or KILL_THREAD, return true
721                Self::KillProcess | Self::KillThread => true,
722                // If it is one of these and the filter flag was set, return true.
723                Self::Errno(_) | Self::Log | Self::Trap(_) | Self::UserNotif => filter_flag,
724                // Never log ALLOW
725                _ => false,
726            }
727        } else {
728            false
729        }
730    }
731
732    pub fn set_actions_logged(kernel: &Kernel, data: &[u8]) -> Result<(), Errno> {
733        let mut new_actions_logged: u16 = 0;
734        for action_res in data.fields_with(|c| c.is_ascii_whitespace()) {
735            if let Ok(action) = action_res.to_str() {
736                match action {
737                    "errno" => Self::Errno(0).set_logged_bit(&mut new_actions_logged),
738                    "kill_process" => Self::KillProcess.set_logged_bit(&mut new_actions_logged),
739                    "kill_thread" => Self::KillThread.set_logged_bit(&mut new_actions_logged),
740                    "log" => Self::Log.set_logged_bit(&mut new_actions_logged),
741                    "trace" => Self::Trace.set_logged_bit(&mut new_actions_logged),
742                    "trap" => Self::Trap(0).set_logged_bit(&mut new_actions_logged),
743                    "user_notif" => Self::UserNotif.set_logged_bit(&mut new_actions_logged),
744                    // Not allowed to write anything other than the approved actions to that list.
745                    _ => return error!(EINVAL),
746                }
747            } else {
748                return error!(EINVAL);
749            }
750        }
751        kernel.actions_logged.store(new_actions_logged, Ordering::Relaxed);
752        Ok(())
753    }
754
755    pub fn get_actions_logged(kernel: &Kernel) -> Vec<u8> {
756        let al = kernel.actions_logged.load(Ordering::Relaxed);
757        let mut result: String = "".to_string();
758        for action in Self::all_actions() {
759            if (al & (1 << action.logged_bit_offset())) != 0 {
760                result.push_str(action.canonical_name());
761                result.push(' ');
762            }
763        }
764        if !result.is_empty() {
765            // remove trailing whitespace.
766            result.pop();
767        }
768
769        result.into_bytes()
770    }
771}
772
773/// This struct contains data that needs to be shuttled back and forth between the thread doing
774/// a USER_NOTIF and the supervisor thread responding to it.
775#[derive(Default)]
776struct SeccompNotification {
777    /// notif is the notification set by the filter.  When this is set, the associated fd will
778    /// be set to POLLIN.
779    notif: seccomp_notif,
780
781    /// Consumed indicates whether a supervisor process has read this notification (and so it
782    /// can no longer be consumed by any other SECCOMP_IOCTL_NOTIF_RECV ioctl).  When the notif
783    /// is consumed, the associated fd will be set to POLLOUT, indicating that it is ready to
784    /// receive a response.
785    consumed: bool,
786
787    /// resp is the response that the supervisor sends.  When this is set, an event will be sent
788    /// to SeccompNotifiers::waiters corresponding to the unique id of the notification.  This
789    /// will wake up the filter that is waiting for this particular response.
790    resp: Option<seccomp_notif_resp>,
791}
792
793impl SeccompNotification {
794    fn new(data: seccomp_notif) -> SeccompNotification {
795        SeccompNotification { notif: data, resp: None, consumed: false }
796    }
797}
798
799/// The underlying implementation of the file descriptor that connects a process that triggers a
800/// SECCOMP_RET_USER_NOTIF with the monitoring process. This support seccomp's ability to notify a
801/// user-space process on specific syscall triggers. See seccomp_unotify(2) for the semantics.
802pub struct SeccompNotifier {
803    waiters: WaitQueue,
804
805    pending_notifications: HashMap<u64, SeccompNotification>,
806
807    // This keeps track of the number of threads using this notifier as a filter.  If that hits
808    // zero, the listeners need to receive a HUP.
809    num_active_threads: u64,
810
811    // notifiers are referenced both by fds and in SeccompFilterContainer. If the file no longer
812    // has fds referring to it, it will be closed, and the SeccompFilterContainers should stop
813    // using it.
814    pub is_closed: bool,
815}
816
817pub type SeccompNotifierHandle = Arc<Mutex<SeccompNotifier>>;
818
819impl SeccompNotifier {
820    pub fn new() -> SeccompNotifierHandle {
821        Arc::new(Mutex::new(SeccompNotifier {
822            waiters: WaitQueue::default(),
823            pending_notifications: HashMap::default(),
824            num_active_threads: 0,
825            is_closed: false,
826        }))
827    }
828
829    fn add_thread(&mut self) {
830        self.num_active_threads += 1;
831    }
832
833    fn remove_thread(&mut self) {
834        self.num_active_threads -= 1;
835        if self.num_active_threads == 0 {
836            self.waiters.notify_fd_events(FdEvents::POLLHUP);
837        }
838    }
839
840    // Creates a pending notification for communication between the
841    // target thread and a supervisor, and notifies readers there is
842    // an opportunity to read.
843    fn create_notification(&mut self, cookie: u64, notif: seccomp_notif) {
844        self.pending_notifications.insert(cookie, SeccompNotification::new(notif));
845        self.waiters.notify_fd_events(FdEvents::POLLIN | FdEvents::POLLRDNORM);
846    }
847
848    // Gets a notification that needs to be handled by a supervisor,
849    // and notifies waiters that there is an opportunity to write.
850    fn consume_some_notification(&mut self) -> Option<seccomp_notif> {
851        for (_, notif) in self.pending_notifications.iter_mut() {
852            if !notif.consumed {
853                notif.consumed = true;
854                self.waiters.notify_fd_events(FdEvents::POLLOUT | FdEvents::POLLWRNORM);
855                return Some(notif.notif);
856            }
857        }
858        None
859    }
860
861    // In case something goes wrong after we consume the notification.
862    fn unconsume(&mut self, cookie: u64) {
863        if let Some(n) = self.pending_notifications.get_mut(&cookie).as_mut() {
864            n.consumed = false;
865        }
866    }
867
868    // Returns the appropriate notifications if someone is waiting with poll/epoll/select.
869    fn get_fd_notifications(&self) -> FdEvents {
870        let mut events = FdEvents::empty();
871
872        for (_, notification) in self.pending_notifications.iter() {
873            if !notification.consumed {
874                events |= FdEvents::POLLIN | FdEvents::POLLRDNORM;
875            } else if notification.resp.is_none() {
876                events |= FdEvents::POLLOUT | FdEvents::POLLWRNORM;
877            }
878        }
879
880        if self.num_active_threads == 0 {
881            events |= FdEvents::POLLHUP;
882        }
883        events
884    }
885
886    // Sets the value read by the target in response to this notification.  Intended for use by the
887    // supervisor.  Notifies the filter there is a response to this request.
888    fn set_response(&mut self, cookie: u64, resp: seccomp_notif_resp) -> Option<Errno> {
889        if let Some(entry) = self.pending_notifications.get_mut(&cookie) {
890            if entry.resp.is_some() {
891                return Some(errno!(EINPROGRESS));
892            }
893            entry.resp = Some(resp);
894            self.waiters.notify_value(resp.id);
895            None
896        } else {
897            Some(errno!(EINVAL))
898        }
899    }
900
901    // Gets the value set by the supervisor for the target to read.
902    fn get_response(&self, cookie: u64) -> Option<seccomp_notif_resp> {
903        if let Some(value) = self.pending_notifications.get(&cookie) {
904            return value.resp;
905        }
906        None
907    }
908
909    // Returns whether the cookie represents an active notification.
910    fn notification_pending(&self, cookie: u64) -> bool {
911        self.pending_notifications.contains_key(&cookie)
912    }
913
914    // Deletes the notification, when the target is done processing it.
915    fn delete_notification(&mut self, cookie: u64) {
916        let _ = self.pending_notifications.remove(&cookie);
917    }
918}
919
920struct SeccompNotifierFileObject {
921    notifier: SeccompNotifierHandle,
922}
923
924impl FileOps for SeccompNotifierFileObject {
925    fileops_impl_nonseekable!();
926    fileops_impl_noop_sync!();
927
928    fn close(
929        self: Box<Self>,
930        _locked: &mut Locked<FileOpsCore>,
931        _file: &FileObjectState,
932        _current_task: &CurrentTask,
933    ) {
934        let mut state = self.notifier.lock();
935
936        for (cookie, notification) in state.pending_notifications.iter() {
937            if !notification.consumed {
938                state.waiters.notify_value(*cookie);
939                state.waiters.notify_fd_events(FdEvents::POLLIN | FdEvents::POLLRDNORM);
940            } else if notification.resp.is_none() {
941                state.waiters.notify_fd_events(FdEvents::POLLOUT | FdEvents::POLLWRNORM);
942            }
943        }
944        state.waiters.notify_fd_events(FdEvents::POLLHUP);
945
946        state.pending_notifications.clear();
947
948        state.is_closed = true;
949    }
950
951    fn read(
952        &self,
953        _locked: &mut Locked<FileOpsCore>,
954        _file: &FileObject,
955        _current_task: &CurrentTask,
956        _offset: usize,
957        _usize: &mut dyn OutputBuffer,
958    ) -> Result<usize, Errno> {
959        error!(EINVAL)
960    }
961
962    fn write(
963        &self,
964        _locked: &mut Locked<FileOpsCore>,
965        _file: &FileObject,
966        _current_task: &CurrentTask,
967        _offset: usize,
968        _buffer: &mut dyn InputBuffer,
969    ) -> Result<usize, Errno> {
970        error!(EINVAL)
971    }
972
973    fn ioctl(
974        &self,
975        locked: &mut Locked<Unlocked>,
976        _file: &FileObject,
977        current_task: &CurrentTask,
978        request: u32,
979        arg: SyscallArg,
980    ) -> Result<SyscallResult, Errno> {
981        let user_addr = UserAddress::from(arg);
982        match request {
983            SECCOMP_IOCTL_NOTIF_RECV => {
984                if let Ok(notif) =
985                    current_task.read_memory_to_vec(user_addr, std::mem::size_of::<seccomp_notif>())
986                {
987                    for value in notif.iter() {
988                        if *value != 0 {
989                            return error!(EINVAL);
990                        }
991                    }
992                }
993                // A RECV reads a notification, optionally waiting for one to become available.
994                let mut notif: Option<seccomp_notif>;
995                loop {
996                    // Grab a notification or wait for one to become readable.
997                    let waiter = Waiter::new();
998                    {
999                        let mut notifier = self.notifier.lock();
1000                        notif = notifier.consume_some_notification();
1001                        if notif.is_some() {
1002                            break;
1003                        }
1004                        notifier.waiters.wait_async_fd_events(
1005                            &waiter,
1006                            FdEvents::POLLIN | FdEvents::POLLHUP,
1007                            EventHandler::None,
1008                        );
1009                    }
1010                    waiter.wait(locked, current_task)?;
1011                }
1012                if let Some(notif) = notif {
1013                    if let Err(e) =
1014                        current_task.write_object(UserRef::<seccomp_notif>::new(user_addr), &notif)
1015                    {
1016                        self.notifier.lock().unconsume(notif.id);
1017                        return Err(e);
1018                    }
1019                }
1020
1021                Ok(0.into())
1022            }
1023            SECCOMP_IOCTL_NOTIF_SEND => {
1024                // A SEND sends a response to a previously received notification.
1025                let resp: seccomp_notif_resp = current_task.read_object(UserRef::new(user_addr))?;
1026                if resp.flags & !SECCOMP_USER_NOTIF_FLAG_CONTINUE != 0 {
1027                    return error!(EINVAL);
1028                }
1029                if resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE != 0
1030                    && (resp.error != 0 || resp.val != 0)
1031                {
1032                    return error!(EINVAL);
1033                }
1034                {
1035                    let mut notifier = self.notifier.lock();
1036                    if let Some(err) = notifier.set_response(resp.id, resp) {
1037                        return Err(err);
1038                    }
1039                }
1040                Ok(0.into())
1041            }
1042            SECCOMP_IOCTL_NOTIF_ID_VALID => {
1043                // An ID_VALID indicates that the notification is still in progress.
1044                let cookie: u64 = current_task.read_object(UserRef::new(user_addr))?;
1045                {
1046                    let notifier = self.notifier.lock();
1047                    if notifier.notification_pending(cookie) {
1048                        Ok(0.into())
1049                    } else {
1050                        error!(ENOENT)
1051                    }
1052                }
1053            }
1054            SECCOMP_IOCTL_NOTIF_ADDFD => error!(EINVAL),
1055            _ => error!(EINVAL),
1056        }
1057    }
1058
1059    fn wait_async(
1060        &self,
1061        _locked: &mut Locked<FileOpsCore>,
1062        _file: &FileObject,
1063        _current_task: &CurrentTask,
1064        waiter: &Waiter,
1065        events: FdEvents,
1066        handler: EventHandler,
1067    ) -> Option<WaitCanceler> {
1068        let notifier = self.notifier.lock();
1069        Some(notifier.waiters.wait_async_fd_events(waiter, events, handler))
1070    }
1071
1072    fn query_events(
1073        &self,
1074        _locked: &mut Locked<FileOpsCore>,
1075        _file: &FileObject,
1076        _current_task: &CurrentTask,
1077    ) -> Result<FdEvents, Errno> {
1078        Ok(self.notifier.lock().get_fd_notifications())
1079    }
1080}
1081
1082#[cfg(test)]
1083mod test {
1084    use crate::task::SeccompAction;
1085    use crate::testing::spawn_kernel_and_run;
1086
1087    #[::fuchsia::test]
1088    async fn test_actions_logged_accepts_legal_string() {
1089        spawn_kernel_and_run(async |_, current_task| {
1090            let kernel = current_task.kernel();
1091            let mut actions = SeccompAction::get_actions_avail_file();
1092            // This is a test in Rust instead of a syscall test because we don't want to change the
1093            // global config in a test.
1094            assert!(
1095                SeccompAction::set_actions_logged(&kernel, &actions[..]).is_err(),
1096                "Should not be able to write allow to actions_logged file"
1097            );
1098            let action_string = std::string::String::from_utf8(actions.clone()).unwrap();
1099            if let Some(action_index) = action_string.find("allow") {
1100                actions.drain(action_index..action_index + "allow".len());
1101            }
1102            let write_result = SeccompAction::set_actions_logged(&kernel, &actions[..]);
1103            assert!(
1104                write_result.is_ok(),
1105                "Could not write legal string \"{}\" to actions_logged file: error {}",
1106                std::string::String::from_utf8(actions.clone()).unwrap(),
1107                write_result.unwrap_err()
1108            );
1109        })
1110        .await;
1111    }
1112}