starnix_core/task/
seccomp.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::mm::MemoryAccessorExt;
6use crate::signals::{SignalDetail, SignalInfo, SignalSource, send_standard_signal};
7use crate::task::{
8    CurrentTask, EventHandler, ExitStatus, Kernel, Task, TaskFlags, WaitCanceler, WaitQueue, Waiter,
9};
10use crate::vfs::buffers::{InputBuffer, OutputBuffer};
11use crate::vfs::{
12    Anon, FdFlags, FdNumber, FileObject, FileObjectState, FileOps, fileops_impl_nonseekable,
13    fileops_impl_noop_sync,
14};
15use bstr::ByteSlice;
16use ebpf::{
17    BPF_ABS, BPF_LD, BPF_ST, BpfProgramContext, CbpfConfig, EbpfProgram, MemoryId, NoMap,
18    ProgramArgument, Type, bpf_addressing_mode, bpf_class, convert_and_link_cbpf,
19};
20use ebpf_api::SECCOMP_CBPF_CONFIG;
21use linux_uapi::AUDIT_SECCOMP;
22use starnix_lifecycle::AtomicU64Counter;
23use starnix_logging::{log_warn, track_stub};
24use starnix_sync::{FileOpsCore, Locked, Mutex, Unlocked};
25use starnix_syscalls::decls::Syscall;
26use starnix_syscalls::{SyscallArg, SyscallResult};
27use starnix_uapi::errors::Errno;
28use starnix_uapi::open_flags::OpenFlags;
29use starnix_uapi::signals::{SIGKILL, SIGSYS};
30#[cfg(target_arch = "aarch64")]
31use starnix_uapi::user_address::ArchSpecific;
32use starnix_uapi::user_address::{UserAddress, UserRef};
33use starnix_uapi::vfs::FdEvents;
34use starnix_uapi::{
35    __NR_exit, __NR_read, __NR_write, SECCOMP_IOCTL_NOTIF_ADDFD, SECCOMP_IOCTL_NOTIF_ID_VALID,
36    SECCOMP_IOCTL_NOTIF_RECV, SECCOMP_IOCTL_NOTIF_SEND, SECCOMP_MODE_DISABLED, SECCOMP_MODE_FILTER,
37    SECCOMP_MODE_STRICT, SECCOMP_RET_ACTION_FULL, SECCOMP_RET_DATA,
38    SECCOMP_USER_NOTIF_FLAG_CONTINUE, SYS_SECCOMP, errno, errno_from_code, error, seccomp_data,
39    seccomp_notif, seccomp_notif_resp, sock_filter,
40};
41use std::collections::HashMap;
42use std::sync::atomic::{AtomicU8, Ordering};
43use std::sync::{Arc, LazyLock};
44use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout};
45
46#[cfg(target_arch = "aarch64")]
47use starnix_uapi::__NR_clock_getres;
48#[cfg(target_arch = "aarch64")]
49use starnix_uapi::__NR_clock_gettime;
50#[cfg(target_arch = "aarch64")]
51use starnix_uapi::__NR_gettimeofday;
52#[cfg(target_arch = "aarch64")]
53use starnix_uapi::{AUDIT_ARCH_AARCH64, AUDIT_ARCH_ARM};
54
55#[cfg(target_arch = "x86_64")]
56use starnix_uapi::__NR_clock_gettime;
57#[cfg(target_arch = "x86_64")]
58use starnix_uapi::__NR_getcpu;
59#[cfg(target_arch = "x86_64")]
60use starnix_uapi::__NR_gettimeofday;
61#[cfg(target_arch = "x86_64")]
62use starnix_uapi::__NR_time;
63#[cfg(target_arch = "x86_64")]
64use starnix_uapi::AUDIT_ARCH_X86_64;
65
66#[cfg(target_arch = "riscv64")]
67use starnix_uapi::AUDIT_ARCH_RISCV64;
68
69pub struct SeccompFilter {
70    /// The BPF program associated with this filter.
71    program: EbpfProgram<SeccompFilter>,
72
73    /// The unique-to-this-process id of thi1s filter.  SECCOMP_FILTER_FLAG_TSYNC only works if all
74    /// threads in this process have filters that are a prefix of the filters of the thread
75    /// attempting to do the TSYNC. Identical filters attached in separate seccomp calls are treated
76    /// as different from each other for this purpose, so we need a way of distinguishing them.
77    unique_id: u64,
78
79    /// The next cookie (unique id for this syscall), as used by SECCOMP_RET_USER_NOTIF
80    cookie: AtomicU64Counter,
81
82    // Whether to log the results of this filter
83    log: bool,
84}
85
86/// The result of running a set of seccomp filters.
87pub struct SeccompFilterResult {
88    /// The action indicated by the seccomp filter with the highest priority result.
89    action: SeccompAction,
90
91    /// The filter that returned the highest priority result, as used by SECCOMP_RET_USER_NOTIF,
92    /// which has to have access to its cookie value
93    filter: Option<Arc<SeccompFilter>>,
94}
95
96impl SeccompFilter {
97    /// Creates a SeccompFilter object from the given sock_filter.  Associates the user-provided
98    /// id with it, which is intended to be unique to this process.
99    pub fn from_cbpf(
100        code: &Vec<sock_filter>,
101        maybe_unique_id: u64,
102        should_log: bool,
103    ) -> Result<Self, Errno> {
104        // If an instruction loads from / stores to an absolute address, that address has to be
105        // 32-bit aligned and inside the struct seccomp_data passed in.
106        for insn in code {
107            if (bpf_class(insn) == BPF_LD || bpf_class(insn) == BPF_ST)
108                && (bpf_addressing_mode(insn) == BPF_ABS)
109                && (insn.k & 0x3 != 0 || std::mem::size_of::<seccomp_data>() < insn.k as usize)
110            {
111                return error!(EINVAL);
112            }
113        }
114
115        let program = convert_and_link_cbpf::<SeccompFilter>(code).map_err(|errmsg| {
116            log_warn!("{}", errmsg);
117            errno!(EINVAL)
118        })?;
119
120        Ok(SeccompFilter {
121            program,
122            unique_id: maybe_unique_id,
123            cookie: AtomicU64Counter::new(0),
124            log: should_log,
125        })
126    }
127
128    pub fn run(&self, data: &seccomp_data) -> u32 {
129        self.program.run(&mut (), &SeccompData(*data)) as u32
130    }
131}
132
133// Wrapper for `seccomp_data`. Required in order to implement the `ProgramArgument` trait below.
134#[repr(C)]
135#[derive(Debug, Default, Clone, IntoBytes, FromBytes, KnownLayout, Immutable)]
136pub struct SeccompData(seccomp_data);
137
138impl BpfProgramContext for SeccompFilter {
139    type RunContext<'a> = ();
140    type Packet<'a> = &'a SeccompData;
141    type Map = NoMap;
142    const CBPF_CONFIG: &'static CbpfConfig = &SECCOMP_CBPF_CONFIG;
143}
144
145ebpf::empty_static_helper_set!(SeccompFilter);
146
147static SECCOMP_DATA_TYPE: LazyLock<Type> =
148    LazyLock::new(|| Type::PtrToMemory { id: MemoryId::new(), offset: 0.into(), buffer_size: 0 });
149
150impl ProgramArgument for &'_ SeccompData {
151    fn get_type() -> &'static Type {
152        &*SECCOMP_DATA_TYPE
153    }
154}
155
156const SECCOMP_MAX_INSNS_PER_PATH: u16 = 32768;
157
158/// A list of seccomp filters, intended to be associated with a specific process.
159#[derive(Default)]
160pub struct SeccompFilterContainer {
161    /// List of currently installed seccomp_filters; most recently added is last.
162    pub filters: Vec<Arc<SeccompFilter>>,
163
164    // The total length of the provided seccomp filters, which cannot
165    // exceed SECCOMP_MAX_INSNS_PER_PATH - 4 * the number of filters.  This is stored
166    // instead of computed because we store seccomp filters in an
167    // expanded form, and it is impossible to get the original length.
168    pub provided_instructions: u16,
169
170    // Data needed by SECCOMP_RET_USER_NOTIF
171    pub notifier: Option<SeccompNotifierHandle>,
172}
173
174impl Clone for SeccompFilterContainer {
175    fn clone(&self) -> Self {
176        if let Some(n) = &self.notifier {
177            n.lock().add_thread();
178        }
179        SeccompFilterContainer {
180            filters: self.filters.clone(),
181            provided_instructions: self.provided_instructions,
182            notifier: self.notifier.clone(),
183        }
184    }
185}
186
187impl Drop for SeccompFilterContainer {
188    fn drop(&mut self) {
189        if let Some(n) = &self.notifier {
190            // Notifier needs to send threads a HUP when there is no one left
191            // referencing it.
192            n.lock().remove_thread();
193        }
194    }
195}
196
197fn make_seccomp_data(
198    #[allow(unused_variables)] current_task: &CurrentTask,
199    syscall: &Syscall,
200    ip: u64,
201) -> seccomp_data {
202    #[cfg(target_arch = "x86_64")]
203    let arch_val = AUDIT_ARCH_X86_64;
204    #[cfg(target_arch = "aarch64")]
205    let arch_val = if current_task.is_arch32() { AUDIT_ARCH_ARM } else { AUDIT_ARCH_AARCH64 };
206    #[cfg(target_arch = "riscv64")]
207    let arch_val = AUDIT_ARCH_RISCV64;
208    seccomp_data {
209        nr: syscall.decl.number as i32,
210        arch: arch_val,
211        instruction_pointer: ip,
212        args: [
213            syscall.arg0.raw(),
214            syscall.arg1.raw(),
215            syscall.arg2.raw(),
216            syscall.arg3.raw(),
217            syscall.arg4.raw(),
218            syscall.arg5.raw(),
219        ],
220    }
221}
222
223impl SeccompFilterContainer {
224    /// Ensures that this set of seccomp filters can be "synced to" the given set.
225    /// This means that our filters are a prefix of the given set of filters.
226    pub fn can_sync_to(&self, source: &SeccompFilterContainer) -> bool {
227        if source.filters.len() < self.filters.len() {
228            return false;
229        }
230        for (filter, other_filter) in self.filters.iter().zip(source.filters.iter()) {
231            if other_filter.unique_id != filter.unique_id {
232                return false;
233            }
234        }
235        true
236    }
237
238    /// Adds the given filter to this list.  The original_length parameter is the length of
239    /// the originally provided BPF (i.e., the number of sock_filter instructions), used
240    /// to ensure the total length does not exceed SECCOMP_MAX_INSNS_PER_PATH
241    pub fn add_filter(
242        &mut self,
243        filter: Arc<SeccompFilter>,
244        original_length: u16,
245    ) -> Result<(), Errno> {
246        let maybe_new_length = self.provided_instructions + original_length + 4;
247        if maybe_new_length > SECCOMP_MAX_INSNS_PER_PATH {
248            return error!(ENOMEM);
249        }
250
251        self.provided_instructions = maybe_new_length;
252        self.filters.push(filter);
253        Ok(())
254    }
255
256    /// Runs all of the seccomp filters in this container, most-to-least recent.  Returns the
257    /// highest priority result (which contains a reference to the filter that generated it)
258    pub fn run_all(&self, current_task: &CurrentTask, syscall: &Syscall) -> SeccompFilterResult {
259        let mut r = SeccompFilterResult { action: SeccompAction::Allow, filter: None };
260
261        // VDSO calls can't be caught by seccomp, so most seccomp filters forget to declare them.
262        // But our VDSO implementation is incomplete, and most of the calls forward to the actual
263        // syscalls. So seccomp should ignore them until they're implemented correctly in the VDSO.
264        #[cfg(target_arch = "x86_64")] // The set of VDSO calls is arch dependent.
265        #[allow(non_upper_case_globals)]
266        if let __NR_clock_gettime | __NR_getcpu | __NR_gettimeofday | __NR_time =
267            syscall.decl.number as u32
268        {
269            return r;
270        }
271        #[cfg(target_arch = "aarch64")]
272        #[allow(non_upper_case_globals)]
273        if let __NR_clock_gettime | __NR_clock_getres | __NR_gettimeofday =
274            syscall.decl.number as u32
275        {
276            return r;
277        }
278
279        let data = make_seccomp_data(
280            current_task,
281            syscall,
282            current_task.thread_state.registers.instruction_pointer_register(),
283        );
284
285        // Filters are executed in reverse order of addition
286        for filter in self.filters.iter().rev() {
287            let new_result = filter.run(&data);
288
289            let action = SeccompAction::from_u32(new_result).unwrap_or(SeccompAction::KillProcess);
290
291            if SeccompAction::has_prio(&action, &r.action) == std::cmp::Ordering::Less {
292                r = SeccompFilterResult { action, filter: Some(filter.clone()) };
293            }
294        }
295        r
296    }
297
298    /// Creates a new listener for use by SECCOMP_RET_USER_NOTIF.  Returns its fd.
299    pub fn create_listener(
300        locked: &mut Locked<Unlocked>,
301        current_task: &CurrentTask,
302    ) -> Result<FdNumber, Errno> {
303        // Create the `Anon` handle file before taking the write lock on the task, because
304        // `Anon::new_file()` needs to read the `current_task` SID to label the file object.
305        let the_notifier = SeccompNotifier::new();
306        let handle = Anon::new_file(
307            locked,
308            current_task,
309            Box::new(SeccompNotifierFileObject { notifier: the_notifier.clone() }),
310            OpenFlags::RDWR,
311            "seccomp notify",
312        )?;
313
314        // Take the write lock to check for an existing notifier, and initialize and store the new
315        // notifier otherwise.
316        let filters = &mut current_task.write().seccomp_filters;
317        if filters.notifier.is_some() {
318            return error!(EBUSY);
319        }
320        let fd = current_task.add_file(locked, handle, FdFlags::CLOEXEC)?;
321        {
322            let mut state = the_notifier.lock();
323            state.add_thread();
324        }
325        filters.notifier = Some(the_notifier);
326        Ok(fd)
327    }
328}
329
330/// Possible values for the current status of the seccomp filters for
331/// this process.
332#[repr(u8)]
333#[derive(Clone, Copy, PartialEq)]
334pub enum SeccompStateValue {
335    None = SECCOMP_MODE_DISABLED as u8,
336    Strict = SECCOMP_MODE_STRICT as u8,
337    UserDefined = SECCOMP_MODE_FILTER as u8,
338}
339
340/// Per-process state that cannot be stored in the container (e.g., whether there is a container).
341#[derive(Default)]
342pub struct SeccompState {
343    // This AtomicU8 corresponds to a SeccompStateValue.
344    filter_state: AtomicU8,
345}
346
347impl SeccompState {
348    pub fn from(state: &SeccompState) -> SeccompState {
349        SeccompState { filter_state: AtomicU8::new(state.filter_state.load(Ordering::Acquire)) }
350    }
351
352    fn from_u8(value: u8) -> SeccompStateValue {
353        match value {
354            v if v == SECCOMP_MODE_DISABLED as u8 => SeccompStateValue::None,
355            v if v == SECCOMP_MODE_STRICT as u8 => SeccompStateValue::Strict,
356            v if v == SECCOMP_MODE_FILTER as u8 => SeccompStateValue::UserDefined,
357            _ => unreachable!(),
358        }
359    }
360
361    pub fn get(&self) -> SeccompStateValue {
362        Self::from_u8(self.filter_state.load(Ordering::Acquire))
363    }
364
365    pub fn set(&self, state: &SeccompStateValue) -> Result<(), Errno> {
366        loop {
367            let seccomp_filter_status = self.get();
368            if seccomp_filter_status == *state {
369                return Ok(());
370            }
371            if seccomp_filter_status != SeccompStateValue::None {
372                return error!(EINVAL);
373            }
374
375            if self
376                .filter_state
377                .compare_exchange(
378                    seccomp_filter_status as u8,
379                    *state as u8,
380                    Ordering::Release,
381                    Ordering::Acquire,
382                )
383                .is_ok()
384            {
385                return Ok(());
386            }
387        }
388    }
389
390    /// Check to see if this syscall is allowed in STRICT mode, and, if not,
391    /// send the current task a SIGKILL.
392    pub fn do_strict(
393        locked: &mut Locked<Unlocked>,
394        task: &Task,
395        syscall: &Syscall,
396    ) -> Option<Result<SyscallResult, Errno>> {
397        if syscall.decl.number as u32 != __NR_exit
398            && syscall.decl.number as u32 != __NR_read
399            && syscall.decl.number as u32 != __NR_write
400        {
401            send_standard_signal(locked, task, SignalInfo::default(SIGKILL));
402            return Some(Err(errno_from_code!(0)));
403        }
404        None
405    }
406
407    // This is supposed to be put in the audit log, but starnix does not yet have an
408    // audit log.  Also, it does not match the Linux format.  Still, the machinery
409    // is in place for when we have to support it for real.
410    fn log_action(task: &CurrentTask, syscall: &Syscall) {
411        let creds = task.current_creds();
412        let (uid, gid) = (creds.uid, creds.gid);
413        let arch = if cfg!(target_arch = "x86_64") {
414            "x86_64"
415        } else if cfg!(target_arch = "aarch64") {
416            "aarch64"
417        } else {
418            "unknown"
419        };
420        task.kernel().audit_logger().audit_log(AUDIT_SECCOMP as u16, || {
421            format!(
422                "uid={} gid={} pid={} comm={} syscall={} ip={} ARCH={} SYSCALL={}",
423                uid,
424                gid,
425                task.thread_group().leader,
426                task.command(),
427                syscall.decl.number,
428                task.thread_state.registers.instruction_pointer_register(),
429                arch,
430                syscall.decl.name(),
431            )
432        });
433    }
434
435    /// Take the given |action| on the given |task|.  The action is one of the SECCOMP_RET values
436    /// (ALLOW, LOG, KILL, KILL_PROCESS, TRAP, ERRNO, USER_NOTIF, TRACE).  |task| is the thread that
437    /// invoked the syscall, and |syscall| is the syscall that was invoked.
438    /// Returns the result that the syscall will be forced to return by this
439    /// filter, or None, if the syscall should return its actual return value.
440    // NB: Allow warning below so that it is clear what we are doing on KILL_PROCESS
441    #[allow(clippy::wildcard_in_or_patterns)]
442    pub fn do_user_defined(
443        locked: &mut Locked<Unlocked>,
444        result: SeccompFilterResult,
445        current_task: &mut CurrentTask,
446        syscall: &Syscall,
447    ) -> Option<Result<SyscallResult, Errno>> {
448        let action = result.action;
449        if let Some(filter) = result.filter.as_ref() {
450            if action.is_logged(current_task.kernel(), filter.log) {
451                Self::log_action(current_task, syscall);
452            }
453        }
454        match action {
455            SeccompAction::Allow => None,
456            SeccompAction::Errno(code) => Some(Err(errno_from_code!(code as i16))),
457            SeccompAction::KillThread => {
458                let siginfo = SignalInfo::default(SIGSYS);
459
460                let is_last_thread = current_task.thread_group().read().tasks_count() == 1;
461                let mut task_state = current_task.write();
462
463                if is_last_thread {
464                    task_state.set_flags(TaskFlags::DUMP_ON_EXIT, true);
465                    task_state.set_exit_status_if_not_already(ExitStatus::CoreDump(siginfo));
466                } else {
467                    task_state.set_exit_status_if_not_already(ExitStatus::Kill(siginfo));
468                }
469                Some(Err(errno_from_code!(0)))
470            }
471            SeccompAction::KillProcess => {
472                current_task
473                    .thread_group_exit(locked, ExitStatus::CoreDump(SignalInfo::default(SIGSYS)));
474                Some(Err(errno_from_code!(0)))
475            }
476            SeccompAction::Log => {
477                Self::log_action(current_task, syscall);
478                None
479            }
480            SeccompAction::Trace => {
481                track_stub!(TODO("https://fxbug.dev/297311898"), "ptrace seccomp support");
482                Some(error!(ENOSYS))
483            }
484            SeccompAction::Trap(errno) => {
485                #[cfg(target_arch = "x86_64")]
486                let arch_val = AUDIT_ARCH_X86_64;
487                #[cfg(target_arch = "aarch64")]
488                let arch_val =
489                    if current_task.is_arch32() { AUDIT_ARCH_ARM } else { AUDIT_ARCH_AARCH64 };
490                #[cfg(target_arch = "riscv64")]
491                let arch_val = AUDIT_ARCH_RISCV64;
492
493                let siginfo = SignalInfo {
494                    signal: SIGSYS,
495                    errno: errno as i32,
496                    code: SYS_SECCOMP as i32,
497                    detail: SignalDetail::SIGSYS {
498                        call_addr: current_task
499                            .thread_state
500                            .registers
501                            .instruction_pointer_register()
502                            .into(),
503                        syscall: syscall.decl.number as i32,
504                        arch: arch_val,
505                    },
506                    force: true,
507                    source: SignalSource::capture(),
508                };
509
510                send_standard_signal(locked, current_task, siginfo);
511                Some(Err(errno_from_code!(-(syscall.decl.number as i16))))
512            }
513            SeccompAction::UserNotif => {
514                if let Some(notifier) = current_task.get_seccomp_notifier() {
515                    let cookie = result.filter.as_ref().unwrap().cookie.next();
516                    let msg = seccomp_notif {
517                        id: cookie,
518                        pid: current_task.tid as u32,
519                        flags: 0,
520                        data: make_seccomp_data(
521                            current_task,
522                            syscall,
523                            current_task.thread_state.registers.instruction_pointer_register(),
524                        ),
525                    };
526                    // First, add a pending notification, and wake up the supervisor waiting for it.
527                    let waiter = Waiter::new();
528                    {
529                        let mut notifier = notifier.lock();
530                        if notifier.is_closed {
531                            // Someone explicitly close()d the fd with the notifier, which does not
532                            // clear the thread-local notifier.  Do it now.
533                            drop(notifier);
534                            current_task.set_seccomp_notifier(None);
535                            return Some(error!(ENOSYS));
536                        }
537                        notifier.create_notification(cookie, msg);
538                        notifier.waiters.wait_async_value(&waiter, cookie);
539                    }
540
541                    // Next, wait for a response from the supervisor
542                    if let Err(e) = waiter.wait(locked, current_task) {
543                        return Some(Err(e));
544                    }
545
546                    // Fetch the response.
547                    let resp: Option<seccomp_notif_resp>;
548                    {
549                        let mut notifier = notifier.lock();
550                        resp = notifier.get_response(cookie);
551                        notifier.delete_notification(cookie);
552                    }
553
554                    // The response indicates what you are supposed to do with this syscall.
555                    if let Some(response) = resp {
556                        if response.val != 0 {
557                            return Some(Ok(response.val.into()));
558                        }
559                        if response.error != 0 {
560                            if response.error > 0 {
561                                return Some(Ok(response.error.into()));
562                            } else {
563                                return Some(Err(errno_from_code!(-response.error as i16)));
564                            }
565                        }
566                        if response.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE != 0 {
567                            return None;
568                        }
569                    }
570                    Some(Ok(0.into()))
571                } else {
572                    Some(error!(ENOSYS))
573                }
574            }
575        }
576    }
577}
578
579#[derive(Clone, Copy, PartialEq)]
580pub enum SeccompAction {
581    Allow,
582    Errno(u32),
583    KillProcess,
584    KillThread,
585    Log,
586    Trap(u32),
587    Trace,
588    UserNotif,
589}
590
591impl SeccompAction {
592    pub fn is_action_available(action: u32) -> Result<SyscallResult, Errno> {
593        if SeccompAction::from_u32(action).is_none() {
594            return error!(EOPNOTSUPP);
595        }
596        Ok(0.into())
597    }
598
599    pub fn from_u32(action: u32) -> Option<SeccompAction> {
600        match action & !SECCOMP_RET_DATA {
601            linux_uapi::SECCOMP_RET_ALLOW => Some(Self::Allow),
602            linux_uapi::SECCOMP_RET_ERRNO => {
603                let mut action = action & SECCOMP_RET_DATA;
604                // Linux kernel compatibility: if errno exceeds 0xfff, it is capped at 0xfff.
605                action = std::cmp::min(action & 0xffff, 0xfff);
606                Some(Self::Errno(action))
607            }
608            linux_uapi::SECCOMP_RET_KILL_PROCESS => Some(Self::KillProcess),
609            linux_uapi::SECCOMP_RET_KILL_THREAD => Some(Self::KillThread),
610            linux_uapi::SECCOMP_RET_LOG => Some(Self::Log),
611            linux_uapi::SECCOMP_RET_TRACE => Some(Self::Trace),
612            linux_uapi::SECCOMP_RET_TRAP => Some(Self::Trap(action & SECCOMP_RET_DATA)),
613
614            linux_uapi::SECCOMP_RET_USER_NOTIF => Some(Self::UserNotif),
615            _ => None,
616        }
617    }
618
619    pub fn to_isize(self) -> isize {
620        match self {
621            Self::Allow => linux_uapi::SECCOMP_RET_ALLOW as isize,
622            Self::Errno(x) => (linux_uapi::SECCOMP_RET_ERRNO | x) as isize,
623            Self::KillProcess => linux_uapi::SECCOMP_RET_KILL_PROCESS as isize,
624            Self::KillThread => linux_uapi::SECCOMP_RET_KILL_THREAD as isize,
625            Self::Log => linux_uapi::SECCOMP_RET_LOG as isize,
626            Self::Trace => linux_uapi::SECCOMP_RET_TRACE as isize,
627            Self::Trap(x) => (linux_uapi::SECCOMP_RET_TRAP | x) as isize,
628            Self::UserNotif => linux_uapi::SECCOMP_RET_USER_NOTIF as isize,
629        }
630    }
631
632    pub fn canonical_name(self) -> &'static str {
633        match self {
634            Self::Allow => &"allow",
635            Self::Errno(_) => &"errno",
636            Self::KillProcess => &"kill_process",
637            Self::KillThread => &"kill_thread",
638            Self::Log => &"log",
639            Self::Trace => &"trace",
640            Self::Trap(_) => &"trap",
641            Self::UserNotif => &"user_notif",
642        }
643    }
644
645    pub fn has_prio(a: &SeccompAction, b: &SeccompAction) -> std::cmp::Ordering {
646        let anum = a.to_isize() as i32;
647        let bnum = b.to_isize() as i32;
648        let fullnum = SECCOMP_RET_ACTION_FULL as i32;
649        let aval = anum & fullnum;
650        let bval = bnum & fullnum;
651        aval.cmp(&bval)
652    }
653
654    /// Returns a vector of all available actions, sorted by priority.
655    pub fn all_actions() -> Vec<SeccompAction> {
656        let mut result = vec![
657            Self::Allow,
658            Self::Errno(0),
659            Self::KillProcess,
660            Self::KillThread,
661            Self::Log,
662            Self::Trace,
663            Self::Trap(0),
664            Self::UserNotif,
665        ];
666
667        result.sort_by(Self::has_prio);
668        result
669    }
670
671    /// Gets the contents of /proc/sys/kernel/seccomp/actions_avail
672    pub fn get_actions_avail_file() -> Vec<u8> {
673        let all_actions = Self::all_actions();
674        if all_actions.len() == 0 {
675            return vec![];
676        }
677        let mut result = String::from(all_actions[0].canonical_name());
678        for i in 1..all_actions.len() {
679            result.push_str(" ");
680            result.push_str(all_actions[i].canonical_name());
681        }
682        result.push('\n');
683        result.into_bytes()
684    }
685
686    fn logged_bit_offset(&self) -> u32 {
687        match self {
688            Self::Allow => 1,
689            Self::Errno(_) => 2,
690            Self::KillProcess => 3,
691            Self::KillThread => 4,
692            Self::Log => 5,
693            Self::Trace => 6,
694            Self::Trap(_) => 7,
695            Self::UserNotif => 8,
696        }
697    }
698
699    fn set_logged_bit(&self, dst: &mut u16) {
700        *dst |= 1 << self.logged_bit_offset();
701    }
702
703    pub fn is_logged(&self, kernel: &Kernel, filter_flag: bool) -> bool {
704        if kernel.actions_logged.load(Ordering::Relaxed) & (1 << self.logged_bit_offset()) != 0 {
705            match self {
706                // Per the documentation on audit logging of seccomp actions in
707                // seccomp(2), just because it is listed as logged, that doesn't
708                // mean we actually log it.
709
710                // If it is KILL_PROCESS or KILL_THREAD, return true
711                Self::KillProcess | Self::KillThread => true,
712                // If it is one of these and the filter flag was set, return true.
713                Self::Errno(_) | Self::Log | Self::Trap(_) | Self::UserNotif => filter_flag,
714                // Never log ALLOW
715                _ => false,
716            }
717        } else {
718            false
719        }
720    }
721
722    pub fn set_actions_logged(kernel: &Kernel, data: &[u8]) -> Result<(), Errno> {
723        let mut new_actions_logged: u16 = 0;
724        for action_res in data.fields_with(|c| c.is_ascii_whitespace()) {
725            if let Ok(action) = action_res.to_str() {
726                match action {
727                    "errno" => Self::Errno(0).set_logged_bit(&mut new_actions_logged),
728                    "kill_process" => Self::KillProcess.set_logged_bit(&mut new_actions_logged),
729                    "kill_thread" => Self::KillThread.set_logged_bit(&mut new_actions_logged),
730                    "log" => Self::Log.set_logged_bit(&mut new_actions_logged),
731                    "trace" => Self::Trace.set_logged_bit(&mut new_actions_logged),
732                    "trap" => Self::Trap(0).set_logged_bit(&mut new_actions_logged),
733                    "user_notif" => Self::UserNotif.set_logged_bit(&mut new_actions_logged),
734                    // Not allowed to write anything other than the approved actions to that list.
735                    _ => return error!(EINVAL),
736                }
737            } else {
738                return error!(EINVAL);
739            }
740        }
741        kernel.actions_logged.store(new_actions_logged, Ordering::Relaxed);
742        Ok(())
743    }
744
745    pub fn get_actions_logged(kernel: &Kernel) -> Vec<u8> {
746        let al = kernel.actions_logged.load(Ordering::Relaxed);
747        let mut result: String = "".to_string();
748        for action in Self::all_actions() {
749            if (al & (1 << action.logged_bit_offset())) != 0 {
750                result.push_str(action.canonical_name());
751                result.push(' ');
752            }
753        }
754        if !result.is_empty() {
755            // remove trailing whitespace.
756            result.pop();
757        }
758
759        result.into_bytes()
760    }
761}
762
763/// This struct contains data that needs to be shuttled back and forth between the thread doing
764/// a USER_NOTIF and the supervisor thread responding to it.
765#[derive(Default)]
766struct SeccompNotification {
767    /// notif is the notification set by the filter.  When this is set, the associated fd will
768    /// be set to POLLIN.
769    notif: seccomp_notif,
770
771    /// Consumed indicates whether a supervisor process has read this notification (and so it
772    /// can no longer be consumed by any other SECCOMP_IOCTL_NOTIF_RECV ioctl).  When the notif
773    /// is consumed, the associated fd will be set to POLLOUT, indicating that it is ready to
774    /// receive a response.
775    consumed: bool,
776
777    /// resp is the response that the supervisor sends.  When this is set, an event will be sent
778    /// to SeccompNotifiers::waiters corresponding to the unique id of the notification.  This
779    /// will wake up the filter that is waiting for this particular response.
780    resp: Option<seccomp_notif_resp>,
781}
782
783impl SeccompNotification {
784    fn new(data: seccomp_notif) -> SeccompNotification {
785        SeccompNotification { notif: data, resp: None, consumed: false }
786    }
787}
788
789/// The underlying implementation of the file descriptor that connects a process that triggers a
790/// SECCOMP_RET_USER_NOTIF with the monitoring process. This support seccomp's ability to notify a
791/// user-space process on specific syscall triggers. See seccomp_unotify(2) for the semantics.
792pub struct SeccompNotifier {
793    waiters: WaitQueue,
794
795    pending_notifications: HashMap<u64, SeccompNotification>,
796
797    // This keeps track of the number of threads using this notifier as a filter.  If that hits
798    // zero, the listeners need to receive a HUP.
799    num_active_threads: u64,
800
801    // notifiers are referenced both by fds and in SeccompFilterContainer. If the file no longer
802    // has fds referring to it, it will be closed, and the SeccompFilterContainers should stop
803    // using it.
804    pub is_closed: bool,
805}
806
807pub type SeccompNotifierHandle = Arc<Mutex<SeccompNotifier>>;
808
809impl SeccompNotifier {
810    pub fn new() -> SeccompNotifierHandle {
811        Arc::new(Mutex::new(SeccompNotifier {
812            waiters: WaitQueue::default(),
813            pending_notifications: HashMap::default(),
814            num_active_threads: 0,
815            is_closed: false,
816        }))
817    }
818
819    fn add_thread(&mut self) {
820        self.num_active_threads += 1;
821    }
822
823    fn remove_thread(&mut self) {
824        self.num_active_threads -= 1;
825        if self.num_active_threads == 0 {
826            self.waiters.notify_fd_events(FdEvents::POLLHUP);
827        }
828    }
829
830    // Creates a pending notification for communication between the
831    // target thread and a supervisor, and notifies readers there is
832    // an opportunity to read.
833    fn create_notification(&mut self, cookie: u64, notif: seccomp_notif) {
834        self.pending_notifications.insert(cookie, SeccompNotification::new(notif));
835        self.waiters.notify_fd_events(FdEvents::POLLIN | FdEvents::POLLRDNORM);
836    }
837
838    // Gets a notification that needs to be handled by a supervisor,
839    // and notifies waiters that there is an opportunity to write.
840    fn consume_some_notification(&mut self) -> Option<seccomp_notif> {
841        for (_, notif) in self.pending_notifications.iter_mut() {
842            if !notif.consumed {
843                notif.consumed = true;
844                self.waiters.notify_fd_events(FdEvents::POLLOUT | FdEvents::POLLWRNORM);
845                return Some(notif.notif);
846            }
847        }
848        None
849    }
850
851    // In case something goes wrong after we consume the notification.
852    fn unconsume(&mut self, cookie: u64) {
853        if let Some(n) = self.pending_notifications.get_mut(&cookie).as_mut() {
854            n.consumed = false;
855        }
856    }
857
858    // Returns the appropriate notifications if someone is waiting with poll/epoll/select.
859    fn get_fd_notifications(&self) -> FdEvents {
860        let mut events = FdEvents::empty();
861
862        for (_, notification) in self.pending_notifications.iter() {
863            if !notification.consumed {
864                events |= FdEvents::POLLIN | FdEvents::POLLRDNORM;
865            } else if notification.resp.is_none() {
866                events |= FdEvents::POLLOUT | FdEvents::POLLWRNORM;
867            }
868        }
869
870        if self.num_active_threads == 0 {
871            events |= FdEvents::POLLHUP;
872        }
873        events
874    }
875
876    // Sets the value read by the target in response to this notification.  Intended for use by the
877    // supervisor.  Notifies the filter there is a response to this request.
878    fn set_response(&mut self, cookie: u64, resp: seccomp_notif_resp) -> Option<Errno> {
879        if let Some(entry) = self.pending_notifications.get_mut(&cookie) {
880            if entry.resp.is_some() {
881                return Some(errno!(EINPROGRESS));
882            }
883            entry.resp = Some(resp);
884            self.waiters.notify_value(resp.id);
885            None
886        } else {
887            Some(errno!(EINVAL))
888        }
889    }
890
891    // Gets the value set by the supervisor for the target to read.
892    fn get_response(&self, cookie: u64) -> Option<seccomp_notif_resp> {
893        if let Some(value) = self.pending_notifications.get(&cookie) {
894            return value.resp;
895        }
896        None
897    }
898
899    // Returns whether the cookie represents an active notification.
900    fn notification_pending(&self, cookie: u64) -> bool {
901        self.pending_notifications.contains_key(&cookie)
902    }
903
904    // Deletes the notification, when the target is done processing it.
905    fn delete_notification(&mut self, cookie: u64) {
906        let _ = self.pending_notifications.remove(&cookie);
907    }
908}
909
910struct SeccompNotifierFileObject {
911    notifier: SeccompNotifierHandle,
912}
913
914impl FileOps for SeccompNotifierFileObject {
915    fileops_impl_nonseekable!();
916    fileops_impl_noop_sync!();
917
918    fn close(
919        self: Box<Self>,
920        _locked: &mut Locked<FileOpsCore>,
921        _file: &FileObjectState,
922        _current_task: &CurrentTask,
923    ) {
924        let mut state = self.notifier.lock();
925
926        for (cookie, notification) in state.pending_notifications.iter() {
927            if !notification.consumed {
928                state.waiters.notify_value(*cookie);
929                state.waiters.notify_fd_events(FdEvents::POLLIN | FdEvents::POLLRDNORM);
930            } else if notification.resp.is_none() {
931                state.waiters.notify_fd_events(FdEvents::POLLOUT | FdEvents::POLLWRNORM);
932            }
933        }
934        state.waiters.notify_fd_events(FdEvents::POLLHUP);
935
936        state.pending_notifications.clear();
937
938        state.is_closed = true;
939    }
940
941    fn read(
942        &self,
943        _locked: &mut Locked<FileOpsCore>,
944        _file: &FileObject,
945        _current_task: &CurrentTask,
946        _offset: usize,
947        _usize: &mut dyn OutputBuffer,
948    ) -> Result<usize, Errno> {
949        error!(EINVAL)
950    }
951
952    fn write(
953        &self,
954        _locked: &mut Locked<FileOpsCore>,
955        _file: &FileObject,
956        _current_task: &CurrentTask,
957        _offset: usize,
958        _buffer: &mut dyn InputBuffer,
959    ) -> Result<usize, Errno> {
960        error!(EINVAL)
961    }
962
963    fn ioctl(
964        &self,
965        locked: &mut Locked<Unlocked>,
966        _file: &FileObject,
967        current_task: &CurrentTask,
968        request: u32,
969        arg: SyscallArg,
970    ) -> Result<SyscallResult, Errno> {
971        let user_addr = UserAddress::from(arg);
972        match request {
973            SECCOMP_IOCTL_NOTIF_RECV => {
974                if let Ok(notif) =
975                    current_task.read_memory_to_vec(user_addr, std::mem::size_of::<seccomp_notif>())
976                {
977                    for value in notif.iter() {
978                        if *value != 0 {
979                            return error!(EINVAL);
980                        }
981                    }
982                }
983                // A RECV reads a notification, optionally waiting for one to become available.
984                let mut notif: Option<seccomp_notif>;
985                loop {
986                    // Grab a notification or wait for one to become readable.
987                    let waiter = Waiter::new();
988                    {
989                        let mut notifier = self.notifier.lock();
990                        notif = notifier.consume_some_notification();
991                        if notif.is_some() {
992                            break;
993                        }
994                        notifier.waiters.wait_async_fd_events(
995                            &waiter,
996                            FdEvents::POLLIN | FdEvents::POLLHUP,
997                            EventHandler::None,
998                        );
999                    }
1000                    waiter.wait(locked, current_task)?;
1001                }
1002                if let Some(notif) = notif {
1003                    if let Err(e) =
1004                        current_task.write_object(UserRef::<seccomp_notif>::new(user_addr), &notif)
1005                    {
1006                        self.notifier.lock().unconsume(notif.id);
1007                        return Err(e);
1008                    }
1009                }
1010
1011                Ok(0.into())
1012            }
1013            SECCOMP_IOCTL_NOTIF_SEND => {
1014                // A SEND sends a response to a previously received notification.
1015                let resp: seccomp_notif_resp = current_task.read_object(UserRef::new(user_addr))?;
1016                if resp.flags & !SECCOMP_USER_NOTIF_FLAG_CONTINUE != 0 {
1017                    return error!(EINVAL);
1018                }
1019                if resp.flags & SECCOMP_USER_NOTIF_FLAG_CONTINUE != 0
1020                    && (resp.error != 0 || resp.val != 0)
1021                {
1022                    return error!(EINVAL);
1023                }
1024                {
1025                    let mut notifier = self.notifier.lock();
1026                    if let Some(err) = notifier.set_response(resp.id, resp) {
1027                        return Err(err);
1028                    }
1029                }
1030                Ok(0.into())
1031            }
1032            SECCOMP_IOCTL_NOTIF_ID_VALID => {
1033                // An ID_VALID indicates that the notification is still in progress.
1034                let cookie: u64 = current_task.read_object(UserRef::new(user_addr))?;
1035                {
1036                    let notifier = self.notifier.lock();
1037                    if notifier.notification_pending(cookie) {
1038                        Ok(0.into())
1039                    } else {
1040                        error!(ENOENT)
1041                    }
1042                }
1043            }
1044            SECCOMP_IOCTL_NOTIF_ADDFD => error!(EINVAL),
1045            _ => error!(EINVAL),
1046        }
1047    }
1048
1049    fn wait_async(
1050        &self,
1051        _locked: &mut Locked<FileOpsCore>,
1052        _file: &FileObject,
1053        _current_task: &CurrentTask,
1054        waiter: &Waiter,
1055        events: FdEvents,
1056        handler: EventHandler,
1057    ) -> Option<WaitCanceler> {
1058        let notifier = self.notifier.lock();
1059        Some(notifier.waiters.wait_async_fd_events(waiter, events, handler))
1060    }
1061
1062    fn query_events(
1063        &self,
1064        _locked: &mut Locked<FileOpsCore>,
1065        _file: &FileObject,
1066        _current_task: &CurrentTask,
1067    ) -> Result<FdEvents, Errno> {
1068        Ok(self.notifier.lock().get_fd_notifications())
1069    }
1070}
1071
1072#[cfg(test)]
1073mod test {
1074    use crate::task::SeccompAction;
1075    use crate::testing::spawn_kernel_and_run;
1076
1077    #[::fuchsia::test]
1078    async fn test_actions_logged_accepts_legal_string() {
1079        spawn_kernel_and_run(async |_, current_task| {
1080            let kernel = current_task.kernel();
1081            let mut actions = SeccompAction::get_actions_avail_file();
1082            // This is a test in Rust instead of a syscall test because we don't want to change the
1083            // global config in a test.
1084            assert!(
1085                SeccompAction::set_actions_logged(&kernel, &actions[..]).is_err(),
1086                "Should not be able to write allow to actions_logged file"
1087            );
1088            let action_string = std::string::String::from_utf8(actions.clone()).unwrap();
1089            if let Some(action_index) = action_string.find("allow") {
1090                actions.drain(action_index..action_index + "allow".len());
1091            }
1092            let write_result = SeccompAction::set_actions_logged(&kernel, &actions[..]);
1093            assert!(
1094                write_result.is_ok(),
1095                "Could not write legal string \"{}\" to actions_logged file: error {}",
1096                std::string::String::from_utf8(actions.clone()).unwrap(),
1097                write_result.unwrap_err()
1098            );
1099        })
1100        .await;
1101    }
1102}