starnix_core/bpf/
fs.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// TODO(https://github.com/rust-lang/rust/issues/39371): remove
6#![allow(non_upper_case_globals)]
7
8use crate::bpf::syscalls::BpfTypeFormat;
9use crate::bpf::{BpfMapHandle, ProgramHandle};
10use crate::mm::memory::MemoryObject;
11use crate::mm::{PAGE_SIZE, ProtectionFlags};
12use crate::security::{self, PermissionFlags};
13use crate::task::{
14    CurrentTask, EventHandler, SignalHandler, SignalHandlerInner, Task, WaitCanceler, Waiter,
15};
16use crate::vfs::buffers::{InputBuffer, OutputBuffer};
17use crate::vfs::{
18    CacheMode, CheckAccessReason, FdNumber, FileObject, FileOps, FileSystem, FileSystemHandle,
19    FileSystemOps, FileSystemOptions, FsNode, FsNodeHandle, FsNodeInfo, FsNodeOps, FsStr,
20    MemoryDirectoryFile, MemoryXattrStorage, NamespaceNode, XattrStorage as _,
21    fileops_impl_nonseekable, fileops_impl_noop_sync, fs_node_impl_not_dir,
22    fs_node_impl_xattr_delegate,
23};
24use bstr::BStr;
25use ebpf::{MapFlags, MapSchema};
26use ebpf_api::{RINGBUF_SIGNAL, compute_map_storage_size};
27use starnix_logging::track_stub;
28use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Unlocked};
29use starnix_types::vfs::default_statfs;
30use starnix_uapi::auth::FsCred;
31use starnix_uapi::device_type::DeviceType;
32use starnix_uapi::errors::Errno;
33use starnix_uapi::file_mode::{FileMode, mode};
34use starnix_uapi::math::round_up_to_increment;
35use starnix_uapi::open_flags::OpenFlags;
36use starnix_uapi::vfs::FdEvents;
37use starnix_uapi::{
38    BPF_FS_MAGIC, bpf_map_type_BPF_MAP_TYPE_ARRAY, bpf_map_type_BPF_MAP_TYPE_RINGBUF, errno, error,
39    statfs,
40};
41use std::sync::Arc;
42
43/// A reference to a BPF object that can be stored in either an FD or an entry in the /sys/fs/bpf
44/// filesystem.
45#[derive(Debug, Clone)]
46pub enum BpfHandle {
47    Program(ProgramHandle),
48
49    // Stub used to fake loading of programs of unknown types.
50    ProgramStub(u32),
51
52    Map(BpfMapHandle),
53    BpfTypeFormat(Arc<BpfTypeFormat>),
54}
55
56impl BpfHandle {
57    pub fn as_map(&self) -> Result<&BpfMapHandle, Errno> {
58        match self {
59            Self::Map(map) => Ok(map),
60            _ => error!(EINVAL),
61        }
62    }
63    pub fn as_program(&self) -> Result<&ProgramHandle, Errno> {
64        match self {
65            Self::Program(program) => Ok(program),
66            _ => error!(EINVAL),
67        }
68    }
69
70    pub fn into_program(self) -> Result<ProgramHandle, Errno> {
71        match self {
72            Self::Program(program) => Ok(program),
73            _ => error!(EINVAL),
74        }
75    }
76
77    // Returns VMO and schema if this handle references a map.
78    fn get_map_vmo(&self) -> Result<(&Arc<zx::Vmo>, MapSchema), Errno> {
79        match self {
80            Self::Map(map) => Ok((map.vmo(), map.schema)),
81            _ => error!(ENODEV),
82        }
83    }
84
85    pub fn type_name(&self) -> &'static str {
86        match self {
87            Self::Map(_) => "bpf-map",
88            Self::Program(_) | Self::ProgramStub(_) => "bpf-prog",
89            Self::BpfTypeFormat(_) => "bpf-type",
90        }
91    }
92
93    /// Performs security-related checks when opening a BPF map. If
94    /// `permission_flags` is `None`, then they are inferred from the map's
95    /// schema. `permission_flags` is ignored for programs.
96    pub(super) fn security_check_open_fd(
97        &self,
98        current_task: &CurrentTask,
99        permission_flags: Option<PermissionFlags>,
100    ) -> Result<(), Errno> {
101        match self {
102            Self::Map(bpf_map) => security::check_bpf_map_access(
103                current_task,
104                &bpf_map,
105                permission_flags.unwrap_or_else(|| bpf_map.schema.flags.into()),
106            ),
107            Self::Program(program) => security::check_bpf_prog_access(current_task, &program),
108            _ => Ok(()),
109        }
110    }
111}
112
113impl From<ProgramHandle> for BpfHandle {
114    fn from(program: ProgramHandle) -> Self {
115        Self::Program(program)
116    }
117}
118
119impl From<BpfMapHandle> for BpfHandle {
120    fn from(map: BpfMapHandle) -> Self {
121        Self::Map(map)
122    }
123}
124
125impl From<BpfTypeFormat> for BpfHandle {
126    fn from(format: BpfTypeFormat) -> Self {
127        Self::BpfTypeFormat(Arc::new(format))
128    }
129}
130
131impl FileOps for BpfHandle {
132    fileops_impl_nonseekable!();
133    fileops_impl_noop_sync!();
134    fn read(
135        &self,
136        _locked: &mut Locked<FileOpsCore>,
137        _file: &FileObject,
138        _current_task: &crate::task::CurrentTask,
139        _offset: usize,
140        _data: &mut dyn OutputBuffer,
141    ) -> Result<usize, Errno> {
142        track_stub!(TODO("https://fxbug.dev/322874229"), "bpf handle read");
143        error!(EINVAL)
144    }
145    fn write(
146        &self,
147        _locked: &mut Locked<FileOpsCore>,
148        _file: &FileObject,
149        _current_task: &crate::task::CurrentTask,
150        _offset: usize,
151        _data: &mut dyn InputBuffer,
152    ) -> Result<usize, Errno> {
153        track_stub!(TODO("https://fxbug.dev/322873841"), "bpf handle write");
154        error!(EINVAL)
155    }
156
157    fn get_memory(
158        &self,
159        locked: &mut Locked<FileOpsCore>,
160        _file: &FileObject,
161        _current_task: &CurrentTask,
162        length: Option<usize>,
163        prot: ProtectionFlags,
164    ) -> Result<Arc<MemoryObject>, Errno> {
165        let (vmo, schema) = self.get_map_vmo()?;
166
167        // Because of the specific condition needed to map this object, the size must be known.
168        let length = length.ok_or_else(|| errno!(EINVAL))?;
169
170        // This cannot be mapped executable.
171        if prot.contains(ProtectionFlags::EXEC) {
172            return error!(EPERM);
173        }
174
175        match schema.map_type {
176            bpf_map_type_BPF_MAP_TYPE_RINGBUF => {
177                let page_size = *PAGE_SIZE as usize;
178                // Starting from the second page, this cannot be mapped writable.
179                if length > page_size {
180                    if prot.contains(ProtectionFlags::WRITE) {
181                        return error!(EPERM);
182                    }
183                    // This cannot be mapped outside of the 2 control pages and the 2 data sections.
184                    if length > 2 * page_size + 2 * schema.max_entries as usize {
185                        return error!(EINVAL);
186                    }
187                }
188
189                self.as_map()?.get_memory(locked, || {
190                    // The first page of the ring buffer VMO is not visible to
191                    // user-space processes. Return a VMO slice that doesn't
192                    // include the first page.
193                    let clone_size = 2 * page_size + schema.max_entries as usize;
194                    let vmo_dup = vmo
195                        .create_child(
196                            zx::VmoChildOptions::SLICE,
197                            page_size as u64,
198                            clone_size as u64,
199                        )
200                        .map_err(|_| errno!(EIO))?
201                        .into();
202                    Ok(Arc::new(MemoryObject::RingBuf(vmo_dup)))
203                })
204            }
205
206            bpf_map_type_BPF_MAP_TYPE_ARRAY => {
207                if !schema.flags.contains(MapFlags::Mmapable) {
208                    return error!(EPERM);
209                }
210
211                let array_size = round_up_to_increment(
212                    compute_map_storage_size(&schema).map_err(|_| errno!(EINVAL))?,
213                    *PAGE_SIZE as usize,
214                )?;
215                if length > array_size {
216                    return error!(EINVAL);
217                }
218
219                self.as_map()?.get_memory(locked, || {
220                    let vmo_dup = vmo
221                        .as_handle_ref()
222                        .duplicate(zx::Rights::SAME_RIGHTS)
223                        .map_err(|_| errno!(EIO))?
224                        .into();
225                    Ok(Arc::new(MemoryObject::Vmo(vmo_dup)))
226                })
227            }
228
229            // Other maps cannot be mmap'ed.
230            _ => error!(ENODEV),
231        }
232    }
233
234    fn wait_async(
235        &self,
236        _locked: &mut Locked<FileOpsCore>,
237        _file: &FileObject,
238        _current_task: &CurrentTask,
239        waiter: &Waiter,
240        events: FdEvents,
241        handler: EventHandler,
242    ) -> Option<WaitCanceler> {
243        let (vmo, schema) = self.get_map_vmo().ok()?;
244
245        // Only ringbuffers can be polled for POLLIN.
246        if schema.map_type != bpf_map_type_BPF_MAP_TYPE_RINGBUF
247            || !events.contains(FdEvents::POLLIN)
248        {
249            return Some(WaitCanceler::new_noop());
250        }
251
252        let handler = SignalHandler {
253            inner: SignalHandlerInner::ZxHandle(|signals| {
254                if signals.contains(RINGBUF_SIGNAL) { FdEvents::POLLIN } else { FdEvents::empty() }
255            }),
256            event_handler: handler,
257            err_code: None,
258        };
259
260        // Reset the signal before waiting. The case when the ring buffer already has some data
261        // is handled by the caller: it should call `query_events` after starting the waiter.
262        vmo.as_handle_ref()
263            .signal(RINGBUF_SIGNAL, zx::Signals::empty())
264            .expect("Failed to set signal or a ring buffer VMO");
265
266        let canceler = waiter
267            .wake_on_zircon_signals(&vmo.as_handle_ref(), RINGBUF_SIGNAL, handler)
268            .expect("Failed to wait for signals on ringbuf VMO");
269        Some(WaitCanceler::new_port(canceler))
270    }
271
272    fn query_events(
273        &self,
274        _locked: &mut Locked<FileOpsCore>,
275        _file: &FileObject,
276        _current_task: &CurrentTask,
277    ) -> Result<FdEvents, Errno> {
278        match self {
279            Self::Map(map) => {
280                let events = match map.can_read() {
281                    Some(true) => FdEvents::POLLIN,
282                    Some(false) => FdEvents::empty(),
283                    None => FdEvents::POLLERR,
284                };
285                Ok(events)
286            }
287            _ => error!(EPERM),
288        }
289    }
290}
291
292pub fn get_bpf_object(task: &Task, fd: FdNumber) -> Result<BpfHandle, Errno> {
293    Ok((*task.files.get(fd)?.downcast_file::<BpfHandle>().ok_or_else(|| errno!(EBADF))?).clone())
294}
295pub struct BpfFs;
296impl BpfFs {
297    pub fn new_fs(
298        locked: &mut Locked<Unlocked>,
299        current_task: &CurrentTask,
300        options: FileSystemOptions,
301    ) -> Result<FileSystemHandle, Errno> {
302        let kernel = current_task.kernel();
303        let fs = FileSystem::new(locked, kernel, CacheMode::Permanent, BpfFs, options)?;
304        let root_ino = fs.allocate_ino();
305        fs.create_root_with_info(
306            root_ino,
307            BpfFsDir::new(),
308            FsNodeInfo::new(mode!(IFDIR, 0o777) | FileMode::ISVTX, FsCred::root()),
309        );
310        Ok(fs)
311    }
312}
313
314impl FileSystemOps for BpfFs {
315    fn statfs(
316        &self,
317        _locked: &mut Locked<FileOpsCore>,
318        _fs: &FileSystem,
319        _current_task: &CurrentTask,
320    ) -> Result<statfs, Errno> {
321        Ok(default_statfs(BPF_FS_MAGIC))
322    }
323    fn name(&self) -> &'static FsStr {
324        "bpf".into()
325    }
326
327    fn rename(
328        &self,
329        _locked: &mut Locked<FileOpsCore>,
330        _fs: &FileSystem,
331        _current_task: &CurrentTask,
332        _old_parent: &FsNodeHandle,
333        _old_name: &FsStr,
334        _new_parent: &FsNodeHandle,
335        _new_name: &FsStr,
336        _renamed: &FsNodeHandle,
337        _replaced: Option<&FsNodeHandle>,
338    ) -> Result<(), Errno> {
339        Ok(())
340    }
341}
342
343pub struct BpfFsDir {
344    xattrs: MemoryXattrStorage,
345}
346
347impl BpfFsDir {
348    fn new() -> Self {
349        Self { xattrs: MemoryXattrStorage::default() }
350    }
351
352    pub fn register_pin<L>(
353        &self,
354        locked: &mut Locked<L>,
355        current_task: &CurrentTask,
356        node: &NamespaceNode,
357        name: &FsStr,
358        object: BpfHandle,
359    ) -> Result<(), Errno>
360    where
361        L: LockEqualOrBefore<FileOpsCore>,
362    {
363        node.entry.create_entry(
364            locked,
365            current_task,
366            &node.mount,
367            name,
368            |_locked, dir, _mount, _name| {
369                Ok(dir.fs().create_node_and_allocate_node_id(
370                    BpfFsObject::new(object),
371                    FsNodeInfo::new(mode!(IFREG, 0o600), current_task.current_fscred()),
372                ))
373            },
374        )?;
375        Ok(())
376    }
377}
378
379impl FsNodeOps for BpfFsDir {
380    fs_node_impl_xattr_delegate!(self, self.xattrs);
381
382    fn create_file_ops(
383        &self,
384        _locked: &mut Locked<FileOpsCore>,
385        _node: &FsNode,
386        _current_task: &CurrentTask,
387        _flags: OpenFlags,
388    ) -> Result<Box<dyn FileOps>, Errno> {
389        Ok(Box::new(MemoryDirectoryFile::new()))
390    }
391
392    fn mkdir(
393        &self,
394        _locked: &mut Locked<FileOpsCore>,
395        node: &FsNode,
396        _current_task: &CurrentTask,
397        _name: &FsStr,
398        mode: FileMode,
399        owner: FsCred,
400    ) -> Result<FsNodeHandle, Errno> {
401        Ok(node.fs().create_node_and_allocate_node_id(
402            BpfFsDir::new(),
403            FsNodeInfo::new(mode | FileMode::ISVTX, owner),
404        ))
405    }
406
407    fn mknod(
408        &self,
409        _locked: &mut Locked<FileOpsCore>,
410        _node: &FsNode,
411        _current_task: &CurrentTask,
412        _name: &FsStr,
413        _mode: FileMode,
414        _dev: DeviceType,
415        _owner: FsCred,
416    ) -> Result<FsNodeHandle, Errno> {
417        error!(EPERM)
418    }
419
420    fn create_symlink(
421        &self,
422        _locked: &mut Locked<FileOpsCore>,
423        _node: &FsNode,
424        _current_task: &CurrentTask,
425        _name: &FsStr,
426        _target: &FsStr,
427        _owner: FsCred,
428    ) -> Result<FsNodeHandle, Errno> {
429        error!(EPERM)
430    }
431
432    fn link(
433        &self,
434        _locked: &mut Locked<FileOpsCore>,
435        _node: &FsNode,
436        _current_task: &CurrentTask,
437        _name: &FsStr,
438        _child: &FsNodeHandle,
439    ) -> Result<(), Errno> {
440        Ok(())
441    }
442
443    fn unlink(
444        &self,
445        _locked: &mut Locked<FileOpsCore>,
446        _node: &FsNode,
447        _current_task: &CurrentTask,
448        _name: &FsStr,
449        _child: &FsNodeHandle,
450    ) -> Result<(), Errno> {
451        Ok(())
452    }
453}
454
455pub struct BpfFsObject {
456    pub handle: BpfHandle,
457    xattrs: MemoryXattrStorage,
458}
459
460impl BpfFsObject {
461    fn new(handle: BpfHandle) -> Self {
462        Self { handle, xattrs: MemoryXattrStorage::default() }
463    }
464}
465
466impl FsNodeOps for BpfFsObject {
467    fs_node_impl_not_dir!();
468    fs_node_impl_xattr_delegate!(self, self.xattrs);
469
470    fn create_file_ops(
471        &self,
472        _locked: &mut Locked<FileOpsCore>,
473        _node: &FsNode,
474        _current_task: &CurrentTask,
475        _flags: OpenFlags,
476    ) -> Result<Box<dyn FileOps>, Errno> {
477        error!(EIO)
478    }
479}
480
481/// Resolves a pinned BPF object from a path, returning the handle and the node.
482/// Performs DAC and MAC checks using the specified `open_flags `. Also updates
483/// atime unless `NOATIME` flag is set.
484pub fn resolve_pinned_bpf_object(
485    locked: &mut Locked<Unlocked>,
486    current_task: &CurrentTask,
487    path: &BStr,
488    open_flags: OpenFlags,
489) -> Result<(BpfHandle, NamespaceNode), Errno> {
490    let node = current_task.lookup_path_from_root(locked, path.as_ref())?;
491
492    let permission_flags = PermissionFlags::from(open_flags);
493    node.check_access(locked, current_task, permission_flags, CheckAccessReason::Access)?;
494
495    let object = node.entry.node.downcast_ops::<BpfFsObject>().ok_or_else(|| errno!(EPERM))?;
496    object.handle.security_check_open_fd(current_task, Some(permission_flags))?;
497
498    if !open_flags.contains(OpenFlags::NOATIME) {
499        node.update_atime();
500    }
501
502    Ok((object.handle.clone(), node))
503}