starnix_core/bpf/
fs.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// TODO(https://github.com/rust-lang/rust/issues/39371): remove
6#![allow(non_upper_case_globals)]
7
8use crate::bpf::syscalls::BpfTypeFormat;
9use crate::bpf::{BpfMapHandle, ProgramHandle};
10use crate::mm::memory::MemoryObject;
11use crate::mm::{PAGE_SIZE, ProtectionFlags};
12use crate::security::{self, PermissionFlags};
13use crate::task::{
14    CurrentTask, EventHandler, SignalHandler, SignalHandlerInner, Task, WaitCanceler, Waiter,
15};
16use crate::vfs::buffers::{InputBuffer, OutputBuffer};
17use crate::vfs::{
18    CacheMode, CheckAccessReason, FdNumber, FileObject, FileOps, FileSystem, FileSystemHandle,
19    FileSystemOps, FileSystemOptions, FsNode, FsNodeHandle, FsNodeInfo, FsNodeOps, FsStr,
20    MemoryDirectoryFile, MemoryXattrStorage, NamespaceNode, XattrStorage as _,
21    fileops_impl_nonseekable, fileops_impl_noop_sync, fs_node_impl_not_dir,
22    fs_node_impl_xattr_delegate,
23};
24use bstr::BStr;
25use ebpf::{MapFlags, MapSchema};
26use ebpf_api::{RINGBUF_SIGNAL, compute_map_storage_size};
27use starnix_logging::track_stub;
28use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Unlocked};
29use starnix_types::vfs::default_statfs;
30use starnix_uapi::auth::FsCred;
31use starnix_uapi::device_type::DeviceType;
32use starnix_uapi::errors::Errno;
33use starnix_uapi::file_mode::{FileMode, mode};
34use starnix_uapi::math::round_up_to_increment;
35use starnix_uapi::open_flags::OpenFlags;
36use starnix_uapi::vfs::FdEvents;
37use starnix_uapi::{
38    BPF_FS_MAGIC, bpf_map_type_BPF_MAP_TYPE_ARRAY, bpf_map_type_BPF_MAP_TYPE_RINGBUF, errno, error,
39    statfs,
40};
41use std::sync::Arc;
42use zx::AsHandleRef;
43
44/// A reference to a BPF object that can be stored in either an FD or an entry in the /sys/fs/bpf
45/// filesystem.
46#[derive(Debug, Clone)]
47pub enum BpfHandle {
48    Program(ProgramHandle),
49
50    // Stub used to fake loading of programs of unknown types.
51    ProgramStub(u32),
52
53    Map(BpfMapHandle),
54    BpfTypeFormat(Arc<BpfTypeFormat>),
55}
56
57impl BpfHandle {
58    pub fn as_map(&self) -> Result<&BpfMapHandle, Errno> {
59        match self {
60            Self::Map(map) => Ok(map),
61            _ => error!(EINVAL),
62        }
63    }
64    pub fn as_program(&self) -> Result<&ProgramHandle, Errno> {
65        match self {
66            Self::Program(program) => Ok(program),
67            _ => error!(EINVAL),
68        }
69    }
70
71    pub fn into_program(self) -> Result<ProgramHandle, Errno> {
72        match self {
73            Self::Program(program) => Ok(program),
74            _ => error!(EINVAL),
75        }
76    }
77
78    // Returns VMO and schema if this handle references a map.
79    fn get_map_vmo(&self) -> Result<(&Arc<zx::Vmo>, MapSchema), Errno> {
80        match self {
81            Self::Map(map) => Ok((map.vmo(), map.schema)),
82            _ => error!(ENODEV),
83        }
84    }
85
86    pub fn type_name(&self) -> &'static str {
87        match self {
88            Self::Map(_) => "bpf-map",
89            Self::Program(_) | Self::ProgramStub(_) => "bpf-prog",
90            Self::BpfTypeFormat(_) => "bpf-type",
91        }
92    }
93
94    /// Performs security-related checks when opening a BPF map. If
95    /// `permission_flags` is `None`, then they are inferred from the map's
96    /// schema. `permission_flags` is ignored for programs.
97    pub(super) fn security_check_open_fd(
98        &self,
99        current_task: &CurrentTask,
100        permission_flags: Option<PermissionFlags>,
101    ) -> Result<(), Errno> {
102        match self {
103            Self::Map(bpf_map) => security::check_bpf_map_access(
104                current_task,
105                &bpf_map,
106                permission_flags.unwrap_or_else(|| bpf_map.schema.flags.into()),
107            ),
108            Self::Program(program) => security::check_bpf_prog_access(current_task, &program),
109            _ => Ok(()),
110        }
111    }
112}
113
114impl From<ProgramHandle> for BpfHandle {
115    fn from(program: ProgramHandle) -> Self {
116        Self::Program(program)
117    }
118}
119
120impl From<BpfMapHandle> for BpfHandle {
121    fn from(map: BpfMapHandle) -> Self {
122        Self::Map(map)
123    }
124}
125
126impl From<BpfTypeFormat> for BpfHandle {
127    fn from(format: BpfTypeFormat) -> Self {
128        Self::BpfTypeFormat(Arc::new(format))
129    }
130}
131
132impl FileOps for BpfHandle {
133    fileops_impl_nonseekable!();
134    fileops_impl_noop_sync!();
135    fn read(
136        &self,
137        _locked: &mut Locked<FileOpsCore>,
138        _file: &FileObject,
139        _current_task: &crate::task::CurrentTask,
140        _offset: usize,
141        _data: &mut dyn OutputBuffer,
142    ) -> Result<usize, Errno> {
143        track_stub!(TODO("https://fxbug.dev/322874229"), "bpf handle read");
144        error!(EINVAL)
145    }
146    fn write(
147        &self,
148        _locked: &mut Locked<FileOpsCore>,
149        _file: &FileObject,
150        _current_task: &crate::task::CurrentTask,
151        _offset: usize,
152        _data: &mut dyn InputBuffer,
153    ) -> Result<usize, Errno> {
154        track_stub!(TODO("https://fxbug.dev/322873841"), "bpf handle write");
155        error!(EINVAL)
156    }
157
158    fn get_memory(
159        &self,
160        locked: &mut Locked<FileOpsCore>,
161        _file: &FileObject,
162        _current_task: &CurrentTask,
163        length: Option<usize>,
164        prot: ProtectionFlags,
165    ) -> Result<Arc<MemoryObject>, Errno> {
166        let (vmo, schema) = self.get_map_vmo()?;
167
168        // Because of the specific condition needed to map this object, the size must be known.
169        let length = length.ok_or_else(|| errno!(EINVAL))?;
170
171        // This cannot be mapped executable.
172        if prot.contains(ProtectionFlags::EXEC) {
173            return error!(EPERM);
174        }
175
176        match schema.map_type {
177            bpf_map_type_BPF_MAP_TYPE_RINGBUF => {
178                let page_size = *PAGE_SIZE as usize;
179                // Starting from the second page, this cannot be mapped writable.
180                if length > page_size {
181                    if prot.contains(ProtectionFlags::WRITE) {
182                        return error!(EPERM);
183                    }
184                    // This cannot be mapped outside of the 2 control pages and the 2 data sections.
185                    if length > 2 * page_size + 2 * schema.max_entries as usize {
186                        return error!(EINVAL);
187                    }
188                }
189
190                self.as_map()?.get_memory(locked, || {
191                    // The first page of the ring buffer VMO is not visible to
192                    // user-space processes. Return a VMO slice that doesn't
193                    // include the first page.
194                    let clone_size = 2 * page_size + schema.max_entries as usize;
195                    let vmo_dup = vmo
196                        .create_child(
197                            zx::VmoChildOptions::SLICE,
198                            page_size as u64,
199                            clone_size as u64,
200                        )
201                        .map_err(|_| errno!(EIO))?
202                        .into();
203                    Ok(Arc::new(MemoryObject::RingBuf(vmo_dup)))
204                })
205            }
206
207            bpf_map_type_BPF_MAP_TYPE_ARRAY => {
208                if !schema.flags.contains(MapFlags::Mmapable) {
209                    return error!(EPERM);
210                }
211
212                let array_size = round_up_to_increment(
213                    compute_map_storage_size(&schema).map_err(|_| errno!(EINVAL))?,
214                    *PAGE_SIZE as usize,
215                )?;
216                if length > array_size {
217                    return error!(EINVAL);
218                }
219
220                self.as_map()?.get_memory(locked, || {
221                    let vmo_dup = vmo
222                        .as_handle_ref()
223                        .duplicate(zx::Rights::SAME_RIGHTS)
224                        .map_err(|_| errno!(EIO))?
225                        .into();
226                    Ok(Arc::new(MemoryObject::Vmo(vmo_dup)))
227                })
228            }
229
230            // Other maps cannot be mmap'ed.
231            _ => error!(ENODEV),
232        }
233    }
234
235    fn wait_async(
236        &self,
237        _locked: &mut Locked<FileOpsCore>,
238        _file: &FileObject,
239        _current_task: &CurrentTask,
240        waiter: &Waiter,
241        events: FdEvents,
242        handler: EventHandler,
243    ) -> Option<WaitCanceler> {
244        let (vmo, schema) = self.get_map_vmo().ok()?;
245
246        // Only ringbuffers can be polled for POLLIN.
247        if schema.map_type != bpf_map_type_BPF_MAP_TYPE_RINGBUF
248            || !events.contains(FdEvents::POLLIN)
249        {
250            return Some(WaitCanceler::new_noop());
251        }
252
253        let handler = SignalHandler {
254            inner: SignalHandlerInner::ZxHandle(|signals| {
255                if signals.contains(RINGBUF_SIGNAL) { FdEvents::POLLIN } else { FdEvents::empty() }
256            }),
257            event_handler: handler,
258            err_code: None,
259        };
260
261        // Reset the signal before waiting. The case when the ring buffer already has some data
262        // is handled by the caller: it should call `query_events` after starting the waiter.
263        vmo.as_handle_ref()
264            .signal(RINGBUF_SIGNAL, zx::Signals::empty())
265            .expect("Failed to set signal or a ring buffer VMO");
266
267        let canceler = waiter
268            .wake_on_zircon_signals(&vmo.as_handle_ref(), RINGBUF_SIGNAL, handler)
269            .expect("Failed to wait for signals on ringbuf VMO");
270        Some(WaitCanceler::new_port(canceler))
271    }
272
273    fn query_events(
274        &self,
275        _locked: &mut Locked<FileOpsCore>,
276        _file: &FileObject,
277        _current_task: &CurrentTask,
278    ) -> Result<FdEvents, Errno> {
279        match self {
280            Self::Map(map) => {
281                let events = match map.can_read() {
282                    Some(true) => FdEvents::POLLIN,
283                    Some(false) => FdEvents::empty(),
284                    None => FdEvents::POLLERR,
285                };
286                Ok(events)
287            }
288            _ => error!(EPERM),
289        }
290    }
291}
292
293pub fn get_bpf_object(task: &Task, fd: FdNumber) -> Result<BpfHandle, Errno> {
294    Ok((*task.files.get(fd)?.downcast_file::<BpfHandle>().ok_or_else(|| errno!(EBADF))?).clone())
295}
296pub struct BpfFs;
297impl BpfFs {
298    pub fn new_fs(
299        locked: &mut Locked<Unlocked>,
300        current_task: &CurrentTask,
301        options: FileSystemOptions,
302    ) -> Result<FileSystemHandle, Errno> {
303        let kernel = current_task.kernel();
304        let fs = FileSystem::new(locked, kernel, CacheMode::Permanent, BpfFs, options)?;
305        let root_ino = fs.allocate_ino();
306        fs.create_root_with_info(
307            root_ino,
308            BpfFsDir::new(),
309            FsNodeInfo::new(mode!(IFDIR, 0o777) | FileMode::ISVTX, FsCred::root()),
310        );
311        Ok(fs)
312    }
313}
314
315impl FileSystemOps for BpfFs {
316    fn statfs(
317        &self,
318        _locked: &mut Locked<FileOpsCore>,
319        _fs: &FileSystem,
320        _current_task: &CurrentTask,
321    ) -> Result<statfs, Errno> {
322        Ok(default_statfs(BPF_FS_MAGIC))
323    }
324    fn name(&self) -> &'static FsStr {
325        "bpf".into()
326    }
327
328    fn rename(
329        &self,
330        _locked: &mut Locked<FileOpsCore>,
331        _fs: &FileSystem,
332        _current_task: &CurrentTask,
333        _old_parent: &FsNodeHandle,
334        _old_name: &FsStr,
335        _new_parent: &FsNodeHandle,
336        _new_name: &FsStr,
337        _renamed: &FsNodeHandle,
338        _replaced: Option<&FsNodeHandle>,
339    ) -> Result<(), Errno> {
340        Ok(())
341    }
342}
343
344pub struct BpfFsDir {
345    xattrs: MemoryXattrStorage,
346}
347
348impl BpfFsDir {
349    fn new() -> Self {
350        Self { xattrs: MemoryXattrStorage::default() }
351    }
352
353    pub fn register_pin<L>(
354        &self,
355        locked: &mut Locked<L>,
356        current_task: &CurrentTask,
357        node: &NamespaceNode,
358        name: &FsStr,
359        object: BpfHandle,
360    ) -> Result<(), Errno>
361    where
362        L: LockEqualOrBefore<FileOpsCore>,
363    {
364        node.entry.create_entry(
365            locked,
366            current_task,
367            &node.mount,
368            name,
369            |_locked, dir, _mount, _name| {
370                Ok(dir.fs().create_node_and_allocate_node_id(
371                    BpfFsObject::new(object),
372                    FsNodeInfo::new(mode!(IFREG, 0o600), current_task.current_fscred()),
373                ))
374            },
375        )?;
376        Ok(())
377    }
378}
379
380impl FsNodeOps for BpfFsDir {
381    fs_node_impl_xattr_delegate!(self, self.xattrs);
382
383    fn create_file_ops(
384        &self,
385        _locked: &mut Locked<FileOpsCore>,
386        _node: &FsNode,
387        _current_task: &CurrentTask,
388        _flags: OpenFlags,
389    ) -> Result<Box<dyn FileOps>, Errno> {
390        Ok(Box::new(MemoryDirectoryFile::new()))
391    }
392
393    fn mkdir(
394        &self,
395        _locked: &mut Locked<FileOpsCore>,
396        node: &FsNode,
397        _current_task: &CurrentTask,
398        _name: &FsStr,
399        mode: FileMode,
400        owner: FsCred,
401    ) -> Result<FsNodeHandle, Errno> {
402        Ok(node.fs().create_node_and_allocate_node_id(
403            BpfFsDir::new(),
404            FsNodeInfo::new(mode | FileMode::ISVTX, owner),
405        ))
406    }
407
408    fn mknod(
409        &self,
410        _locked: &mut Locked<FileOpsCore>,
411        _node: &FsNode,
412        _current_task: &CurrentTask,
413        _name: &FsStr,
414        _mode: FileMode,
415        _dev: DeviceType,
416        _owner: FsCred,
417    ) -> Result<FsNodeHandle, Errno> {
418        error!(EPERM)
419    }
420
421    fn create_symlink(
422        &self,
423        _locked: &mut Locked<FileOpsCore>,
424        _node: &FsNode,
425        _current_task: &CurrentTask,
426        _name: &FsStr,
427        _target: &FsStr,
428        _owner: FsCred,
429    ) -> Result<FsNodeHandle, Errno> {
430        error!(EPERM)
431    }
432
433    fn link(
434        &self,
435        _locked: &mut Locked<FileOpsCore>,
436        _node: &FsNode,
437        _current_task: &CurrentTask,
438        _name: &FsStr,
439        _child: &FsNodeHandle,
440    ) -> Result<(), Errno> {
441        Ok(())
442    }
443
444    fn unlink(
445        &self,
446        _locked: &mut Locked<FileOpsCore>,
447        _node: &FsNode,
448        _current_task: &CurrentTask,
449        _name: &FsStr,
450        _child: &FsNodeHandle,
451    ) -> Result<(), Errno> {
452        Ok(())
453    }
454}
455
456pub struct BpfFsObject {
457    pub handle: BpfHandle,
458    xattrs: MemoryXattrStorage,
459}
460
461impl BpfFsObject {
462    fn new(handle: BpfHandle) -> Self {
463        Self { handle, xattrs: MemoryXattrStorage::default() }
464    }
465}
466
467impl FsNodeOps for BpfFsObject {
468    fs_node_impl_not_dir!();
469    fs_node_impl_xattr_delegate!(self, self.xattrs);
470
471    fn create_file_ops(
472        &self,
473        _locked: &mut Locked<FileOpsCore>,
474        _node: &FsNode,
475        _current_task: &CurrentTask,
476        _flags: OpenFlags,
477    ) -> Result<Box<dyn FileOps>, Errno> {
478        error!(EIO)
479    }
480}
481
482/// Resolves a pinned BPF object from a path, returning the handle and the node.
483/// Performs DAC and MAC checks using the specified `open_flags `. Also updates
484/// atime unless `NOATIME` flag is set.
485pub fn resolve_pinned_bpf_object(
486    locked: &mut Locked<Unlocked>,
487    current_task: &CurrentTask,
488    path: &BStr,
489    open_flags: OpenFlags,
490) -> Result<(BpfHandle, NamespaceNode), Errno> {
491    let node = current_task.lookup_path_from_root(locked, path.as_ref())?;
492
493    let permission_flags = PermissionFlags::from(open_flags);
494    node.check_access(locked, current_task, permission_flags, CheckAccessReason::Access)?;
495
496    let object = node.entry.node.downcast_ops::<BpfFsObject>().ok_or_else(|| errno!(EPERM))?;
497    object.handle.security_check_open_fd(current_task, Some(permission_flags))?;
498
499    if !open_flags.contains(OpenFlags::NOATIME) {
500        node.update_atime();
501    }
502
503    Ok((object.handle.clone(), node))
504}