Skip to main content

starnix_core/bpf/
fs.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// TODO(https://github.com/rust-lang/rust/issues/39371): remove
6#![allow(non_upper_case_globals)]
7
8use crate::bpf::syscalls::BpfTypeFormat;
9use crate::bpf::{BpfMapHandle, ProgramHandle};
10use crate::mm::memory::MemoryObject;
11use crate::mm::{DesiredAddress, MappingOptions, PAGE_SIZE, ProtectionFlags};
12use crate::security::{self, PermissionFlags};
13use crate::task::{
14    CurrentTask, EventHandler, SignalHandler, SignalHandlerInner, Task, WaitCanceler, Waiter,
15};
16use crate::vfs::buffers::{InputBuffer, OutputBuffer};
17use crate::vfs::{
18    CacheMode, CheckAccessReason, FdNumber, FileObject, FileOps, FileSystem, FileSystemHandle,
19    FileSystemOps, FileSystemOptions, FsNode, FsNodeHandle, FsNodeInfo, FsNodeOps, FsStr,
20    MemoryDirectoryFile, MemoryXattrStorage, NamespaceNode, RenameContext, XattrStorage as _,
21    default_mmap, fileops_impl_nonseekable, fileops_impl_noop_sync, fs_node_impl_not_dir,
22    fs_node_impl_xattr_delegate,
23};
24use bstr::BStr;
25use ebpf::{MapFlags, MapSchema};
26use ebpf_api::{RINGBUF_SIGNAL, compute_map_storage_size};
27use starnix_logging::track_stub;
28use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Unlocked};
29use starnix_types::vfs::default_statfs;
30use starnix_uapi::auth::FsCred;
31use starnix_uapi::device_id::DeviceId;
32use starnix_uapi::errors::Errno;
33use starnix_uapi::file_mode::{FileMode, mode};
34use starnix_uapi::math::round_up_to_increment;
35use starnix_uapi::open_flags::OpenFlags;
36use starnix_uapi::user_address::UserAddress;
37use starnix_uapi::vfs::FdEvents;
38use starnix_uapi::{
39    BPF_FS_MAGIC, bpf_map_type_BPF_MAP_TYPE_ARRAY, bpf_map_type_BPF_MAP_TYPE_RINGBUF, errno, error,
40    statfs,
41};
42use std::sync::Arc;
43
44/// A reference to a BPF object that can be stored in either an FD or an entry in the /sys/fs/bpf
45/// filesystem.
46#[derive(Debug, Clone)]
47pub enum BpfHandle {
48    Program(ProgramHandle),
49
50    // Stub used to fake loading of programs of unknown types.
51    ProgramStub(u32),
52
53    Map(BpfMapHandle),
54    BpfTypeFormat(Arc<BpfTypeFormat>),
55}
56
57impl BpfHandle {
58    pub fn as_map(&self) -> Result<&BpfMapHandle, Errno> {
59        match self {
60            Self::Map(map) => Ok(map),
61            _ => error!(EINVAL),
62        }
63    }
64    pub fn as_program(&self) -> Result<&ProgramHandle, Errno> {
65        match self {
66            Self::Program(program) => Ok(program),
67            _ => error!(EINVAL),
68        }
69    }
70
71    pub fn into_program(self) -> Result<ProgramHandle, Errno> {
72        match self {
73            Self::Program(program) => Ok(program),
74            _ => error!(EINVAL),
75        }
76    }
77
78    // Returns VMO and schema if this handle references a map.
79    fn get_map_vmo(&self) -> Result<(&Arc<zx::Vmo>, MapSchema), Errno> {
80        match self {
81            Self::Map(map) => Ok((map.vmo(), map.schema)),
82            _ => error!(ENODEV),
83        }
84    }
85
86    pub fn type_name(&self) -> &'static str {
87        match self {
88            Self::Map(_) => "bpf-map",
89            Self::Program(_) | Self::ProgramStub(_) => "bpf-prog",
90            Self::BpfTypeFormat(_) => "bpf-type",
91        }
92    }
93
94    /// Performs security-related checks when opening a BPF map. If
95    /// `permission_flags` is `None`, then they are inferred from the map's
96    /// schema. `permission_flags` is ignored for programs.
97    pub(super) fn security_check_open_fd(
98        &self,
99        current_task: &CurrentTask,
100        permission_flags: Option<PermissionFlags>,
101    ) -> Result<(), Errno> {
102        match self {
103            Self::Map(bpf_map) => security::check_bpf_map_access(
104                current_task,
105                &bpf_map.security_state,
106                permission_flags.unwrap_or_else(|| bpf_map.schema.flags.into()),
107            ),
108            Self::Program(program) => {
109                security::check_bpf_prog_access(current_task, &program.security_state)
110            }
111            _ => Ok(()),
112        }
113    }
114}
115
116impl From<ProgramHandle> for BpfHandle {
117    fn from(program: ProgramHandle) -> Self {
118        Self::Program(program)
119    }
120}
121
122impl From<BpfMapHandle> for BpfHandle {
123    fn from(map: BpfMapHandle) -> Self {
124        Self::Map(map)
125    }
126}
127
128impl From<BpfTypeFormat> for BpfHandle {
129    fn from(format: BpfTypeFormat) -> Self {
130        Self::BpfTypeFormat(Arc::new(format))
131    }
132}
133
134impl FileOps for BpfHandle {
135    fileops_impl_nonseekable!();
136    fileops_impl_noop_sync!();
137    fn read(
138        &self,
139        _locked: &mut Locked<FileOpsCore>,
140        _file: &FileObject,
141        _current_task: &crate::task::CurrentTask,
142        _offset: usize,
143        _data: &mut dyn OutputBuffer,
144    ) -> Result<usize, Errno> {
145        track_stub!(TODO("https://fxbug.dev/322874229"), "bpf handle read");
146        error!(EINVAL)
147    }
148    fn write(
149        &self,
150        _locked: &mut Locked<FileOpsCore>,
151        _file: &FileObject,
152        _current_task: &crate::task::CurrentTask,
153        _offset: usize,
154        _data: &mut dyn InputBuffer,
155    ) -> Result<usize, Errno> {
156        track_stub!(TODO("https://fxbug.dev/322873841"), "bpf handle write");
157        error!(EINVAL)
158    }
159
160    fn get_memory(
161        &self,
162        locked: &mut Locked<FileOpsCore>,
163        _file: &FileObject,
164        _current_task: &CurrentTask,
165        length: Option<usize>,
166        prot: ProtectionFlags,
167    ) -> Result<Arc<MemoryObject>, Errno> {
168        let (vmo, schema) = self.get_map_vmo()?;
169
170        // Because of the specific condition needed to map this object, the size must be known.
171        let length = length.ok_or_else(|| errno!(EINVAL))?;
172
173        // This cannot be mapped executable.
174        if prot.contains(ProtectionFlags::EXEC) {
175            return error!(EPERM);
176        }
177
178        match schema.map_type {
179            bpf_map_type_BPF_MAP_TYPE_RINGBUF => {
180                let page_size = *PAGE_SIZE as usize;
181                // Starting from the second page, this cannot be mapped writable.
182                if length > page_size {
183                    if prot.contains(ProtectionFlags::WRITE) {
184                        return error!(EPERM);
185                    }
186                    // This cannot be mapped outside of the 2 control pages and the 2 data sections.
187                    if length > 2 * page_size + 2 * schema.max_entries as usize {
188                        return error!(EINVAL);
189                    }
190                }
191
192                self.as_map()?.get_memory(locked, || {
193                    // The first page of the ring buffer VMO is not visible to
194                    // user-space processes. Return a VMO slice that doesn't
195                    // include the first page.
196                    let clone_size = 2 * page_size + schema.max_entries as usize;
197                    let vmo_dup = vmo
198                        .create_child(
199                            zx::VmoChildOptions::SLICE,
200                            page_size as u64,
201                            clone_size as u64,
202                        )
203                        .map_err(|_| errno!(EIO))?
204                        .into();
205                    Ok(Arc::new(MemoryObject::RingBuf(vmo_dup)))
206                })
207            }
208
209            bpf_map_type_BPF_MAP_TYPE_ARRAY => {
210                if !schema.flags.contains(MapFlags::Mmapable) {
211                    return error!(EPERM);
212                }
213
214                let array_size = round_up_to_increment(
215                    compute_map_storage_size(&schema).map_err(|_| errno!(EINVAL))?,
216                    *PAGE_SIZE as usize,
217                )?;
218                if length > array_size {
219                    return error!(EINVAL);
220                }
221
222                self.as_map()?.get_memory(locked, || {
223                    let vmo_dup: zx::Vmo = vmo
224                        .as_handle_ref()
225                        .duplicate_handle(zx::Rights::SAME_RIGHTS)
226                        .map_err(|_| errno!(EIO))?
227                        .into();
228                    Ok(Arc::new(MemoryObject::from(vmo_dup)))
229                })
230            }
231
232            // Other maps cannot be mmap'ed.
233            _ => error!(ENODEV),
234        }
235    }
236
237    fn mmap(
238        &self,
239        locked: &mut Locked<FileOpsCore>,
240        file: &FileObject,
241        current_task: &CurrentTask,
242        addr: DesiredAddress,
243        memory_offset: u64,
244        length: usize,
245        prot_flags: ProtectionFlags,
246        options: MappingOptions,
247        filename: NamespaceNode,
248    ) -> Result<UserAddress, Errno> {
249        let BpfHandle::Map(bpf_map) = &self else {
250            return error!(EINVAL);
251        };
252        security::check_bpf_map_access(
253            current_task,
254            &bpf_map.security_state,
255            PermissionFlags::READ | PermissionFlags::WRITE,
256        )?;
257        default_mmap(
258            locked,
259            file,
260            current_task,
261            addr,
262            memory_offset,
263            length,
264            prot_flags,
265            options,
266            filename,
267        )
268    }
269
270    fn wait_async(
271        &self,
272        _locked: &mut Locked<FileOpsCore>,
273        _file: &FileObject,
274        _current_task: &CurrentTask,
275        waiter: &Waiter,
276        events: FdEvents,
277        handler: EventHandler,
278    ) -> Option<WaitCanceler> {
279        let (vmo, schema) = self.get_map_vmo().ok()?;
280
281        // Only ringbuffers can be polled for POLLIN.
282        if schema.map_type != bpf_map_type_BPF_MAP_TYPE_RINGBUF
283            || !events.contains(FdEvents::POLLIN)
284        {
285            return Some(WaitCanceler::new_noop());
286        }
287
288        let handler = SignalHandler {
289            inner: SignalHandlerInner::ZxHandle(|signals| {
290                if signals.contains(RINGBUF_SIGNAL) { FdEvents::POLLIN } else { FdEvents::empty() }
291            }),
292            event_handler: handler,
293            err_code: None,
294        };
295
296        // Reset the signal before waiting. The case when the ring buffer already has some data
297        // is handled by the caller: it should call `query_events` after starting the waiter.
298        vmo.as_handle_ref()
299            .signal(RINGBUF_SIGNAL, zx::Signals::empty())
300            .expect("Failed to set signal or a ring buffer VMO");
301
302        let canceler = waiter
303            .wake_on_zircon_signals(&vmo.as_handle_ref(), RINGBUF_SIGNAL, handler)
304            .expect("Failed to wait for signals on ringbuf VMO");
305        Some(WaitCanceler::new_port(canceler))
306    }
307
308    fn query_events(
309        &self,
310        _locked: &mut Locked<FileOpsCore>,
311        _file: &FileObject,
312        _current_task: &CurrentTask,
313    ) -> Result<FdEvents, Errno> {
314        match self {
315            Self::Map(map) => {
316                let events = match map.can_read() {
317                    Some(true) => FdEvents::POLLIN,
318                    Some(false) => FdEvents::empty(),
319                    None => FdEvents::POLLERR,
320                };
321                Ok(events)
322            }
323            _ => error!(EPERM),
324        }
325    }
326}
327
328pub fn get_bpf_object(task: &Task, fd: FdNumber) -> Result<BpfHandle, Errno> {
329    Ok((*task
330        .running_state()?
331        .files
332        .get(fd)?
333        .downcast_file::<BpfHandle>()
334        .ok_or_else(|| errno!(EBADF))?)
335    .clone())
336}
337pub struct BpfFs;
338impl BpfFs {
339    pub fn new_fs(
340        locked: &mut Locked<Unlocked>,
341        current_task: &CurrentTask,
342        options: FileSystemOptions,
343    ) -> Result<FileSystemHandle, Errno> {
344        let kernel = current_task.kernel();
345        let fs = FileSystem::new(locked, kernel, CacheMode::Permanent, BpfFs, options)?;
346        let root_ino = fs.allocate_ino();
347        fs.create_root_with_info(
348            root_ino,
349            BpfFsDir::new(),
350            FsNodeInfo::new(mode!(IFDIR, 0o777) | FileMode::ISVTX, FsCred::root()),
351        );
352        Ok(fs)
353    }
354}
355
356impl FileSystemOps for BpfFs {
357    fn statfs(
358        &self,
359        _locked: &mut Locked<FileOpsCore>,
360        _fs: &FileSystem,
361        _current_task: &CurrentTask,
362    ) -> Result<statfs, Errno> {
363        Ok(default_statfs(BPF_FS_MAGIC))
364    }
365    fn name(&self) -> &'static FsStr {
366        "bpf".into()
367    }
368
369    fn rename(
370        &self,
371        _locked: &mut Locked<FileOpsCore>,
372        _fs: &FileSystem,
373        _current_task: &CurrentTask,
374        _context: &mut RenameContext<'_>,
375        _old_name: &FsStr,
376        _new_name: &FsStr,
377    ) -> Result<(), Errno> {
378        Ok(())
379    }
380}
381
382pub struct BpfFsDir {
383    xattrs: MemoryXattrStorage,
384}
385
386impl BpfFsDir {
387    fn new() -> Self {
388        Self { xattrs: MemoryXattrStorage::default() }
389    }
390
391    pub fn register_pin<L>(
392        &self,
393        locked: &mut Locked<L>,
394        current_task: &CurrentTask,
395        node: &NamespaceNode,
396        name: &FsStr,
397        object: BpfHandle,
398    ) -> Result<(), Errno>
399    where
400        L: LockEqualOrBefore<FileOpsCore>,
401    {
402        node.entry.create_entry(
403            locked,
404            current_task,
405            &node.mount,
406            name,
407            |_locked, dir, _mount, _name| {
408                Ok(dir.fs().create_node_and_allocate_node_id(
409                    BpfFsObject::new(object),
410                    FsNodeInfo::new(mode!(IFREG, 0o600), current_task.current_fscred()),
411                ))
412            },
413        )?;
414        Ok(())
415    }
416}
417
418impl FsNodeOps for BpfFsDir {
419    fs_node_impl_xattr_delegate!(self, self.xattrs);
420
421    fn create_file_ops(
422        &self,
423        _locked: &mut Locked<FileOpsCore>,
424        _node: &FsNode,
425        _current_task: &CurrentTask,
426        _flags: OpenFlags,
427    ) -> Result<Box<dyn FileOps>, Errno> {
428        Ok(Box::new(MemoryDirectoryFile::new()))
429    }
430
431    fn mkdir(
432        &self,
433        _locked: &mut Locked<FileOpsCore>,
434        node: &FsNode,
435        _current_task: &CurrentTask,
436        _name: &FsStr,
437        mode: FileMode,
438        owner: FsCred,
439    ) -> Result<FsNodeHandle, Errno> {
440        Ok(node.fs().create_node_and_allocate_node_id(
441            BpfFsDir::new(),
442            FsNodeInfo::new(mode | FileMode::ISVTX, owner),
443        ))
444    }
445
446    fn mknod(
447        &self,
448        _locked: &mut Locked<FileOpsCore>,
449        _node: &FsNode,
450        _current_task: &CurrentTask,
451        _name: &FsStr,
452        _mode: FileMode,
453        _dev: DeviceId,
454        _owner: FsCred,
455    ) -> Result<FsNodeHandle, Errno> {
456        error!(EPERM)
457    }
458
459    fn create_symlink(
460        &self,
461        _locked: &mut Locked<FileOpsCore>,
462        _node: &FsNode,
463        _current_task: &CurrentTask,
464        _name: &FsStr,
465        _target: &FsStr,
466        _owner: FsCred,
467    ) -> Result<FsNodeHandle, Errno> {
468        error!(EPERM)
469    }
470
471    fn link(
472        &self,
473        _locked: &mut Locked<FileOpsCore>,
474        _node: &FsNode,
475        _current_task: &CurrentTask,
476        _name: &FsStr,
477        _child: &FsNodeHandle,
478    ) -> Result<(), Errno> {
479        Ok(())
480    }
481
482    fn unlink(
483        &self,
484        _locked: &mut Locked<FileOpsCore>,
485        _node: &FsNode,
486        _current_task: &CurrentTask,
487        _name: &FsStr,
488        _child: &FsNodeHandle,
489    ) -> Result<(), Errno> {
490        Ok(())
491    }
492}
493
494pub struct BpfFsObject {
495    pub handle: BpfHandle,
496    xattrs: MemoryXattrStorage,
497}
498
499impl BpfFsObject {
500    fn new(handle: BpfHandle) -> Self {
501        Self { handle, xattrs: MemoryXattrStorage::default() }
502    }
503}
504
505impl FsNodeOps for BpfFsObject {
506    fs_node_impl_not_dir!();
507    fs_node_impl_xattr_delegate!(self, self.xattrs);
508
509    fn create_file_ops(
510        &self,
511        _locked: &mut Locked<FileOpsCore>,
512        _node: &FsNode,
513        _current_task: &CurrentTask,
514        _flags: OpenFlags,
515    ) -> Result<Box<dyn FileOps>, Errno> {
516        error!(EIO)
517    }
518}
519
520/// Resolves a pinned BPF object from a path, returning the underlying handle.
521/// Performs DAC and MAC checks using the specified `open_flags `. Also updates
522/// atime unless `NOATIME` flag is set.
523pub fn resolve_pinned_bpf_object(
524    locked: &mut Locked<Unlocked>,
525    current_task: &CurrentTask,
526    path: &BStr,
527    open_flags: OpenFlags,
528) -> Result<BpfHandle, Errno> {
529    let node = current_task.lookup_path_from_root(locked, path.as_ref())?;
530
531    let permission_flags = PermissionFlags::from(open_flags);
532    node.check_access(locked, current_task, permission_flags, CheckAccessReason::Access)?;
533
534    let object = node.entry.node.downcast_ops::<BpfFsObject>().ok_or_else(|| errno!(EPERM))?;
535    object.handle.security_check_open_fd(current_task, Some(permission_flags))?;
536
537    if !open_flags.contains(OpenFlags::NOATIME) {
538        node.update_atime();
539    }
540
541    Ok(object.handle.clone())
542}