Skip to main content

starnix_core/bpf/
attachments.rs

1// Copyright 2025 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// TODO(https://github.com/rust-lang/rust/issues/39371): remove
6#![allow(non_upper_case_globals)]
7
8use crate::bpf::context::EbpfRunContextImpl;
9use crate::bpf::fs::{BpfHandle, get_bpf_object};
10use crate::bpf::program::ProgramHandle;
11use crate::mm::PAGE_SIZE;
12use crate::security;
13use crate::task::CurrentTask;
14use crate::vfs::FdNumber;
15use crate::vfs::socket::{
16    SockOptValue, Socket, SocketDomain, SocketProtocol, SocketType, ZxioBackedSocket,
17};
18use ebpf::{BpfValue, EbpfProgram, EbpfProgramContext, EbpfPtr, ProgramArgument, Type};
19use ebpf_api::{
20    AttachType, BPF_SOCK_ADDR_TYPE, BPF_SOCK_TYPE, BpfSockContext, CgroupSockAddrProgramContext,
21    CgroupSockOptProgramContext, CgroupSockProgramContext, CurrentTaskContext, Map, MapValueRef,
22    MapsContext, PinnedMap, ProgramType, ReturnValueContext, SocketRef,
23};
24use fidl_fuchsia_net_filter as fnet_filter;
25use fuchsia_component::client::connect_to_protocol_sync;
26use linux_uapi::{bpf_sockopt, uaddr};
27use starnix_logging::{log_error, log_warn, track_stub};
28use starnix_sync::{EbpfStateLock, FileOpsCore, Locked, OrderedRwLock, Unlocked};
29use starnix_syscalls::{SUCCESS, SyscallResult};
30use starnix_uapi::auth::{CAP_NET_ADMIN, CAP_SYS_ADMIN, Capabilities};
31use starnix_uapi::errors::{Errno, ErrnoCode, is_error_return_value};
32use starnix_uapi::{
33    CGROUP2_SUPER_MAGIC, bpf_attr__bindgen_ty_6, bpf_sock, bpf_sock_addr, errno, error, gid_t,
34    pid_t, uid_t,
35};
36use std::ops::{Deref, DerefMut};
37use std::sync::{Arc, OnceLock};
38use zerocopy::FromBytes;
39
40pub type BpfAttachAttr = bpf_attr__bindgen_ty_6;
41
42fn check_root_cgroup_fd(
43    locked: &mut Locked<Unlocked>,
44    current_task: &CurrentTask,
45    cgroup_fd: FdNumber,
46) -> Result<(), Errno> {
47    let file = current_task.get_file(cgroup_fd)?;
48
49    // Check that `cgroup_fd` is from the CGROUP2 file system.
50    let is_cgroup =
51        file.node().fs().statfs(locked, current_task)?.f_type == CGROUP2_SUPER_MAGIC as i64;
52    if !is_cgroup {
53        log_warn!("bpf_prog_attach(BPF_PROG_ATTACH) is called with an invalid cgroup2 FD.");
54        return error!(EINVAL);
55    }
56
57    // Currently cgroup attachments are supported only for the root cgroup.
58    // TODO(https://fxbug.dev//388077431) Allow attachments to any cgroup once cgroup
59    // hierarchy is moved to starnix_core.
60    let is_root = file
61        .node()
62        .fs()
63        .maybe_root()
64        .map(|root| Arc::ptr_eq(&root.node, file.node()))
65        .unwrap_or(false);
66    if !is_root {
67        log_warn!("bpf_prog_attach(BPF_PROG_ATTACH) is supported only for root cgroup.");
68        return error!(EINVAL);
69    }
70
71    Ok(())
72}
73
74pub fn bpf_prog_attach(
75    locked: &mut Locked<Unlocked>,
76    current_task: &CurrentTask,
77    attr: BpfAttachAttr,
78) -> Result<SyscallResult, Errno> {
79    // SAFETY: reading i32 field from a union is always safe.
80    let bpf_fd = FdNumber::from_raw(attr.attach_bpf_fd as i32);
81    let object = get_bpf_object(current_task, bpf_fd)?;
82    if matches!(object, BpfHandle::ProgramStub(_)) {
83        log_warn!("Stub program. Faking successful attach");
84        return Ok(SUCCESS);
85    }
86    let program = object.as_program()?.clone();
87
88    if !security::is_task_capable_noaudit(current_task, CAP_SYS_ADMIN) {
89        let required_caps = get_capability_for_program(program.info.program_type)?;
90        security::check_task_capable(current_task, required_caps)?;
91    }
92
93    let attach_type = AttachType::from(attr.attach_type);
94    let program_type = program.info.program_type;
95    if attach_type.get_program_type() != program_type {
96        log_warn!(
97            "bpf_prog_attach(BPF_PROG_ATTACH): program not compatible with attach_type \
98                   attach_type: {attach_type:?}, program_type: {program_type:?}"
99        );
100        return error!(EINVAL);
101    }
102
103    if !attach_type.is_compatible_with_expected_attach_type(program.info.expected_attach_type) {
104        log_warn!(
105            "bpf_prog_attach(BPF_PROG_ATTACH): expected_attach_type didn't match attach_type \
106                   expected_attach_type: {:?}, attach_type: {:?}",
107            program.info.expected_attach_type,
108            attach_type
109        );
110        return error!(EINVAL);
111    }
112
113    // SAFETY: reading i32 field from a union is always safe.
114    let target_fd = unsafe { attr.__bindgen_anon_1.target_fd };
115    let target_fd = FdNumber::from_raw(target_fd as i32);
116
117    current_task.kernel().ebpf_state.attachments.attach_prog(
118        locked,
119        current_task,
120        attach_type,
121        target_fd,
122        program,
123    )
124}
125
126pub fn bpf_prog_detach(
127    locked: &mut Locked<Unlocked>,
128    current_task: &CurrentTask,
129    attr: BpfAttachAttr,
130) -> Result<SyscallResult, Errno> {
131    let attach_type = AttachType::from(attr.attach_type);
132
133    // SAFETY: reading i32 field from a union is always safe.
134    let target_fd = unsafe { attr.__bindgen_anon_1.target_fd };
135    let target_fd = FdNumber::from_raw(target_fd as i32);
136
137    current_task.kernel().ebpf_state.attachments.detach_prog(
138        locked,
139        current_task,
140        attach_type,
141        target_fd,
142    )
143}
144
145// Wrapper for `bpf_sock_addr` used to implement `ProgramArgument` trait.
146#[repr(C)]
147pub struct BpfSockAddr<'a> {
148    sock_addr: bpf_sock_addr,
149
150    bpf_sock: &'a BpfSock<'a>,
151}
152
153impl<'a> Deref for BpfSockAddr<'a> {
154    type Target = bpf_sock_addr;
155    fn deref(&self) -> &Self::Target {
156        &self.sock_addr
157    }
158}
159
160impl<'a> DerefMut for BpfSockAddr<'a> {
161    fn deref_mut(&mut self) -> &mut Self::Target {
162        &mut self.sock_addr
163    }
164}
165
166impl<'a> ProgramArgument for &'_ mut BpfSockAddr<'a> {
167    fn get_type() -> &'static Type {
168        &*BPF_SOCK_ADDR_TYPE
169    }
170}
171
172impl<'a, 'b> SocketRef for &'a mut BpfSockAddr<'a> {
173    fn get_socket_cookie(&self) -> Option<u64> {
174        self.bpf_sock.get_socket_cookie()
175    }
176
177    fn get_socket_uid(&self) -> Option<uid_t> {
178        self.bpf_sock.get_socket_uid()
179    }
180}
181
182// Context for eBPF programs of type BPF_PROG_TYPE_CGROUP_SOCKADDR.
183struct SockAddrProgram(EbpfProgram<SockAddrProgram>);
184
185impl EbpfProgramContext for SockAddrProgram {
186    type RunContext<'a> = EbpfRunContextImpl<'a>;
187    type Packet<'a> = ();
188    type Arg1<'a> = &'a mut BpfSockAddr<'a>;
189    type Arg2<'a> = ();
190    type Arg3<'a> = ();
191    type Arg4<'a> = ();
192    type Arg5<'a> = ();
193
194    type Map = PinnedMap;
195}
196
197ebpf_api::ebpf_program_context_type!(SockAddrProgram, CgroupSockAddrProgramContext);
198
199#[derive(Debug, PartialEq, Eq)]
200pub enum SockAddrProgramResult {
201    Allow,
202    Block,
203}
204
205impl SockAddrProgram {
206    fn run<'a>(
207        &self,
208        locked: &'a mut Locked<EbpfStateLock>,
209        current_task: &'a CurrentTask,
210        addr: &'a mut BpfSockAddr<'a>,
211        can_block: bool,
212    ) -> SockAddrProgramResult {
213        let mut run_context = EbpfRunContextImpl::new(locked, current_task);
214        match self.0.run_with_1_argument(&mut run_context, addr) {
215            // UDP_RECVMSG programs are not allowed to block the packet.
216            0 if can_block => SockAddrProgramResult::Block,
217            1 => SockAddrProgramResult::Allow,
218            result => {
219                // TODO(https://fxbug.dev/413490751): Change this to panic once
220                // result validation is implemented in the eBPF verifier.
221                log_error!("eBPF program returned invalid result: {}", result);
222                SockAddrProgramResult::Allow
223            }
224        }
225    }
226}
227
228type AttachedSockAddrProgramCell = OrderedRwLock<Option<SockAddrProgram>, EbpfStateLock>;
229
230// Wrapper for `bpf_sock` used to implement `ProgramArgument` trait.
231#[repr(C)]
232pub struct BpfSock<'a> {
233    // Must be first field.
234    value: bpf_sock,
235
236    socket: Option<&'a ZxioBackedSocket>,
237}
238
239impl<'a> BpfSock<'a> {
240    fn from_socket(socket: &'a Socket) -> Self {
241        Self {
242            value: bpf_sock {
243                family: socket.domain.as_raw().into(),
244                type_: socket.socket_type.as_raw(),
245                protocol: socket.protocol.as_raw(),
246                ..Default::default()
247            },
248            socket: socket.downcast_socket(),
249        }
250    }
251}
252
253impl<'a> Deref for BpfSock<'a> {
254    type Target = bpf_sock;
255    fn deref(&self) -> &Self::Target {
256        &self.value
257    }
258}
259
260impl<'a> DerefMut for BpfSock<'a> {
261    fn deref_mut(&mut self) -> &mut Self::Target {
262        &mut self.value
263    }
264}
265
266impl<'a> ProgramArgument for &'_ BpfSock<'a> {
267    fn get_type() -> &'static Type {
268        &*BPF_SOCK_TYPE
269    }
270}
271
272impl<'a> SocketRef for &'_ BpfSock<'a> {
273    fn get_socket_cookie(&self) -> Option<u64> {
274        self.socket.and_then(|socket| {
275            socket
276                .get_socket_cookie()
277                .inspect_err(|errno| log_error!("Failed to get socket cookie: {:?}", errno))
278                .ok()
279        })
280    }
281
282    fn get_socket_uid(&self) -> Option<uid_t> {
283        self.socket.map(|socket| socket.uid())
284    }
285}
286
287// Context for eBPF programs of type BPF_PROG_TYPE_CGROUP_SOCK.
288struct SockProgram(EbpfProgram<SockProgram>);
289
290impl EbpfProgramContext for SockProgram {
291    type RunContext<'a> = EbpfRunContextImpl<'a>;
292    type Packet<'a> = ();
293    type Arg1<'a> = &'a BpfSock<'a>;
294    type Arg2<'a> = ();
295    type Arg3<'a> = ();
296    type Arg4<'a> = ();
297    type Arg5<'a> = ();
298
299    type Map = PinnedMap;
300}
301
302ebpf_api::ebpf_program_context_type!(SockProgram, CgroupSockProgramContext);
303
304#[derive(Debug, PartialEq, Eq)]
305pub enum SockProgramResult {
306    Allow,
307    Block,
308}
309
310impl SockProgram {
311    fn run<'a>(
312        &self,
313        locked: &mut Locked<EbpfStateLock>,
314        current_task: &'a CurrentTask,
315        sock: &'a BpfSock<'a>,
316    ) -> SockProgramResult {
317        let mut run_context = EbpfRunContextImpl::new(locked, current_task);
318        if self.0.run_with_1_argument(&mut run_context, sock) == 0 {
319            SockProgramResult::Block
320        } else {
321            SockProgramResult::Allow
322        }
323    }
324}
325
326type AttachedSockProgramCell = OrderedRwLock<Option<SockProgram>, EbpfStateLock>;
327
328mod internal {
329    use super::BpfSock;
330    use ebpf::{BpfValue, EbpfPtr, ProgramArgument, Type};
331    use ebpf_api::BPF_SOCKOPT_TYPE;
332    use starnix_uapi::{bpf_sockopt, uaddr};
333    use std::ops::Deref;
334    use zerocopy::{FromBytes, IntoBytes};
335
336    // Wrapper for `bpf_sockopt` that implements `ProgramArgument` trait.
337    #[repr(C)]
338    #[derive(IntoBytes, FromBytes)]
339    pub struct BpfSockOpt(bpf_sockopt);
340
341    impl ProgramArgument for &'_ mut BpfSockOpt {
342        fn get_type() -> &'static Type {
343            &*BPF_SOCKOPT_TYPE
344        }
345    }
346
347    /// Wrapper for `bpf_sockopt` that keeps a buffer for the `optval`.
348    pub struct BpfSockOptWithValue {
349        sockopt: BpfSockOpt,
350
351        // Buffer used to store the option value. A pointer to the buffer
352        // contents is stored in `sockopt`. `Vec::as_mut_ptr()` guarantees that
353        // the pointer remains valid only as long as the `Vec` is not modified,
354        // so this field should not be updated directly. `take_value()` can be
355        // used to extract the value when `BpfSockOpt` is no longer needed.
356        value_buf: Vec<u8>,
357    }
358
359    impl BpfSockOptWithValue {
360        pub fn new(
361            level: u32,
362            optname: u32,
363            value_buf: Vec<u8>,
364            optlen: u32,
365            retval: i32,
366            sock: *const BpfSock<'_>,
367        ) -> Self {
368            let mut sockopt = Self {
369                sockopt: BpfSockOpt(bpf_sockopt {
370                    level: level as i32,
371                    optname: optname as i32,
372                    optlen: optlen as i32,
373                    retval: retval as i32,
374                    ..Default::default()
375                }),
376                value_buf,
377            };
378
379            // SAFETY: Setting buffer bounds in unions is safe.
380            unsafe {
381                sockopt.sockopt.0.__bindgen_anon_2.optval =
382                    uaddr { addr: sockopt.value_buf.as_mut_ptr() as u64 };
383                sockopt.sockopt.0.__bindgen_anon_3.optval_end = uaddr {
384                    addr: sockopt.value_buf.as_mut_ptr().add(sockopt.value_buf.len()) as u64,
385                };
386            }
387
388            sockopt.sockopt.0.__bindgen_anon_1.sk =
389                (uaddr { addr: BpfValue::from(sock).into() }).into();
390
391            sockopt
392        }
393
394        pub fn as_ptr<'a>(&'a mut self) -> EbpfPtr<'a, BpfSockOpt> {
395            EbpfPtr::from(&mut self.sockopt)
396        }
397
398        // Returns the value. Consumes `self` since it's not safe to use again
399        // after the value buffer is moved.
400        pub fn take_value(self) -> Vec<u8> {
401            self.value_buf
402        }
403    }
404
405    impl Deref for BpfSockOptWithValue {
406        type Target = bpf_sockopt;
407        fn deref(&self) -> &Self::Target {
408            &self.sockopt.0
409        }
410    }
411}
412
413use internal::{BpfSockOpt, BpfSockOptWithValue};
414
415// Context for eBPF programs of type BPF_PROG_TYPE_CGROUP_SOCKOPT.
416struct SockOptProgram(EbpfProgram<SockOptProgram>);
417
418// RunContext for eBPF programs of type BPF_PROG_TYPE_CGROUP_SOCKOPT.
419pub struct SockOptEbpfRunContextImpl<'a> {
420    ebpf_run_context: EbpfRunContextImpl<'a>,
421
422    // Pointer to the BpfSockOpt passed to the program. Used for
423    // `bpf_set_retval` and `bpf_get_retval`.
424    sockopt: EbpfPtr<'a, BpfSockOpt>,
425}
426
427const BPF_SOCKOPT_RETVAL_OFFSET: usize = std::mem::offset_of!(bpf_sockopt, retval);
428
429impl<'a> SockOptEbpfRunContextImpl<'a> {
430    pub fn new(
431        locked: &'a mut Locked<EbpfStateLock>,
432        current_task: &'a CurrentTask,
433        sockopt: EbpfPtr<'a, BpfSockOpt>,
434    ) -> Self {
435        Self { ebpf_run_context: EbpfRunContextImpl::new(locked, current_task), sockopt }
436    }
437}
438
439impl<'a> MapsContext<'a> for SockOptEbpfRunContextImpl<'a> {
440    fn on_map_access(&mut self, map: &Map) {
441        self.ebpf_run_context.on_map_access(map);
442    }
443    fn add_value_ref(&mut self, map_ref: MapValueRef<'a>) {
444        self.ebpf_run_context.add_value_ref(map_ref);
445    }
446}
447
448impl<'a> CurrentTaskContext for SockOptEbpfRunContextImpl<'a> {
449    fn get_uid_gid(&self) -> (uid_t, gid_t) {
450        self.ebpf_run_context.get_uid_gid()
451    }
452    fn get_tid_tgid(&self) -> (pid_t, pid_t) {
453        self.ebpf_run_context.get_tid_tgid()
454    }
455}
456
457impl<'a> ReturnValueContext for SockOptEbpfRunContextImpl<'a> {
458    fn set_retval(&mut self, value: i32) -> i32 {
459        let sockopt = self.sockopt.get_field::<i32, BPF_SOCKOPT_RETVAL_OFFSET>();
460        sockopt.store_relaxed(value);
461        0
462    }
463    fn get_retval(&self) -> i32 {
464        let sockopt = self.sockopt.get_field::<i32, BPF_SOCKOPT_RETVAL_OFFSET>();
465        sockopt.load_relaxed()
466    }
467}
468
469impl<'a> BpfSockContext for SockOptEbpfRunContextImpl<'a> {
470    type BpfSockRef = &'a BpfSock<'a>;
471}
472
473impl EbpfProgramContext for SockOptProgram {
474    type RunContext<'a> = SockOptEbpfRunContextImpl<'a>;
475    type Packet<'a> = ();
476    type Arg1<'a> = EbpfPtr<'a, BpfSockOpt>;
477    type Arg2<'a> = ();
478    type Arg3<'a> = ();
479    type Arg4<'a> = ();
480    type Arg5<'a> = ();
481
482    type Map = PinnedMap;
483}
484
485ebpf_api::ebpf_program_context_type!(SockOptProgram, CgroupSockOptProgramContext);
486
487#[derive(Debug)]
488pub enum SetSockOptProgramResult {
489    /// Fail the syscall.
490    Fail(Errno),
491
492    /// Proceed with the specified option value.
493    Allow(SockOptValue),
494
495    /// Return to userspace without invoking the underlying implementation of
496    /// setsockopt.
497    Bypass,
498}
499
500impl SockOptProgram {
501    fn run<'a>(
502        &self,
503        locked: &mut Locked<EbpfStateLock>,
504        current_task: &'a CurrentTask,
505        sockopt: &'a mut BpfSockOptWithValue,
506    ) -> u64 {
507        let sockopt_ptr = sockopt.as_ptr();
508        let mut run_context = SockOptEbpfRunContextImpl::new(locked, current_task, sockopt_ptr);
509        self.0.run_with_1_argument(&mut run_context, sockopt_ptr)
510    }
511}
512
513type AttachedSockOptProgramCell = OrderedRwLock<Option<SockOptProgram>, EbpfStateLock>;
514
515#[derive(Default)]
516pub struct CgroupEbpfProgramSet {
517    inet4_bind: AttachedSockAddrProgramCell,
518    inet6_bind: AttachedSockAddrProgramCell,
519    inet4_connect: AttachedSockAddrProgramCell,
520    inet6_connect: AttachedSockAddrProgramCell,
521    udp4_sendmsg: AttachedSockAddrProgramCell,
522    udp6_sendmsg: AttachedSockAddrProgramCell,
523    udp4_recvmsg: AttachedSockAddrProgramCell,
524    udp6_recvmsg: AttachedSockAddrProgramCell,
525    sock_create: AttachedSockProgramCell,
526    sock_release: AttachedSockProgramCell,
527    set_sockopt: AttachedSockOptProgramCell,
528    get_sockopt: AttachedSockOptProgramCell,
529}
530
531#[derive(Eq, PartialEq, Debug, Copy, Clone)]
532pub enum SockAddrOp {
533    Bind,
534    Connect,
535    UdpSendMsg,
536    UdpRecvMsg,
537}
538
539#[derive(Eq, PartialEq, Debug, Copy, Clone)]
540pub enum SockOp {
541    Create,
542    Release,
543}
544
545impl CgroupEbpfProgramSet {
546    fn get_sock_addr_program(
547        &self,
548        attach_type: AttachType,
549    ) -> Result<&AttachedSockAddrProgramCell, Errno> {
550        assert!(attach_type.is_cgroup());
551
552        match attach_type {
553            AttachType::CgroupInet4Bind => Ok(&self.inet4_bind),
554            AttachType::CgroupInet6Bind => Ok(&self.inet6_bind),
555            AttachType::CgroupInet4Connect => Ok(&self.inet4_connect),
556            AttachType::CgroupInet6Connect => Ok(&self.inet6_connect),
557            AttachType::CgroupUdp4Sendmsg => Ok(&self.udp4_sendmsg),
558            AttachType::CgroupUdp6Sendmsg => Ok(&self.udp6_sendmsg),
559            AttachType::CgroupUdp4Recvmsg => Ok(&self.udp4_recvmsg),
560            AttachType::CgroupUdp6Recvmsg => Ok(&self.udp6_recvmsg),
561            _ => error!(ENOTSUP),
562        }
563    }
564
565    fn get_sock_program(&self, attach_type: AttachType) -> Result<&AttachedSockProgramCell, Errno> {
566        assert!(attach_type.is_cgroup());
567
568        match attach_type {
569            AttachType::CgroupInetSockCreate => Ok(&self.sock_create),
570            AttachType::CgroupInetSockRelease => Ok(&self.sock_release),
571            _ => error!(ENOTSUP),
572        }
573    }
574
575    fn get_sock_opt_program(
576        &self,
577        attach_type: AttachType,
578    ) -> Result<&AttachedSockOptProgramCell, Errno> {
579        assert!(attach_type.is_cgroup());
580
581        match attach_type {
582            AttachType::CgroupSetsockopt => Ok(&self.set_sockopt),
583            AttachType::CgroupGetsockopt => Ok(&self.get_sockopt),
584            _ => error!(ENOTSUP),
585        }
586    }
587
588    // Executes eBPF program for the operation `op`. `socket_address` contains
589    // socket address as a `sockaddr` struct.
590    pub fn run_sock_addr_prog(
591        &self,
592        locked: &mut Locked<FileOpsCore>,
593        current_task: &CurrentTask,
594        op: SockAddrOp,
595        domain: SocketDomain,
596        socket_type: SocketType,
597        protocol: SocketProtocol,
598        socket_address: &[u8],
599        socket: &Socket,
600    ) -> Result<SockAddrProgramResult, Errno> {
601        let prog_cell = match (domain, op) {
602            (SocketDomain::Inet, SockAddrOp::Bind) => &self.inet4_bind,
603            (SocketDomain::Inet6, SockAddrOp::Bind) => &self.inet6_bind,
604            (SocketDomain::Inet, SockAddrOp::Connect) => &self.inet4_connect,
605            (SocketDomain::Inet6, SockAddrOp::Connect) => &self.inet6_connect,
606            (SocketDomain::Inet, SockAddrOp::UdpSendMsg) => &self.udp4_sendmsg,
607            (SocketDomain::Inet6, SockAddrOp::UdpSendMsg) => &self.udp6_sendmsg,
608            (SocketDomain::Inet, SockAddrOp::UdpRecvMsg) => &self.udp4_recvmsg,
609            (SocketDomain::Inet6, SockAddrOp::UdpRecvMsg) => &self.udp6_recvmsg,
610            _ => return Ok(SockAddrProgramResult::Allow),
611        };
612
613        let (prog_guard, locked) = prog_cell.read_and(locked);
614        let Some(prog) = prog_guard.as_ref() else {
615            return Ok(SockAddrProgramResult::Allow);
616        };
617
618        let bpf_sock = BpfSock::from_socket(socket);
619
620        let mut bpf_sockaddr = BpfSockAddr { sock_addr: Default::default(), bpf_sock: &bpf_sock };
621        bpf_sockaddr.family = domain.as_raw().into();
622        bpf_sockaddr.type_ = socket_type.as_raw();
623        bpf_sockaddr.protocol = protocol.as_raw();
624
625        let (sa_family, _) = u16::read_from_prefix(socket_address).map_err(|_| errno!(EINVAL))?;
626
627        if domain.as_raw() != sa_family {
628            return error!(EAFNOSUPPORT);
629        }
630        bpf_sockaddr.user_family = sa_family.into();
631
632        match sa_family.into() {
633            linux_uapi::AF_INET => {
634                let (sockaddr, _) = linux_uapi::sockaddr_in::ref_from_prefix(socket_address)
635                    .map_err(|_| errno!(EINVAL))?;
636                bpf_sockaddr.user_port = sockaddr.sin_port.into();
637                bpf_sockaddr.user_ip4 = sockaddr.sin_addr.s_addr;
638            }
639            linux_uapi::AF_INET6 => {
640                let sockaddr = linux_uapi::sockaddr_in6::ref_from_prefix(socket_address)
641                    .map_err(|_| errno!(EINVAL))?
642                    .0;
643                bpf_sockaddr.user_port = sockaddr.sin6_port.into();
644                // SAFETY: reading an array of u32 from a union is safe.
645                bpf_sockaddr.user_ip6 = unsafe { sockaddr.sin6_addr.in6_u.u6_addr32 };
646            }
647            _ => return error!(EAFNOSUPPORT),
648        }
649
650        bpf_sockaddr.__bindgen_anon_1.sk =
651            (uaddr { addr: BpfValue::from(&bpf_sock).into() }).into();
652
653        // UDP recvmsg programs are not allowed to filter packets.
654        let can_block = op != SockAddrOp::UdpRecvMsg;
655        Ok(prog.run(locked, current_task, &mut bpf_sockaddr, can_block))
656    }
657
658    pub fn run_sock_prog(
659        &self,
660        locked: &mut Locked<FileOpsCore>,
661        current_task: &CurrentTask,
662        op: SockOp,
663        domain: SocketDomain,
664        socket_type: SocketType,
665        protocol: SocketProtocol,
666        socket: &ZxioBackedSocket,
667    ) -> SockProgramResult {
668        let prog_cell = match op {
669            SockOp::Create => &self.sock_create,
670            SockOp::Release => &self.sock_release,
671        };
672        let (prog_guard, locked) = prog_cell.read_and(locked);
673        let Some(prog) = prog_guard.as_ref() else {
674            return SockProgramResult::Allow;
675        };
676
677        let bpf_sock = BpfSock {
678            value: bpf_sock {
679                family: domain.as_raw().into(),
680                type_: socket_type.as_raw(),
681                protocol: protocol.as_raw(),
682                ..Default::default()
683            },
684            socket: Some(socket),
685        };
686
687        prog.run(locked, current_task, &bpf_sock)
688    }
689
690    pub fn run_getsockopt_prog(
691        &self,
692        locked: &mut Locked<FileOpsCore>,
693        current_task: &CurrentTask,
694        level: u32,
695        optname: u32,
696        value_buf: Vec<u8>,
697        optlen: usize,
698        error: Option<Errno>,
699        socket: &Socket,
700    ) -> Result<(Vec<u8>, usize), Errno> {
701        let (prog_guard, locked) = self.get_sockopt.read_and(locked);
702        let Some(prog) = prog_guard.as_ref() else {
703            return error.map(|e| Err(e)).unwrap_or_else(|| Ok((value_buf, optlen)));
704        };
705
706        let bpf_sock = BpfSock::from_socket(socket);
707
708        let retval = error.as_ref().map(|e| -(e.code.error_code() as i32)).unwrap_or(0);
709        let mut bpf_sockopt = BpfSockOptWithValue::new(
710            level,
711            optname,
712            value_buf.clone(),
713            optlen as u32,
714            retval,
715            &bpf_sock,
716        );
717
718        // Run the program.
719        let result = prog.run(locked, current_task, &mut bpf_sockopt);
720
721        let retval = bpf_sockopt.retval;
722
723        let retval = match result {
724            0 if is_error_return_value(retval) => retval,
725            0 => -(linux_uapi::EPERM as i32),
726            1 => retval,
727            _ => {
728                // TODO(https://fxbug.dev/413490751): Change this to panic once
729                // result validation is implemented in the verifier.
730                log_error!("eBPF getsockopt program returned invalid result: {}", result);
731                retval
732            }
733        };
734
735        if retval < 0 {
736            return Err(Errno::new(ErrnoCode::from_error_code(-retval as i16)));
737        }
738
739        let new_optlen = bpf_sockopt.optlen;
740
741        match usize::try_from(new_optlen) {
742            // Fail if the program set an invalid `optlen`.
743            Err(_) => error!(EFAULT),
744            Ok(new_optlen) if new_optlen > value_buf.len() => error!(EFAULT),
745
746            // If `optlen` is set to 0 then proceed with the original value.
747            Ok(0) => Ok((value_buf, optlen)),
748
749            Ok(new_optlen) => Ok((bpf_sockopt.take_value(), new_optlen)),
750        }
751    }
752
753    pub fn run_setsockopt_prog(
754        &self,
755        locked: &mut Locked<FileOpsCore>,
756        current_task: &CurrentTask,
757        level: u32,
758        optname: u32,
759        value: SockOptValue,
760        socket: &Socket,
761    ) -> SetSockOptProgramResult {
762        let (prog_guard, locked) = self.set_sockopt.read_and(locked);
763        let Some(prog) = prog_guard.as_ref() else {
764            return SetSockOptProgramResult::Allow(value);
765        };
766
767        let page_size = *PAGE_SIZE as usize;
768
769        // Read only the first page from the user-specified buffer in case it's
770        // larger than that.
771        let buffer = match value.read_bytes(current_task, page_size) {
772            Ok(buffer) => buffer,
773            Err(err) => return SetSockOptProgramResult::Fail(err),
774        };
775
776        let bpf_sock = BpfSock::from_socket(socket);
777
778        let buffer_len = buffer.len();
779        let optlen = value.len();
780        let mut bpf_sockopt =
781            BpfSockOptWithValue::new(level, optname, buffer, optlen as u32, 0, &bpf_sock);
782        let result = prog.run(locked.cast_locked(), current_task, &mut bpf_sockopt);
783
784        let retval = bpf_sockopt.retval;
785
786        let retval = match result {
787            0 if is_error_return_value(retval) => retval,
788            0 => -(linux_uapi::EPERM as i32),
789            1 => retval,
790            _ => {
791                // TODO(https://fxbug.dev/413490751): Change this to panic once
792                // result validation is implemented in the verifier.
793                log_error!("eBPF getsockopt program returned invalid result: {}", result);
794                retval
795            }
796        };
797
798        if retval < 0 {
799            return SetSockOptProgramResult::Fail(Errno::new(ErrnoCode::from_error_code(
800                -retval as i16,
801            )));
802        }
803
804        match bpf_sockopt.optlen {
805            // `setsockopt` programs can bypass the platform implementation by
806            // setting `optlen` to -1.
807            -1 => SetSockOptProgramResult::Bypass,
808
809            // If the original value is larger than a page and the program
810            // didn't change `optlen` then return the original value. This
811            // allows to avoid `EFAULT` below with a no-op program.
812            new_optlen if optlen > page_size && (new_optlen as usize) == optlen => {
813                SetSockOptProgramResult::Allow(value)
814            }
815
816            // Fail if the program has set an invalid `optlen` (except for the
817            // case handled above).
818            optlen if optlen < 0 || (optlen as usize) > buffer_len => {
819                SetSockOptProgramResult::Fail(errno!(EFAULT))
820            }
821
822            // If `optlen` is set to 0 then proceed with the original value.
823            0 => SetSockOptProgramResult::Allow(value),
824
825            // Return value from `bpf_sockbuf` - it may be different from the
826            // original value.
827            optlen => {
828                let mut value = bpf_sockopt.take_value();
829                value.resize(optlen as usize, 0);
830                SetSockOptProgramResult::Allow(value.into())
831            }
832        }
833    }
834}
835
836fn attach_type_to_netstack_hook(attach_type: AttachType) -> Option<fnet_filter::SocketHook> {
837    let hook = match attach_type {
838        AttachType::CgroupInetEgress => fnet_filter::SocketHook::Egress,
839        AttachType::CgroupInetIngress => fnet_filter::SocketHook::Ingress,
840        _ => return None,
841    };
842    Some(hook)
843}
844
845// Defined a location where eBPF programs can be attached.
846#[derive(Copy, Clone, Debug, PartialEq, Eq)]
847enum AttachLocation {
848    // Attached in Starnix kernel.
849    Kernel,
850
851    // Attached in Netstack.
852    Netstack,
853}
854
855impl TryFrom<AttachType> for AttachLocation {
856    type Error = Errno;
857
858    fn try_from(attach_type: AttachType) -> Result<Self, Self::Error> {
859        match attach_type {
860            AttachType::CgroupInet4Bind
861            | AttachType::CgroupInet6Bind
862            | AttachType::CgroupInet4Connect
863            | AttachType::CgroupInet6Connect
864            | AttachType::CgroupUdp4Sendmsg
865            | AttachType::CgroupUdp6Sendmsg
866            | AttachType::CgroupUdp4Recvmsg
867            | AttachType::CgroupUdp6Recvmsg
868            | AttachType::CgroupInetSockCreate
869            | AttachType::CgroupInetSockRelease
870            | AttachType::CgroupGetsockopt
871            | AttachType::CgroupSetsockopt => Ok(AttachLocation::Kernel),
872
873            AttachType::CgroupInetEgress | AttachType::CgroupInetIngress => {
874                Ok(AttachLocation::Netstack)
875            }
876
877            AttachType::CgroupDevice
878            | AttachType::CgroupInet4Getpeername
879            | AttachType::CgroupInet4Getsockname
880            | AttachType::CgroupInet4PostBind
881            | AttachType::CgroupInet6Getpeername
882            | AttachType::CgroupInet6Getsockname
883            | AttachType::CgroupInet6PostBind
884            | AttachType::CgroupSysctl
885            | AttachType::CgroupUnixConnect
886            | AttachType::CgroupUnixGetpeername
887            | AttachType::CgroupUnixGetsockname
888            | AttachType::CgroupUnixRecvmsg
889            | AttachType::CgroupUnixSendmsg
890            | AttachType::CgroupSockOps
891            | AttachType::SkSkbStreamParser
892            | AttachType::SkSkbStreamVerdict
893            | AttachType::SkMsgVerdict
894            | AttachType::LircMode2
895            | AttachType::FlowDissector
896            | AttachType::TraceRawTp
897            | AttachType::TraceFentry
898            | AttachType::TraceFexit
899            | AttachType::ModifyReturn
900            | AttachType::LsmMac
901            | AttachType::TraceIter
902            | AttachType::XdpDevmap
903            | AttachType::XdpCpumap
904            | AttachType::SkLookup
905            | AttachType::Xdp
906            | AttachType::SkSkbVerdict
907            | AttachType::SkReuseportSelect
908            | AttachType::SkReuseportSelectOrMigrate
909            | AttachType::PerfEvent
910            | AttachType::TraceKprobeMulti
911            | AttachType::LsmCgroup
912            | AttachType::StructOps
913            | AttachType::Netfilter
914            | AttachType::TcxIngress
915            | AttachType::TcxEgress
916            | AttachType::TraceUprobeMulti
917            | AttachType::NetkitPrimary
918            | AttachType::NetkitPeer
919            | AttachType::TraceKprobeSession => {
920                track_stub!(TODO("https://fxbug.dev/322873416"), "BPF_PROG_ATTACH", attach_type);
921                error!(ENOTSUP)
922            }
923
924            AttachType::Unspecified | AttachType::Invalid(_) => {
925                error!(EINVAL)
926            }
927        }
928    }
929}
930
931fn get_capability_for_program(program_type: ProgramType) -> Result<Capabilities, Errno> {
932    match program_type {
933        ProgramType::CgroupSkb
934        | ProgramType::CgroupSock
935        | ProgramType::CgroupSockAddr
936        | ProgramType::CgroupSockopt
937        | ProgramType::CgroupSysctl => Ok(CAP_NET_ADMIN),
938
939        // The following program types cannot be attached with
940        // `bpf(BPF_PROG_ATTACH)` yet.
941        ProgramType::CgroupDevice
942        | ProgramType::Ext
943        | ProgramType::FlowDissector
944        | ProgramType::Kprobe
945        | ProgramType::LircMode2
946        | ProgramType::Lsm
947        | ProgramType::LwtIn
948        | ProgramType::LwtOut
949        | ProgramType::LwtSeg6Local
950        | ProgramType::LwtXmit
951        | ProgramType::Netfilter
952        | ProgramType::PerfEvent
953        | ProgramType::RawTracepoint
954        | ProgramType::RawTracepointWritable
955        | ProgramType::SchedAct
956        | ProgramType::SchedCls
957        | ProgramType::SkLookup
958        | ProgramType::SkMsg
959        | ProgramType::SkReuseport
960        | ProgramType::SkSkb
961        | ProgramType::SocketFilter
962        | ProgramType::SockOps
963        | ProgramType::StructOps
964        | ProgramType::Syscall
965        | ProgramType::Tracepoint
966        | ProgramType::Tracing
967        | ProgramType::Unspec
968        | ProgramType::Xdp
969        | ProgramType::Fuse => error!(ENOTSUP),
970    }
971}
972
973#[derive(Default)]
974pub struct EbpfAttachments {
975    root_cgroup: CgroupEbpfProgramSet,
976    socket_control: OnceLock<fnet_filter::SocketControlSynchronousProxy>,
977}
978
979impl EbpfAttachments {
980    pub fn root_cgroup(&self) -> &CgroupEbpfProgramSet {
981        &self.root_cgroup
982    }
983
984    fn socket_control(&self) -> &fnet_filter::SocketControlSynchronousProxy {
985        self.socket_control.get_or_init(|| {
986            connect_to_protocol_sync::<fnet_filter::SocketControlMarker>()
987                .expect("Failed to connect to fuchsia.net.filter.SocketControl.")
988        })
989    }
990
991    fn attach_prog(
992        &self,
993        locked: &mut Locked<Unlocked>,
994        current_task: &CurrentTask,
995        attach_type: AttachType,
996        target_fd: FdNumber,
997        program: ProgramHandle,
998    ) -> Result<SyscallResult, Errno> {
999        let location: AttachLocation = attach_type.try_into()?;
1000        let program_type = attach_type.get_program_type();
1001        match (location, program_type) {
1002            (AttachLocation::Kernel, ProgramType::CgroupSockAddr) => {
1003                check_root_cgroup_fd(locked, current_task, target_fd)?;
1004
1005                let linked_program = SockAddrProgram(program.link(attach_type.get_program_type())?);
1006                *self.root_cgroup.get_sock_addr_program(attach_type)?.write(locked) =
1007                    Some(linked_program);
1008
1009                Ok(SUCCESS)
1010            }
1011
1012            (AttachLocation::Kernel, ProgramType::CgroupSock) => {
1013                check_root_cgroup_fd(locked, current_task, target_fd)?;
1014
1015                let linked_program = SockProgram(program.link(attach_type.get_program_type())?);
1016                *self.root_cgroup.get_sock_program(attach_type)?.write(locked) =
1017                    Some(linked_program);
1018
1019                Ok(SUCCESS)
1020            }
1021
1022            (AttachLocation::Kernel, ProgramType::CgroupSockopt) => {
1023                check_root_cgroup_fd(locked, current_task, target_fd)?;
1024
1025                let linked_program = SockOptProgram(program.link(attach_type.get_program_type())?);
1026                *self.root_cgroup.get_sock_opt_program(attach_type)?.write(locked) =
1027                    Some(linked_program);
1028
1029                Ok(SUCCESS)
1030            }
1031
1032            (AttachLocation::Kernel, _) => {
1033                unreachable!();
1034            }
1035
1036            (AttachLocation::Netstack, _) => {
1037                check_root_cgroup_fd(locked, current_task, target_fd)?;
1038                self.attach_prog_in_netstack(attach_type, program)
1039            }
1040        }
1041    }
1042
1043    fn detach_prog(
1044        &self,
1045        locked: &mut Locked<Unlocked>,
1046        current_task: &CurrentTask,
1047        attach_type: AttachType,
1048        target_fd: FdNumber,
1049    ) -> Result<SyscallResult, Errno> {
1050        let location = attach_type.try_into()?;
1051        let program_type = attach_type.get_program_type();
1052        match (location, program_type) {
1053            (AttachLocation::Kernel, ProgramType::CgroupSockAddr) => {
1054                check_root_cgroup_fd(locked, current_task, target_fd)?;
1055
1056                let mut prog_guard =
1057                    self.root_cgroup.get_sock_addr_program(attach_type)?.write(locked);
1058                if prog_guard.is_none() {
1059                    return error!(ENOENT);
1060                }
1061
1062                *prog_guard = None;
1063
1064                Ok(SUCCESS)
1065            }
1066
1067            (AttachLocation::Kernel, ProgramType::CgroupSock) => {
1068                check_root_cgroup_fd(locked, current_task, target_fd)?;
1069
1070                let mut prog_guard = self.root_cgroup.get_sock_program(attach_type)?.write(locked);
1071                if prog_guard.is_none() {
1072                    return error!(ENOENT);
1073                }
1074
1075                *prog_guard = None;
1076
1077                Ok(SUCCESS)
1078            }
1079
1080            (AttachLocation::Kernel, ProgramType::CgroupSockopt) => {
1081                check_root_cgroup_fd(locked, current_task, target_fd)?;
1082
1083                let mut prog_guard =
1084                    self.root_cgroup.get_sock_opt_program(attach_type)?.write(locked);
1085                if prog_guard.is_none() {
1086                    return error!(ENOENT);
1087                }
1088
1089                *prog_guard = None;
1090
1091                Ok(SUCCESS)
1092            }
1093
1094            (AttachLocation::Kernel, _) => {
1095                unreachable!();
1096            }
1097
1098            (AttachLocation::Netstack, _) => {
1099                check_root_cgroup_fd(locked, current_task, target_fd)?;
1100                self.detach_prog_in_netstack(attach_type)
1101            }
1102        }
1103    }
1104
1105    fn attach_prog_in_netstack(
1106        &self,
1107        attach_type: AttachType,
1108        program: ProgramHandle,
1109    ) -> Result<SyscallResult, Errno> {
1110        let hook = attach_type_to_netstack_hook(attach_type).ok_or_else(|| errno!(ENOTSUP))?;
1111        let opts = fnet_filter::AttachEbpfProgramOptions {
1112            hook: Some(hook),
1113            program: Some((&**program).try_into()?),
1114            ..Default::default()
1115        };
1116        self.socket_control()
1117            .attach_ebpf_program(opts, zx::MonotonicInstant::INFINITE)
1118            .map_err(|e| {
1119                log_error!(
1120                    "failed to send fuchsia.net.filter/SocketControl.AttachEbpfProgram: {}",
1121                    e
1122                );
1123                errno!(EIO)
1124            })?
1125            .map_err(|e| {
1126                use fnet_filter::SocketControlAttachEbpfProgramError as Error;
1127                match e {
1128                    Error::NotSupported => errno!(ENOTSUP),
1129                    Error::LinkFailed => errno!(EINVAL),
1130                    Error::MapFailed => errno!(EIO),
1131                    Error::DuplicateAttachment => errno!(EEXIST),
1132                }
1133            })?;
1134
1135        Ok(SUCCESS)
1136    }
1137
1138    fn detach_prog_in_netstack(&self, attach_type: AttachType) -> Result<SyscallResult, Errno> {
1139        let hook = attach_type_to_netstack_hook(attach_type).ok_or_else(|| errno!(ENOTSUP))?;
1140        self.socket_control()
1141            .detach_ebpf_program(hook, zx::MonotonicInstant::INFINITE)
1142            .map_err(|e| {
1143                log_error!(
1144                    "failed to send fuchsia.net.filter/SocketControl.DetachEbpfProgram: {}",
1145                    e
1146                );
1147                errno!(EIO)
1148            })?
1149            .map_err(|e| {
1150                use fnet_filter::SocketControlDetachEbpfProgramError as Error;
1151                match e {
1152                    Error::NotFound => errno!(ENOENT),
1153                }
1154            })?;
1155        Ok(SUCCESS)
1156    }
1157}