Skip to main content

starnix_core/vfs/socket/
syscalls.rs

1// Copyright 2021 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::bpf::attachments::SetSockOptProgramResult;
6use crate::mm::{IOVecPtr, MemoryAccessor, MemoryAccessorExt};
7use crate::security;
8use crate::syscalls::time::TimeSpecPtr;
9use crate::task::{CurrentTask, IpTables, Task, WaitCallback, Waiter};
10use crate::vfs::buffers::{
11    AncillaryData, ControlMsg, UserBuffersInputBuffer, UserBuffersOutputBuffer,
12};
13use crate::vfs::socket::{
14    SA_FAMILY_SIZE, SA_STORAGE_SIZE, Socket, SocketAddress, SocketDomain, SocketFile,
15    SocketMessageFlags, SocketPeer, SocketProtocol, SocketShutdownFlags, SocketType, UnixSocket,
16    resolve_unix_socket_address,
17};
18use crate::vfs::{FdFlags, FdNumber, FileHandle, FsString, LookupContext};
19use starnix_logging::{log_trace, track_stub};
20use starnix_sync::{FileOpsCore, LockEqualOrBefore, Locked, Unlocked};
21use starnix_types::augmented::Augmented;
22use starnix_types::time::duration_from_timespec;
23use starnix_types::user_buffer::{UserBuffer, UserBuffers};
24use starnix_uapi::auth::CAP_NET_BIND_SERVICE;
25use starnix_uapi::errors::{EEXIST, EINPROGRESS, Errno};
26use starnix_uapi::file_mode::FileMode;
27use starnix_uapi::math::round_up_to_increment;
28use starnix_uapi::open_flags::OpenFlags;
29use starnix_uapi::user_address::{
30    ArchSpecific, MappingMultiArchUserRef, MultiArchUserRef, UserAddress, UserRef,
31};
32use starnix_uapi::user_value::UserValue;
33use starnix_uapi::vfs::FdEvents;
34use starnix_uapi::{
35    MSG_CTRUNC, MSG_DONTWAIT, MSG_TRUNC, MSG_WAITFORONE, SHUT_RD, SHUT_RDWR, SHUT_WR, SOCK_CLOEXEC,
36    SOCK_NONBLOCK, UIO_MAXIOV, errno, error, socklen_t, uapi,
37};
38use std::ops::DerefMut;
39
40uapi::check_arch_independent_layout! {
41    socklen_t {}
42}
43
44/// A `msghdr` can be augmented with a `UserBuffer`. In that case, the `UserBuffer` is used for
45/// the I/O, instead of the `iovec` fields from the `msghdr`.
46pub type WithAlternateBuffer<T> = Augmented<T, UserBuffer>;
47pub type MsgHdrPtr = MappingMultiArchUserRef<MsgHdr, uapi::msghdr, uapi::arch32::msghdr>;
48
49#[derive(Debug, Clone)]
50pub struct MsgHdr {
51    pub name: UserAddress,
52    pub name_len: socklen_t,
53    pub iov: IOVecPtr,
54    pub iovlen: UserValue<usize>,
55    pub control: UserAddress,
56    pub control_len: usize,
57    pub flags: u32,
58}
59
60/// A reference to a `msghdr`.
61///
62/// This enum is used to abstract over whether the `msghdr` is in user memory (and needs to be
63/// read) or has been constructed in the kernel. This is used by `io_uring` to provide a buffer
64/// for `recvmsg`.
65#[derive(Debug, Clone)]
66pub enum MsgHdrRef {
67    Ptr(MsgHdrPtr),
68    Value(WithAlternateBuffer<MsgHdr>),
69}
70
71impl From<MsgHdrPtr> for MsgHdrRef {
72    fn from(ptr: MsgHdrPtr) -> Self {
73        Self::Ptr(ptr)
74    }
75}
76
77impl From<WithAlternateBuffer<MsgHdr>> for MsgHdrRef {
78    fn from(value: WithAlternateBuffer<MsgHdr>) -> Self {
79        Self::Value(value)
80    }
81}
82
83pub type MMsgHdrPtr = MappingMultiArchUserRef<MMsgHdr, uapi::mmsghdr, uapi::arch32::mmsghdr>;
84
85pub struct MMsgHdr {
86    hdr: MsgHdr,
87    len: usize,
88}
89
90uapi::arch_map_data! {
91    BidiTryFrom<MsgHdr, msghdr> {
92        name = msg_name;
93        name_len = msg_namelen;
94        iov = msg_iov;
95        iovlen = msg_iovlen;
96        control = msg_control;
97        control_len = msg_controllen;
98        flags = msg_flags;
99    }
100
101    BidiTryFrom<MMsgHdr, mmsghdr> {
102        hdr = msg_hdr;
103        len = msg_len;
104    }
105}
106
107pub type CMsgHdrPtr = MultiArchUserRef<uapi::cmsghdr, uapi::arch32::cmsghdr>;
108
109pub fn sys_socket(
110    locked: &mut Locked<Unlocked>,
111    current_task: &CurrentTask,
112    domain: u32,
113    socket_type: u32,
114    protocol: u32,
115) -> Result<FdNumber, Errno> {
116    let flags = socket_type & (SOCK_NONBLOCK | SOCK_CLOEXEC);
117    let domain = parse_socket_domain(domain)?;
118    let socket_type = parse_socket_type(domain, socket_type)?;
119    // Should we use parse_socket_protocol here?
120    let protocol = SocketProtocol::from_raw(protocol);
121    let open_flags = socket_flags_to_open_flags(flags);
122    let socket_file = SocketFile::new_socket(
123        locked,
124        current_task,
125        domain,
126        socket_type,
127        open_flags,
128        protocol,
129        /*kernel_private=*/ false,
130    )?;
131
132    let fd_flags = socket_flags_to_fd_flags(flags);
133    let fd = current_task.add_file(locked, socket_file, fd_flags)?;
134    Ok(fd)
135}
136
137fn socket_flags_to_open_flags(flags: u32) -> OpenFlags {
138    OpenFlags::RDWR
139        | if flags & SOCK_NONBLOCK != 0 { OpenFlags::NONBLOCK } else { OpenFlags::empty() }
140}
141
142fn socket_flags_to_fd_flags(flags: u32) -> FdFlags {
143    if flags & SOCK_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() }
144}
145
146fn parse_socket_domain(domain: u32) -> Result<SocketDomain, Errno> {
147    SocketDomain::from_raw(domain.try_into().map_err(|_| errno!(EAFNOSUPPORT))?).ok_or_else(|| {
148        track_stub!(TODO("https://fxbug.dev/322875074"), "parse socket domain", domain);
149        errno!(EAFNOSUPPORT)
150    })
151}
152
153fn parse_socket_type(domain: SocketDomain, socket_type: u32) -> Result<SocketType, Errno> {
154    let socket_type = SocketType::from_raw(socket_type & 0xf).ok_or_else(|| {
155        track_stub!(TODO("https://fxbug.dev/322875418"), "parse socket type", socket_type);
156        errno!(EINVAL)
157    })?;
158    // For AF_UNIX, SOCK_RAW sockets are treated as if they were SOCK_DGRAM.
159    Ok(if domain == SocketDomain::Unix && socket_type == SocketType::Raw {
160        SocketType::Datagram
161    } else {
162        socket_type
163    })
164}
165
166fn parse_socket_protocol(
167    domain: SocketDomain,
168    socket_type: SocketType,
169    protocol: u32,
170) -> Result<SocketProtocol, Errno> {
171    let protocol = SocketProtocol::from_raw(protocol);
172    if domain == SocketDomain::Inet {
173        match (socket_type, protocol) {
174            (SocketType::Raw, _) => {
175                // Should we have different behavior error when called by root?
176                return error!(EPROTONOSUPPORT);
177            }
178            (SocketType::Datagram, SocketProtocol::UDP) => (),
179            (SocketType::Datagram, _) => return error!(EPROTONOSUPPORT),
180            (SocketType::Stream, SocketProtocol::TCP) => (),
181            (SocketType::Stream, _) => return error!(EPROTONOSUPPORT),
182            _ => (),
183        }
184    }
185    Ok(protocol)
186}
187
188fn parse_socket_address(
189    task: &Task,
190    user_socket_address: UserAddress,
191    user_address_length: usize,
192) -> Result<SocketAddress, Errno> {
193    if user_address_length < SA_FAMILY_SIZE || user_address_length > SA_STORAGE_SIZE {
194        return error!(EINVAL);
195    }
196
197    let address = task.read_memory_to_vec(user_socket_address, user_address_length)?;
198
199    SocketAddress::from_bytes(address)
200}
201
202fn maybe_parse_socket_address(
203    task: &Task,
204    user_socket_address: UserAddress,
205    user_address_length: usize,
206) -> Result<Option<SocketAddress>, Errno> {
207    if user_address_length > i32::MAX as usize {
208        return error!(EINVAL);
209    }
210    Ok(if user_socket_address.is_null() {
211        None
212    } else {
213        Some(parse_socket_address(task, user_socket_address, user_address_length)?)
214    })
215}
216
217// See "Autobind feature" section of https://man7.org/linux/man-pages/man7/unix.7.html
218fn generate_autobind_address() -> FsString {
219    let mut bytes = [0u8; 4];
220    zx::cprng_draw(&mut bytes);
221    let value = u32::from_ne_bytes(bytes) & 0xFFFFF;
222    format!("\0{value:05x}").into()
223}
224
225pub fn sys_bind(
226    locked: &mut Locked<Unlocked>,
227    current_task: &CurrentTask,
228    fd: FdNumber,
229    user_socket_address: UserAddress,
230    user_address_length: usize,
231) -> Result<(), Errno> {
232    let file = current_task.get_file(fd)?;
233    let socket = Socket::get_from_file(&file)?;
234    let address = parse_socket_address(current_task, user_socket_address, user_address_length)?;
235    if !address.valid_for_domain(socket.domain) {
236        return match socket.domain {
237            SocketDomain::Unix
238            | SocketDomain::Vsock
239            | SocketDomain::Inet6
240            | SocketDomain::Netlink
241            | SocketDomain::Key
242            | SocketDomain::Packet
243            | SocketDomain::Qipcrtr => error!(EINVAL),
244            SocketDomain::Inet => error!(EAFNOSUPPORT),
245        };
246    }
247    if let Some(port) = address.maybe_inet_port() {
248        // See <https://man7.org/linux/man-pages/man7/ip.7.html>:
249        //
250        //   The port numbers below 1024 are called privileged ports (or
251        //   sometimes: reserved ports).  Only a privileged process (on Linux:
252        //   a process that has the CAP_NET_BIND_SERVICE capability in the
253        //   user namespace governing its network namespace) may bind(2) to
254        //   these sockets.
255        if port != 0 && port < 1024 {
256            security::check_task_capable(current_task, CAP_NET_BIND_SERVICE)
257                .map_err(|_| errno!(EACCES))?;
258        }
259    }
260    security::check_socket_bind_access(current_task, socket, &address)?;
261    match address {
262        SocketAddress::Unspecified => return error!(EINVAL),
263        SocketAddress::Unix(mut name) => {
264            if name.is_empty() {
265                // If the name is empty, then we're supposed to generate an
266                // autobind address, which is always abstract.
267                name = generate_autobind_address();
268            }
269            // If there is a null byte at the start of the sun_path, then the
270            // address is abstract.
271            if name[0] == b'\0' {
272                current_task.live().abstract_socket_namespace.bind(
273                    locked,
274                    current_task,
275                    name,
276                    socket,
277                )?;
278            } else {
279                let mode = file.node().info().mode;
280                let mode = current_task.fs().apply_umask(mode).with_type(FileMode::IFSOCK);
281                let (parent, basename) = current_task.lookup_parent_at(
282                    locked,
283                    &mut LookupContext::default(),
284                    FdNumber::AT_FDCWD,
285                    name.as_ref(),
286                )?;
287
288                parent
289                    .bind_socket(
290                        locked,
291                        current_task,
292                        basename,
293                        socket.clone(),
294                        SocketAddress::Unix(name.clone()),
295                        mode,
296                    )
297                    .map_err(|errno| if errno == EEXIST { errno!(EADDRINUSE) } else { errno })?;
298            }
299        }
300        SocketAddress::Vsock { port, .. } => {
301            current_task.live().abstract_vsock_namespace.bind(
302                locked,
303                current_task,
304                port,
305                socket,
306            )?;
307        }
308        SocketAddress::Inet(_)
309        | SocketAddress::Inet6(_)
310        | SocketAddress::Netlink(_)
311        | SocketAddress::Packet(_)
312        | SocketAddress::Qipcrtr(_) => socket.bind(locked, current_task, address)?,
313    }
314
315    Ok(())
316}
317
318pub fn sys_listen(
319    locked: &mut Locked<Unlocked>,
320    current_task: &CurrentTask,
321    fd: FdNumber,
322    backlog: i32,
323) -> Result<(), Errno> {
324    let file = current_task.get_file(fd)?;
325    let socket = Socket::get_from_file(&file)?;
326    socket.listen(locked, current_task, backlog)?;
327    Ok(())
328}
329
330pub fn sys_accept(
331    locked: &mut Locked<Unlocked>,
332    current_task: &CurrentTask,
333    fd: FdNumber,
334    user_socket_address: UserAddress,
335    user_address_length: UserRef<socklen_t>,
336) -> Result<FdNumber, Errno> {
337    sys_accept4(locked, current_task, fd, user_socket_address, user_address_length, 0)
338}
339
340pub fn sys_accept4(
341    locked: &mut Locked<Unlocked>,
342    current_task: &CurrentTask,
343    fd: FdNumber,
344    user_socket_address: UserAddress,
345    user_address_length: UserRef<socklen_t>,
346    flags: u32,
347) -> Result<FdNumber, Errno> {
348    let file = current_task.get_file(fd)?;
349    let listening_socket = Socket::get_from_file(&file)?;
350    let accepted_socket = file.blocking_op(
351        locked,
352        current_task,
353        FdEvents::POLLIN | FdEvents::POLLHUP,
354        None,
355        |locked| listening_socket.accept(locked, current_task),
356    )?;
357
358    if !user_socket_address.is_null() {
359        let address_bytes = accepted_socket.getpeername(locked)?.to_bytes();
360        write_socket_address(
361            current_task,
362            user_socket_address,
363            user_address_length,
364            &address_bytes,
365        )?;
366    }
367
368    let open_flags = socket_flags_to_open_flags(flags);
369    let accepted_socket_file = SocketFile::from_socket(
370        locked,
371        current_task,
372        accepted_socket,
373        open_flags,
374        /* kernel_private= */ false,
375    )?;
376    let listening_socket = SocketFile::get_from_file(&file)?;
377    let accepted_socket = SocketFile::get_from_file(&accepted_socket_file)?;
378    security::socket_accept(current_task, listening_socket, accepted_socket)?;
379    let fd_flags = if flags & SOCK_CLOEXEC != 0 { FdFlags::CLOEXEC } else { FdFlags::empty() };
380    let accepted_fd = current_task.add_file(locked, accepted_socket_file, fd_flags)?;
381    Ok(accepted_fd)
382}
383
384pub fn sys_connect(
385    locked: &mut Locked<Unlocked>,
386    current_task: &CurrentTask,
387    fd: FdNumber,
388    user_socket_address: UserAddress,
389    user_address_length: usize,
390) -> Result<(), Errno> {
391    let client = current_task.get_file(fd)?;
392    let client = SocketFile::get_from_file(&client)?;
393    let address = parse_socket_address(current_task, user_socket_address, user_address_length)?;
394    let peer = match address {
395        SocketAddress::Unspecified => return error!(EAFNOSUPPORT),
396        SocketAddress::Unix(ref name) => {
397            log_trace!("connect to unix socket named \"{name}\"");
398            if name.is_empty() {
399                return error!(ECONNREFUSED);
400            }
401            SocketPeer::Handle(resolve_unix_socket_address(locked, current_task, name.as_ref())?)
402        }
403        // TODO(https://fxbug.dev/445433238): Connect not available for AF_VSOCK
404        SocketAddress::Vsock { .. } => return error!(ENOSYS),
405        SocketAddress::Inet(ref addr) | SocketAddress::Inet6(ref addr) => {
406            log_trace!("connect to inet socket named {:?}", addr);
407            SocketPeer::Address(address)
408        }
409        SocketAddress::Netlink(_) => SocketPeer::Address(address),
410        SocketAddress::Packet(ref addr) => {
411            log_trace!("connect to packet socket named {:?}", addr);
412            SocketPeer::Address(address)
413        }
414        SocketAddress::Qipcrtr(ref addr) => {
415            log_trace!("connect to qipcrtr socket named {:?}", addr);
416            SocketPeer::Address(address)
417        }
418    };
419    let result = client.connect(locked, current_task, peer.clone());
420
421    if client.file().is_non_blocking() {
422        return result;
423    }
424
425    match result {
426        // EINPROGRESS may be returned for inet sockets when `connect()` is completed
427        // asynchronously.
428        Err(errno) if errno.code == EINPROGRESS => {
429            let waiter = Waiter::new();
430            client.file().wait_async(
431                locked,
432                current_task,
433                &waiter,
434                FdEvents::POLLOUT,
435                WaitCallback::none(),
436            );
437            if !client.file().query_events(locked, current_task)?.contains(FdEvents::POLLOUT) {
438                waiter.wait(locked, current_task)?;
439            }
440            client.connect(locked, current_task, peer)
441        }
442        // TODO(tbodt): Support blocking when the UNIX domain socket queue fills up. This one's
443        // weird because as far as I can tell, removing a socket from the queue does not actually
444        // trigger FdEvents on anything.
445        result => result,
446    }
447}
448
449fn write_socket_address(
450    current_task: &CurrentTask,
451    user_socket_address: UserAddress,
452    user_address_length: UserRef<socklen_t>,
453    address_bytes: &[u8],
454) -> Result<(), Errno> {
455    let capacity = current_task.read_object(user_address_length)?;
456    if capacity > i32::MAX as socklen_t {
457        return error!(EINVAL);
458    }
459    let length = address_bytes.len() as socklen_t;
460    if length > 0 {
461        let actual = std::cmp::min(length, capacity) as usize;
462        current_task.write_memory(user_socket_address, &address_bytes[..actual])?;
463    }
464    current_task.write_object(user_address_length, &length)?;
465    Ok(())
466}
467
468pub fn sys_getsockname(
469    locked: &mut Locked<Unlocked>,
470    current_task: &CurrentTask,
471    fd: FdNumber,
472    user_socket_address: UserAddress,
473    user_address_length: UserRef<socklen_t>,
474) -> Result<(), Errno> {
475    let file = current_task.get_file(fd)?;
476    let socket = Socket::get_from_file(&file)?;
477    security::check_socket_getsockname_access(current_task, socket)?;
478    let address_bytes = socket.getsockname(locked)?.to_bytes();
479
480    write_socket_address(current_task, user_socket_address, user_address_length, &address_bytes)?;
481
482    Ok(())
483}
484
485pub fn sys_getpeername(
486    locked: &mut Locked<Unlocked>,
487    current_task: &CurrentTask,
488    fd: FdNumber,
489    user_socket_address: UserAddress,
490    user_address_length: UserRef<socklen_t>,
491) -> Result<(), Errno> {
492    let file = current_task.get_file(fd)?;
493    let socket = Socket::get_from_file(&file)?;
494    security::check_socket_getpeername_access(current_task, socket)?;
495    let address_bytes = socket.getpeername(locked)?.to_bytes();
496
497    write_socket_address(current_task, user_socket_address, user_address_length, &address_bytes)?;
498
499    Ok(())
500}
501
502pub fn sys_socketpair(
503    locked: &mut Locked<Unlocked>,
504    current_task: &CurrentTask,
505    domain: u32,
506    socket_type: u32,
507    protocol: u32,
508    user_sockets: UserRef<[FdNumber; 2]>,
509) -> Result<(), Errno> {
510    let flags = socket_type & (SOCK_NONBLOCK | SOCK_CLOEXEC);
511    let domain = parse_socket_domain(domain)?;
512    if !matches!(domain, SocketDomain::Unix | SocketDomain::Inet) {
513        return error!(EAFNOSUPPORT);
514    }
515    let socket_type = parse_socket_type(domain, socket_type)?;
516    let _protocol = parse_socket_protocol(domain, socket_type, protocol)?;
517    if domain != SocketDomain::Unix {
518        return error!(EOPNOTSUPP);
519    }
520    let open_flags = socket_flags_to_open_flags(flags);
521
522    let (left, right) =
523        UnixSocket::new_pair(locked, current_task, domain, socket_type, open_flags)?;
524
525    let fd_flags = socket_flags_to_fd_flags(flags);
526    // TODO: Eventually this will need to allocate two fd numbers (each of which could
527    // potentially fail), and only populate the fd numbers (which can't fail) if both allocations
528    // succeed.
529    let left_fd = current_task.add_file(locked, left, fd_flags)?;
530    let right_fd = current_task.add_file(locked, right, fd_flags)?;
531
532    let fds = [left_fd, right_fd];
533    log_trace!("socketpair -> [{:#x}, {:#x}]", fds[0].raw(), fds[1].raw());
534    current_task.write_object(user_sockets, &fds)?;
535
536    Ok(())
537}
538
539fn read_iovec_from_msghdr(
540    current_task: &CurrentTask,
541    message_header: WithAlternateBuffer<&MsgHdr>,
542) -> Result<UserBuffers, Errno> {
543    if let WithAlternateBuffer::WithAux(_, b) = message_header {
544        return Ok(UserBuffers::from_buf([b]));
545    }
546    let iovec_count = message_header.iovlen;
547
548    // In `CurrentTask::read_iovec()` the same check fails with `EINVAL`. This works for all
549    // syscalls that use `iovec`, except `sendmsg()` and `recvmsg()`, which need to fail with
550    // EMSGSIZE.
551    if iovec_count.raw() > UIO_MAXIOV as usize {
552        return error!(EMSGSIZE);
553    }
554
555    current_task.read_iovec(message_header.iov, iovec_count)
556}
557
558fn recvmsg_internal<L>(
559    locked: &mut Locked<L>,
560    current_task: &CurrentTask,
561    file: &FileHandle,
562    user_message_header: &mut MsgHdrRef,
563    flags: u32,
564    deadline: Option<zx::MonotonicInstant>,
565) -> Result<usize, Errno>
566where
567    L: LockEqualOrBefore<FileOpsCore>,
568{
569    let mut message_header = match *user_message_header {
570        MsgHdrRef::Ptr(ptr) => current_task.read_multi_arch_object(ptr)?.into(),
571        MsgHdrRef::Value(ref value) => value.clone(),
572    };
573    let result = recvmsg_internal_with_header(
574        locked,
575        current_task,
576        file,
577        message_header.as_mut(),
578        flags,
579        deadline,
580    )?;
581    match *user_message_header {
582        MsgHdrRef::Ptr(ptr) => {
583            current_task.write_multi_arch_object(ptr, message_header.extract())?;
584        }
585        MsgHdrRef::Value(ref mut value) => {
586            *value.deref_mut() = message_header.extract();
587        }
588    }
589    Ok(result)
590}
591
592fn recvmsg_internal_with_header<L>(
593    locked: &mut Locked<L>,
594    current_task: &CurrentTask,
595    file: &FileHandle,
596    mut message_header: WithAlternateBuffer<&mut MsgHdr>,
597    flags: u32,
598    deadline: Option<zx::MonotonicInstant>,
599) -> Result<usize, Errno>
600where
601    L: LockEqualOrBefore<FileOpsCore>,
602{
603    let iovec = read_iovec_from_msghdr(current_task, message_header.as_unmut())?;
604
605    let flags = SocketMessageFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
606    let socket_ops = file.downcast_file::<SocketFile>().unwrap();
607    let info = socket_ops.recvmsg(
608        locked,
609        current_task,
610        file,
611        &mut UserBuffersOutputBuffer::unified_new(current_task, iovec)?,
612        flags,
613        deadline,
614    )?;
615
616    message_header.flags = 0;
617
618    let cmsg_buffer_size = message_header.control_len;
619
620    let mut cmsg_bytes_written = 0;
621    let header_size = CMsgHdrPtr::size_of_object_for(current_task);
622
623    for ancillary_data in info.ancillary_data {
624        if ancillary_data.total_size(current_task) == 0 {
625            // Skip zero-byte ancillary data on the receiving end. Not doing this trips this
626            // assert:
627            // https://cs.android.com/android/platform/superproject/+/master:system/libbase/cmsg.cpp;l=144;drc=15ec2c7a23cda814351a064a345a8270ed8c83ab
628            continue;
629        }
630
631        let expected_size = header_size + ancillary_data.total_size(current_task);
632        let message_bytes = ancillary_data.into_bytes(
633            locked,
634            current_task,
635            flags,
636            cmsg_buffer_size - cmsg_bytes_written,
637        )?;
638
639        // If the message is smaller than expected, set the MSG_CTRUNC flag, so the caller can tell
640        // some of the message is missing.
641        let truncated = message_bytes.len() < expected_size;
642        if truncated {
643            message_header.flags |= MSG_CTRUNC;
644        }
645
646        if message_bytes.len() < header_size {
647            // Can't fit the header, so stop trying to write.
648            break;
649        }
650
651        if !message_bytes.is_empty() {
652            current_task
653                .write_memory((message_header.control + cmsg_bytes_written)?, &message_bytes)?;
654            cmsg_bytes_written += message_bytes.len();
655            if !truncated {
656                cmsg_bytes_written = cmsg_align(current_task, cmsg_bytes_written)?;
657            }
658        }
659    }
660
661    message_header.control_len = cmsg_bytes_written;
662
663    let msg_name = message_header.name;
664    if !msg_name.is_null() {
665        if message_header.name_len > i32::MAX as u32 {
666            return error!(EINVAL);
667        }
668        let bytes = info.address.map(|a| a.to_bytes()).unwrap_or_else(|| vec![]);
669        let num_bytes = std::cmp::min(message_header.name_len as usize, bytes.len());
670        message_header.name_len = bytes.len() as u32;
671        if num_bytes > 0 {
672            current_task.write_memory(msg_name, &bytes[..num_bytes])?;
673        }
674    }
675
676    if info.bytes_read != info.message_length {
677        message_header.flags |= MSG_TRUNC;
678    }
679
680    if flags.contains(SocketMessageFlags::TRUNC) {
681        Ok(info.message_length)
682    } else {
683        Ok(info.bytes_read)
684    }
685}
686
687pub fn sys_recvmsg(
688    locked: &mut Locked<Unlocked>,
689    current_task: &CurrentTask,
690    fd: FdNumber,
691    user_message_header: MsgHdrPtr,
692    flags: u32,
693) -> Result<usize, Errno> {
694    recvmsg_impl(locked, current_task, fd, &mut user_message_header.into(), flags)
695}
696
697/// Implementation of `recvmsg`.
698///
699/// This function is used by `sys_recvmsg`, but can also be called from other parts of the kernel
700/// that need to override the `iovec` from the `msghdr`. For example, when using `io_uring` with
701/// ring buffers.
702pub fn recvmsg_impl(
703    locked: &mut Locked<Unlocked>,
704    current_task: &CurrentTask,
705    fd: FdNumber,
706    user_message_header: &mut MsgHdrRef,
707    flags: u32,
708) -> Result<usize, Errno> {
709    let file = current_task.get_file(fd)?;
710    if !file.node().is_sock() {
711        return error!(ENOTSOCK);
712    }
713    recvmsg_internal(locked, current_task, &file, user_message_header, flags, None)
714}
715
716pub fn sys_recvmmsg(
717    locked: &mut Locked<Unlocked>,
718    current_task: &CurrentTask,
719    fd: FdNumber,
720    user_mmsgvec: MMsgHdrPtr,
721    vlen: u32,
722    mut flags: u32,
723    user_timeout: TimeSpecPtr,
724) -> Result<usize, Errno> {
725    let file = current_task.get_file(fd)?;
726    if !file.node().is_sock() {
727        return error!(ENOTSOCK);
728    }
729
730    if vlen > UIO_MAXIOV {
731        return error!(EINVAL);
732    }
733
734    let deadline = if user_timeout.is_null() {
735        None
736    } else {
737        let ts = current_task.read_multi_arch_object(user_timeout)?;
738        Some(zx::MonotonicInstant::after(duration_from_timespec(ts)?))
739    };
740
741    let mut index = 0usize;
742    while index < vlen as usize {
743        let current_ptr = user_mmsgvec.at(index)?;
744        let mut current_mmsghdr = current_task.read_multi_arch_object(current_ptr)?;
745        match recvmsg_internal_with_header(
746            locked,
747            current_task,
748            &file,
749            (&mut current_mmsghdr.hdr).into(),
750            flags,
751            deadline,
752        ) {
753            Err(error) => {
754                if index == 0 {
755                    return Err(error);
756                }
757                break;
758            }
759            Ok(bytes_read) => {
760                current_mmsghdr.len = bytes_read;
761                current_task.write_multi_arch_object(current_ptr, current_mmsghdr)?;
762            }
763        }
764        index += 1;
765        if flags & MSG_WAITFORONE != 0 {
766            flags |= MSG_DONTWAIT;
767        }
768    }
769    Ok(index)
770}
771
772pub fn sys_recvfrom(
773    locked: &mut Locked<Unlocked>,
774    current_task: &CurrentTask,
775    fd: FdNumber,
776    user_buffer: UserAddress,
777    buffer_length: usize,
778    flags: u32,
779    user_src_address: UserAddress,
780    user_src_address_length: UserRef<socklen_t>,
781) -> Result<usize, Errno> {
782    let file = current_task.get_file(fd)?;
783    if !file.node().is_sock() {
784        return error!(ENOTSOCK);
785    }
786
787    let flags = SocketMessageFlags::from_bits(flags).ok_or_else(|| errno!(EINVAL))?;
788    let socket_ops = file.downcast_file::<SocketFile>().unwrap();
789    let info = socket_ops.recvmsg(
790        locked,
791        current_task,
792        &file,
793        &mut UserBuffersOutputBuffer::unified_new_at(current_task, user_buffer, buffer_length)?,
794        flags,
795        None,
796    )?;
797
798    if !user_src_address.is_null() {
799        let bytes = info.address.map(|a| a.to_bytes()).unwrap_or_else(|| vec![]);
800        write_socket_address(current_task, user_src_address, user_src_address_length, &bytes)?;
801    }
802
803    if flags.contains(SocketMessageFlags::TRUNC) {
804        Ok(info.message_length)
805    } else {
806        Ok(info.bytes_read)
807    }
808}
809
810fn sendmsg_internal<L>(
811    locked: &mut Locked<L>,
812    current_task: &CurrentTask,
813    file: &FileHandle,
814    user_message_header: MsgHdrPtr,
815    flags: u32,
816) -> Result<usize, Errno>
817where
818    L: LockEqualOrBefore<FileOpsCore>,
819{
820    let message_header = current_task.read_multi_arch_object(user_message_header)?;
821    sendmsg_internal_with_header(locked, current_task, file, &message_header, flags)
822}
823
824fn sendmsg_internal_with_header<L>(
825    locked: &mut Locked<L>,
826    current_task: &CurrentTask,
827    file: &FileHandle,
828    message_header: &MsgHdr,
829    flags: u32,
830) -> Result<usize, Errno>
831where
832    L: LockEqualOrBefore<FileOpsCore>,
833{
834    if message_header.name_len > i32::MAX as u32 {
835        return error!(EINVAL);
836    }
837    if message_header.control_len > 20480 {
838        return error!(ENOBUFS);
839    }
840    let dest_address = maybe_parse_socket_address(
841        current_task,
842        message_header.name,
843        message_header.name_len as usize,
844    )?;
845    let iovec = read_iovec_from_msghdr(current_task, message_header.into())?;
846
847    let mut next_message_offset: usize = 0;
848    let mut ancillary_data = Vec::new();
849    let header_size = CMsgHdrPtr::size_of_object_for(current_task);
850    loop {
851        let space = message_header.control_len.saturating_sub(next_message_offset);
852        if space < header_size {
853            break;
854        }
855        let cmsg_ref =
856            CMsgHdrPtr::new(current_task, (message_header.control + next_message_offset)?);
857        let cmsg = current_task.read_multi_arch_object(cmsg_ref)?;
858        // If the message header is not long enough to fit the required fields of the
859        // control data, return EINVAL.
860        if (cmsg.cmsg_len as usize) < header_size {
861            return error!(EINVAL);
862        }
863
864        let data_size = std::cmp::min(cmsg.cmsg_len as usize - header_size, space);
865        let next_data_offset = next_message_offset + header_size;
866        let data = current_task
867            .read_memory_to_vec((message_header.control + next_data_offset)?, data_size)?;
868        next_message_offset += cmsg_align(current_task, header_size + data.len())?;
869        let data = AncillaryData::from_cmsg(
870            current_task,
871            ControlMsg::new(cmsg.cmsg_level, cmsg.cmsg_type, data),
872        )?;
873        if data.total_size(current_task) == 0 {
874            continue;
875        }
876        ancillary_data.push(data);
877    }
878
879    let flags = SocketMessageFlags::from_bits(flags).ok_or_else(|| errno!(EOPNOTSUPP))?;
880    let socket_ops = file.downcast_file::<SocketFile>().unwrap();
881    socket_ops.sendmsg(
882        locked,
883        current_task,
884        file,
885        &mut UserBuffersInputBuffer::unified_new(current_task, iovec)?,
886        dest_address,
887        ancillary_data,
888        flags,
889    )
890}
891
892pub fn sys_sendmsg(
893    locked: &mut Locked<Unlocked>,
894    current_task: &CurrentTask,
895    fd: FdNumber,
896    user_message_header: MsgHdrPtr,
897    flags: u32,
898) -> Result<usize, Errno> {
899    let file = current_task.get_file(fd)?;
900    if !file.node().is_sock() {
901        return error!(ENOTSOCK);
902    }
903    sendmsg_internal(locked, current_task, &file, user_message_header, flags)
904}
905
906pub fn sys_sendmmsg(
907    locked: &mut Locked<Unlocked>,
908    current_task: &CurrentTask,
909    fd: FdNumber,
910    user_mmsgvec: MMsgHdrPtr,
911    mut vlen: u32,
912    flags: u32,
913) -> Result<usize, Errno> {
914    let file = current_task.get_file(fd)?;
915    if !file.node().is_sock() {
916        return error!(ENOTSOCK);
917    }
918
919    // vlen is capped at UIO_MAXIOV.
920    if vlen > UIO_MAXIOV {
921        vlen = UIO_MAXIOV;
922    }
923
924    let mut index = 0usize;
925    while index < vlen as usize {
926        let current_ptr = user_mmsgvec.at(index)?;
927        let mut current_mmsghdr = current_task.read_multi_arch_object(current_ptr)?;
928        match sendmsg_internal_with_header(locked, current_task, &file, &current_mmsghdr.hdr, flags)
929        {
930            Err(error) => {
931                if index == 0 {
932                    return Err(error);
933                }
934                break;
935            }
936            Ok(bytes_read) => {
937                current_mmsghdr.len = bytes_read;
938                current_task.write_multi_arch_object(current_ptr, current_mmsghdr)?;
939            }
940        }
941        index += 1;
942    }
943    Ok(index)
944}
945
946pub fn sys_sendto(
947    locked: &mut Locked<Unlocked>,
948    current_task: &CurrentTask,
949    fd: FdNumber,
950    user_buffer: UserAddress,
951    user_buffer_length: usize,
952    flags: u32,
953    user_dest_address: UserAddress,
954    user_dest_address_length: socklen_t,
955) -> Result<usize, Errno> {
956    let file = current_task.get_file(fd)?;
957    if !file.node().is_sock() {
958        return error!(ENOTSOCK);
959    }
960
961    let dest_address = maybe_parse_socket_address(
962        current_task,
963        user_dest_address,
964        user_dest_address_length as usize,
965    )?;
966    let mut data =
967        UserBuffersInputBuffer::unified_new_at(current_task, user_buffer, user_buffer_length)?;
968
969    let flags = SocketMessageFlags::from_bits(flags).ok_or_else(|| errno!(EOPNOTSUPP))?;
970    let socket_file = file.downcast_file::<SocketFile>().unwrap();
971    socket_file.sendmsg(locked, current_task, &file, &mut data, dest_address, vec![], flags)
972}
973
974pub fn sys_getsockopt(
975    locked: &mut Locked<Unlocked>,
976    current_task: &CurrentTask,
977    fd: FdNumber,
978    level: u32,
979    optname: u32,
980    user_optval: UserAddress,
981    user_optlen: UserRef<socklen_t>,
982) -> Result<(), Errno> {
983    let file = current_task.get_file(fd)?;
984    let socket = Socket::get_from_file(&file)?;
985
986    let optlen = current_task.read_object(user_optlen)? as usize;
987    let optval_buffer_len = optlen;
988    let mut optval = current_task.read_memory_to_vec(user_optval, optlen as usize)?;
989
990    let result = if socket.domain.is_inet() && IpTables::can_handle_getsockopt(level, optname) {
991        current_task.kernel().iptables().getsockopt(
992            locked,
993            current_task,
994            socket,
995            optname,
996            optval.clone(),
997        )
998    } else {
999        socket.getsockopt(locked, current_task, level, optname, optlen as u32)
1000    };
1001
1002    // Even if `getsockopt()` above returned an error we still need to run
1003    // the eBPF program - it may handle the error.
1004    let (optlen, error) = match result {
1005        Ok(new_optval) if new_optval.len() > optval.len() => (optlen, Some(errno!(EINVAL))),
1006        Ok(new_optval) => {
1007            // Copy the returned value to the buffer, but don't truncate it yet
1008            // - this will allow to use the whole buffer in the eBPF program.
1009            optval[..new_optval.len()].copy_from_slice(&new_optval);
1010            (new_optval.len(), None)
1011        }
1012        Err(e) => (optlen, Some(e)),
1013    };
1014
1015    let root_cgroup = current_task.kernel().ebpf_state.attachments.root_cgroup();
1016    let (optval, optlen) = root_cgroup.run_getsockopt_prog(
1017        locked.cast_locked(),
1018        current_task,
1019        level,
1020        optname,
1021        optval,
1022        optlen,
1023        error,
1024    )?;
1025
1026    assert!(optlen <= optval_buffer_len);
1027    current_task.write_memory(user_optval, &optval[..optlen])?;
1028    current_task.write_object(user_optlen, &(optlen as u32))?;
1029
1030    Ok(())
1031}
1032
1033pub fn sys_setsockopt(
1034    locked: &mut Locked<Unlocked>,
1035    current_task: &CurrentTask,
1036    fd: FdNumber,
1037    level: u32,
1038    optname: u32,
1039    user_optval: UserAddress,
1040    optlen: socklen_t,
1041) -> Result<(), Errno> {
1042    let file = current_task.get_file(fd)?;
1043    let socket = Socket::get_from_file(&file)?;
1044
1045    let user_opt = UserBuffer { address: user_optval, length: optlen as usize };
1046
1047    // Run eBPF program if any.
1048    let root_cgroup = current_task.kernel().ebpf_state.attachments.root_cgroup();
1049    let optval = match root_cgroup.run_setsockopt_prog(
1050        locked.cast_locked(),
1051        current_task,
1052        level,
1053        optname,
1054        user_opt.into(),
1055    ) {
1056        SetSockOptProgramResult::Allow(value) => value,
1057        SetSockOptProgramResult::Fail(errno) => return Err(errno),
1058        SetSockOptProgramResult::Bypass => return Ok(()), // The option was handled by eBPF.
1059    };
1060
1061    if socket.domain.is_inet() && IpTables::can_handle_setsockopt(level, optname) {
1062        current_task.kernel().iptables().setsockopt(locked, current_task, socket, optname, optval)
1063    } else {
1064        socket.setsockopt(locked, current_task, level, optname, optval)
1065    }
1066}
1067
1068pub fn sys_shutdown(
1069    locked: &mut Locked<Unlocked>,
1070    current_task: &CurrentTask,
1071    fd: FdNumber,
1072    how: u32,
1073) -> Result<(), Errno> {
1074    let file = current_task.get_file(fd)?;
1075    let socket = Socket::get_from_file(&file)?;
1076    let how = match how {
1077        SHUT_RD => SocketShutdownFlags::READ,
1078        SHUT_WR => SocketShutdownFlags::WRITE,
1079        SHUT_RDWR => SocketShutdownFlags::READ | SocketShutdownFlags::WRITE,
1080        _ => return error!(EINVAL),
1081    };
1082    socket.shutdown(locked, current_task, how)?;
1083    Ok(())
1084}
1085
1086pub fn cmsg_align(current_task: &CurrentTask, value: usize) -> Result<usize, Errno> {
1087    let alignment = if current_task.is_arch32() { 4 } else { 8 };
1088    round_up_to_increment(value, alignment)
1089}
1090
1091// Syscalls for arch32 usage
1092#[cfg(target_arch = "aarch64")]
1093mod arch32 {
1094    use crate::task::CurrentTask;
1095    use crate::vfs::FdNumber;
1096    use starnix_sync::{Locked, Unlocked};
1097    use starnix_uapi::errors::Errno;
1098    use starnix_uapi::user_address::UserAddress;
1099
1100    pub use super::{
1101        sys_accept as sys_arch32_accept, sys_accept4 as sys_arch32_accept4,
1102        sys_bind as sys_arch32_bind, sys_getpeername as sys_arch32_getpeername,
1103        sys_getsockname as sys_arch32_getsockname, sys_getsockopt as sys_arch32_getsockopt,
1104        sys_listen as sys_arch32_listen, sys_recvfrom as sys_arch32_recvfrom,
1105        sys_recvmmsg as sys_arch32_recvmmsg, sys_recvmsg as sys_arch32_recvmsg,
1106        sys_sendmsg as sys_arch32_sendmsg, sys_sendto as sys_arch32_sendto,
1107        sys_setsockopt as sys_arch32_setsockopt, sys_shutdown as sys_arch32_shutdown,
1108        sys_socketpair as sys_arch32_socketpair,
1109    };
1110
1111    pub fn sys_arch32_send(
1112        locked: &mut Locked<Unlocked>,
1113        current_task: &CurrentTask,
1114        fd: FdNumber,
1115        user_buffer: UserAddress,
1116        user_buffer_length: usize,
1117        flags: u32,
1118    ) -> Result<usize, Errno> {
1119        super::sys_sendto(
1120            locked,
1121            current_task,
1122            fd,
1123            user_buffer,
1124            user_buffer_length,
1125            flags,
1126            Default::default(),
1127            Default::default(),
1128        )
1129    }
1130
1131    pub fn sys_arch32_recv(
1132        locked: &mut Locked<Unlocked>,
1133        current_task: &CurrentTask,
1134        fd: FdNumber,
1135        user_buffer: UserAddress,
1136        buffer_length: usize,
1137        flags: u32,
1138    ) -> Result<usize, Errno> {
1139        super::sys_recvfrom(
1140            locked,
1141            current_task,
1142            fd,
1143            user_buffer,
1144            buffer_length,
1145            flags,
1146            Default::default(),
1147            Default::default(),
1148        )
1149    }
1150}
1151
1152#[cfg(target_arch = "aarch64")]
1153pub use arch32::*;
1154
1155#[cfg(test)]
1156mod tests {
1157    use super::*;
1158    use crate::testing::spawn_kernel_and_run;
1159    use starnix_uapi::{AF_INET, AF_UNIX, SOCK_STREAM};
1160
1161    #[::fuchsia::test]
1162    async fn test_socketpair_invalid_arguments() {
1163        spawn_kernel_and_run(async |locked, current_task| {
1164            assert_eq!(
1165                sys_socketpair(
1166                    locked,
1167                    current_task,
1168                    AF_INET as u32,
1169                    SOCK_STREAM,
1170                    0,
1171                    UserRef::new(UserAddress::default())
1172                ),
1173                error!(EPROTONOSUPPORT)
1174            );
1175            assert_eq!(
1176                sys_socketpair(
1177                    locked,
1178                    current_task,
1179                    AF_UNIX as u32,
1180                    7,
1181                    0,
1182                    UserRef::new(UserAddress::default())
1183                ),
1184                error!(EINVAL)
1185            );
1186            assert_eq!(
1187                sys_socketpair(
1188                    locked,
1189                    current_task,
1190                    AF_UNIX as u32,
1191                    SOCK_STREAM,
1192                    0,
1193                    UserRef::new(UserAddress::default())
1194                ),
1195                error!(EFAULT)
1196            );
1197        })
1198        .await;
1199    }
1200
1201    #[::fuchsia::test]
1202    fn test_generate_autobind_address() {
1203        let address = generate_autobind_address();
1204        assert_eq!(address.len(), 6);
1205        assert_eq!(address[0], 0);
1206        for byte in address[1..].iter() {
1207            match byte {
1208                b'0'..=b'9' | b'a'..=b'f' => {
1209                    // Ok.
1210                }
1211                bad => {
1212                    panic!("bad byte: {bad}");
1213                }
1214            }
1215        }
1216    }
1217}