netlink_packet_sock_diag/unix/
nlas.rs

1// SPDX-License-Identifier: MIT
2
3use anyhow::Context;
4use byteorder::{ByteOrder, NativeEndian};
5use netlink_packet_utils::nla::{self, DefaultNla, NlaBuffer};
6use netlink_packet_utils::parsers::{parse_string, parse_u8, parse_u32};
7use netlink_packet_utils::traits::{Emitable, Parseable};
8use netlink_packet_utils::{DecodeError, buffer};
9
10use crate::constants::*;
11
12#[derive(Debug, Eq, PartialEq, Clone)]
13pub enum Nla {
14    /// Path to which the socket was bound. This attribute is known as
15    /// `UNIX_DIAG_NAME` in the kernel.
16    Name(String),
17    /// VFS information for this socket. This attribute is known as
18    /// `UNIX_DIAG_VFS` in the kernel.
19    Vfs(Vfs),
20    /// Inode number of the socket's peer. This attribute is reported
21    /// for connected socket only. This attribute is known as
22    /// `UNIX_DIAG_PEER` in the kernel.
23    Peer(u32),
24    /// The payload associated with this attribute is an array of
25    /// inode numbers of sockets that have passed the `connect(2)`
26    /// call, but haven't been processed with `accept(2)` yet. This
27    /// attribute is reported for listening sockets only. This
28    /// attribute is known as `UNIX_DIAG_ICONS` in the kernel.
29    PendingConnections(Vec<u32>),
30    /// This attribute corresponds to the `UNIX_DIAG_RQLEN`. It
31    /// reports the length of the socket receive queue, and the queue
32    /// size limit. Note that for **listening** sockets the receive
33    /// queue is used to store actual data sent by other sockets. It
34    /// is used to store pending connections. So the meaning of this
35    /// attribute differs for listening sockets.
36    ///
37    /// For **listening** sockets:
38    ///
39    /// - the first the number is the number of pending connections. It should
40    ///   be equal to `Nla::PendingConnections` value's length.
41    /// - the second number is the backlog queue maximum length, which equals
42    ///   to the value passed as the second argument to `listen(2)`
43    ///
44    /// For other sockets:
45    ///
46    /// - the first number is the amount of data in receive queue (**note**: I
47    ///   am not sure if it is the actual amount of data or the amount of
48    ///   memory allocated. The two might differ because of memory allocation
49    ///   strategies: more memory than strictly necessary may be allocated for
50    ///   a given `sk_buff`)
51    /// - the second number is the memory used by outgoing data. Note that
52    ///   strictly UNIX sockets don't have a send queue, since the data they
53    ///   send is directly written into the destination socket receive queue.
54    ///   But the memory allocated for this data is still counted from the
55    ///   sender point of view.
56    ReceiveQueueLength(u32, u32),
57    /// Socket memory information. See [`MemInfo`] for more details.
58    MemInfo(MemInfo),
59    /// Shutown state: one of [`SHUT_RD`], [`SHUT_WR`] or [`SHUT_RDWR`]
60    Shutdown(u8),
61    /// Unknown attribute
62    Other(DefaultNla),
63}
64
65pub const VFS_LEN: usize = 8;
66
67buffer!(VfsBuffer(8) {
68    inode: (u32, 0..4),
69    device: (u32, 4..8),
70});
71
72#[derive(Debug, Eq, PartialEq, Clone)]
73pub struct Vfs {
74    /// Inode number
75    inode: u32,
76    /// Device number
77    device: u32,
78}
79
80impl<T: AsRef<[u8]>> Parseable<VfsBuffer<T>> for Vfs {
81    type Error = DecodeError;
82    fn parse(buf: &VfsBuffer<T>) -> Result<Self, DecodeError> {
83        Ok(Self { inode: buf.inode(), device: buf.device() })
84    }
85}
86
87impl Emitable for Vfs {
88    fn buffer_len(&self) -> usize {
89        VFS_LEN
90    }
91
92    fn emit(&self, buf: &mut [u8]) {
93        let mut buf = VfsBuffer::new_unchecked(buf);
94        buf.set_inode(self.inode);
95        buf.set_device(self.device);
96    }
97}
98
99pub const MEM_INFO_LEN: usize = 36;
100
101buffer!(MemInfoBuffer(MEM_INFO_LEN) {
102    unused_sk_rmem_alloc: (u32, 0..4),
103    so_rcvbuf: (u32, 4..8),
104    unused_sk_wmem_queued: (u32, 8..12),
105    max_datagram_size: (u32, 12..16),
106    unused_sk_fwd_alloc: (u32, 16..20),
107    alloc: (u32, 20..24),
108    unused_sk_optmem: (u32, 24..28),
109    unused_backlog: (u32, 28..32),
110    unused_drops: (u32, 32..36),
111});
112
113/// # Warning
114///
115/// I don't have a good understanding of the Unix Domain Sockets, thus
116/// take the following documentation with a *huge* grain of salt.
117///
118/// # Documentation
119///
120/// ## `UNIX_DIAG_MEMINFO` vs `INET_DIAG_SK_MEMINFO`
121///
122/// `MemInfo` represent an `UNIX_DIAG_MEMINFO` NLA. This NLA has the
123/// same structure than `INET_DIAG_SKMEMINFO`, but since Unix sockets
124/// don't actually use the network stack, many fields are not relevant
125/// and are always set to 0. According to iproute2 commit
126/// [51ff9f2453d066933f24170f0106a7deeefa02d9](https://patchwork.ozlabs.org/patch/222700/), only three attributes can have non-zero values.
127///
128/// ## Particularities of UNIX sockets
129///
130/// One particularity of UNIX sockets is that they don't really have a
131/// send queue: when sending data, the kernel finds the destination
132/// socket and enqueues the data directly in its receive queue (which
133/// [see also this StackOverflow
134/// answer](https://stackoverflow.com/questions/9644251/how-do-unix-domain-sockets-differentiate-between-multiple-clients)). For
135/// instance in `unix_dgram_sendmsg()` in `net/unix/af_unix.c` we
136/// have:
137///
138/// ```c
139/// // `other` refers to the peer socket here
140/// skb_queue_tail(&other->sk_receive_queue, skb);
141/// ```
142///
143/// Another particularity is that the kernel keeps track of the memory
144/// using the sender's `sock.sk_wmem_alloc` attribute. The receiver's
145/// `sock.sk_rmem_alloc` is always zero. Memory is allocated when data
146/// is written to a socket, and is reclaimed when the data is read
147/// from the peer's socket.
148///
149/// Last but not least, the way unix sockets handle incoming
150/// connection differs from the TCP sockets. For TCP sockets, the
151/// queue used to store pending connections is
152/// `sock.sk_ack_backlog`. But UNIX sockets use the receive queue to
153/// store them. They can do that because a listening socket only
154/// receive connections, they do not receive actual data from other
155/// socket, so there is no ambiguity about the nature of the data
156/// stored in the receive queue.
157// /// We can see that in `unix_stream_sendmsg()` for instance we have
158// /// the follownig function calls:
159// ///
160// /// ```
161// /// unix_stream_sendmsg()
162// ///     -> sock_alloc_send_pskb()
163// ///     -> skb_set_owner_w()
164// ///     -> refcount_add(size, &sk->sk_wmem_alloc);
165/// ```
166#[derive(Debug, PartialEq, Eq, Copy, Clone)]
167pub struct MemInfo {
168    /// Value of `SO_RCVBUF`, although it does not have any effect on
169    /// Unix Domain Sockets. As per `man unix(7)`:
170    ///
171    /// > The `SO_SNDBUF` socket option does have an effect for UNIX
172    /// > domain sockets, but the `SO_RCVBUF` option does not.
173    ///
174    /// This attribute corresponds to `sock.sk_rcvbuf` in the kernel.
175    pub so_rcvbuf: u32,
176    /// Maximum size in in bytes of a datagram, as set by
177    /// `SO_SNDBUF`. As per `man unix(7)`:
178    ///
179    /// > For datagram sockets, the `SO_SNDBUF` value imposes an upper
180    /// > limit on the size of outgoing datagrams. This limit is
181    /// > calculated as the doubled (see `socket(7)`) option value
182    /// > less 32 bytes used for overhead.
183    ///
184    /// This attribute corresponds to `sock.sk_sndbuf` in the kernel.
185    pub max_datagram_size: u32,
186    /// Memory currently allocated for the data sent but not yet read
187    /// from the receiving socket(s). The memory is tracked using the
188    /// sending socket `sock.sk_wmem_queued` attribute in the kernel.
189    ///
190    /// Note that this quantity is a little larger than the actual
191    /// data being sent because it takes into account the overhead of
192    /// the `sk_buff`s used internally:
193    ///
194    /// ```c
195    /// /* in net/core/sock.c, sk_wmem_alloc is set in
196    ///    skb_set_owner_w() with: */
197    /// refcount_add(skb->truesize, &sk->sk_wmem_alloc);
198    ///
199    /// /* truesize is set by __alloc_skb() in net/core/skbuff.c
200    ///    by: */
201    /// skb->truesize = SKB_TRUESIZE(size);
202    ///
203    /// /* and SKB_TRUESIZE is defined as: */
204    /// #define SKB_TRUESIZE(X) ((X) +                        \
205    ///     SKB_DATA_ALIGN(sizeof(struct sk_buff)) +          \
206    ///     SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
207    /// ```
208    pub alloc: u32,
209}
210
211impl<T: AsRef<[u8]>> Parseable<MemInfoBuffer<T>> for MemInfo {
212    type Error = DecodeError;
213    fn parse(buf: &MemInfoBuffer<T>) -> Result<Self, DecodeError> {
214        Ok(Self {
215            so_rcvbuf: buf.so_rcvbuf(),
216            max_datagram_size: buf.max_datagram_size(),
217            alloc: buf.alloc(),
218        })
219    }
220}
221
222impl Emitable for MemInfo {
223    fn buffer_len(&self) -> usize {
224        MEM_INFO_LEN
225    }
226
227    fn emit(&self, buf: &mut [u8]) {
228        let mut buf = MemInfoBuffer::new_unchecked(buf);
229        buf.set_unused_sk_rmem_alloc(0);
230        buf.set_so_rcvbuf(self.so_rcvbuf);
231        buf.set_unused_sk_wmem_queued(0);
232        buf.set_max_datagram_size(self.max_datagram_size);
233        buf.set_unused_sk_fwd_alloc(0);
234        buf.set_alloc(self.alloc);
235        buf.set_unused_sk_optmem(0);
236        buf.set_unused_backlog(0);
237        buf.set_unused_drops(0);
238    }
239}
240
241impl nla::Nla for Nla {
242    fn value_len(&self) -> usize {
243        use self::Nla::*;
244        match *self {
245            // +1 because we need to append a null byte
246            Name(ref s) => s.as_bytes().len() + 1,
247            Vfs(_) => VFS_LEN,
248            Peer(_) => 4,
249            PendingConnections(ref v) => 4 * v.len(),
250            ReceiveQueueLength(_, _) => 8,
251            MemInfo(_) => MEM_INFO_LEN,
252            Shutdown(_) => 1,
253            Other(ref attr) => attr.value_len(),
254        }
255    }
256
257    fn emit_value(&self, buffer: &mut [u8]) {
258        use self::Nla::*;
259        match *self {
260            Name(ref s) => {
261                buffer[..s.len()].copy_from_slice(s.as_bytes());
262                buffer[s.len()] = 0;
263            }
264            Vfs(ref value) => value.emit(buffer),
265            Peer(value) => NativeEndian::write_u32(buffer, value),
266            PendingConnections(ref values) => {
267                for (i, v) in values.iter().enumerate() {
268                    NativeEndian::write_u32(&mut buffer[i * 4..], *v);
269                }
270            }
271            ReceiveQueueLength(v1, v2) => {
272                NativeEndian::write_u32(buffer, v1);
273                NativeEndian::write_u32(&mut buffer[4..], v2);
274            }
275            MemInfo(ref value) => value.emit(buffer),
276            Shutdown(value) => buffer[0] = value,
277            Other(ref attr) => attr.emit_value(buffer),
278        }
279    }
280
281    fn kind(&self) -> u16 {
282        use self::Nla::*;
283        match *self {
284            Name(_) => UNIX_DIAG_NAME,
285            Vfs(_) => UNIX_DIAG_VFS,
286            Peer(_) => UNIX_DIAG_PEER,
287            PendingConnections(_) => UNIX_DIAG_ICONS,
288            ReceiveQueueLength(_, _) => UNIX_DIAG_RQLEN,
289            MemInfo(_) => UNIX_DIAG_MEMINFO,
290            Shutdown(_) => UNIX_DIAG_SHUTDOWN,
291            Other(ref attr) => attr.kind(),
292        }
293    }
294}
295
296impl<'a, T: AsRef<[u8]> + ?Sized> Parseable<NlaBuffer<&'a T>> for Nla {
297    type Error = DecodeError;
298    fn parse(buf: &NlaBuffer<&'a T>) -> Result<Self, DecodeError> {
299        let payload = buf.value();
300        Ok(match buf.kind() {
301            UNIX_DIAG_NAME => {
302                let err = "invalid UNIX_DIAG_NAME value";
303                Self::Name(parse_string(payload).context(err)?)
304            }
305            UNIX_DIAG_VFS => {
306                let err = "invalid UNIX_DIAG_VFS value";
307                let buf = VfsBuffer::new(payload).context(err)?;
308                Self::Vfs(Vfs::parse(&buf).context(err)?)
309            }
310            UNIX_DIAG_PEER => {
311                Self::Peer(parse_u32(payload).context("invalid UNIX_DIAG_PEER value")?)
312            }
313            UNIX_DIAG_ICONS => {
314                if payload.len() % 4 != 0 {
315                    return Err(DecodeError::from("invalid UNIX_DIAG_ICONS"));
316                }
317                Self::PendingConnections(payload.chunks(4).map(NativeEndian::read_u32).collect())
318            }
319            UNIX_DIAG_RQLEN => {
320                if payload.len() != 8 {
321                    return Err(DecodeError::from("invalid UNIX_DIAG_RQLEN"));
322                }
323                Self::ReceiveQueueLength(
324                    NativeEndian::read_u32(&payload[..4]),
325                    NativeEndian::read_u32(&payload[4..]),
326                )
327            }
328            UNIX_DIAG_MEMINFO => {
329                let err = "invalid UNIX_DIAG_MEMINFO value";
330                let buf = MemInfoBuffer::new(payload).context(err)?;
331                Self::MemInfo(MemInfo::parse(&buf).context(err)?)
332            }
333            UNIX_DIAG_SHUTDOWN => {
334                Self::Shutdown(parse_u8(payload).context("invalid UNIX_DIAG_SHUTDOWN value")?)
335            }
336            kind => {
337                Self::Other(DefaultNla::parse(buf).context(format!("unknown NLA type {kind}"))?)
338            }
339        })
340    }
341}