netlink_packet_sock_diag/unix/nlas.rs
1// SPDX-License-Identifier: MIT
2
3use anyhow::Context;
4use byteorder::{ByteOrder, NativeEndian};
5use netlink_packet_utils::nla::{self, DefaultNla, NlaBuffer};
6use netlink_packet_utils::parsers::{parse_string, parse_u8, parse_u32};
7use netlink_packet_utils::traits::{Emitable, Parseable};
8use netlink_packet_utils::{DecodeError, buffer};
9
10use crate::constants::*;
11
12#[derive(Debug, Eq, PartialEq, Clone)]
13pub enum Nla {
14 /// Path to which the socket was bound. This attribute is known as
15 /// `UNIX_DIAG_NAME` in the kernel.
16 Name(String),
17 /// VFS information for this socket. This attribute is known as
18 /// `UNIX_DIAG_VFS` in the kernel.
19 Vfs(Vfs),
20 /// Inode number of the socket's peer. This attribute is reported
21 /// for connected socket only. This attribute is known as
22 /// `UNIX_DIAG_PEER` in the kernel.
23 Peer(u32),
24 /// The payload associated with this attribute is an array of
25 /// inode numbers of sockets that have passed the `connect(2)`
26 /// call, but haven't been processed with `accept(2)` yet. This
27 /// attribute is reported for listening sockets only. This
28 /// attribute is known as `UNIX_DIAG_ICONS` in the kernel.
29 PendingConnections(Vec<u32>),
30 /// This attribute corresponds to the `UNIX_DIAG_RQLEN`. It
31 /// reports the length of the socket receive queue, and the queue
32 /// size limit. Note that for **listening** sockets the receive
33 /// queue is used to store actual data sent by other sockets. It
34 /// is used to store pending connections. So the meaning of this
35 /// attribute differs for listening sockets.
36 ///
37 /// For **listening** sockets:
38 ///
39 /// - the first the number is the number of pending connections. It should
40 /// be equal to `Nla::PendingConnections` value's length.
41 /// - the second number is the backlog queue maximum length, which equals
42 /// to the value passed as the second argument to `listen(2)`
43 ///
44 /// For other sockets:
45 ///
46 /// - the first number is the amount of data in receive queue (**note**: I
47 /// am not sure if it is the actual amount of data or the amount of
48 /// memory allocated. The two might differ because of memory allocation
49 /// strategies: more memory than strictly necessary may be allocated for
50 /// a given `sk_buff`)
51 /// - the second number is the memory used by outgoing data. Note that
52 /// strictly UNIX sockets don't have a send queue, since the data they
53 /// send is directly written into the destination socket receive queue.
54 /// But the memory allocated for this data is still counted from the
55 /// sender point of view.
56 ReceiveQueueLength(u32, u32),
57 /// Socket memory information. See [`MemInfo`] for more details.
58 MemInfo(MemInfo),
59 /// Shutown state: one of [`SHUT_RD`], [`SHUT_WR`] or [`SHUT_RDWR`]
60 Shutdown(u8),
61 /// Unknown attribute
62 Other(DefaultNla),
63}
64
65pub const VFS_LEN: usize = 8;
66
67buffer!(VfsBuffer(8) {
68 inode: (u32, 0..4),
69 device: (u32, 4..8),
70});
71
72#[derive(Debug, Eq, PartialEq, Clone)]
73pub struct Vfs {
74 /// Inode number
75 inode: u32,
76 /// Device number
77 device: u32,
78}
79
80impl<T: AsRef<[u8]>> Parseable<VfsBuffer<T>> for Vfs {
81 type Error = DecodeError;
82 fn parse(buf: &VfsBuffer<T>) -> Result<Self, DecodeError> {
83 Ok(Self { inode: buf.inode(), device: buf.device() })
84 }
85}
86
87impl Emitable for Vfs {
88 fn buffer_len(&self) -> usize {
89 VFS_LEN
90 }
91
92 fn emit(&self, buf: &mut [u8]) {
93 let mut buf = VfsBuffer::new_unchecked(buf);
94 buf.set_inode(self.inode);
95 buf.set_device(self.device);
96 }
97}
98
99pub const MEM_INFO_LEN: usize = 36;
100
101buffer!(MemInfoBuffer(MEM_INFO_LEN) {
102 unused_sk_rmem_alloc: (u32, 0..4),
103 so_rcvbuf: (u32, 4..8),
104 unused_sk_wmem_queued: (u32, 8..12),
105 max_datagram_size: (u32, 12..16),
106 unused_sk_fwd_alloc: (u32, 16..20),
107 alloc: (u32, 20..24),
108 unused_sk_optmem: (u32, 24..28),
109 unused_backlog: (u32, 28..32),
110 unused_drops: (u32, 32..36),
111});
112
113/// # Warning
114///
115/// I don't have a good understanding of the Unix Domain Sockets, thus
116/// take the following documentation with a *huge* grain of salt.
117///
118/// # Documentation
119///
120/// ## `UNIX_DIAG_MEMINFO` vs `INET_DIAG_SK_MEMINFO`
121///
122/// `MemInfo` represent an `UNIX_DIAG_MEMINFO` NLA. This NLA has the
123/// same structure than `INET_DIAG_SKMEMINFO`, but since Unix sockets
124/// don't actually use the network stack, many fields are not relevant
125/// and are always set to 0. According to iproute2 commit
126/// [51ff9f2453d066933f24170f0106a7deeefa02d9](https://patchwork.ozlabs.org/patch/222700/), only three attributes can have non-zero values.
127///
128/// ## Particularities of UNIX sockets
129///
130/// One particularity of UNIX sockets is that they don't really have a
131/// send queue: when sending data, the kernel finds the destination
132/// socket and enqueues the data directly in its receive queue (which
133/// [see also this StackOverflow
134/// answer](https://stackoverflow.com/questions/9644251/how-do-unix-domain-sockets-differentiate-between-multiple-clients)). For
135/// instance in `unix_dgram_sendmsg()` in `net/unix/af_unix.c` we
136/// have:
137///
138/// ```c
139/// // `other` refers to the peer socket here
140/// skb_queue_tail(&other->sk_receive_queue, skb);
141/// ```
142///
143/// Another particularity is that the kernel keeps track of the memory
144/// using the sender's `sock.sk_wmem_alloc` attribute. The receiver's
145/// `sock.sk_rmem_alloc` is always zero. Memory is allocated when data
146/// is written to a socket, and is reclaimed when the data is read
147/// from the peer's socket.
148///
149/// Last but not least, the way unix sockets handle incoming
150/// connection differs from the TCP sockets. For TCP sockets, the
151/// queue used to store pending connections is
152/// `sock.sk_ack_backlog`. But UNIX sockets use the receive queue to
153/// store them. They can do that because a listening socket only
154/// receive connections, they do not receive actual data from other
155/// socket, so there is no ambiguity about the nature of the data
156/// stored in the receive queue.
157// /// We can see that in `unix_stream_sendmsg()` for instance we have
158// /// the follownig function calls:
159// ///
160// /// ```
161// /// unix_stream_sendmsg()
162// /// -> sock_alloc_send_pskb()
163// /// -> skb_set_owner_w()
164// /// -> refcount_add(size, &sk->sk_wmem_alloc);
165/// ```
166#[derive(Debug, PartialEq, Eq, Copy, Clone)]
167pub struct MemInfo {
168 /// Value of `SO_RCVBUF`, although it does not have any effect on
169 /// Unix Domain Sockets. As per `man unix(7)`:
170 ///
171 /// > The `SO_SNDBUF` socket option does have an effect for UNIX
172 /// > domain sockets, but the `SO_RCVBUF` option does not.
173 ///
174 /// This attribute corresponds to `sock.sk_rcvbuf` in the kernel.
175 pub so_rcvbuf: u32,
176 /// Maximum size in in bytes of a datagram, as set by
177 /// `SO_SNDBUF`. As per `man unix(7)`:
178 ///
179 /// > For datagram sockets, the `SO_SNDBUF` value imposes an upper
180 /// > limit on the size of outgoing datagrams. This limit is
181 /// > calculated as the doubled (see `socket(7)`) option value
182 /// > less 32 bytes used for overhead.
183 ///
184 /// This attribute corresponds to `sock.sk_sndbuf` in the kernel.
185 pub max_datagram_size: u32,
186 /// Memory currently allocated for the data sent but not yet read
187 /// from the receiving socket(s). The memory is tracked using the
188 /// sending socket `sock.sk_wmem_queued` attribute in the kernel.
189 ///
190 /// Note that this quantity is a little larger than the actual
191 /// data being sent because it takes into account the overhead of
192 /// the `sk_buff`s used internally:
193 ///
194 /// ```c
195 /// /* in net/core/sock.c, sk_wmem_alloc is set in
196 /// skb_set_owner_w() with: */
197 /// refcount_add(skb->truesize, &sk->sk_wmem_alloc);
198 ///
199 /// /* truesize is set by __alloc_skb() in net/core/skbuff.c
200 /// by: */
201 /// skb->truesize = SKB_TRUESIZE(size);
202 ///
203 /// /* and SKB_TRUESIZE is defined as: */
204 /// #define SKB_TRUESIZE(X) ((X) + \
205 /// SKB_DATA_ALIGN(sizeof(struct sk_buff)) + \
206 /// SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
207 /// ```
208 pub alloc: u32,
209}
210
211impl<T: AsRef<[u8]>> Parseable<MemInfoBuffer<T>> for MemInfo {
212 type Error = DecodeError;
213 fn parse(buf: &MemInfoBuffer<T>) -> Result<Self, DecodeError> {
214 Ok(Self {
215 so_rcvbuf: buf.so_rcvbuf(),
216 max_datagram_size: buf.max_datagram_size(),
217 alloc: buf.alloc(),
218 })
219 }
220}
221
222impl Emitable for MemInfo {
223 fn buffer_len(&self) -> usize {
224 MEM_INFO_LEN
225 }
226
227 fn emit(&self, buf: &mut [u8]) {
228 let mut buf = MemInfoBuffer::new_unchecked(buf);
229 buf.set_unused_sk_rmem_alloc(0);
230 buf.set_so_rcvbuf(self.so_rcvbuf);
231 buf.set_unused_sk_wmem_queued(0);
232 buf.set_max_datagram_size(self.max_datagram_size);
233 buf.set_unused_sk_fwd_alloc(0);
234 buf.set_alloc(self.alloc);
235 buf.set_unused_sk_optmem(0);
236 buf.set_unused_backlog(0);
237 buf.set_unused_drops(0);
238 }
239}
240
241impl nla::Nla for Nla {
242 fn value_len(&self) -> usize {
243 use self::Nla::*;
244 match *self {
245 // +1 because we need to append a null byte
246 Name(ref s) => s.as_bytes().len() + 1,
247 Vfs(_) => VFS_LEN,
248 Peer(_) => 4,
249 PendingConnections(ref v) => 4 * v.len(),
250 ReceiveQueueLength(_, _) => 8,
251 MemInfo(_) => MEM_INFO_LEN,
252 Shutdown(_) => 1,
253 Other(ref attr) => attr.value_len(),
254 }
255 }
256
257 fn emit_value(&self, buffer: &mut [u8]) {
258 use self::Nla::*;
259 match *self {
260 Name(ref s) => {
261 buffer[..s.len()].copy_from_slice(s.as_bytes());
262 buffer[s.len()] = 0;
263 }
264 Vfs(ref value) => value.emit(buffer),
265 Peer(value) => NativeEndian::write_u32(buffer, value),
266 PendingConnections(ref values) => {
267 for (i, v) in values.iter().enumerate() {
268 NativeEndian::write_u32(&mut buffer[i * 4..], *v);
269 }
270 }
271 ReceiveQueueLength(v1, v2) => {
272 NativeEndian::write_u32(buffer, v1);
273 NativeEndian::write_u32(&mut buffer[4..], v2);
274 }
275 MemInfo(ref value) => value.emit(buffer),
276 Shutdown(value) => buffer[0] = value,
277 Other(ref attr) => attr.emit_value(buffer),
278 }
279 }
280
281 fn kind(&self) -> u16 {
282 use self::Nla::*;
283 match *self {
284 Name(_) => UNIX_DIAG_NAME,
285 Vfs(_) => UNIX_DIAG_VFS,
286 Peer(_) => UNIX_DIAG_PEER,
287 PendingConnections(_) => UNIX_DIAG_ICONS,
288 ReceiveQueueLength(_, _) => UNIX_DIAG_RQLEN,
289 MemInfo(_) => UNIX_DIAG_MEMINFO,
290 Shutdown(_) => UNIX_DIAG_SHUTDOWN,
291 Other(ref attr) => attr.kind(),
292 }
293 }
294}
295
296impl<'a, T: AsRef<[u8]> + ?Sized> Parseable<NlaBuffer<&'a T>> for Nla {
297 type Error = DecodeError;
298 fn parse(buf: &NlaBuffer<&'a T>) -> Result<Self, DecodeError> {
299 let payload = buf.value();
300 Ok(match buf.kind() {
301 UNIX_DIAG_NAME => {
302 let err = "invalid UNIX_DIAG_NAME value";
303 Self::Name(parse_string(payload).context(err)?)
304 }
305 UNIX_DIAG_VFS => {
306 let err = "invalid UNIX_DIAG_VFS value";
307 let buf = VfsBuffer::new(payload).context(err)?;
308 Self::Vfs(Vfs::parse(&buf).context(err)?)
309 }
310 UNIX_DIAG_PEER => {
311 Self::Peer(parse_u32(payload).context("invalid UNIX_DIAG_PEER value")?)
312 }
313 UNIX_DIAG_ICONS => {
314 if payload.len() % 4 != 0 {
315 return Err(DecodeError::from("invalid UNIX_DIAG_ICONS"));
316 }
317 Self::PendingConnections(payload.chunks(4).map(NativeEndian::read_u32).collect())
318 }
319 UNIX_DIAG_RQLEN => {
320 if payload.len() != 8 {
321 return Err(DecodeError::from("invalid UNIX_DIAG_RQLEN"));
322 }
323 Self::ReceiveQueueLength(
324 NativeEndian::read_u32(&payload[..4]),
325 NativeEndian::read_u32(&payload[4..]),
326 )
327 }
328 UNIX_DIAG_MEMINFO => {
329 let err = "invalid UNIX_DIAG_MEMINFO value";
330 let buf = MemInfoBuffer::new(payload).context(err)?;
331 Self::MemInfo(MemInfo::parse(&buf).context(err)?)
332 }
333 UNIX_DIAG_SHUTDOWN => {
334 Self::Shutdown(parse_u8(payload).context("invalid UNIX_DIAG_SHUTDOWN value")?)
335 }
336 kind => {
337 Self::Other(DefaultNla::parse(buf).context(format!("unknown NLA type {kind}"))?)
338 }
339 })
340 }
341}