netstack3_ip/
socket.rs

1// Copyright 2019 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! IPv4 and IPv6 sockets.
6
7use core::cmp::Ordering;
8use core::convert::Infallible;
9use core::num::NonZeroU8;
10
11use log::error;
12use net_types::ip::{Ip, IpVersionMarker, Ipv6Addr, Mtu};
13use net_types::{MulticastAddress, ScopeableAddress, SpecifiedAddr};
14use netstack3_base::socket::{SocketIpAddr, SocketIpAddrExt as _};
15use netstack3_base::{
16    AnyDevice, CounterContext, DeviceIdContext, DeviceIdentifier, EitherDeviceId, InstantContext,
17    IpDeviceAddr, IpExt, Marks, Mms, SendFrameErrorReason, StrongDeviceIdentifier, TxMetadata as _,
18    TxMetadataBindingsTypes, WeakDeviceIdentifier,
19};
20use netstack3_filter::{
21    self as filter, DynTransportSerializer, DynamicTransportSerializer, FilterBindingsContext,
22    FilterHandler as _, FilterIpExt, InterfaceProperties, RawIpBody, SocketEgressFilterResult,
23    SocketOpsFilter, SocketOpsFilterBindingContext, TransportPacketSerializer,
24};
25use netstack3_trace::trace_duration;
26use packet::{BufferMut, PacketConstraints, SerializeError};
27use packet_formats::ip::DscpAndEcn;
28use thiserror::Error;
29
30use crate::internal::base::{
31    FilterHandlerProvider, IpDeviceMtuContext, IpLayerIpExt, IpLayerPacketMetadata,
32    IpPacketDestination, IpSendFrameError, IpSendFrameErrorReason, ResolveRouteError,
33    SendIpPacketMeta,
34};
35use crate::internal::counters::IpCounters;
36use crate::internal::device::state::IpDeviceStateIpExt;
37use crate::internal::routing::PacketOrigin;
38use crate::internal::routing::rules::RuleInput;
39use crate::internal::types::{InternalForwarding, ResolvedRoute, RoutableIpAddr};
40use crate::{HopLimits, NextHop};
41
42/// The arguments used for creating an [`IpSock`]
43pub struct IpSocketArgs<'a, D: StrongDeviceIdentifier, I: IpExt, O> {
44    /// The device the socket is bound to.
45    pub device: Option<EitherDeviceId<&'a D, &'a D::Weak>>,
46    /// The local IP to use for the connection. One is selected if not provided
47    /// based on the output route.
48    pub local_ip: Option<IpDeviceAddr<I::Addr>>,
49    /// The remote IP address for this connection.
50    pub remote_ip: RoutableIpAddr<I::Addr>,
51    /// The IP protocol in use.
52    pub proto: I::Proto,
53    /// Additional IP layer options.
54    pub options: &'a O,
55}
56/// An execution context defining a type of IP socket.
57pub trait IpSocketHandler<I: IpExt + FilterIpExt, BC: TxMetadataBindingsTypes>:
58    DeviceIdContext<AnyDevice>
59{
60    /// Constructs a new [`IpSock`].
61    ///
62    /// `new_ip_socket` constructs a new `IpSock` to the given remote IP
63    /// address from the given local IP address with the given IP protocol. If
64    /// no local IP address is given, one will be chosen automatically. If
65    /// `device` is `Some`, the socket will be bound to the given device - only
66    /// routes which egress over the device will be used. If no route is
67    /// available which egresses over the device - even if routes are available
68    /// which egress over other devices - the socket will be considered
69    /// unroutable.
70    ///
71    /// `new_ip_socket` returns an error if no route to the remote was found in
72    /// the forwarding table or if the given local IP address is not valid for
73    /// the found route.
74    fn new_ip_socket<O>(
75        &mut self,
76        bindings_ctx: &mut BC,
77        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
78    ) -> Result<IpSock<I, Self::WeakDeviceId>, IpSockCreationError>
79    where
80        O: RouteResolutionOptions<I>;
81
82    /// Sends an IP packet on a socket.
83    ///
84    /// The generated packet has its metadata initialized from `socket`,
85    /// including the source and destination addresses, the Time To Live/Hop
86    /// Limit, and the Protocol/Next Header. The outbound device is also chosen
87    /// based on information stored in the socket.
88    ///
89    /// `mtu` may be used to optionally impose an MTU on the outgoing packet.
90    /// Note that the device's MTU will still be imposed on the packet. That is,
91    /// the smaller of `mtu` and the device's MTU will be imposed on the packet.
92    ///
93    /// If the socket is currently unroutable, an error is returned.
94    fn send_ip_packet<S, O>(
95        &mut self,
96        bindings_ctx: &mut BC,
97        socket: &IpSock<I, Self::WeakDeviceId>,
98        body: S,
99        options: &O,
100        tx_metadata: BC::TxMetadata,
101    ) -> Result<(), IpSockSendError>
102    where
103        S: TransportPacketSerializer<I>,
104        S::Buffer: BufferMut,
105        O: SendOptions<I> + RouteResolutionOptions<I>;
106
107    /// Confirms the provided IP socket destination is reachable.
108    ///
109    /// Implementations must retrieve the next hop given the provided
110    /// IP socket and confirm neighbor reachability for the resolved target
111    /// device.
112    fn confirm_reachable<O>(
113        &mut self,
114        bindings_ctx: &mut BC,
115        socket: &IpSock<I, Self::WeakDeviceId>,
116        options: &O,
117    ) where
118        O: RouteResolutionOptions<I>;
119
120    /// Creates a temporary IP socket and sends a single packet on it.
121    ///
122    /// `local_ip`, `remote_ip`, `proto`, and `options` are passed directly to
123    /// [`IpSocketHandler::new_ip_socket`]. `get_body_from_src_ip` is given the
124    /// source IP address for the packet - which may have been chosen
125    /// automatically if `local_ip` is `None` - and returns the body to be
126    /// encapsulated. This is provided in case the body's contents depend on the
127    /// chosen source IP address.
128    ///
129    /// If `device` is specified, the available routes are limited to those that
130    /// egress over the device.
131    ///
132    /// `mtu` may be used to optionally impose an MTU on the outgoing packet.
133    /// Note that the device's MTU will still be imposed on the packet. That is,
134    /// the smaller of `mtu` and the device's MTU will be imposed on the packet.
135    ///
136    /// # Errors
137    ///
138    /// If an error is encountered while constructing the temporary IP socket
139    /// or sending the packet, `options` will be returned along with the
140    /// error. `get_body_from_src_ip` is fallible, and if there's an error,
141    /// it will be returned as well.
142    fn send_oneshot_ip_packet_with_fallible_serializer<S, E, F, O>(
143        &mut self,
144        bindings_ctx: &mut BC,
145        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
146        tx_metadata: BC::TxMetadata,
147        get_body_from_src_ip: F,
148    ) -> Result<(), SendOneShotIpPacketError<E>>
149    where
150        S: TransportPacketSerializer<I>,
151        S::Buffer: BufferMut,
152        F: FnOnce(IpDeviceAddr<I::Addr>) -> Result<S, E>,
153        O: SendOptions<I> + RouteResolutionOptions<I>,
154    {
155        let options = args.options;
156        let tmp = self
157            .new_ip_socket(bindings_ctx, args)
158            .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })?;
159        let packet = get_body_from_src_ip(*tmp.local_ip())
160            .map_err(SendOneShotIpPacketError::SerializeError)?;
161        self.send_ip_packet(bindings_ctx, &tmp, packet, options, tx_metadata)
162            .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })
163    }
164
165    /// Like `send_oneshot_ip_packet_with_fallible_serializer`, but a dynamic
166    /// transport serializer is used.
167    ///
168    /// This reduces code generation cost at the expense of some runtime
169    /// overhead.
170    fn send_oneshot_ip_packet_with_dyn_fallible_serializer<S, E, F, O>(
171        &mut self,
172        bindings_ctx: &mut BC,
173        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
174        tx_metadata: BC::TxMetadata,
175        get_body_from_src_ip: F,
176    ) -> Result<(), SendOneShotIpPacketError<E>>
177    where
178        S: DynamicTransportSerializer<I>,
179        F: FnOnce(IpDeviceAddr<I::Addr>) -> Result<S, E>,
180        O: SendOptions<I> + RouteResolutionOptions<I>,
181    {
182        let options = args.options;
183        let tmp = self
184            .new_ip_socket(bindings_ctx, args)
185            .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })?;
186        let mut packet = get_body_from_src_ip(*tmp.local_ip())
187            .map_err(SendOneShotIpPacketError::SerializeError)?;
188        self.send_ip_packet(
189            bindings_ctx,
190            &tmp,
191            DynTransportSerializer::new(&mut packet),
192            options,
193            tx_metadata,
194        )
195        .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })
196    }
197
198    /// Sends a one-shot IP packet but with a non-fallible serializer.
199    fn send_oneshot_ip_packet<S, F, O>(
200        &mut self,
201        bindings_ctx: &mut BC,
202        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
203        tx_metadata: BC::TxMetadata,
204        get_body_from_src_ip: F,
205    ) -> Result<(), IpSockCreateAndSendError>
206    where
207        S: TransportPacketSerializer<I>,
208        S::Buffer: BufferMut,
209        F: FnOnce(IpDeviceAddr<I::Addr>) -> S,
210        O: SendOptions<I> + RouteResolutionOptions<I>,
211    {
212        self.send_oneshot_ip_packet_with_fallible_serializer(
213            bindings_ctx,
214            args,
215            tx_metadata,
216            |ip| Ok::<_, Infallible>(get_body_from_src_ip(ip)),
217        )
218        .map_err(|err| match err {
219            SendOneShotIpPacketError::CreateAndSendError { err } => err,
220        })
221    }
222
223    /// Like `send_oneshot_ip_packet`, but a dynamic transport serializer is
224    /// used.
225    ///
226    /// This reduces code generation cost at the expense of some runtime
227    /// overhead.
228    fn send_oneshot_ip_packet_with_dyn_serializer<S, F, O>(
229        &mut self,
230        bindings_ctx: &mut BC,
231        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
232        tx_metadata: BC::TxMetadata,
233        get_body_from_src_ip: F,
234    ) -> Result<(), IpSockCreateAndSendError>
235    where
236        S: DynamicTransportSerializer<I>,
237        F: FnOnce(IpDeviceAddr<I::Addr>) -> S,
238        O: SendOptions<I> + RouteResolutionOptions<I>,
239    {
240        self.send_oneshot_ip_packet_with_dyn_fallible_serializer(
241            bindings_ctx,
242            args,
243            tx_metadata,
244            |ip| Ok::<_, Infallible>(get_body_from_src_ip(ip)),
245        )
246        .map_err(|err| match err {
247            SendOneShotIpPacketError::CreateAndSendError { err } => err,
248        })
249    }
250}
251
252/// An error in sending a packet on an IP socket.
253#[derive(Error, Copy, Clone, Debug, Eq, PartialEq)]
254pub enum IpSockSendError {
255    /// An MTU was exceeded.
256    ///
257    /// This could be caused by an MTU at any layer of the stack, including both
258    /// device MTUs and packet format body size limits.
259    #[error("a maximum transmission unit (MTU) was exceeded")]
260    Mtu,
261    /// The socket is currently unroutable.
262    #[error("the socket is currently unroutable: {0}")]
263    Unroutable(#[from] ResolveRouteError),
264    /// The socket operation would've resulted in illegal loopback addresses on
265    /// a non-loopback device.
266    #[error("illegal loopback address")]
267    IllegalLoopbackAddress,
268    /// Broadcast send is not allowed.
269    #[error("broadcast send is not enabled for the socket")]
270    BroadcastNotAllowed,
271}
272
273impl From<SerializeError<Infallible>> for IpSockSendError {
274    fn from(err: SerializeError<Infallible>) -> IpSockSendError {
275        match err {
276            SerializeError::SizeLimitExceeded => IpSockSendError::Mtu,
277        }
278    }
279}
280
281impl IpSockSendError {
282    /// Constructs a `Result` from an [`IpSendFrameErrorReason`] with
283    /// application-visible [`IpSockSendError`]s in the `Err` variant.
284    ///
285    /// Errors that are not bubbled up to applications are dropped.
286    fn from_ip_send_frame(e: IpSendFrameErrorReason) -> Result<(), Self> {
287        match e {
288            IpSendFrameErrorReason::Device(d) => Self::from_send_frame(d),
289            IpSendFrameErrorReason::IllegalLoopbackAddress => Err(Self::IllegalLoopbackAddress),
290        }
291    }
292
293    /// Constructs a `Result` from a [`SendFrameErrorReason`] with
294    /// application-visible [`IpSockSendError`]s in the `Err` variant.
295    ///
296    /// Errors that are not bubbled up to applications are dropped.
297    fn from_send_frame(e: SendFrameErrorReason) -> Result<(), Self> {
298        match e {
299            SendFrameErrorReason::Alloc | SendFrameErrorReason::QueueFull => Ok(()),
300            SendFrameErrorReason::SizeConstraintsViolation => Err(Self::Mtu),
301        }
302    }
303}
304
305/// An error in sending a packet on a temporary IP socket.
306#[derive(Error, Copy, Clone, Debug)]
307pub enum IpSockCreateAndSendError {
308    /// Cannot send via temporary socket.
309    #[error("cannot send via temporary socket: {0}")]
310    Send(#[from] IpSockSendError),
311    /// The temporary socket could not be created.
312    #[error("the temporary socket could not be created: {0}")]
313    Create(#[from] IpSockCreationError),
314}
315
316/// The error returned by
317/// [`IpSocketHandler::send_oneshot_ip_packet_with_fallible_serializer`].
318#[derive(Debug)]
319#[allow(missing_docs)]
320pub enum SendOneShotIpPacketError<E> {
321    CreateAndSendError { err: IpSockCreateAndSendError },
322    SerializeError(E),
323}
324
325/// Possible errors when retrieving the maximum transport message size.
326#[derive(Error, Copy, Clone, Debug, Eq, PartialEq)]
327pub enum MmsError {
328    /// Cannot find the device that is used for the ip socket, possibly because
329    /// there is no route.
330    #[error("cannot find the device: {0}")]
331    NoDevice(#[from] ResolveRouteError),
332    /// The MTU provided by the device is too small such that there is no room
333    /// for a transport message at all.
334    #[error("invalid MTU: {0:?}")]
335    MTUTooSmall(Mtu),
336}
337
338/// Gets device related information of an IP socket.
339pub trait DeviceIpSocketHandler<I: IpExt, BC>: DeviceIdContext<AnyDevice> {
340    /// Gets the maximum message size for the transport layer, it equals the
341    /// device MTU minus the IP header size.
342    ///
343    /// This corresponds to the GET_MAXSIZES call described in:
344    /// https://www.rfc-editor.org/rfc/rfc1122#section-3.4
345    fn get_mms<O: RouteResolutionOptions<I>>(
346        &mut self,
347        bindings_ctx: &mut BC,
348        ip_sock: &IpSock<I, Self::WeakDeviceId>,
349        options: &O,
350    ) -> Result<Mms, MmsError>;
351}
352
353/// An error encountered when creating an IP socket.
354#[derive(Error, Copy, Clone, Debug, Eq, PartialEq)]
355pub enum IpSockCreationError {
356    /// An error occurred while looking up a route.
357    #[error("a route cannot be determined: {0}")]
358    Route(#[from] ResolveRouteError),
359}
360
361/// An IP socket.
362#[derive(Clone, Debug)]
363#[cfg_attr(test, derive(PartialEq))]
364pub struct IpSock<I: IpExt, D> {
365    /// The definition of the socket.
366    ///
367    /// This does not change for the lifetime of the socket.
368    definition: IpSockDefinition<I, D>,
369}
370
371impl<I: IpExt, D> IpSock<I, D> {
372    /// Returns the socket's definition.
373    #[cfg(any(test, feature = "testutils"))]
374    pub fn definition(&self) -> &IpSockDefinition<I, D> {
375        &self.definition
376    }
377}
378
379/// The definition of an IP socket.
380///
381/// These values are part of the socket's definition, and never change.
382#[derive(Clone, Debug, PartialEq)]
383pub struct IpSockDefinition<I: IpExt, D> {
384    /// The socket's remote address.
385    pub remote_ip: SocketIpAddr<I::Addr>,
386    /// The socket's local address.
387    ///
388    /// Guaranteed to be unicast in its subnet since it's always equal to an
389    /// address assigned to the local device. We can't use the `UnicastAddr`
390    /// witness type since `Ipv4Addr` doesn't implement `UnicastAddress`.
391    //
392    // TODO(joshlf): Support unnumbered interfaces. Once we do that, a few
393    // issues arise: A) Does the unicast restriction still apply, and is that
394    // even well-defined for IPv4 in the absence of a subnet? B) Presumably we
395    // have to always bind to a particular interface?
396    pub local_ip: IpDeviceAddr<I::Addr>,
397    /// The socket's bound output device.
398    pub device: Option<D>,
399    /// The IP protocol the socket is bound to.
400    pub proto: I::Proto,
401}
402
403impl<I: IpExt, D> IpSock<I, D> {
404    /// Returns the socket's local IP address.
405    pub fn local_ip(&self) -> &IpDeviceAddr<I::Addr> {
406        &self.definition.local_ip
407    }
408    /// Returns the socket's remote IP address.
409    pub fn remote_ip(&self) -> &SocketIpAddr<I::Addr> {
410        &self.definition.remote_ip
411    }
412    /// Returns the selected output interface for the socket, if any.
413    pub fn device(&self) -> Option<&D> {
414        self.definition.device.as_ref()
415    }
416    /// Returns the socket's protocol.
417    pub fn proto(&self) -> I::Proto {
418        self.definition.proto
419    }
420}
421
422// TODO(joshlf): Once we support configuring transport-layer protocols using
423// type parameters, use that to ensure that `proto` is the right protocol for
424// the caller. We will still need to have a separate enforcement mechanism for
425// raw IP sockets once we support those.
426
427/// The bindings execution context for IP sockets.
428pub trait IpSocketBindingsContext<D: StrongDeviceIdentifier>:
429    InstantContext + FilterBindingsContext + TxMetadataBindingsTypes + SocketOpsFilterBindingContext<D>
430{
431}
432impl<
433    D: StrongDeviceIdentifier,
434    BC: InstantContext
435        + FilterBindingsContext
436        + TxMetadataBindingsTypes
437        + SocketOpsFilterBindingContext<D>,
438> IpSocketBindingsContext<D> for BC
439{
440}
441
442/// The context required in order to implement [`IpSocketHandler`].
443///
444/// Blanket impls of `IpSocketHandler` are provided in terms of
445/// `IpSocketContext`.
446pub trait IpSocketContext<I, BC>:
447    DeviceIdContext<AnyDevice, DeviceId: InterfaceProperties<BC::DeviceClass>>
448    + FilterHandlerProvider<I, BC>
449where
450    I: IpDeviceStateIpExt + IpExt + FilterIpExt,
451    BC: IpSocketBindingsContext<Self::DeviceId>,
452{
453    /// Returns a route for a socket.
454    ///
455    /// If `device` is specified, the available routes are limited to those that
456    /// egress over the device.
457    fn lookup_route(
458        &mut self,
459        bindings_ctx: &mut BC,
460        device: Option<&Self::DeviceId>,
461        src_ip: Option<IpDeviceAddr<I::Addr>>,
462        dst_ip: RoutableIpAddr<I::Addr>,
463        transparent: bool,
464        marks: &Marks,
465    ) -> Result<ResolvedRoute<I, Self::DeviceId>, ResolveRouteError>;
466
467    /// Send an IP packet to the next-hop node.
468    fn send_ip_packet<S>(
469        &mut self,
470        bindings_ctx: &mut BC,
471        meta: SendIpPacketMeta<I, &Self::DeviceId, SpecifiedAddr<I::Addr>>,
472        body: S,
473        packet_metadata: IpLayerPacketMetadata<I, Self::WeakAddressId, BC>,
474    ) -> Result<(), IpSendFrameError<S>>
475    where
476        S: TransportPacketSerializer<I>,
477        S::Buffer: BufferMut;
478
479    /// Returns `DeviceId` for the loopback device.
480    fn get_loopback_device(&mut self) -> Option<Self::DeviceId>;
481
482    /// Confirms the provided IP socket destination is reachable.
483    ///
484    /// Implementations must retrieve the next hop given the provided
485    /// IP socket and confirm neighbor reachability for the resolved target
486    /// device.
487    fn confirm_reachable(
488        &mut self,
489        bindings_ctx: &mut BC,
490        dst: SpecifiedAddr<I::Addr>,
491        input: RuleInput<'_, I, Self::DeviceId>,
492    );
493}
494
495/// Enables a blanket implementation of [`IpSocketHandler`].
496///
497/// Implementing this marker trait for a type enables a blanket implementation
498/// of `IpSocketHandler` given the other requirements are met.
499pub trait UseIpSocketHandlerBlanket {}
500
501impl<I, BC, CC> IpSocketHandler<I, BC> for CC
502where
503    I: IpLayerIpExt + IpDeviceStateIpExt,
504    BC: IpSocketBindingsContext<Self::DeviceId>,
505    CC: IpSocketContext<I, BC> + CounterContext<IpCounters<I>> + UseIpSocketHandlerBlanket,
506    CC::DeviceId: filter::InterfaceProperties<BC::DeviceClass>,
507{
508    fn new_ip_socket<O>(
509        &mut self,
510        bindings_ctx: &mut BC,
511        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
512    ) -> Result<IpSock<I, CC::WeakDeviceId>, IpSockCreationError>
513    where
514        O: RouteResolutionOptions<I>,
515    {
516        let IpSocketArgs { device, local_ip, remote_ip, proto, options } = args;
517        let device = device
518            .as_ref()
519            .map(|d| d.as_strong_ref().ok_or(ResolveRouteError::Unreachable))
520            .transpose()?;
521        let device = device.as_ref().map(|d| d.as_ref());
522
523        // Make sure the remote is routable with a local address before creating
524        // the socket. We do not care about the actual destination here because
525        // we will recalculate it when we send a packet so that the best route
526        // available at the time is used for each outgoing packet.
527        let resolved_route = self.lookup_route(
528            bindings_ctx,
529            device,
530            local_ip,
531            remote_ip,
532            options.transparent(),
533            options.marks(),
534        )?;
535        Ok(new_ip_socket(device, resolved_route, remote_ip, proto))
536    }
537
538    fn send_ip_packet<S, O>(
539        &mut self,
540        bindings_ctx: &mut BC,
541        ip_sock: &IpSock<I, CC::WeakDeviceId>,
542        body: S,
543        options: &O,
544        tx_metadata: BC::TxMetadata,
545    ) -> Result<(), IpSockSendError>
546    where
547        S: TransportPacketSerializer<I>,
548        S::Buffer: BufferMut,
549        O: SendOptions<I> + RouteResolutionOptions<I>,
550    {
551        send_ip_packet(self, bindings_ctx, ip_sock, body, options, tx_metadata)
552    }
553
554    fn confirm_reachable<O>(
555        &mut self,
556        bindings_ctx: &mut BC,
557        socket: &IpSock<I, CC::WeakDeviceId>,
558        options: &O,
559    ) where
560        O: RouteResolutionOptions<I>,
561    {
562        let bound_device = socket.device().and_then(|weak| weak.upgrade());
563        let bound_device = bound_device.as_ref();
564        let bound_address = Some((*socket.local_ip()).into());
565        let destination = (*socket.remote_ip()).into();
566        IpSocketContext::confirm_reachable(
567            self,
568            bindings_ctx,
569            destination,
570            RuleInput {
571                packet_origin: PacketOrigin::Local { bound_address, bound_device },
572                marks: options.marks(),
573            },
574        )
575    }
576}
577
578/// Provides hooks for altering route resolution behavior of [`IpSock`].
579///
580/// Must be implemented by the socket option type of an `IpSock` when using it
581/// to call [`IpSocketHandler::new_ip_socket`] or
582/// [`IpSocketHandler::send_ip_packet`]. This is implemented as a trait instead
583/// of an inherent impl on a type so that users of sockets that don't need
584/// certain option types can avoid allocating space for those options.
585// TODO(https://fxbug.dev/323389672): We need a mechanism to inform `IpSock` of
586// changes in the route resolution options when it starts caching previously
587// calculated routes. Any changes to the options here *MUST* cause the route to
588// be re-calculated.
589pub trait RouteResolutionOptions<I: Ip> {
590    /// Whether the socket is transparent.
591    ///
592    /// This allows transparently proxying traffic to the socket, and allows the
593    /// socket to be bound to a non-local address.
594    fn transparent(&self) -> bool;
595
596    /// Returns the marks carried by packets created on the socket.
597    fn marks(&self) -> &Marks;
598}
599
600/// Provides hooks for altering sending behavior of [`IpSock`].
601///
602/// Must be implemented by the socket option type of an `IpSock` when using it
603/// to call [`IpSocketHandler::send_ip_packet`]. This is implemented as a trait
604/// instead of an inherent impl on a type so that users of sockets that don't
605/// need certain option types, like TCP for anything multicast-related, can
606/// avoid allocating space for those options.
607pub trait SendOptions<I: IpExt> {
608    /// Returns the hop limit to set on a packet going to the given destination.
609    ///
610    /// If `Some(u)`, `u` will be used as the hop limit (IPv6) or TTL (IPv4) for
611    /// a packet going to the given destination. Otherwise the default value
612    /// will be used.
613    fn hop_limit(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8>;
614
615    /// Returns true if outgoing multicast packets should be looped back and
616    /// delivered to local receivers who joined the multicast group.
617    fn multicast_loop(&self) -> bool;
618
619    /// `Some` if the socket can be used to send broadcast packets.
620    fn allow_broadcast(&self) -> Option<I::BroadcastMarker>;
621
622    /// Returns TCLASS/TOS field value that should be set in IP headers.
623    fn dscp_and_ecn(&self) -> DscpAndEcn;
624
625    /// The IP MTU to use for this transmission.
626    ///
627    /// Note that the minimum overall MTU is used considering the device and
628    /// path. This option can be used to restrict an MTU to an upper bound.
629    fn mtu(&self) -> Mtu;
630}
631
632/// Empty send and creation options that never overrides default values.
633#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
634pub struct DefaultIpSocketOptions;
635
636impl<I: IpExt> SendOptions<I> for DefaultIpSocketOptions {
637    fn hop_limit(&self, _destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
638        None
639    }
640
641    fn multicast_loop(&self) -> bool {
642        false
643    }
644
645    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
646        None
647    }
648
649    fn dscp_and_ecn(&self) -> DscpAndEcn {
650        DscpAndEcn::default()
651    }
652
653    fn mtu(&self) -> Mtu {
654        Mtu::no_limit()
655    }
656}
657
658impl<I: Ip> RouteResolutionOptions<I> for DefaultIpSocketOptions {
659    fn transparent(&self) -> bool {
660        false
661    }
662
663    fn marks(&self) -> &Marks {
664        &Marks::UNMARKED
665    }
666}
667
668/// A trait providing send options delegation to an inner type.
669///
670/// A blanket impl of [`SendOptions`] is provided to all implementers. This
671/// trait has the same shape as `SendOptions` but all the methods provide
672/// default implementations that delegate to the value returned by
673/// `DelegatedSendOptions::Delegate`. For brevity, the default `delegate` is
674/// [`DefaultIpSocketOptions`].
675#[allow(missing_docs)]
676pub trait DelegatedSendOptions<I: IpExt>: OptionDelegationMarker {
677    /// Returns the delegate providing the impl for all default methods.
678    fn delegate(&self) -> &impl SendOptions<I> {
679        &DefaultIpSocketOptions
680    }
681
682    fn hop_limit(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
683        self.delegate().hop_limit(destination)
684    }
685
686    fn multicast_loop(&self) -> bool {
687        self.delegate().multicast_loop()
688    }
689
690    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
691        self.delegate().allow_broadcast()
692    }
693
694    fn dscp_and_ecn(&self) -> DscpAndEcn {
695        self.delegate().dscp_and_ecn()
696    }
697
698    fn mtu(&self) -> Mtu {
699        self.delegate().mtu()
700    }
701}
702
703impl<O: DelegatedSendOptions<I> + OptionDelegationMarker, I: IpExt> SendOptions<I> for O {
704    fn hop_limit(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
705        self.hop_limit(destination)
706    }
707
708    fn multicast_loop(&self) -> bool {
709        self.multicast_loop()
710    }
711
712    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
713        self.allow_broadcast()
714    }
715
716    fn dscp_and_ecn(&self) -> DscpAndEcn {
717        self.dscp_and_ecn()
718    }
719
720    fn mtu(&self) -> Mtu {
721        self.mtu()
722    }
723}
724
725/// A trait providing route resolution options delegation to an inner type.
726///
727/// A blanket impl of [`RouteResolutionOptions`] is provided to all
728/// implementers. This trait has the same shape as `RouteResolutionOptions` but
729/// all the methods provide default implementations that delegate to the value
730/// returned by `DelegatedRouteResolutionOptions::Delegate`. For brevity, the
731/// default `delegate` is [`DefaultIpSocketOptions`].
732#[allow(missing_docs)]
733pub trait DelegatedRouteResolutionOptions<I: Ip>: OptionDelegationMarker {
734    /// Returns the delegate providing the impl for all default methods.
735    fn delegate(&self) -> &impl RouteResolutionOptions<I> {
736        &DefaultIpSocketOptions
737    }
738
739    fn transparent(&self) -> bool {
740        self.delegate().transparent()
741    }
742
743    fn marks(&self) -> &Marks {
744        self.delegate().marks()
745    }
746}
747
748impl<O: DelegatedRouteResolutionOptions<I> + OptionDelegationMarker, I: IpExt>
749    RouteResolutionOptions<I> for O
750{
751    fn transparent(&self) -> bool {
752        self.transparent()
753    }
754
755    fn marks(&self) -> &Marks {
756        self.marks()
757    }
758}
759
760/// A marker trait to allow option delegation traits.
761///
762/// This trait sidesteps trait resolution rules around the delegation traits
763/// because of the `Ip` parameter in them.
764pub trait OptionDelegationMarker {}
765
766/// The configurable hop limits for a socket.
767#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
768pub struct SocketHopLimits<I: Ip> {
769    /// Unicast hop limit.
770    pub unicast: Option<NonZeroU8>,
771    /// Multicast hop limit.
772    // TODO(https://fxbug.dev/42059735): Make this an Option<u8> to allow sending
773    // multicast packets destined only for the local machine.
774    pub multicast: Option<NonZeroU8>,
775    /// An unused marker type signifying the IP version for which these hop
776    /// limits are valid. Including this helps prevent using the wrong hop limits
777    /// when operating on dualstack sockets.
778    pub version: IpVersionMarker<I>,
779}
780
781impl<I: Ip> SocketHopLimits<I> {
782    /// Returns a function that updates the unicast hop limit.
783    pub fn set_unicast(value: Option<NonZeroU8>) -> impl FnOnce(&mut Self) {
784        move |limits| limits.unicast = value
785    }
786
787    /// Returns a function that updates the multicast hop limit.
788    pub fn set_multicast(value: Option<NonZeroU8>) -> impl FnOnce(&mut Self) {
789        move |limits| limits.multicast = value
790    }
791
792    /// Returns the hop limits, or the provided defaults if unset.
793    pub fn get_limits_with_defaults(&self, defaults: &HopLimits) -> HopLimits {
794        let Self { unicast, multicast, version: _ } = self;
795        HopLimits {
796            unicast: unicast.unwrap_or(defaults.unicast),
797            multicast: multicast.unwrap_or(defaults.multicast),
798        }
799    }
800
801    /// Returns the appropriate hop limit to use for the given destination addr.
802    pub fn hop_limit_for_dst(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
803        let Self { unicast, multicast, version: _ } = self;
804        if destination.is_multicast() { *multicast } else { *unicast }
805    }
806}
807
808fn new_ip_socket<I, D>(
809    requested_device: Option<&D>,
810    route: ResolvedRoute<I, D>,
811    remote_ip: SocketIpAddr<I::Addr>,
812    proto: I::Proto,
813) -> IpSock<I, D::Weak>
814where
815    I: IpExt,
816    D: StrongDeviceIdentifier,
817{
818    // TODO(https://fxbug.dev/323389672): Cache a reference to the route to
819    // avoid the route lookup on send as long as the routing table hasn't
820    // changed in between these operations.
821    let ResolvedRoute {
822        src_addr,
823        device: route_device,
824        local_delivery_device,
825        next_hop: _,
826        internal_forwarding: _,
827    } = route;
828
829    // If the source or destination address require a device, make sure to
830    // set that in the socket definition. Otherwise defer to what was provided.
831    let socket_device = (src_addr.as_ref().must_have_zone() || remote_ip.as_ref().must_have_zone())
832        .then(|| {
833            // NB: The route device might be loopback, and in such cases
834            // we want to bind the socket to the device the source IP is
835            // assigned to instead.
836            local_delivery_device.unwrap_or(route_device)
837        })
838        .as_ref()
839        .or(requested_device)
840        .map(|d| d.downgrade());
841
842    let definition =
843        IpSockDefinition { local_ip: src_addr, remote_ip, device: socket_device, proto };
844    IpSock { definition }
845}
846
847fn send_ip_packet<I, S, BC, CC, O>(
848    core_ctx: &mut CC,
849    bindings_ctx: &mut BC,
850    socket: &IpSock<I, CC::WeakDeviceId>,
851    mut body: S,
852    options: &O,
853    tx_metadata: BC::TxMetadata,
854) -> Result<(), IpSockSendError>
855where
856    I: IpExt + IpDeviceStateIpExt + FilterIpExt,
857    S: TransportPacketSerializer<I>,
858    S::Buffer: BufferMut,
859    BC: IpSocketBindingsContext<CC::DeviceId>,
860    CC: IpSocketContext<I, BC> + CounterContext<IpCounters<I>>,
861    CC::DeviceId: filter::InterfaceProperties<BC::DeviceClass>,
862    O: SendOptions<I> + RouteResolutionOptions<I>,
863{
864    trace_duration!(c"ip::send_packet");
865
866    // Extracted to a function without the serializer parameter to ease code
867    // generation.
868    fn resolve<
869        I: IpExt + IpDeviceStateIpExt + FilterIpExt,
870        CC: IpSocketContext<I, BC>,
871        BC: IpSocketBindingsContext<CC::DeviceId>,
872    >(
873        core_ctx: &mut CC,
874        bindings_ctx: &mut BC,
875        device: &Option<CC::WeakDeviceId>,
876        local_ip: IpDeviceAddr<I::Addr>,
877        remote_ip: RoutableIpAddr<I::Addr>,
878        transparent: bool,
879        marks: &Marks,
880    ) -> Result<ResolvedRoute<I, CC::DeviceId>, IpSockSendError> {
881        let device = match device.as_ref().map(|d| d.upgrade()) {
882            Some(Some(device)) => Some(device),
883            Some(None) => return Err(ResolveRouteError::Unreachable.into()),
884            None => None,
885        };
886        let route = core_ctx
887            .lookup_route(
888                bindings_ctx,
889                device.as_ref(),
890                Some(local_ip),
891                remote_ip,
892                transparent,
893                marks,
894            )
895            .map_err(|e| IpSockSendError::Unroutable(e))?;
896        assert_eq!(local_ip, route.src_addr);
897        Ok(route)
898    }
899
900    let IpSock {
901        definition: IpSockDefinition { remote_ip, local_ip, device: socket_device, proto },
902    } = socket;
903    let ResolvedRoute {
904        src_addr: local_ip,
905        device: mut egress_device,
906        mut next_hop,
907        mut local_delivery_device,
908        mut internal_forwarding,
909    } = resolve(
910        core_ctx,
911        bindings_ctx,
912        socket_device,
913        *local_ip,
914        *remote_ip,
915        options.transparent(),
916        options.marks(),
917    )?;
918
919    if matches!(next_hop, NextHop::Broadcast(_)) && options.allow_broadcast().is_none() {
920        return Err(IpSockSendError::BroadcastNotAllowed);
921    }
922
923    let previous_dst = remote_ip.addr();
924    let mut packet = filter::TxPacket::new(local_ip.addr(), remote_ip.addr(), *proto, &mut body);
925    let mut packet_metadata =
926        IpLayerPacketMetadata::from_tx_metadata_and_marks(tx_metadata, *options.marks());
927
928    match core_ctx.filter_handler().local_egress_hook(
929        bindings_ctx,
930        &mut packet,
931        &egress_device,
932        &mut packet_metadata,
933    ) {
934        filter::Verdict::Drop => {
935            packet_metadata.acknowledge_drop();
936            return Ok(());
937        }
938        filter::Verdict::Accept(()) => {}
939    }
940
941    let Some(mut local_ip) = IpDeviceAddr::new(packet.src_addr()) else {
942        packet_metadata.acknowledge_drop();
943        return Err(IpSockSendError::Unroutable(ResolveRouteError::NoSrcAddr));
944    };
945    let Some(remote_ip) = RoutableIpAddr::new(packet.dst_addr()) else {
946        packet_metadata.acknowledge_drop();
947        return Err(IpSockSendError::Unroutable(ResolveRouteError::Unreachable));
948    };
949
950    // If the LOCAL_EGRESS hook ended up rewriting the packet's destination, perform
951    // re-routing based on the new destination.
952    if remote_ip.addr() != previous_dst {
953        let ResolvedRoute {
954            src_addr: new_local_ip,
955            device: new_device,
956            next_hop: new_next_hop,
957            local_delivery_device: new_local_delivery_device,
958            internal_forwarding: new_internal_forwarding,
959        } = match resolve(
960            core_ctx,
961            bindings_ctx,
962            socket_device,
963            local_ip,
964            remote_ip,
965            options.transparent(),
966            options.marks(),
967        ) {
968            Ok(r) => r,
969            Err(err) => {
970                packet_metadata.acknowledge_drop();
971                return Err(err);
972            }
973        };
974        local_ip = new_local_ip;
975        egress_device = new_device;
976        next_hop = new_next_hop;
977        local_delivery_device = new_local_delivery_device;
978        internal_forwarding = new_internal_forwarding;
979    }
980
981    // NB: Hit the forwarding hook if the route leverages internal forwarding.
982    match internal_forwarding {
983        InternalForwarding::Used(ingress_device) => {
984            match core_ctx.filter_handler().forwarding_hook(
985                &mut packet,
986                &ingress_device,
987                &egress_device,
988                &mut packet_metadata,
989            ) {
990                filter::Verdict::Drop => {
991                    packet_metadata.acknowledge_drop();
992                    return Ok(());
993                }
994                filter::Verdict::Accept(()) => {}
995            }
996        }
997        InternalForwarding::NotUsed => {}
998    }
999
1000    if let Some(socket_cookie) = packet_metadata.tx_metadata().socket_cookie() {
1001        let egress_filter_result = bindings_ctx.socket_ops_filter().on_egress(
1002            &packet,
1003            &egress_device,
1004            socket_cookie,
1005            packet_metadata.marks(),
1006        );
1007
1008        // TODO(https://fxbug.dev/412426836): Implement congestion signal handling.
1009        match egress_filter_result {
1010            SocketEgressFilterResult::Pass { congestion: _ } => (),
1011            SocketEgressFilterResult::Drop { congestion: _ } => {
1012                core_ctx.counters().socket_egress_filter_dropped.increment();
1013                packet_metadata.acknowledge_drop();
1014                return Ok(());
1015            }
1016        }
1017    }
1018
1019    // The packet needs to be delivered locally if it's sent to a broadcast
1020    // or multicast address. For multicast packets this feature can be disabled
1021    // with IP_MULTICAST_LOOP.
1022
1023    let loopback_packet = (!egress_device.is_loopback()
1024        && ((options.multicast_loop() && remote_ip.addr().is_multicast())
1025            || next_hop.is_broadcast()))
1026    .then(|| body.serialize_new_buf(PacketConstraints::UNCONSTRAINED, packet::new_buf_vec))
1027    .transpose()?
1028    .map(|buf| RawIpBody::new(*proto, local_ip.addr(), remote_ip.addr(), buf));
1029
1030    let destination = match &local_delivery_device {
1031        Some(d) => IpPacketDestination::Loopback(d),
1032        None => IpPacketDestination::from_next_hop(next_hop, remote_ip.into()),
1033    };
1034    let ttl = options.hop_limit(&remote_ip.into());
1035    let meta = SendIpPacketMeta {
1036        device: &egress_device,
1037        src_ip: local_ip.into(),
1038        dst_ip: remote_ip.into(),
1039        destination,
1040        ttl,
1041        proto: *proto,
1042        mtu: options.mtu(),
1043        dscp_and_ecn: options.dscp_and_ecn(),
1044    };
1045    IpSocketContext::send_ip_packet(core_ctx, bindings_ctx, meta, body, packet_metadata).or_else(
1046        |IpSendFrameError { serializer: _, error }| IpSockSendError::from_ip_send_frame(error),
1047    )?;
1048
1049    match (loopback_packet, core_ctx.get_loopback_device()) {
1050        (Some(loopback_packet), Some(loopback_device)) => {
1051            let meta = SendIpPacketMeta {
1052                device: &loopback_device,
1053                src_ip: local_ip.into(),
1054                dst_ip: remote_ip.into(),
1055                destination: IpPacketDestination::Loopback(&egress_device),
1056                ttl,
1057                proto: *proto,
1058                mtu: options.mtu(),
1059                dscp_and_ecn: options.dscp_and_ecn(),
1060            };
1061            let packet_metadata = IpLayerPacketMetadata::default();
1062
1063            // The loopback packet will hit the egress hook. LOCAL_EGRESS hook
1064            // is not called again.
1065            IpSocketContext::send_ip_packet(
1066                core_ctx,
1067                bindings_ctx,
1068                meta,
1069                loopback_packet,
1070                packet_metadata,
1071            )
1072            .unwrap_or_else(|IpSendFrameError { serializer: _, error }| {
1073                error!("failed to send loopback packet: {error:?}")
1074            });
1075        }
1076        (Some(_loopback_packet), None) => {
1077            error!("can't send a loopback packet without the loopback device")
1078        }
1079        _ => (),
1080    }
1081
1082    Ok(())
1083}
1084
1085/// Enables a blanket implementation of [`DeviceIpSocketHandler`].
1086///
1087/// Implementing this marker trait for a type enables a blanket implementation
1088/// of `DeviceIpSocketHandler` given the other requirements are met.
1089pub trait UseDeviceIpSocketHandlerBlanket {}
1090
1091impl<I, BC, CC> DeviceIpSocketHandler<I, BC> for CC
1092where
1093    I: IpLayerIpExt + IpDeviceStateIpExt,
1094    BC: IpSocketBindingsContext<CC::DeviceId>,
1095    CC: IpDeviceMtuContext<I> + IpSocketContext<I, BC> + UseDeviceIpSocketHandlerBlanket,
1096{
1097    fn get_mms<O: RouteResolutionOptions<I>>(
1098        &mut self,
1099        bindings_ctx: &mut BC,
1100        ip_sock: &IpSock<I, Self::WeakDeviceId>,
1101        options: &O,
1102    ) -> Result<Mms, MmsError> {
1103        let IpSockDefinition { remote_ip, local_ip, device, proto: _ } = &ip_sock.definition;
1104        let device = device
1105            .as_ref()
1106            .map(|d| d.upgrade().ok_or(ResolveRouteError::Unreachable))
1107            .transpose()?;
1108
1109        let ResolvedRoute {
1110            src_addr: _,
1111            local_delivery_device: _,
1112            device,
1113            next_hop: _,
1114            internal_forwarding: _,
1115        } = self
1116            .lookup_route(
1117                bindings_ctx,
1118                device.as_ref(),
1119                Some(*local_ip),
1120                *remote_ip,
1121                options.transparent(),
1122                options.marks(),
1123            )
1124            .map_err(MmsError::NoDevice)?;
1125        let mtu = self.get_mtu(&device);
1126        // TODO(https://fxbug.dev/42072935): Calculate the options size when they
1127        // are supported.
1128        Mms::from_mtu::<I>(mtu, 0 /* no ip options used */).ok_or(MmsError::MTUTooSmall(mtu))
1129    }
1130}
1131
1132/// IPv6 source address selection as defined in [RFC 6724 Section 5].
1133pub(crate) mod ipv6_source_address_selection {
1134    use net_types::ip::{AddrSubnet, IpAddress as _};
1135
1136    use super::*;
1137
1138    use netstack3_base::Ipv6DeviceAddr;
1139
1140    /// A source address selection candidate.
1141    pub struct SasCandidate<D> {
1142        /// The candidate address and subnet.
1143        pub addr_sub: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1144        /// True if the address is assigned (i.e. non tentative).
1145        pub assigned: bool,
1146        /// True if the address is deprecated (i.e. not preferred).
1147        pub deprecated: bool,
1148        /// True if the address is temporary (i.e. not permanent).
1149        pub temporary: bool,
1150        /// The device this address belongs to.
1151        pub device: D,
1152    }
1153
1154    /// Selects the source address for an IPv6 socket using the algorithm
1155    /// defined in [RFC 6724 Section 5].
1156    ///
1157    /// This algorithm is only applicable when the user has not explicitly
1158    /// specified a source address.
1159    ///
1160    /// `remote_ip` is the remote IP address of the socket, `outbound_device` is
1161    /// the device over which outbound traffic to `remote_ip` is sent (according
1162    /// to the forwarding table), and `addresses` is an iterator of all
1163    /// addresses on all devices. The algorithm works by iterating over
1164    /// `addresses` and selecting the address which is most preferred according
1165    /// to a set of selection criteria.
1166    pub fn select_ipv6_source_address<
1167        'a,
1168        D: PartialEq,
1169        A,
1170        I: Iterator<Item = A>,
1171        F: FnMut(&A) -> SasCandidate<D>,
1172    >(
1173        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1174        outbound_device: &D,
1175        addresses: I,
1176        mut get_candidate: F,
1177    ) -> Option<A> {
1178        // Source address selection as defined in RFC 6724 Section 5.
1179        //
1180        // The algorithm operates by defining a partial ordering on available
1181        // source addresses, and choosing one of the best address as defined by
1182        // that ordering (given multiple best addresses, the choice from among
1183        // those is implementation-defined). The partial order is defined in
1184        // terms of a sequence of rules. If a given rule defines an order
1185        // between two addresses, then that is their order. Otherwise, the next
1186        // rule must be consulted, and so on until all of the rules are
1187        // exhausted.
1188
1189        addresses
1190            .map(|item| {
1191                let candidate = get_candidate(&item);
1192                (item, candidate)
1193            })
1194            // Tentative addresses are not considered available to the source
1195            // selection algorithm.
1196            .filter(|(_, candidate)| candidate.assigned)
1197            .max_by(|(_, a), (_, b)| {
1198                select_ipv6_source_address_cmp(remote_ip, outbound_device, a, b)
1199            })
1200            .map(|(item, _candidate)| item)
1201    }
1202
1203    /// Comparison operator used by `select_ipv6_source_address`.
1204    fn select_ipv6_source_address_cmp<D: PartialEq>(
1205        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1206        outbound_device: &D,
1207        a: &SasCandidate<D>,
1208        b: &SasCandidate<D>,
1209    ) -> Ordering {
1210        // TODO(https://fxbug.dev/42123500): Implement rules 4, 5.5, and 6.
1211        let SasCandidate {
1212            addr_sub: a_addr_sub,
1213            assigned: a_assigned,
1214            deprecated: a_deprecated,
1215            temporary: a_temporary,
1216            device: a_device,
1217        } = a;
1218        let SasCandidate {
1219            addr_sub: b_addr_sub,
1220            assigned: b_assigned,
1221            deprecated: b_deprecated,
1222            temporary: b_temporary,
1223            device: b_device,
1224        } = b;
1225
1226        let a_addr = a_addr_sub.addr().into_specified();
1227        let b_addr = b_addr_sub.addr().into_specified();
1228
1229        // Assertions required in order for this implementation to be valid.
1230
1231        // Required by the implementation of Rule 1.
1232        if let Some(remote_ip) = remote_ip {
1233            debug_assert!(!(a_addr == remote_ip && b_addr == remote_ip));
1234        }
1235
1236        // Addresses that are not considered assigned are not valid source
1237        // addresses.
1238        debug_assert!(a_assigned);
1239        debug_assert!(b_assigned);
1240
1241        rule_1(remote_ip, a_addr, b_addr)
1242            .then_with(|| rule_2(remote_ip, a_addr, b_addr))
1243            .then_with(|| rule_3(*a_deprecated, *b_deprecated))
1244            .then_with(|| rule_5(outbound_device, a_device, b_device))
1245            .then_with(|| rule_7(*a_temporary, *b_temporary))
1246            .then_with(|| rule_8(remote_ip, *a_addr_sub, *b_addr_sub))
1247    }
1248
1249    // Assumes that `a` and `b` are not both equal to `remote_ip`.
1250    fn rule_1(
1251        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1252        a: SpecifiedAddr<Ipv6Addr>,
1253        b: SpecifiedAddr<Ipv6Addr>,
1254    ) -> Ordering {
1255        let remote_ip = match remote_ip {
1256            Some(remote_ip) => remote_ip,
1257            None => return Ordering::Equal,
1258        };
1259        if (a == remote_ip) != (b == remote_ip) {
1260            // Rule 1: Prefer same address.
1261            //
1262            // Note that both `a` and `b` cannot be equal to `remote_ip` since
1263            // that would imply that we had added the same address twice to the
1264            // same device.
1265            //
1266            // If `(a == remote_ip) != (b == remote_ip)`, then exactly one of
1267            // them is equal. If this inequality does not hold, then they must
1268            // both be unequal to `remote_ip`. In the first case, we have a tie,
1269            // and in the second case, the rule doesn't apply. In either case,
1270            // we move onto the next rule.
1271            if a == remote_ip { Ordering::Greater } else { Ordering::Less }
1272        } else {
1273            Ordering::Equal
1274        }
1275    }
1276
1277    fn rule_2(
1278        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1279        a: SpecifiedAddr<Ipv6Addr>,
1280        b: SpecifiedAddr<Ipv6Addr>,
1281    ) -> Ordering {
1282        // Scope ordering is defined by the Multicast Scope ID, see
1283        // https://datatracker.ietf.org/doc/html/rfc6724#section-3.1 .
1284        let remote_scope = match remote_ip {
1285            Some(remote_ip) => remote_ip.scope().multicast_scope_id(),
1286            None => return Ordering::Equal,
1287        };
1288        let a_scope = a.scope().multicast_scope_id();
1289        let b_scope = b.scope().multicast_scope_id();
1290        if a_scope < b_scope {
1291            if a_scope < remote_scope { Ordering::Less } else { Ordering::Greater }
1292        } else if a_scope > b_scope {
1293            if b_scope < remote_scope { Ordering::Greater } else { Ordering::Less }
1294        } else {
1295            Ordering::Equal
1296        }
1297    }
1298
1299    fn rule_3(a_deprecated: bool, b_deprecated: bool) -> Ordering {
1300        match (a_deprecated, b_deprecated) {
1301            (true, false) => Ordering::Less,
1302            (true, true) | (false, false) => Ordering::Equal,
1303            (false, true) => Ordering::Greater,
1304        }
1305    }
1306
1307    fn rule_5<D: PartialEq>(outbound_device: &D, a_device: &D, b_device: &D) -> Ordering {
1308        if (a_device == outbound_device) != (b_device == outbound_device) {
1309            // Rule 5: Prefer outgoing interface.
1310            if a_device == outbound_device { Ordering::Greater } else { Ordering::Less }
1311        } else {
1312            Ordering::Equal
1313        }
1314    }
1315
1316    // Prefer temporary addresses following rule 7.
1317    fn rule_7(a_temporary: bool, b_temporary: bool) -> Ordering {
1318        match (a_temporary, b_temporary) {
1319            (true, false) => Ordering::Greater,
1320            (true, true) | (false, false) => Ordering::Equal,
1321            (false, true) => Ordering::Less,
1322        }
1323    }
1324
1325    fn rule_8(
1326        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1327        a: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1328        b: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1329    ) -> Ordering {
1330        let remote_ip = match remote_ip {
1331            Some(remote_ip) => remote_ip,
1332            None => return Ordering::Equal,
1333        };
1334        // Per RFC 6724 Section 2.2:
1335        //
1336        //   We define the common prefix length CommonPrefixLen(S, D) of a
1337        //   source address S and a destination address D as the length of the
1338        //   longest prefix (looking at the most significant, or leftmost, bits)
1339        //   that the two addresses have in common, up to the length of S's
1340        //   prefix (i.e., the portion of the address not including the
1341        //   interface ID).  For example, CommonPrefixLen(fe80::1, fe80::2) is
1342        //   64.
1343        fn common_prefix_len(
1344            src: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1345            dst: SpecifiedAddr<Ipv6Addr>,
1346        ) -> u8 {
1347            core::cmp::min(src.addr().common_prefix_len(&dst), src.subnet().prefix())
1348        }
1349
1350        // Rule 8: Use longest matching prefix.
1351        //
1352        // Note that, per RFC 6724 Section 5:
1353        //
1354        //   Rule 8 MAY be superseded if the implementation has other means of
1355        //   choosing among source addresses.  For example, if the
1356        //   implementation somehow knows which source address will result in
1357        //   the "best" communications performance.
1358        //
1359        // We don't currently make use of this option, but it's an option for
1360        // the future.
1361        common_prefix_len(a, remote_ip).cmp(&common_prefix_len(b, remote_ip))
1362    }
1363
1364    #[cfg(test)]
1365    mod tests {
1366        use net_declare::net_ip_v6;
1367
1368        use super::*;
1369
1370        #[test]
1371        fn test_select_ipv6_source_address() {
1372            // Test the comparison operator used by `select_ipv6_source_address`
1373            // by separately testing each comparison condition.
1374
1375            let remote = SpecifiedAddr::new(net_ip_v6!("2001:0db8:1::")).unwrap();
1376            let local0 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:2::")).unwrap();
1377            let local1 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:3::")).unwrap();
1378            let link_local_remote = SpecifiedAddr::new(net_ip_v6!("fe80::1:2:42")).unwrap();
1379            let link_local = SpecifiedAddr::new(net_ip_v6!("fe80::1:2:4")).unwrap();
1380            let dev0 = &0;
1381            let dev1 = &1;
1382            let dev2 = &2;
1383
1384            // Rule 1: Prefer same address
1385            assert_eq!(rule_1(Some(remote), remote, local0), Ordering::Greater);
1386            assert_eq!(rule_1(Some(remote), local0, remote), Ordering::Less);
1387            assert_eq!(rule_1(Some(remote), local0, local1), Ordering::Equal);
1388            assert_eq!(rule_1(None, local0, local1), Ordering::Equal);
1389
1390            // Rule 2: Prefer appropriate scope
1391            assert_eq!(rule_2(Some(remote), local0, local1), Ordering::Equal);
1392            assert_eq!(rule_2(Some(remote), local1, local0), Ordering::Equal);
1393            assert_eq!(rule_2(Some(remote), local0, link_local), Ordering::Greater);
1394            assert_eq!(rule_2(Some(remote), link_local, local0), Ordering::Less);
1395            assert_eq!(rule_2(Some(link_local_remote), local0, link_local), Ordering::Less);
1396            assert_eq!(rule_2(Some(link_local_remote), link_local, local0), Ordering::Greater);
1397            assert_eq!(rule_1(None, local0, link_local), Ordering::Equal);
1398
1399            // Rule 3: Avoid deprecated states
1400            assert_eq!(rule_3(false, true), Ordering::Greater);
1401            assert_eq!(rule_3(true, false), Ordering::Less);
1402            assert_eq!(rule_3(true, true), Ordering::Equal);
1403            assert_eq!(rule_3(false, false), Ordering::Equal);
1404
1405            // Rule 5: Prefer outgoing interface
1406            assert_eq!(rule_5(dev0, dev0, dev2), Ordering::Greater);
1407            assert_eq!(rule_5(dev0, dev2, dev0), Ordering::Less);
1408            assert_eq!(rule_5(dev0, dev0, dev0), Ordering::Equal);
1409            assert_eq!(rule_5(dev0, dev2, dev2), Ordering::Equal);
1410
1411            // Rule 7: Prefer temporary address.
1412            assert_eq!(rule_7(true, false), Ordering::Greater);
1413            assert_eq!(rule_7(false, true), Ordering::Less);
1414            assert_eq!(rule_7(true, true), Ordering::Equal);
1415            assert_eq!(rule_7(false, false), Ordering::Equal);
1416
1417            // Rule 8: Use longest matching prefix.
1418            {
1419                let new_addr_entry = |addr, prefix_len| AddrSubnet::new(addr, prefix_len).unwrap();
1420
1421                // First, test that the longest prefix match is preferred when
1422                // using addresses whose common prefix length is shorter than
1423                // the subnet prefix length.
1424
1425                // 4 leading 0x01 bytes.
1426                let remote = SpecifiedAddr::new(net_ip_v6!("1111::")).unwrap();
1427                // 3 leading 0x01 bytes.
1428                let local0 = new_addr_entry(net_ip_v6!("1110::"), 64);
1429                // 2 leading 0x01 bytes.
1430                let local1 = new_addr_entry(net_ip_v6!("1100::"), 64);
1431
1432                assert_eq!(rule_8(Some(remote), local0, local1), Ordering::Greater);
1433                assert_eq!(rule_8(Some(remote), local1, local0), Ordering::Less);
1434                assert_eq!(rule_8(Some(remote), local0, local0), Ordering::Equal);
1435                assert_eq!(rule_8(Some(remote), local1, local1), Ordering::Equal);
1436                assert_eq!(rule_8(None, local0, local1), Ordering::Equal);
1437
1438                // Second, test that the common prefix length is capped at the
1439                // subnet prefix length.
1440
1441                // 3 leading 0x01 bytes, but a subnet prefix length of 8 (1 byte).
1442                let local0 = new_addr_entry(net_ip_v6!("1110::"), 8);
1443                // 2 leading 0x01 bytes, but a subnet prefix length of 8 (1 byte).
1444                let local1 = new_addr_entry(net_ip_v6!("1100::"), 8);
1445
1446                assert_eq!(rule_8(Some(remote), local0, local1), Ordering::Equal);
1447                assert_eq!(rule_8(Some(remote), local1, local0), Ordering::Equal);
1448                assert_eq!(rule_8(Some(remote), local0, local0), Ordering::Equal);
1449                assert_eq!(rule_8(Some(remote), local1, local1), Ordering::Equal);
1450                assert_eq!(rule_8(None, local0, local1), Ordering::Equal);
1451            }
1452
1453            {
1454                let new_addr_entry = |addr, device| SasCandidate {
1455                    addr_sub: AddrSubnet::new(addr, 128).unwrap(),
1456                    deprecated: false,
1457                    assigned: true,
1458                    temporary: false,
1459                    device,
1460                };
1461
1462                // If no rules apply, then the two address entries are equal.
1463                assert_eq!(
1464                    select_ipv6_source_address_cmp(
1465                        Some(remote),
1466                        dev0,
1467                        &new_addr_entry(*local0, *dev1),
1468                        &new_addr_entry(*local1, *dev2),
1469                    ),
1470                    Ordering::Equal
1471                );
1472            }
1473        }
1474
1475        #[test]
1476        fn test_select_ipv6_source_address_no_remote() {
1477            // Verify that source address selection correctly applies all
1478            // applicable rules when the remote is `None`.
1479            let dev0 = &0;
1480            let dev1 = &1;
1481            let dev2 = &2;
1482
1483            let local0 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:2::")).unwrap();
1484            let local1 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:3::")).unwrap();
1485
1486            let new_addr_entry = |addr, deprecated, device| SasCandidate {
1487                addr_sub: AddrSubnet::new(addr, 128).unwrap(),
1488                deprecated,
1489                assigned: true,
1490                temporary: false,
1491                device,
1492            };
1493
1494            // Verify that Rule 3 still applies (avoid deprecated states).
1495            assert_eq!(
1496                select_ipv6_source_address_cmp(
1497                    None,
1498                    dev0,
1499                    &new_addr_entry(*local0, false, *dev1),
1500                    &new_addr_entry(*local1, true, *dev2),
1501                ),
1502                Ordering::Greater
1503            );
1504
1505            // Verify that Rule 5 still applies (Prefer outgoing interface).
1506            assert_eq!(
1507                select_ipv6_source_address_cmp(
1508                    None,
1509                    dev0,
1510                    &new_addr_entry(*local0, false, *dev0),
1511                    &new_addr_entry(*local1, false, *dev1),
1512                ),
1513                Ordering::Greater
1514            );
1515        }
1516    }
1517}
1518
1519/// Test fake implementations of the traits defined in the `socket` module.
1520#[cfg(any(test, feature = "testutils"))]
1521pub(crate) mod testutil {
1522    use alloc::boxed::Box;
1523    use alloc::vec::Vec;
1524    use core::num::NonZeroUsize;
1525
1526    use derivative::Derivative;
1527    use net_types::ip::{GenericOverIp, IpAddr, IpAddress, Ipv4, Ipv4Addr, Ipv6, Subnet};
1528    use net_types::{MulticastAddr, Witness as _};
1529    use netstack3_base::testutil::{FakeCoreCtx, FakeStrongDeviceId, FakeWeakDeviceId};
1530    use netstack3_base::{SendFrameContext, SendFrameError};
1531    use netstack3_filter::Tuple;
1532    use netstack3_hashmap::HashMap;
1533
1534    use super::*;
1535    use crate::internal::base::{
1536        BaseTransportIpContext, DEFAULT_HOP_LIMITS, HopLimits, MulticastMembershipHandler,
1537    };
1538    use crate::internal::routing::testutil::FakeIpRoutingCtx;
1539    use crate::internal::routing::{self, RoutingTable};
1540    use crate::internal::types::{Destination, Entry, Metric, RawMetric};
1541
1542    /// A fake implementation of the traits required by the transport layer from
1543    /// the IP layer.
1544    #[derive(Derivative, GenericOverIp)]
1545    #[generic_over_ip(I, Ip)]
1546    #[derivative(Default(bound = ""))]
1547    pub struct FakeIpSocketCtx<I: Ip, D> {
1548        pub(crate) table: RoutingTable<I, D>,
1549        forwarding: FakeIpRoutingCtx<D>,
1550        devices: HashMap<D, FakeDeviceState<I>>,
1551    }
1552
1553    /// A trait enabling [`FakeIpSockeCtx`]'s implementations for
1554    /// [`FakeCoreCtx`] with types that hold a [`FakeIpSocketCtx`] internally,
1555    pub trait InnerFakeIpSocketCtx<I: Ip, D> {
1556        /// Gets a mutable reference to the inner fake context.
1557        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D>;
1558    }
1559
1560    impl<I: Ip, D> InnerFakeIpSocketCtx<I, D> for FakeIpSocketCtx<I, D> {
1561        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1562            self
1563        }
1564    }
1565
1566    impl<I: IpExt, D: FakeStrongDeviceId, BC> BaseTransportIpContext<I, BC> for FakeIpSocketCtx<I, D> {
1567        fn get_default_hop_limits(&mut self, device: Option<&D>) -> HopLimits {
1568            device.map_or(DEFAULT_HOP_LIMITS, |device| {
1569                let hop_limit = self.get_device_state(device).default_hop_limit;
1570                HopLimits { unicast: hop_limit, multicast: DEFAULT_HOP_LIMITS.multicast }
1571            })
1572        }
1573
1574        type DevicesWithAddrIter<'a> = Box<dyn Iterator<Item = D> + 'a>;
1575
1576        fn with_devices_with_assigned_addr<O, F: FnOnce(Self::DevicesWithAddrIter<'_>) -> O>(
1577            &mut self,
1578            addr: SpecifiedAddr<I::Addr>,
1579            cb: F,
1580        ) -> O {
1581            cb(Box::new(self.devices.iter().filter_map(move |(device, state)| {
1582                state.addresses.contains(&addr).then(|| device.clone())
1583            })))
1584        }
1585
1586        fn get_original_destination(&mut self, _tuple: &Tuple<I>) -> Option<(I::Addr, u16)> {
1587            unimplemented!()
1588        }
1589    }
1590
1591    impl<I: IpExt, D: FakeStrongDeviceId> DeviceIdContext<AnyDevice> for FakeIpSocketCtx<I, D> {
1592        type DeviceId = D;
1593        type WeakDeviceId = D::Weak;
1594    }
1595
1596    impl<I, State, D, Meta, BC> IpSocketHandler<I, BC> for FakeCoreCtx<State, Meta, D>
1597    where
1598        I: IpExt + FilterIpExt,
1599        State: InnerFakeIpSocketCtx<I, D>,
1600        D: FakeStrongDeviceId,
1601        BC: TxMetadataBindingsTypes,
1602        FakeCoreCtx<State, Meta, D>:
1603            SendFrameContext<BC, SendIpPacketMeta<I, Self::DeviceId, SpecifiedAddr<I::Addr>>>,
1604    {
1605        fn new_ip_socket<O>(
1606            &mut self,
1607            _bindings_ctx: &mut BC,
1608            args: IpSocketArgs<'_, Self::DeviceId, I, O>,
1609        ) -> Result<IpSock<I, Self::WeakDeviceId>, IpSockCreationError>
1610        where
1611            O: RouteResolutionOptions<I>,
1612        {
1613            self.state.fake_ip_socket_ctx_mut().new_ip_socket(args)
1614        }
1615
1616        fn send_ip_packet<S, O>(
1617            &mut self,
1618            bindings_ctx: &mut BC,
1619            socket: &IpSock<I, Self::WeakDeviceId>,
1620            body: S,
1621            options: &O,
1622            // NB: Tx metadata plumbing is not supported for fake socket
1623            // contexts. Drop at the end of the scope.
1624            _tx_meta: BC::TxMetadata,
1625        ) -> Result<(), IpSockSendError>
1626        where
1627            S: TransportPacketSerializer<I>,
1628            S::Buffer: BufferMut,
1629            O: SendOptions<I> + RouteResolutionOptions<I>,
1630        {
1631            let meta = self.state.fake_ip_socket_ctx_mut().resolve_send_meta(socket, options)?;
1632            self.send_frame(bindings_ctx, meta, body).or_else(
1633                |SendFrameError { serializer: _, error }| IpSockSendError::from_send_frame(error),
1634            )
1635        }
1636
1637        fn confirm_reachable<O>(
1638            &mut self,
1639            _bindings_ctx: &mut BC,
1640            _socket: &IpSock<I, Self::WeakDeviceId>,
1641            _options: &O,
1642        ) {
1643        }
1644    }
1645
1646    impl<I: IpExt, D: FakeStrongDeviceId, BC> MulticastMembershipHandler<I, BC>
1647        for FakeIpSocketCtx<I, D>
1648    {
1649        fn join_multicast_group(
1650            &mut self,
1651            _bindings_ctx: &mut BC,
1652            device: &Self::DeviceId,
1653            addr: MulticastAddr<<I as Ip>::Addr>,
1654        ) {
1655            let value = self.get_device_state_mut(device).multicast_groups.entry(addr).or_insert(0);
1656            *value = value.checked_add(1).unwrap();
1657        }
1658
1659        fn leave_multicast_group(
1660            &mut self,
1661            _bindings_ctx: &mut BC,
1662            device: &Self::DeviceId,
1663            addr: MulticastAddr<<I as Ip>::Addr>,
1664        ) {
1665            let value = self
1666                .get_device_state_mut(device)
1667                .multicast_groups
1668                .get_mut(&addr)
1669                .unwrap_or_else(|| panic!("no entry for {addr} on {device:?}"));
1670            *value = value.checked_sub(1).unwrap();
1671        }
1672
1673        fn select_device_for_multicast_group(
1674            &mut self,
1675            addr: MulticastAddr<<I as Ip>::Addr>,
1676            _marks: &Marks,
1677        ) -> Result<Self::DeviceId, ResolveRouteError> {
1678            let remote_ip = SocketIpAddr::new_from_multicast(addr);
1679            self.lookup_route(None, None, remote_ip, /* transparent */ false)
1680                .map(|ResolvedRoute { device, .. }| device)
1681        }
1682    }
1683
1684    impl<I, BC, D, State, Meta> BaseTransportIpContext<I, BC> for FakeCoreCtx<State, Meta, D>
1685    where
1686        I: IpExt + FilterIpExt,
1687        D: FakeStrongDeviceId,
1688        State: InnerFakeIpSocketCtx<I, D>,
1689        BC: TxMetadataBindingsTypes,
1690        Self: IpSocketHandler<I, BC, DeviceId = D, WeakDeviceId = FakeWeakDeviceId<D>>,
1691    {
1692        type DevicesWithAddrIter<'a> = Box<dyn Iterator<Item = D> + 'a>;
1693
1694        fn with_devices_with_assigned_addr<O, F: FnOnce(Self::DevicesWithAddrIter<'_>) -> O>(
1695            &mut self,
1696            addr: SpecifiedAddr<I::Addr>,
1697            cb: F,
1698        ) -> O {
1699            BaseTransportIpContext::<I, BC>::with_devices_with_assigned_addr(
1700                self.state.fake_ip_socket_ctx_mut(),
1701                addr,
1702                cb,
1703            )
1704        }
1705
1706        fn get_default_hop_limits(&mut self, device: Option<&Self::DeviceId>) -> HopLimits {
1707            BaseTransportIpContext::<I, BC>::get_default_hop_limits(
1708                self.state.fake_ip_socket_ctx_mut(),
1709                device,
1710            )
1711        }
1712
1713        fn get_original_destination(&mut self, tuple: &Tuple<I>) -> Option<(I::Addr, u16)> {
1714            BaseTransportIpContext::<I, BC>::get_original_destination(
1715                self.state.fake_ip_socket_ctx_mut(),
1716                tuple,
1717            )
1718        }
1719    }
1720
1721    /// A fake context providing [`IpSocketHandler`] for tests.
1722    #[derive(Derivative)]
1723    #[derivative(Default(bound = ""))]
1724    pub struct FakeDualStackIpSocketCtx<D> {
1725        v4: FakeIpSocketCtx<Ipv4, D>,
1726        v6: FakeIpSocketCtx<Ipv6, D>,
1727    }
1728
1729    impl<D: FakeStrongDeviceId> FakeDualStackIpSocketCtx<D> {
1730        /// Creates a new [`FakeDualStackIpSocketCtx`] with `devices`.
1731        pub fn new<A: Into<SpecifiedAddr<IpAddr>>>(
1732            devices: impl IntoIterator<Item = FakeDeviceConfig<D, A>>,
1733        ) -> Self {
1734            let partition =
1735                |v: Vec<A>| -> (Vec<SpecifiedAddr<Ipv4Addr>>, Vec<SpecifiedAddr<Ipv6Addr>>) {
1736                    v.into_iter().fold((Vec::new(), Vec::new()), |(mut v4, mut v6), i| {
1737                        match IpAddr::from(i.into()) {
1738                            IpAddr::V4(a) => v4.push(a),
1739                            IpAddr::V6(a) => v6.push(a),
1740                        }
1741                        (v4, v6)
1742                    })
1743                };
1744
1745            let (v4, v6): (Vec<_>, Vec<_>) = devices
1746                .into_iter()
1747                .map(|FakeDeviceConfig { device, local_ips, remote_ips }| {
1748                    let (local_v4, local_v6) = partition(local_ips);
1749                    let (remote_v4, remote_v6) = partition(remote_ips);
1750                    (
1751                        FakeDeviceConfig {
1752                            device: device.clone(),
1753                            local_ips: local_v4,
1754                            remote_ips: remote_v4,
1755                        },
1756                        FakeDeviceConfig { device, local_ips: local_v6, remote_ips: remote_v6 },
1757                    )
1758                })
1759                .unzip();
1760            Self { v4: FakeIpSocketCtx::new(v4), v6: FakeIpSocketCtx::new(v6) }
1761        }
1762
1763        /// Returns the [`FakeIpSocketCtx`] for IP version `I`.
1764        pub fn inner_mut<I: Ip>(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1765            I::map_ip_out(self, |s| &mut s.v4, |s| &mut s.v6)
1766        }
1767
1768        fn inner<I: Ip>(&self) -> &FakeIpSocketCtx<I, D> {
1769            I::map_ip_out(self, |s| &s.v4, |s| &s.v6)
1770        }
1771
1772        /// Adds a fake direct route to `ip` through `device`.
1773        pub fn add_route(&mut self, device: D, ip: SpecifiedAddr<IpAddr>) {
1774            match IpAddr::from(ip) {
1775                IpAddr::V4(ip) => {
1776                    routing::testutil::add_on_link_routing_entry(&mut self.v4.table, ip, device)
1777                }
1778                IpAddr::V6(ip) => {
1779                    routing::testutil::add_on_link_routing_entry(&mut self.v6.table, ip, device)
1780                }
1781            }
1782        }
1783
1784        /// Adds a fake route to `subnet` through `device`.
1785        pub fn add_subnet_route<A: IpAddress>(&mut self, device: D, subnet: Subnet<A>) {
1786            let entry = Entry {
1787                subnet,
1788                device,
1789                gateway: None,
1790                metric: Metric::ExplicitMetric(RawMetric(0)),
1791            };
1792            A::Version::map_ip::<_, ()>(
1793                entry,
1794                |entry_v4| {
1795                    let _ = routing::testutil::add_entry(&mut self.v4.table, entry_v4)
1796                        .expect("Failed to add route");
1797                },
1798                |entry_v6| {
1799                    let _ = routing::testutil::add_entry(&mut self.v6.table, entry_v6)
1800                        .expect("Failed to add route");
1801                },
1802            );
1803        }
1804
1805        /// Returns a mutable reference to fake device state.
1806        pub fn get_device_state_mut<I: IpExt>(&mut self, device: &D) -> &mut FakeDeviceState<I> {
1807            self.inner_mut::<I>().get_device_state_mut(device)
1808        }
1809
1810        /// Returns the fake multicast memberships.
1811        pub fn multicast_memberships<I: IpExt>(
1812            &self,
1813        ) -> HashMap<(D, MulticastAddr<I::Addr>), NonZeroUsize> {
1814            self.inner::<I>().multicast_memberships()
1815        }
1816    }
1817
1818    impl<I: IpExt, S: InnerFakeIpSocketCtx<I, D>, Meta, D: FakeStrongDeviceId, BC>
1819        MulticastMembershipHandler<I, BC> for FakeCoreCtx<S, Meta, D>
1820    {
1821        fn join_multicast_group(
1822            &mut self,
1823            bindings_ctx: &mut BC,
1824            device: &Self::DeviceId,
1825            addr: MulticastAddr<<I as Ip>::Addr>,
1826        ) {
1827            MulticastMembershipHandler::<I, BC>::join_multicast_group(
1828                self.state.fake_ip_socket_ctx_mut(),
1829                bindings_ctx,
1830                device,
1831                addr,
1832            )
1833        }
1834
1835        fn leave_multicast_group(
1836            &mut self,
1837            bindings_ctx: &mut BC,
1838            device: &Self::DeviceId,
1839            addr: MulticastAddr<<I as Ip>::Addr>,
1840        ) {
1841            MulticastMembershipHandler::<I, BC>::leave_multicast_group(
1842                self.state.fake_ip_socket_ctx_mut(),
1843                bindings_ctx,
1844                device,
1845                addr,
1846            )
1847        }
1848
1849        fn select_device_for_multicast_group(
1850            &mut self,
1851            addr: MulticastAddr<<I as Ip>::Addr>,
1852            marks: &Marks,
1853        ) -> Result<Self::DeviceId, ResolveRouteError> {
1854            MulticastMembershipHandler::<I, BC>::select_device_for_multicast_group(
1855                self.state.fake_ip_socket_ctx_mut(),
1856                addr,
1857                marks,
1858            )
1859        }
1860    }
1861
1862    impl<I: Ip, D, State: InnerFakeIpSocketCtx<I, D>, Meta> InnerFakeIpSocketCtx<I, D>
1863        for FakeCoreCtx<State, Meta, D>
1864    {
1865        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1866            self.state.fake_ip_socket_ctx_mut()
1867        }
1868    }
1869
1870    impl<I: Ip, D: FakeStrongDeviceId> InnerFakeIpSocketCtx<I, D> for FakeDualStackIpSocketCtx<D> {
1871        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1872            self.inner_mut::<I>()
1873        }
1874    }
1875
1876    /// A device configuration for fake socket contexts.
1877    #[derive(Clone, GenericOverIp)]
1878    #[generic_over_ip()]
1879    pub struct FakeDeviceConfig<D, A> {
1880        /// The device.
1881        pub device: D,
1882        /// The device's local IPs.
1883        pub local_ips: Vec<A>,
1884        /// The remote IPs reachable from this device.
1885        pub remote_ips: Vec<A>,
1886    }
1887
1888    /// State associated with a fake device in [`FakeIpSocketCtx`].
1889    pub struct FakeDeviceState<I: Ip> {
1890        /// The default hop limit used by the device.
1891        pub default_hop_limit: NonZeroU8,
1892        /// The assigned device addresses.
1893        pub addresses: Vec<SpecifiedAddr<I::Addr>>,
1894        /// The joined multicast groups.
1895        pub multicast_groups: HashMap<MulticastAddr<I::Addr>, usize>,
1896    }
1897
1898    impl<I: Ip> FakeDeviceState<I> {
1899        /// Returns whether this fake device has joined multicast group `addr`.
1900        pub fn is_in_multicast_group(&self, addr: &MulticastAddr<I::Addr>) -> bool {
1901            self.multicast_groups.get(addr).is_some_and(|v| *v != 0)
1902        }
1903    }
1904
1905    impl<I: IpExt, D: FakeStrongDeviceId> FakeIpSocketCtx<I, D> {
1906        /// Creates a new `FakeIpSocketCtx` with the given device
1907        /// configs.
1908        pub fn new(
1909            device_configs: impl IntoIterator<Item = FakeDeviceConfig<D, SpecifiedAddr<I::Addr>>>,
1910        ) -> Self {
1911            let mut table = RoutingTable::default();
1912            let mut devices = HashMap::default();
1913            for FakeDeviceConfig { device, local_ips, remote_ips } in device_configs {
1914                for addr in remote_ips {
1915                    routing::testutil::add_on_link_routing_entry(&mut table, addr, device.clone())
1916                }
1917                let state = FakeDeviceState {
1918                    default_hop_limit: DEFAULT_HOP_LIMITS.unicast,
1919                    addresses: local_ips,
1920                    multicast_groups: Default::default(),
1921                };
1922                assert!(
1923                    devices.insert(device.clone(), state).is_none(),
1924                    "duplicate entries for {device:?}",
1925                );
1926            }
1927
1928            Self { table, devices, forwarding: Default::default() }
1929        }
1930
1931        /// Returns an immutable reference to the fake device state.
1932        pub fn get_device_state(&self, device: &D) -> &FakeDeviceState<I> {
1933            self.devices.get(device).unwrap_or_else(|| panic!("no device {device:?}"))
1934        }
1935
1936        /// Returns a mutable reference to the fake device state.
1937        pub fn get_device_state_mut(&mut self, device: &D) -> &mut FakeDeviceState<I> {
1938            self.devices.get_mut(device).unwrap_or_else(|| panic!("no device {device:?}"))
1939        }
1940
1941        pub(crate) fn multicast_memberships(
1942            &self,
1943        ) -> HashMap<(D, MulticastAddr<I::Addr>), NonZeroUsize> {
1944            self.devices
1945                .iter()
1946                .map(|(device, state)| {
1947                    state.multicast_groups.iter().filter_map(|(group, count)| {
1948                        NonZeroUsize::new(*count).map(|count| ((device.clone(), *group), count))
1949                    })
1950                })
1951                .flatten()
1952                .collect()
1953        }
1954
1955        fn new_ip_socket<O>(
1956            &mut self,
1957            args: IpSocketArgs<'_, D, I, O>,
1958        ) -> Result<IpSock<I, D::Weak>, IpSockCreationError>
1959        where
1960            O: RouteResolutionOptions<I>,
1961        {
1962            let IpSocketArgs { device, local_ip, remote_ip, proto, options } = args;
1963            let device = device
1964                .as_ref()
1965                .map(|d| d.as_strong_ref().ok_or(ResolveRouteError::Unreachable))
1966                .transpose()?;
1967            let device = device.as_ref().map(|d| d.as_ref());
1968            let resolved_route =
1969                self.lookup_route(device, local_ip, remote_ip, options.transparent())?;
1970            Ok(new_ip_socket(device, resolved_route, remote_ip, proto))
1971        }
1972
1973        fn lookup_route(
1974            &mut self,
1975            device: Option<&D>,
1976            local_ip: Option<IpDeviceAddr<I::Addr>>,
1977            addr: RoutableIpAddr<I::Addr>,
1978            transparent: bool,
1979        ) -> Result<ResolvedRoute<I, D>, ResolveRouteError> {
1980            let Self { table, devices, forwarding } = self;
1981            let (destination, ()) = table
1982                .lookup_filter_map(forwarding, device, addr.addr(), |_, d| match &local_ip {
1983                    None => Some(()),
1984                    Some(local_ip) => {
1985                        if transparent {
1986                            return Some(());
1987                        }
1988                        devices.get(d).and_then(|state| {
1989                            state.addresses.contains(local_ip.as_ref()).then_some(())
1990                        })
1991                    }
1992                })
1993                .next()
1994                .ok_or(ResolveRouteError::Unreachable)?;
1995
1996            let Destination { device, next_hop } = destination;
1997            let mut addrs = devices.get(device).unwrap().addresses.iter();
1998            let local_ip = match local_ip {
1999                None => {
2000                    let addr = addrs.next().ok_or(ResolveRouteError::NoSrcAddr)?;
2001                    IpDeviceAddr::new(addr.get()).expect("not valid device addr")
2002                }
2003                Some(local_ip) => {
2004                    if !transparent {
2005                        // We already constrained the set of devices so this
2006                        // should be a given.
2007                        assert!(
2008                            addrs.any(|a| a.get() == local_ip.addr()),
2009                            "didn't find IP {:?} in {:?}",
2010                            local_ip,
2011                            addrs.collect::<Vec<_>>()
2012                        );
2013                    }
2014                    local_ip
2015                }
2016            };
2017
2018            Ok(ResolvedRoute {
2019                src_addr: local_ip,
2020                device: device.clone(),
2021                local_delivery_device: None,
2022                next_hop,
2023                // NB: Keep unit tests simple and skip internal forwarding
2024                // logic. Instead, this is verified by integration tests.
2025                internal_forwarding: InternalForwarding::NotUsed,
2026            })
2027        }
2028
2029        fn resolve_send_meta<O>(
2030            &mut self,
2031            socket: &IpSock<I, D::Weak>,
2032            options: &O,
2033        ) -> Result<SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>, IpSockSendError>
2034        where
2035            O: SendOptions<I> + RouteResolutionOptions<I>,
2036        {
2037            let IpSockDefinition { remote_ip, local_ip, device, proto } = &socket.definition;
2038            let device = device
2039                .as_ref()
2040                .map(|d| d.upgrade().ok_or(ResolveRouteError::Unreachable))
2041                .transpose()?;
2042            let ResolvedRoute {
2043                src_addr,
2044                device,
2045                next_hop,
2046                local_delivery_device: _,
2047                internal_forwarding: _,
2048            } = self.lookup_route(
2049                device.as_ref(),
2050                Some(*local_ip),
2051                *remote_ip,
2052                options.transparent(),
2053            )?;
2054
2055            let remote_ip: &SpecifiedAddr<_> = remote_ip.as_ref();
2056
2057            let destination = IpPacketDestination::from_next_hop(next_hop, *remote_ip);
2058            Ok(SendIpPacketMeta {
2059                device,
2060                src_ip: src_addr.into(),
2061                dst_ip: *remote_ip,
2062                destination,
2063                proto: *proto,
2064                ttl: options.hop_limit(remote_ip),
2065                mtu: options.mtu(),
2066                dscp_and_ecn: DscpAndEcn::default(),
2067            })
2068        }
2069    }
2070}