netstack3_ip/
socket.rs

1// Copyright 2019 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! IPv4 and IPv6 sockets.
6
7use core::cmp::Ordering;
8use core::convert::Infallible;
9use core::num::NonZeroU8;
10
11use log::error;
12use net_types::ip::{Ip, IpVersionMarker, Ipv6Addr, Mtu};
13use net_types::{MulticastAddress, ScopeableAddress, SpecifiedAddr};
14use netstack3_base::socket::{SocketIpAddr, SocketIpAddrExt as _};
15use netstack3_base::{
16    AnyDevice, CounterContext, DeviceIdContext, DeviceIdentifier, EitherDeviceId, InstantContext,
17    InterfaceProperties, IpDeviceAddr, IpExt, Marks, Mms, SendFrameErrorReason,
18    StrongDeviceIdentifier, TxMetadata as _, TxMetadataBindingsTypes, WeakDeviceIdentifier,
19};
20use netstack3_filter::{
21    self as filter, DynTransportSerializer, DynamicTransportSerializer, FilterBindingsContext,
22    FilterHandler as _, FilterIpExt, RawIpBody, SocketEgressFilterResult, SocketOpsFilter,
23    SocketOpsFilterBindingContext, TransportPacketSerializer,
24};
25use netstack3_trace::trace_duration;
26use packet::{BufferMut, PacketConstraints, SerializeError};
27use packet_formats::ip::DscpAndEcn;
28use thiserror::Error;
29
30use crate::internal::base::{
31    FilterHandlerProvider, IpDeviceMtuContext, IpLayerIpExt, IpLayerPacketMetadata,
32    IpPacketDestination, IpSendFrameError, IpSendFrameErrorReason, ResolveRouteError,
33    SendIpPacketMeta,
34};
35use crate::internal::counters::IpCounters;
36use crate::internal::device::state::IpDeviceStateIpExt;
37use crate::internal::routing::PacketOrigin;
38use crate::internal::routing::rules::RuleInput;
39use crate::internal::types::{InternalForwarding, ResolvedRoute, RoutableIpAddr};
40use crate::{HopLimits, NextHop};
41
42/// The arguments used for creating an [`IpSock`]
43pub struct IpSocketArgs<'a, D: StrongDeviceIdentifier, I: IpExt, O> {
44    /// The device the socket is bound to.
45    pub device: Option<EitherDeviceId<&'a D, &'a D::Weak>>,
46    /// The local IP to use for the connection. One is selected if not provided
47    /// based on the output route.
48    pub local_ip: Option<IpDeviceAddr<I::Addr>>,
49    /// The remote IP address for this connection.
50    pub remote_ip: RoutableIpAddr<I::Addr>,
51    /// The IP protocol in use.
52    pub proto: I::Proto,
53    /// Additional IP layer options.
54    pub options: &'a O,
55}
56/// An execution context defining a type of IP socket.
57pub trait IpSocketHandler<I: IpExt + FilterIpExt, BC: TxMetadataBindingsTypes>:
58    DeviceIdContext<AnyDevice>
59{
60    /// Constructs a new [`IpSock`].
61    ///
62    /// `new_ip_socket` constructs a new `IpSock` to the given remote IP
63    /// address from the given local IP address with the given IP protocol. If
64    /// no local IP address is given, one will be chosen automatically. If
65    /// `device` is `Some`, the socket will be bound to the given device - only
66    /// routes which egress over the device will be used. If no route is
67    /// available which egresses over the device - even if routes are available
68    /// which egress over other devices - the socket will be considered
69    /// unroutable.
70    ///
71    /// `new_ip_socket` returns an error if no route to the remote was found in
72    /// the forwarding table or if the given local IP address is not valid for
73    /// the found route.
74    fn new_ip_socket<O>(
75        &mut self,
76        bindings_ctx: &mut BC,
77        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
78    ) -> Result<IpSock<I, Self::WeakDeviceId>, IpSockCreationError>
79    where
80        O: RouteResolutionOptions<I>;
81
82    /// Sends an IP packet on a socket.
83    ///
84    /// The generated packet has its metadata initialized from `socket`,
85    /// including the source and destination addresses, the Time To Live/Hop
86    /// Limit, and the Protocol/Next Header. The outbound device is also chosen
87    /// based on information stored in the socket.
88    ///
89    /// `mtu` may be used to optionally impose an MTU on the outgoing packet.
90    /// Note that the device's MTU will still be imposed on the packet. That is,
91    /// the smaller of `mtu` and the device's MTU will be imposed on the packet.
92    ///
93    /// If the socket is currently unroutable, an error is returned.
94    fn send_ip_packet<S, O>(
95        &mut self,
96        bindings_ctx: &mut BC,
97        socket: &IpSock<I, Self::WeakDeviceId>,
98        body: S,
99        options: &O,
100        tx_metadata: BC::TxMetadata,
101    ) -> Result<(), IpSockSendError>
102    where
103        S: TransportPacketSerializer<I>,
104        S::Buffer: BufferMut,
105        O: SendOptions<I> + RouteResolutionOptions<I>;
106
107    /// Confirms the provided IP socket destination is reachable.
108    ///
109    /// Implementations must retrieve the next hop given the provided
110    /// IP socket and confirm neighbor reachability for the resolved target
111    /// device.
112    fn confirm_reachable<O>(
113        &mut self,
114        bindings_ctx: &mut BC,
115        socket: &IpSock<I, Self::WeakDeviceId>,
116        options: &O,
117    ) where
118        O: RouteResolutionOptions<I>;
119
120    /// Creates a temporary IP socket and sends a single packet on it.
121    ///
122    /// `local_ip`, `remote_ip`, `proto`, and `options` are passed directly to
123    /// [`IpSocketHandler::new_ip_socket`]. `get_body_from_src_ip` is given the
124    /// source IP address for the packet - which may have been chosen
125    /// automatically if `local_ip` is `None` - and returns the body to be
126    /// encapsulated. This is provided in case the body's contents depend on the
127    /// chosen source IP address.
128    ///
129    /// If `device` is specified, the available routes are limited to those that
130    /// egress over the device.
131    ///
132    /// `mtu` may be used to optionally impose an MTU on the outgoing packet.
133    /// Note that the device's MTU will still be imposed on the packet. That is,
134    /// the smaller of `mtu` and the device's MTU will be imposed on the packet.
135    ///
136    /// # Errors
137    ///
138    /// If an error is encountered while constructing the temporary IP socket
139    /// or sending the packet, `options` will be returned along with the
140    /// error. `get_body_from_src_ip` is fallible, and if there's an error,
141    /// it will be returned as well.
142    fn send_oneshot_ip_packet_with_fallible_serializer<S, E, F, O>(
143        &mut self,
144        bindings_ctx: &mut BC,
145        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
146        tx_metadata: BC::TxMetadata,
147        get_body_from_src_ip: F,
148    ) -> Result<(), SendOneShotIpPacketError<E>>
149    where
150        S: TransportPacketSerializer<I>,
151        S::Buffer: BufferMut,
152        F: FnOnce(IpDeviceAddr<I::Addr>) -> Result<S, E>,
153        O: SendOptions<I> + RouteResolutionOptions<I>,
154    {
155        let options = args.options;
156        let tmp = self
157            .new_ip_socket(bindings_ctx, args)
158            .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })?;
159        let packet = get_body_from_src_ip(*tmp.local_ip())
160            .map_err(SendOneShotIpPacketError::SerializeError)?;
161        self.send_ip_packet(bindings_ctx, &tmp, packet, options, tx_metadata)
162            .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })
163    }
164
165    /// Like `send_oneshot_ip_packet_with_fallible_serializer`, but a dynamic
166    /// transport serializer is used.
167    ///
168    /// This reduces code generation cost at the expense of some runtime
169    /// overhead.
170    fn send_oneshot_ip_packet_with_dyn_fallible_serializer<S, E, F, O>(
171        &mut self,
172        bindings_ctx: &mut BC,
173        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
174        tx_metadata: BC::TxMetadata,
175        get_body_from_src_ip: F,
176    ) -> Result<(), SendOneShotIpPacketError<E>>
177    where
178        S: DynamicTransportSerializer<I>,
179        F: FnOnce(IpDeviceAddr<I::Addr>) -> Result<S, E>,
180        O: SendOptions<I> + RouteResolutionOptions<I>,
181    {
182        let options = args.options;
183        let tmp = self
184            .new_ip_socket(bindings_ctx, args)
185            .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })?;
186        let mut packet = get_body_from_src_ip(*tmp.local_ip())
187            .map_err(SendOneShotIpPacketError::SerializeError)?;
188        self.send_ip_packet(
189            bindings_ctx,
190            &tmp,
191            DynTransportSerializer::new(&mut packet),
192            options,
193            tx_metadata,
194        )
195        .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })
196    }
197
198    /// Sends a one-shot IP packet but with a non-fallible serializer.
199    fn send_oneshot_ip_packet<S, F, O>(
200        &mut self,
201        bindings_ctx: &mut BC,
202        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
203        tx_metadata: BC::TxMetadata,
204        get_body_from_src_ip: F,
205    ) -> Result<(), IpSockCreateAndSendError>
206    where
207        S: TransportPacketSerializer<I>,
208        S::Buffer: BufferMut,
209        F: FnOnce(IpDeviceAddr<I::Addr>) -> S,
210        O: SendOptions<I> + RouteResolutionOptions<I>,
211    {
212        self.send_oneshot_ip_packet_with_fallible_serializer(
213            bindings_ctx,
214            args,
215            tx_metadata,
216            |ip| Ok::<_, Infallible>(get_body_from_src_ip(ip)),
217        )
218        .map_err(|err| match err {
219            SendOneShotIpPacketError::CreateAndSendError { err } => err,
220        })
221    }
222
223    /// Like `send_oneshot_ip_packet`, but a dynamic transport serializer is
224    /// used.
225    ///
226    /// This reduces code generation cost at the expense of some runtime
227    /// overhead.
228    fn send_oneshot_ip_packet_with_dyn_serializer<S, F, O>(
229        &mut self,
230        bindings_ctx: &mut BC,
231        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
232        tx_metadata: BC::TxMetadata,
233        get_body_from_src_ip: F,
234    ) -> Result<(), IpSockCreateAndSendError>
235    where
236        S: DynamicTransportSerializer<I>,
237        F: FnOnce(IpDeviceAddr<I::Addr>) -> S,
238        O: SendOptions<I> + RouteResolutionOptions<I>,
239    {
240        self.send_oneshot_ip_packet_with_dyn_fallible_serializer(
241            bindings_ctx,
242            args,
243            tx_metadata,
244            |ip| Ok::<_, Infallible>(get_body_from_src_ip(ip)),
245        )
246        .map_err(|err| match err {
247            SendOneShotIpPacketError::CreateAndSendError { err } => err,
248        })
249    }
250}
251
252/// An error in sending a packet on an IP socket.
253#[derive(Error, Copy, Clone, Debug, Eq, PartialEq)]
254pub enum IpSockSendError {
255    /// An MTU was exceeded.
256    ///
257    /// This could be caused by an MTU at any layer of the stack, including both
258    /// device MTUs and packet format body size limits.
259    #[error("a maximum transmission unit (MTU) was exceeded")]
260    Mtu,
261    /// The socket is currently unroutable.
262    #[error("the socket is currently unroutable: {0}")]
263    Unroutable(#[from] ResolveRouteError),
264    /// The socket operation would've resulted in illegal loopback addresses on
265    /// a non-loopback device.
266    #[error("illegal loopback address")]
267    IllegalLoopbackAddress,
268    /// Broadcast send is not allowed.
269    #[error("broadcast send is not enabled for the socket")]
270    BroadcastNotAllowed,
271}
272
273impl From<SerializeError<Infallible>> for IpSockSendError {
274    fn from(err: SerializeError<Infallible>) -> IpSockSendError {
275        match err {
276            SerializeError::SizeLimitExceeded => IpSockSendError::Mtu,
277        }
278    }
279}
280
281impl IpSockSendError {
282    /// Constructs a `Result` from an [`IpSendFrameErrorReason`] with
283    /// application-visible [`IpSockSendError`]s in the `Err` variant.
284    ///
285    /// Errors that are not bubbled up to applications are dropped.
286    fn from_ip_send_frame(e: IpSendFrameErrorReason) -> Result<(), Self> {
287        match e {
288            IpSendFrameErrorReason::Device(d) => Self::from_send_frame(d),
289            IpSendFrameErrorReason::IllegalLoopbackAddress => Err(Self::IllegalLoopbackAddress),
290        }
291    }
292
293    /// Constructs a `Result` from a [`SendFrameErrorReason`] with
294    /// application-visible [`IpSockSendError`]s in the `Err` variant.
295    ///
296    /// Errors that are not bubbled up to applications are dropped.
297    fn from_send_frame(e: SendFrameErrorReason) -> Result<(), Self> {
298        match e {
299            SendFrameErrorReason::Alloc | SendFrameErrorReason::QueueFull => Ok(()),
300            SendFrameErrorReason::SizeConstraintsViolation => Err(Self::Mtu),
301        }
302    }
303}
304
305/// An error in sending a packet on a temporary IP socket.
306#[derive(Error, Copy, Clone, Debug)]
307pub enum IpSockCreateAndSendError {
308    /// Cannot send via temporary socket.
309    #[error("cannot send via temporary socket: {0}")]
310    Send(#[from] IpSockSendError),
311    /// The temporary socket could not be created.
312    #[error("the temporary socket could not be created: {0}")]
313    Create(#[from] IpSockCreationError),
314}
315
316/// The error returned by
317/// [`IpSocketHandler::send_oneshot_ip_packet_with_fallible_serializer`].
318#[derive(Debug)]
319#[allow(missing_docs)]
320pub enum SendOneShotIpPacketError<E> {
321    CreateAndSendError { err: IpSockCreateAndSendError },
322    SerializeError(E),
323}
324
325/// Possible errors when retrieving the maximum transport message size.
326#[derive(Error, Copy, Clone, Debug, Eq, PartialEq)]
327pub enum MmsError {
328    /// Cannot find the device that is used for the ip socket, possibly because
329    /// there is no route.
330    #[error("cannot find the device: {0}")]
331    NoDevice(#[from] ResolveRouteError),
332    /// The MTU provided by the device is too small such that there is no room
333    /// for a transport message at all.
334    #[error("invalid MTU: {0:?}")]
335    MTUTooSmall(Mtu),
336}
337
338/// Gets device related information of an IP socket.
339pub trait DeviceIpSocketHandler<I: IpExt, BC>: DeviceIdContext<AnyDevice> {
340    /// Gets the maximum message size for the transport layer, it equals the
341    /// device MTU minus the IP header size.
342    ///
343    /// This corresponds to the GET_MAXSIZES call described in:
344    /// https://www.rfc-editor.org/rfc/rfc1122#section-3.4
345    fn get_mms<O: RouteResolutionOptions<I>>(
346        &mut self,
347        bindings_ctx: &mut BC,
348        ip_sock: &IpSock<I, Self::WeakDeviceId>,
349        options: &O,
350    ) -> Result<Mms, MmsError>;
351}
352
353/// An error encountered when creating an IP socket.
354#[derive(Error, Copy, Clone, Debug, Eq, PartialEq)]
355pub enum IpSockCreationError {
356    /// An error occurred while looking up a route.
357    #[error("a route cannot be determined: {0}")]
358    Route(#[from] ResolveRouteError),
359}
360
361/// An IP socket.
362#[derive(Clone, Debug)]
363#[cfg_attr(test, derive(PartialEq))]
364pub struct IpSock<I: IpExt, D> {
365    /// The definition of the socket.
366    ///
367    /// This does not change for the lifetime of the socket.
368    definition: IpSockDefinition<I, D>,
369}
370
371impl<I: IpExt, D> IpSock<I, D> {
372    /// Returns the socket's definition.
373    #[cfg(any(test, feature = "testutils"))]
374    pub fn definition(&self) -> &IpSockDefinition<I, D> {
375        &self.definition
376    }
377}
378
379/// The definition of an IP socket.
380///
381/// These values are part of the socket's definition, and never change.
382#[derive(Clone, Debug, PartialEq)]
383pub struct IpSockDefinition<I: IpExt, D> {
384    /// The socket's remote address.
385    pub remote_ip: SocketIpAddr<I::Addr>,
386    /// The socket's local address.
387    ///
388    /// Guaranteed to be unicast in its subnet since it's always equal to an
389    /// address assigned to the local device. We can't use the `UnicastAddr`
390    /// witness type since `Ipv4Addr` doesn't implement `UnicastAddress`.
391    //
392    // TODO(joshlf): Support unnumbered interfaces. Once we do that, a few
393    // issues arise: A) Does the unicast restriction still apply, and is that
394    // even well-defined for IPv4 in the absence of a subnet? B) Presumably we
395    // have to always bind to a particular interface?
396    pub local_ip: IpDeviceAddr<I::Addr>,
397    /// The socket's bound output device.
398    pub device: Option<D>,
399    /// The IP protocol the socket is bound to.
400    pub proto: I::Proto,
401}
402
403impl<I: IpExt, D> IpSock<I, D> {
404    /// Returns the socket's local IP address.
405    pub fn local_ip(&self) -> &IpDeviceAddr<I::Addr> {
406        &self.definition.local_ip
407    }
408    /// Returns the socket's remote IP address.
409    pub fn remote_ip(&self) -> &SocketIpAddr<I::Addr> {
410        &self.definition.remote_ip
411    }
412    /// Returns the selected output interface for the socket, if any.
413    pub fn device(&self) -> Option<&D> {
414        self.definition.device.as_ref()
415    }
416    /// Returns the socket's protocol.
417    pub fn proto(&self) -> I::Proto {
418        self.definition.proto
419    }
420}
421
422// TODO(joshlf): Once we support configuring transport-layer protocols using
423// type parameters, use that to ensure that `proto` is the right protocol for
424// the caller. We will still need to have a separate enforcement mechanism for
425// raw IP sockets once we support those.
426
427/// The bindings execution context for IP sockets.
428pub trait IpSocketBindingsContext<D>:
429    InstantContext
430    + FilterBindingsContext<D>
431    + TxMetadataBindingsTypes
432    + SocketOpsFilterBindingContext<D>
433{
434}
435impl<
436    D,
437    BC: InstantContext
438        + FilterBindingsContext<D>
439        + TxMetadataBindingsTypes
440        + SocketOpsFilterBindingContext<D>,
441> IpSocketBindingsContext<D> for BC
442{
443}
444
445/// The context required in order to implement [`IpSocketHandler`].
446///
447/// Blanket impls of `IpSocketHandler` are provided in terms of
448/// `IpSocketContext`.
449pub trait IpSocketContext<I, BC>:
450    DeviceIdContext<AnyDevice, DeviceId: InterfaceProperties<BC::DeviceClass>>
451    + FilterHandlerProvider<I, BC>
452where
453    I: IpDeviceStateIpExt + IpExt + FilterIpExt,
454    BC: IpSocketBindingsContext<Self::DeviceId>,
455{
456    /// Returns a route for a socket.
457    ///
458    /// If `device` is specified, the available routes are limited to those that
459    /// egress over the device.
460    fn lookup_route(
461        &mut self,
462        bindings_ctx: &mut BC,
463        device: Option<&Self::DeviceId>,
464        src_ip: Option<IpDeviceAddr<I::Addr>>,
465        dst_ip: RoutableIpAddr<I::Addr>,
466        transparent: bool,
467        marks: &Marks,
468    ) -> Result<ResolvedRoute<I, Self::DeviceId>, ResolveRouteError>;
469
470    /// Send an IP packet to the next-hop node.
471    fn send_ip_packet<S>(
472        &mut self,
473        bindings_ctx: &mut BC,
474        meta: SendIpPacketMeta<I, &Self::DeviceId, SpecifiedAddr<I::Addr>>,
475        body: S,
476        packet_metadata: IpLayerPacketMetadata<I, Self::WeakAddressId, BC>,
477    ) -> Result<(), IpSendFrameError<S>>
478    where
479        S: TransportPacketSerializer<I>,
480        S::Buffer: BufferMut;
481
482    /// Returns `DeviceId` for the loopback device.
483    fn get_loopback_device(&mut self) -> Option<Self::DeviceId>;
484
485    /// Confirms the provided IP socket destination is reachable.
486    ///
487    /// Implementations must retrieve the next hop given the provided
488    /// IP socket and confirm neighbor reachability for the resolved target
489    /// device.
490    fn confirm_reachable(
491        &mut self,
492        bindings_ctx: &mut BC,
493        dst: SpecifiedAddr<I::Addr>,
494        input: RuleInput<'_, I, Self::DeviceId>,
495    );
496}
497
498/// Enables a blanket implementation of [`IpSocketHandler`].
499///
500/// Implementing this marker trait for a type enables a blanket implementation
501/// of `IpSocketHandler` given the other requirements are met.
502pub trait UseIpSocketHandlerBlanket {}
503
504impl<I, BC, CC> IpSocketHandler<I, BC> for CC
505where
506    I: IpLayerIpExt + IpDeviceStateIpExt,
507    BC: IpSocketBindingsContext<Self::DeviceId>,
508    CC: IpSocketContext<I, BC> + CounterContext<IpCounters<I>> + UseIpSocketHandlerBlanket,
509    CC::DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>,
510{
511    fn new_ip_socket<O>(
512        &mut self,
513        bindings_ctx: &mut BC,
514        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
515    ) -> Result<IpSock<I, CC::WeakDeviceId>, IpSockCreationError>
516    where
517        O: RouteResolutionOptions<I>,
518    {
519        let IpSocketArgs { device, local_ip, remote_ip, proto, options } = args;
520        let device = device
521            .as_ref()
522            .map(|d| d.as_strong_ref().ok_or(ResolveRouteError::Unreachable))
523            .transpose()?;
524        let device = device.as_ref().map(|d| d.as_ref());
525
526        // Make sure the remote is routable with a local address before creating
527        // the socket. We do not care about the actual destination here because
528        // we will recalculate it when we send a packet so that the best route
529        // available at the time is used for each outgoing packet.
530        let resolved_route = self.lookup_route(
531            bindings_ctx,
532            device,
533            local_ip,
534            remote_ip,
535            options.transparent(),
536            options.marks(),
537        )?;
538        Ok(new_ip_socket(device, resolved_route, remote_ip, proto))
539    }
540
541    fn send_ip_packet<S, O>(
542        &mut self,
543        bindings_ctx: &mut BC,
544        ip_sock: &IpSock<I, CC::WeakDeviceId>,
545        body: S,
546        options: &O,
547        tx_metadata: BC::TxMetadata,
548    ) -> Result<(), IpSockSendError>
549    where
550        S: TransportPacketSerializer<I>,
551        S::Buffer: BufferMut,
552        O: SendOptions<I> + RouteResolutionOptions<I>,
553    {
554        send_ip_packet(self, bindings_ctx, ip_sock, body, options, tx_metadata)
555    }
556
557    fn confirm_reachable<O>(
558        &mut self,
559        bindings_ctx: &mut BC,
560        socket: &IpSock<I, CC::WeakDeviceId>,
561        options: &O,
562    ) where
563        O: RouteResolutionOptions<I>,
564    {
565        let bound_device = socket.device().and_then(|weak| weak.upgrade());
566        let bound_device = bound_device.as_ref();
567        let bound_address = Some((*socket.local_ip()).into());
568        let destination = (*socket.remote_ip()).into();
569        IpSocketContext::confirm_reachable(
570            self,
571            bindings_ctx,
572            destination,
573            RuleInput {
574                packet_origin: PacketOrigin::Local { bound_address, bound_device },
575                marks: options.marks(),
576            },
577        )
578    }
579}
580
581/// Provides hooks for altering route resolution behavior of [`IpSock`].
582///
583/// Must be implemented by the socket option type of an `IpSock` when using it
584/// to call [`IpSocketHandler::new_ip_socket`] or
585/// [`IpSocketHandler::send_ip_packet`]. This is implemented as a trait instead
586/// of an inherent impl on a type so that users of sockets that don't need
587/// certain option types can avoid allocating space for those options.
588// TODO(https://fxbug.dev/323389672): We need a mechanism to inform `IpSock` of
589// changes in the route resolution options when it starts caching previously
590// calculated routes. Any changes to the options here *MUST* cause the route to
591// be re-calculated.
592pub trait RouteResolutionOptions<I: Ip> {
593    /// Whether the socket is transparent.
594    ///
595    /// This allows transparently proxying traffic to the socket, and allows the
596    /// socket to be bound to a non-local address.
597    fn transparent(&self) -> bool;
598
599    /// Returns the marks carried by packets created on the socket.
600    fn marks(&self) -> &Marks;
601}
602
603/// Provides hooks for altering sending behavior of [`IpSock`].
604///
605/// Must be implemented by the socket option type of an `IpSock` when using it
606/// to call [`IpSocketHandler::send_ip_packet`]. This is implemented as a trait
607/// instead of an inherent impl on a type so that users of sockets that don't
608/// need certain option types, like TCP for anything multicast-related, can
609/// avoid allocating space for those options.
610pub trait SendOptions<I: IpExt> {
611    /// Returns the hop limit to set on a packet going to the given destination.
612    ///
613    /// If `Some(u)`, `u` will be used as the hop limit (IPv6) or TTL (IPv4) for
614    /// a packet going to the given destination. Otherwise the default value
615    /// will be used.
616    fn hop_limit(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8>;
617
618    /// Returns true if outgoing multicast packets should be looped back and
619    /// delivered to local receivers who joined the multicast group.
620    fn multicast_loop(&self) -> bool;
621
622    /// `Some` if the socket can be used to send broadcast packets.
623    fn allow_broadcast(&self) -> Option<I::BroadcastMarker>;
624
625    /// Returns TCLASS/TOS field value that should be set in IP headers.
626    fn dscp_and_ecn(&self) -> DscpAndEcn;
627
628    /// The IP MTU to use for this transmission.
629    ///
630    /// Note that the minimum overall MTU is used considering the device and
631    /// path. This option can be used to restrict an MTU to an upper bound.
632    fn mtu(&self) -> Mtu;
633}
634
635/// Empty send and creation options that never overrides default values.
636#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
637pub struct DefaultIpSocketOptions;
638
639impl<I: IpExt> SendOptions<I> for DefaultIpSocketOptions {
640    fn hop_limit(&self, _destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
641        None
642    }
643
644    fn multicast_loop(&self) -> bool {
645        false
646    }
647
648    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
649        None
650    }
651
652    fn dscp_and_ecn(&self) -> DscpAndEcn {
653        DscpAndEcn::default()
654    }
655
656    fn mtu(&self) -> Mtu {
657        Mtu::no_limit()
658    }
659}
660
661impl<I: Ip> RouteResolutionOptions<I> for DefaultIpSocketOptions {
662    fn transparent(&self) -> bool {
663        false
664    }
665
666    fn marks(&self) -> &Marks {
667        &Marks::UNMARKED
668    }
669}
670
671/// A trait providing send options delegation to an inner type.
672///
673/// A blanket impl of [`SendOptions`] is provided to all implementers. This
674/// trait has the same shape as `SendOptions` but all the methods provide
675/// default implementations that delegate to the value returned by
676/// `DelegatedSendOptions::Delegate`. For brevity, the default `delegate` is
677/// [`DefaultIpSocketOptions`].
678#[allow(missing_docs)]
679pub trait DelegatedSendOptions<I: IpExt>: OptionDelegationMarker {
680    /// Returns the delegate providing the impl for all default methods.
681    fn delegate(&self) -> &impl SendOptions<I> {
682        &DefaultIpSocketOptions
683    }
684
685    fn hop_limit(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
686        self.delegate().hop_limit(destination)
687    }
688
689    fn multicast_loop(&self) -> bool {
690        self.delegate().multicast_loop()
691    }
692
693    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
694        self.delegate().allow_broadcast()
695    }
696
697    fn dscp_and_ecn(&self) -> DscpAndEcn {
698        self.delegate().dscp_and_ecn()
699    }
700
701    fn mtu(&self) -> Mtu {
702        self.delegate().mtu()
703    }
704}
705
706impl<O: DelegatedSendOptions<I> + OptionDelegationMarker, I: IpExt> SendOptions<I> for O {
707    fn hop_limit(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
708        self.hop_limit(destination)
709    }
710
711    fn multicast_loop(&self) -> bool {
712        self.multicast_loop()
713    }
714
715    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
716        self.allow_broadcast()
717    }
718
719    fn dscp_and_ecn(&self) -> DscpAndEcn {
720        self.dscp_and_ecn()
721    }
722
723    fn mtu(&self) -> Mtu {
724        self.mtu()
725    }
726}
727
728/// A trait providing route resolution options delegation to an inner type.
729///
730/// A blanket impl of [`RouteResolutionOptions`] is provided to all
731/// implementers. This trait has the same shape as `RouteResolutionOptions` but
732/// all the methods provide default implementations that delegate to the value
733/// returned by `DelegatedRouteResolutionOptions::Delegate`. For brevity, the
734/// default `delegate` is [`DefaultIpSocketOptions`].
735#[allow(missing_docs)]
736pub trait DelegatedRouteResolutionOptions<I: Ip>: OptionDelegationMarker {
737    /// Returns the delegate providing the impl for all default methods.
738    fn delegate(&self) -> &impl RouteResolutionOptions<I> {
739        &DefaultIpSocketOptions
740    }
741
742    fn transparent(&self) -> bool {
743        self.delegate().transparent()
744    }
745
746    fn marks(&self) -> &Marks {
747        self.delegate().marks()
748    }
749}
750
751impl<O: DelegatedRouteResolutionOptions<I> + OptionDelegationMarker, I: IpExt>
752    RouteResolutionOptions<I> for O
753{
754    fn transparent(&self) -> bool {
755        self.transparent()
756    }
757
758    fn marks(&self) -> &Marks {
759        self.marks()
760    }
761}
762
763/// A marker trait to allow option delegation traits.
764///
765/// This trait sidesteps trait resolution rules around the delegation traits
766/// because of the `Ip` parameter in them.
767pub trait OptionDelegationMarker {}
768
769/// The configurable hop limits for a socket.
770#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
771pub struct SocketHopLimits<I: Ip> {
772    /// Unicast hop limit.
773    pub unicast: Option<NonZeroU8>,
774    /// Multicast hop limit.
775    // TODO(https://fxbug.dev/42059735): Make this an Option<u8> to allow sending
776    // multicast packets destined only for the local machine.
777    pub multicast: Option<NonZeroU8>,
778    /// An unused marker type signifying the IP version for which these hop
779    /// limits are valid. Including this helps prevent using the wrong hop limits
780    /// when operating on dualstack sockets.
781    pub version: IpVersionMarker<I>,
782}
783
784impl<I: Ip> SocketHopLimits<I> {
785    /// Returns a function that updates the unicast hop limit.
786    pub fn set_unicast(value: Option<NonZeroU8>) -> impl FnOnce(&mut Self) {
787        move |limits| limits.unicast = value
788    }
789
790    /// Returns a function that updates the multicast hop limit.
791    pub fn set_multicast(value: Option<NonZeroU8>) -> impl FnOnce(&mut Self) {
792        move |limits| limits.multicast = value
793    }
794
795    /// Returns the hop limits, or the provided defaults if unset.
796    pub fn get_limits_with_defaults(&self, defaults: &HopLimits) -> HopLimits {
797        let Self { unicast, multicast, version: _ } = self;
798        HopLimits {
799            unicast: unicast.unwrap_or(defaults.unicast),
800            multicast: multicast.unwrap_or(defaults.multicast),
801        }
802    }
803
804    /// Returns the appropriate hop limit to use for the given destination addr.
805    pub fn hop_limit_for_dst(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
806        let Self { unicast, multicast, version: _ } = self;
807        if destination.is_multicast() { *multicast } else { *unicast }
808    }
809}
810
811fn new_ip_socket<I, D>(
812    requested_device: Option<&D>,
813    route: ResolvedRoute<I, D>,
814    remote_ip: SocketIpAddr<I::Addr>,
815    proto: I::Proto,
816) -> IpSock<I, D::Weak>
817where
818    I: IpExt,
819    D: StrongDeviceIdentifier,
820{
821    // TODO(https://fxbug.dev/323389672): Cache a reference to the route to
822    // avoid the route lookup on send as long as the routing table hasn't
823    // changed in between these operations.
824    let ResolvedRoute {
825        src_addr,
826        device: route_device,
827        local_delivery_device,
828        next_hop: _,
829        internal_forwarding: _,
830    } = route;
831
832    // If the source or destination address require a device, make sure to
833    // set that in the socket definition. Otherwise defer to what was provided.
834    let socket_device = (src_addr.as_ref().must_have_zone() || remote_ip.as_ref().must_have_zone())
835        .then(|| {
836            // NB: The route device might be loopback, and in such cases
837            // we want to bind the socket to the device the source IP is
838            // assigned to instead.
839            local_delivery_device.unwrap_or(route_device)
840        })
841        .as_ref()
842        .or(requested_device)
843        .map(|d| d.downgrade());
844
845    let definition =
846        IpSockDefinition { local_ip: src_addr, remote_ip, device: socket_device, proto };
847    IpSock { definition }
848}
849
850fn send_ip_packet<I, S, BC, CC, O>(
851    core_ctx: &mut CC,
852    bindings_ctx: &mut BC,
853    socket: &IpSock<I, CC::WeakDeviceId>,
854    mut body: S,
855    options: &O,
856    tx_metadata: BC::TxMetadata,
857) -> Result<(), IpSockSendError>
858where
859    I: IpExt + IpDeviceStateIpExt + FilterIpExt,
860    S: TransportPacketSerializer<I>,
861    S::Buffer: BufferMut,
862    BC: IpSocketBindingsContext<CC::DeviceId>,
863    CC: IpSocketContext<I, BC> + CounterContext<IpCounters<I>>,
864    CC::DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>,
865    O: SendOptions<I> + RouteResolutionOptions<I>,
866{
867    trace_duration!("ip::send_packet");
868
869    // Extracted to a function without the serializer parameter to ease code
870    // generation.
871    fn resolve<
872        I: IpExt + IpDeviceStateIpExt + FilterIpExt,
873        CC: IpSocketContext<I, BC>,
874        BC: IpSocketBindingsContext<CC::DeviceId>,
875    >(
876        core_ctx: &mut CC,
877        bindings_ctx: &mut BC,
878        device: &Option<CC::WeakDeviceId>,
879        local_ip: IpDeviceAddr<I::Addr>,
880        remote_ip: RoutableIpAddr<I::Addr>,
881        transparent: bool,
882        marks: &Marks,
883    ) -> Result<ResolvedRoute<I, CC::DeviceId>, IpSockSendError> {
884        let device = match device.as_ref().map(|d| d.upgrade()) {
885            Some(Some(device)) => Some(device),
886            Some(None) => return Err(ResolveRouteError::Unreachable.into()),
887            None => None,
888        };
889        let route = core_ctx
890            .lookup_route(
891                bindings_ctx,
892                device.as_ref(),
893                Some(local_ip),
894                remote_ip,
895                transparent,
896                marks,
897            )
898            .map_err(|e| IpSockSendError::Unroutable(e))?;
899        assert_eq!(local_ip, route.src_addr);
900        Ok(route)
901    }
902
903    let IpSock {
904        definition: IpSockDefinition { remote_ip, local_ip, device: socket_device, proto },
905    } = socket;
906    let ResolvedRoute {
907        src_addr: local_ip,
908        device: mut egress_device,
909        mut next_hop,
910        mut local_delivery_device,
911        mut internal_forwarding,
912    } = resolve(
913        core_ctx,
914        bindings_ctx,
915        socket_device,
916        *local_ip,
917        *remote_ip,
918        options.transparent(),
919        options.marks(),
920    )?;
921
922    if matches!(next_hop, NextHop::Broadcast(_)) && options.allow_broadcast().is_none() {
923        return Err(IpSockSendError::BroadcastNotAllowed);
924    }
925
926    let previous_dst = remote_ip.addr();
927    let mut packet = filter::TxPacket::new(local_ip.addr(), remote_ip.addr(), *proto, &mut body);
928    let mut packet_metadata =
929        IpLayerPacketMetadata::from_tx_metadata_and_marks(tx_metadata, *options.marks());
930
931    match core_ctx.filter_handler().local_egress_hook(
932        bindings_ctx,
933        &mut packet,
934        &egress_device,
935        &mut packet_metadata,
936    ) {
937        filter::Verdict::Drop => {
938            packet_metadata.acknowledge_drop();
939            return Ok(());
940        }
941        filter::Verdict::Accept(()) => {}
942    }
943
944    let Some(mut local_ip) = IpDeviceAddr::new(packet.src_addr()) else {
945        packet_metadata.acknowledge_drop();
946        return Err(IpSockSendError::Unroutable(ResolveRouteError::NoSrcAddr));
947    };
948    let Some(remote_ip) = RoutableIpAddr::new(packet.dst_addr()) else {
949        packet_metadata.acknowledge_drop();
950        return Err(IpSockSendError::Unroutable(ResolveRouteError::Unreachable));
951    };
952
953    // If the LOCAL_EGRESS hook ended up rewriting the packet's destination, perform
954    // re-routing based on the new destination.
955    if remote_ip.addr() != previous_dst {
956        let ResolvedRoute {
957            src_addr: new_local_ip,
958            device: new_device,
959            next_hop: new_next_hop,
960            local_delivery_device: new_local_delivery_device,
961            internal_forwarding: new_internal_forwarding,
962        } = match resolve(
963            core_ctx,
964            bindings_ctx,
965            socket_device,
966            local_ip,
967            remote_ip,
968            options.transparent(),
969            options.marks(),
970        ) {
971            Ok(r) => r,
972            Err(err) => {
973                packet_metadata.acknowledge_drop();
974                return Err(err);
975            }
976        };
977        local_ip = new_local_ip;
978        egress_device = new_device;
979        next_hop = new_next_hop;
980        local_delivery_device = new_local_delivery_device;
981        internal_forwarding = new_internal_forwarding;
982    }
983
984    // NB: Hit the forwarding hook if the route leverages internal forwarding.
985    match internal_forwarding {
986        InternalForwarding::Used(ingress_device) => {
987            match core_ctx.filter_handler().forwarding_hook(
988                &mut packet,
989                &ingress_device,
990                &egress_device,
991                &mut packet_metadata,
992            ) {
993                filter::Verdict::Drop => {
994                    packet_metadata.acknowledge_drop();
995                    return Ok(());
996                }
997                filter::Verdict::Accept(()) => {}
998            }
999        }
1000        InternalForwarding::NotUsed => {}
1001    }
1002
1003    if let Some(socket_cookie) = packet_metadata.tx_metadata().socket_cookie() {
1004        let egress_filter_result = bindings_ctx.socket_ops_filter().on_egress(
1005            &packet,
1006            &egress_device,
1007            socket_cookie,
1008            packet_metadata.marks(),
1009        );
1010
1011        // TODO(https://fxbug.dev/412426836): Implement congestion signal handling.
1012        match egress_filter_result {
1013            SocketEgressFilterResult::Pass { congestion: _ } => (),
1014            SocketEgressFilterResult::Drop { congestion: _ } => {
1015                core_ctx.counters().socket_egress_filter_dropped.increment();
1016                packet_metadata.acknowledge_drop();
1017                return Ok(());
1018            }
1019        }
1020    }
1021
1022    // The packet needs to be delivered locally if it's sent to a broadcast
1023    // or multicast address. For multicast packets this feature can be disabled
1024    // with IP_MULTICAST_LOOP.
1025
1026    let loopback_packet = (!egress_device.is_loopback()
1027        && ((options.multicast_loop() && remote_ip.addr().is_multicast())
1028            || next_hop.is_broadcast()))
1029    .then(|| body.serialize_new_buf(PacketConstraints::UNCONSTRAINED, packet::new_buf_vec))
1030    .transpose()?
1031    .map(|buf| RawIpBody::new(*proto, local_ip.addr(), remote_ip.addr(), buf));
1032
1033    let destination = match &local_delivery_device {
1034        Some(d) => IpPacketDestination::Loopback(d),
1035        None => IpPacketDestination::from_next_hop(next_hop, remote_ip.into()),
1036    };
1037    let ttl = options.hop_limit(&remote_ip.into());
1038    let meta = SendIpPacketMeta {
1039        device: &egress_device,
1040        src_ip: local_ip.into(),
1041        dst_ip: remote_ip.into(),
1042        destination,
1043        ttl,
1044        proto: *proto,
1045        mtu: options.mtu(),
1046        dscp_and_ecn: options.dscp_and_ecn(),
1047    };
1048    IpSocketContext::send_ip_packet(core_ctx, bindings_ctx, meta, body, packet_metadata).or_else(
1049        |IpSendFrameError { serializer: _, error }| IpSockSendError::from_ip_send_frame(error),
1050    )?;
1051
1052    match (loopback_packet, core_ctx.get_loopback_device()) {
1053        (Some(loopback_packet), Some(loopback_device)) => {
1054            let meta = SendIpPacketMeta {
1055                device: &loopback_device,
1056                src_ip: local_ip.into(),
1057                dst_ip: remote_ip.into(),
1058                destination: IpPacketDestination::Loopback(&egress_device),
1059                ttl,
1060                proto: *proto,
1061                mtu: options.mtu(),
1062                dscp_and_ecn: options.dscp_and_ecn(),
1063            };
1064            let packet_metadata = IpLayerPacketMetadata::default();
1065
1066            // The loopback packet will hit the egress hook. LOCAL_EGRESS hook
1067            // is not called again.
1068            IpSocketContext::send_ip_packet(
1069                core_ctx,
1070                bindings_ctx,
1071                meta,
1072                loopback_packet,
1073                packet_metadata,
1074            )
1075            .unwrap_or_else(|IpSendFrameError { serializer: _, error }| {
1076                error!("failed to send loopback packet: {error:?}")
1077            });
1078        }
1079        (Some(_loopback_packet), None) => {
1080            error!("can't send a loopback packet without the loopback device")
1081        }
1082        _ => (),
1083    }
1084
1085    Ok(())
1086}
1087
1088/// Enables a blanket implementation of [`DeviceIpSocketHandler`].
1089///
1090/// Implementing this marker trait for a type enables a blanket implementation
1091/// of `DeviceIpSocketHandler` given the other requirements are met.
1092pub trait UseDeviceIpSocketHandlerBlanket {}
1093
1094impl<I, BC, CC> DeviceIpSocketHandler<I, BC> for CC
1095where
1096    I: IpLayerIpExt + IpDeviceStateIpExt,
1097    BC: IpSocketBindingsContext<CC::DeviceId>,
1098    CC: IpDeviceMtuContext<I> + IpSocketContext<I, BC> + UseDeviceIpSocketHandlerBlanket,
1099{
1100    fn get_mms<O: RouteResolutionOptions<I>>(
1101        &mut self,
1102        bindings_ctx: &mut BC,
1103        ip_sock: &IpSock<I, Self::WeakDeviceId>,
1104        options: &O,
1105    ) -> Result<Mms, MmsError> {
1106        let IpSockDefinition { remote_ip, local_ip, device, proto: _ } = &ip_sock.definition;
1107        let device = device
1108            .as_ref()
1109            .map(|d| d.upgrade().ok_or(ResolveRouteError::Unreachable))
1110            .transpose()?;
1111
1112        let ResolvedRoute {
1113            src_addr: _,
1114            local_delivery_device: _,
1115            device,
1116            next_hop: _,
1117            internal_forwarding: _,
1118        } = self
1119            .lookup_route(
1120                bindings_ctx,
1121                device.as_ref(),
1122                Some(*local_ip),
1123                *remote_ip,
1124                options.transparent(),
1125                options.marks(),
1126            )
1127            .map_err(MmsError::NoDevice)?;
1128        let mtu = self.get_mtu(&device);
1129        // TODO(https://fxbug.dev/42072935): Calculate the options size when they
1130        // are supported.
1131        Mms::from_mtu::<I>(mtu, 0 /* no ip options used */).ok_or(MmsError::MTUTooSmall(mtu))
1132    }
1133}
1134
1135/// IPv6 source address selection as defined in [RFC 6724 Section 5].
1136pub(crate) mod ipv6_source_address_selection {
1137    use net_types::ip::{AddrSubnet, IpAddress as _};
1138
1139    use super::*;
1140
1141    use netstack3_base::Ipv6DeviceAddr;
1142
1143    /// A source address selection candidate.
1144    pub struct SasCandidate<D> {
1145        /// The candidate address and subnet.
1146        pub addr_sub: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1147        /// True if the address is assigned (i.e. non tentative).
1148        pub assigned: bool,
1149        /// True if the address is deprecated (i.e. not preferred).
1150        pub deprecated: bool,
1151        /// True if the address is temporary (i.e. not permanent).
1152        pub temporary: bool,
1153        /// The device this address belongs to.
1154        pub device: D,
1155    }
1156
1157    /// Selects the source address for an IPv6 socket using the algorithm
1158    /// defined in [RFC 6724 Section 5].
1159    ///
1160    /// This algorithm is only applicable when the user has not explicitly
1161    /// specified a source address.
1162    ///
1163    /// `remote_ip` is the remote IP address of the socket, `outbound_device` is
1164    /// the device over which outbound traffic to `remote_ip` is sent (according
1165    /// to the forwarding table), and `addresses` is an iterator of all
1166    /// addresses on all devices. The algorithm works by iterating over
1167    /// `addresses` and selecting the address which is most preferred according
1168    /// to a set of selection criteria.
1169    pub fn select_ipv6_source_address<
1170        'a,
1171        D: PartialEq,
1172        A,
1173        I: Iterator<Item = A>,
1174        F: FnMut(&A) -> SasCandidate<D>,
1175    >(
1176        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1177        outbound_device: &D,
1178        addresses: I,
1179        mut get_candidate: F,
1180    ) -> Option<A> {
1181        // Source address selection as defined in RFC 6724 Section 5.
1182        //
1183        // The algorithm operates by defining a partial ordering on available
1184        // source addresses, and choosing one of the best address as defined by
1185        // that ordering (given multiple best addresses, the choice from among
1186        // those is implementation-defined). The partial order is defined in
1187        // terms of a sequence of rules. If a given rule defines an order
1188        // between two addresses, then that is their order. Otherwise, the next
1189        // rule must be consulted, and so on until all of the rules are
1190        // exhausted.
1191
1192        addresses
1193            .map(|item| {
1194                let candidate = get_candidate(&item);
1195                (item, candidate)
1196            })
1197            // Tentative addresses are not considered available to the source
1198            // selection algorithm.
1199            .filter(|(_, candidate)| candidate.assigned)
1200            .max_by(|(_, a), (_, b)| {
1201                select_ipv6_source_address_cmp(remote_ip, outbound_device, a, b)
1202            })
1203            .map(|(item, _candidate)| item)
1204    }
1205
1206    /// Comparison operator used by `select_ipv6_source_address`.
1207    fn select_ipv6_source_address_cmp<D: PartialEq>(
1208        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1209        outbound_device: &D,
1210        a: &SasCandidate<D>,
1211        b: &SasCandidate<D>,
1212    ) -> Ordering {
1213        // TODO(https://fxbug.dev/42123500): Implement rules 4, 5.5, and 6.
1214        let SasCandidate {
1215            addr_sub: a_addr_sub,
1216            assigned: a_assigned,
1217            deprecated: a_deprecated,
1218            temporary: a_temporary,
1219            device: a_device,
1220        } = a;
1221        let SasCandidate {
1222            addr_sub: b_addr_sub,
1223            assigned: b_assigned,
1224            deprecated: b_deprecated,
1225            temporary: b_temporary,
1226            device: b_device,
1227        } = b;
1228
1229        let a_addr = a_addr_sub.addr().into_specified();
1230        let b_addr = b_addr_sub.addr().into_specified();
1231
1232        // Assertions required in order for this implementation to be valid.
1233
1234        // Required by the implementation of Rule 1.
1235        if let Some(remote_ip) = remote_ip {
1236            debug_assert!(!(a_addr == remote_ip && b_addr == remote_ip));
1237        }
1238
1239        // Addresses that are not considered assigned are not valid source
1240        // addresses.
1241        debug_assert!(a_assigned);
1242        debug_assert!(b_assigned);
1243
1244        rule_1(remote_ip, a_addr, b_addr)
1245            .then_with(|| rule_2(remote_ip, a_addr, b_addr))
1246            .then_with(|| rule_3(*a_deprecated, *b_deprecated))
1247            .then_with(|| rule_5(outbound_device, a_device, b_device))
1248            .then_with(|| rule_7(*a_temporary, *b_temporary))
1249            .then_with(|| rule_8(remote_ip, *a_addr_sub, *b_addr_sub))
1250    }
1251
1252    // Assumes that `a` and `b` are not both equal to `remote_ip`.
1253    fn rule_1(
1254        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1255        a: SpecifiedAddr<Ipv6Addr>,
1256        b: SpecifiedAddr<Ipv6Addr>,
1257    ) -> Ordering {
1258        let remote_ip = match remote_ip {
1259            Some(remote_ip) => remote_ip,
1260            None => return Ordering::Equal,
1261        };
1262        if (a == remote_ip) != (b == remote_ip) {
1263            // Rule 1: Prefer same address.
1264            //
1265            // Note that both `a` and `b` cannot be equal to `remote_ip` since
1266            // that would imply that we had added the same address twice to the
1267            // same device.
1268            //
1269            // If `(a == remote_ip) != (b == remote_ip)`, then exactly one of
1270            // them is equal. If this inequality does not hold, then they must
1271            // both be unequal to `remote_ip`. In the first case, we have a tie,
1272            // and in the second case, the rule doesn't apply. In either case,
1273            // we move onto the next rule.
1274            if a == remote_ip { Ordering::Greater } else { Ordering::Less }
1275        } else {
1276            Ordering::Equal
1277        }
1278    }
1279
1280    fn rule_2(
1281        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1282        a: SpecifiedAddr<Ipv6Addr>,
1283        b: SpecifiedAddr<Ipv6Addr>,
1284    ) -> Ordering {
1285        // Scope ordering is defined by the Multicast Scope ID, see
1286        // https://datatracker.ietf.org/doc/html/rfc6724#section-3.1 .
1287        let remote_scope = match remote_ip {
1288            Some(remote_ip) => remote_ip.scope().multicast_scope_id(),
1289            None => return Ordering::Equal,
1290        };
1291        let a_scope = a.scope().multicast_scope_id();
1292        let b_scope = b.scope().multicast_scope_id();
1293        if a_scope < b_scope {
1294            if a_scope < remote_scope { Ordering::Less } else { Ordering::Greater }
1295        } else if a_scope > b_scope {
1296            if b_scope < remote_scope { Ordering::Greater } else { Ordering::Less }
1297        } else {
1298            Ordering::Equal
1299        }
1300    }
1301
1302    fn rule_3(a_deprecated: bool, b_deprecated: bool) -> Ordering {
1303        match (a_deprecated, b_deprecated) {
1304            (true, false) => Ordering::Less,
1305            (true, true) | (false, false) => Ordering::Equal,
1306            (false, true) => Ordering::Greater,
1307        }
1308    }
1309
1310    fn rule_5<D: PartialEq>(outbound_device: &D, a_device: &D, b_device: &D) -> Ordering {
1311        if (a_device == outbound_device) != (b_device == outbound_device) {
1312            // Rule 5: Prefer outgoing interface.
1313            if a_device == outbound_device { Ordering::Greater } else { Ordering::Less }
1314        } else {
1315            Ordering::Equal
1316        }
1317    }
1318
1319    // Prefer temporary addresses following rule 7.
1320    fn rule_7(a_temporary: bool, b_temporary: bool) -> Ordering {
1321        match (a_temporary, b_temporary) {
1322            (true, false) => Ordering::Greater,
1323            (true, true) | (false, false) => Ordering::Equal,
1324            (false, true) => Ordering::Less,
1325        }
1326    }
1327
1328    fn rule_8(
1329        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1330        a: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1331        b: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1332    ) -> Ordering {
1333        let remote_ip = match remote_ip {
1334            Some(remote_ip) => remote_ip,
1335            None => return Ordering::Equal,
1336        };
1337        // Per RFC 6724 Section 2.2:
1338        //
1339        //   We define the common prefix length CommonPrefixLen(S, D) of a
1340        //   source address S and a destination address D as the length of the
1341        //   longest prefix (looking at the most significant, or leftmost, bits)
1342        //   that the two addresses have in common, up to the length of S's
1343        //   prefix (i.e., the portion of the address not including the
1344        //   interface ID).  For example, CommonPrefixLen(fe80::1, fe80::2) is
1345        //   64.
1346        fn common_prefix_len(
1347            src: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1348            dst: SpecifiedAddr<Ipv6Addr>,
1349        ) -> u8 {
1350            core::cmp::min(src.addr().common_prefix_len(&dst), src.subnet().prefix())
1351        }
1352
1353        // Rule 8: Use longest matching prefix.
1354        //
1355        // Note that, per RFC 6724 Section 5:
1356        //
1357        //   Rule 8 MAY be superseded if the implementation has other means of
1358        //   choosing among source addresses.  For example, if the
1359        //   implementation somehow knows which source address will result in
1360        //   the "best" communications performance.
1361        //
1362        // We don't currently make use of this option, but it's an option for
1363        // the future.
1364        common_prefix_len(a, remote_ip).cmp(&common_prefix_len(b, remote_ip))
1365    }
1366
1367    #[cfg(test)]
1368    mod tests {
1369        use net_declare::net_ip_v6;
1370
1371        use super::*;
1372
1373        #[test]
1374        fn test_select_ipv6_source_address() {
1375            // Test the comparison operator used by `select_ipv6_source_address`
1376            // by separately testing each comparison condition.
1377
1378            let remote = SpecifiedAddr::new(net_ip_v6!("2001:0db8:1::")).unwrap();
1379            let local0 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:2::")).unwrap();
1380            let local1 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:3::")).unwrap();
1381            let link_local_remote = SpecifiedAddr::new(net_ip_v6!("fe80::1:2:42")).unwrap();
1382            let link_local = SpecifiedAddr::new(net_ip_v6!("fe80::1:2:4")).unwrap();
1383            let dev0 = &0;
1384            let dev1 = &1;
1385            let dev2 = &2;
1386
1387            // Rule 1: Prefer same address
1388            assert_eq!(rule_1(Some(remote), remote, local0), Ordering::Greater);
1389            assert_eq!(rule_1(Some(remote), local0, remote), Ordering::Less);
1390            assert_eq!(rule_1(Some(remote), local0, local1), Ordering::Equal);
1391            assert_eq!(rule_1(None, local0, local1), Ordering::Equal);
1392
1393            // Rule 2: Prefer appropriate scope
1394            assert_eq!(rule_2(Some(remote), local0, local1), Ordering::Equal);
1395            assert_eq!(rule_2(Some(remote), local1, local0), Ordering::Equal);
1396            assert_eq!(rule_2(Some(remote), local0, link_local), Ordering::Greater);
1397            assert_eq!(rule_2(Some(remote), link_local, local0), Ordering::Less);
1398            assert_eq!(rule_2(Some(link_local_remote), local0, link_local), Ordering::Less);
1399            assert_eq!(rule_2(Some(link_local_remote), link_local, local0), Ordering::Greater);
1400            assert_eq!(rule_1(None, local0, link_local), Ordering::Equal);
1401
1402            // Rule 3: Avoid deprecated states
1403            assert_eq!(rule_3(false, true), Ordering::Greater);
1404            assert_eq!(rule_3(true, false), Ordering::Less);
1405            assert_eq!(rule_3(true, true), Ordering::Equal);
1406            assert_eq!(rule_3(false, false), Ordering::Equal);
1407
1408            // Rule 5: Prefer outgoing interface
1409            assert_eq!(rule_5(dev0, dev0, dev2), Ordering::Greater);
1410            assert_eq!(rule_5(dev0, dev2, dev0), Ordering::Less);
1411            assert_eq!(rule_5(dev0, dev0, dev0), Ordering::Equal);
1412            assert_eq!(rule_5(dev0, dev2, dev2), Ordering::Equal);
1413
1414            // Rule 7: Prefer temporary address.
1415            assert_eq!(rule_7(true, false), Ordering::Greater);
1416            assert_eq!(rule_7(false, true), Ordering::Less);
1417            assert_eq!(rule_7(true, true), Ordering::Equal);
1418            assert_eq!(rule_7(false, false), Ordering::Equal);
1419
1420            // Rule 8: Use longest matching prefix.
1421            {
1422                let new_addr_entry = |addr, prefix_len| AddrSubnet::new(addr, prefix_len).unwrap();
1423
1424                // First, test that the longest prefix match is preferred when
1425                // using addresses whose common prefix length is shorter than
1426                // the subnet prefix length.
1427
1428                // 4 leading 0x01 bytes.
1429                let remote = SpecifiedAddr::new(net_ip_v6!("1111::")).unwrap();
1430                // 3 leading 0x01 bytes.
1431                let local0 = new_addr_entry(net_ip_v6!("1110::"), 64);
1432                // 2 leading 0x01 bytes.
1433                let local1 = new_addr_entry(net_ip_v6!("1100::"), 64);
1434
1435                assert_eq!(rule_8(Some(remote), local0, local1), Ordering::Greater);
1436                assert_eq!(rule_8(Some(remote), local1, local0), Ordering::Less);
1437                assert_eq!(rule_8(Some(remote), local0, local0), Ordering::Equal);
1438                assert_eq!(rule_8(Some(remote), local1, local1), Ordering::Equal);
1439                assert_eq!(rule_8(None, local0, local1), Ordering::Equal);
1440
1441                // Second, test that the common prefix length is capped at the
1442                // subnet prefix length.
1443
1444                // 3 leading 0x01 bytes, but a subnet prefix length of 8 (1 byte).
1445                let local0 = new_addr_entry(net_ip_v6!("1110::"), 8);
1446                // 2 leading 0x01 bytes, but a subnet prefix length of 8 (1 byte).
1447                let local1 = new_addr_entry(net_ip_v6!("1100::"), 8);
1448
1449                assert_eq!(rule_8(Some(remote), local0, local1), Ordering::Equal);
1450                assert_eq!(rule_8(Some(remote), local1, local0), Ordering::Equal);
1451                assert_eq!(rule_8(Some(remote), local0, local0), Ordering::Equal);
1452                assert_eq!(rule_8(Some(remote), local1, local1), Ordering::Equal);
1453                assert_eq!(rule_8(None, local0, local1), Ordering::Equal);
1454            }
1455
1456            {
1457                let new_addr_entry = |addr, device| SasCandidate {
1458                    addr_sub: AddrSubnet::new(addr, 128).unwrap(),
1459                    deprecated: false,
1460                    assigned: true,
1461                    temporary: false,
1462                    device,
1463                };
1464
1465                // If no rules apply, then the two address entries are equal.
1466                assert_eq!(
1467                    select_ipv6_source_address_cmp(
1468                        Some(remote),
1469                        dev0,
1470                        &new_addr_entry(*local0, *dev1),
1471                        &new_addr_entry(*local1, *dev2),
1472                    ),
1473                    Ordering::Equal
1474                );
1475            }
1476        }
1477
1478        #[test]
1479        fn test_select_ipv6_source_address_no_remote() {
1480            // Verify that source address selection correctly applies all
1481            // applicable rules when the remote is `None`.
1482            let dev0 = &0;
1483            let dev1 = &1;
1484            let dev2 = &2;
1485
1486            let local0 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:2::")).unwrap();
1487            let local1 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:3::")).unwrap();
1488
1489            let new_addr_entry = |addr, deprecated, device| SasCandidate {
1490                addr_sub: AddrSubnet::new(addr, 128).unwrap(),
1491                deprecated,
1492                assigned: true,
1493                temporary: false,
1494                device,
1495            };
1496
1497            // Verify that Rule 3 still applies (avoid deprecated states).
1498            assert_eq!(
1499                select_ipv6_source_address_cmp(
1500                    None,
1501                    dev0,
1502                    &new_addr_entry(*local0, false, *dev1),
1503                    &new_addr_entry(*local1, true, *dev2),
1504                ),
1505                Ordering::Greater
1506            );
1507
1508            // Verify that Rule 5 still applies (Prefer outgoing interface).
1509            assert_eq!(
1510                select_ipv6_source_address_cmp(
1511                    None,
1512                    dev0,
1513                    &new_addr_entry(*local0, false, *dev0),
1514                    &new_addr_entry(*local1, false, *dev1),
1515                ),
1516                Ordering::Greater
1517            );
1518        }
1519    }
1520}
1521
1522/// Test fake implementations of the traits defined in the `socket` module.
1523#[cfg(any(test, feature = "testutils"))]
1524pub(crate) mod testutil {
1525    use alloc::boxed::Box;
1526    use alloc::vec::Vec;
1527    use core::num::NonZeroUsize;
1528
1529    use derivative::Derivative;
1530    use net_types::ip::{GenericOverIp, IpAddr, IpAddress, Ipv4, Ipv4Addr, Ipv6, Subnet};
1531    use net_types::{MulticastAddr, Witness as _};
1532    use netstack3_base::testutil::{FakeCoreCtx, FakeStrongDeviceId, FakeWeakDeviceId};
1533    use netstack3_base::{SendFrameContext, SendFrameError};
1534    use netstack3_filter::Tuple;
1535    use netstack3_hashmap::HashMap;
1536
1537    use super::*;
1538    use crate::internal::base::{
1539        BaseTransportIpContext, DEFAULT_HOP_LIMITS, HopLimits, MulticastMembershipHandler,
1540    };
1541    use crate::internal::routing::testutil::FakeIpRoutingCtx;
1542    use crate::internal::routing::{self, RoutingTable};
1543    use crate::internal::types::{Destination, Entry, Metric, RawMetric};
1544
1545    /// A fake implementation of the traits required by the transport layer from
1546    /// the IP layer.
1547    #[derive(Derivative, GenericOverIp)]
1548    #[generic_over_ip(I, Ip)]
1549    #[derivative(Default(bound = ""))]
1550    pub struct FakeIpSocketCtx<I: Ip, D> {
1551        pub(crate) table: RoutingTable<I, D>,
1552        forwarding: FakeIpRoutingCtx<D>,
1553        devices: HashMap<D, FakeDeviceState<I>>,
1554    }
1555
1556    /// A trait enabling [`FakeIpSockeCtx`]'s implementations for
1557    /// [`FakeCoreCtx`] with types that hold a [`FakeIpSocketCtx`] internally,
1558    pub trait InnerFakeIpSocketCtx<I: Ip, D> {
1559        /// Gets a mutable reference to the inner fake context.
1560        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D>;
1561    }
1562
1563    impl<I: Ip, D> InnerFakeIpSocketCtx<I, D> for FakeIpSocketCtx<I, D> {
1564        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1565            self
1566        }
1567    }
1568
1569    impl<I: IpExt, D: FakeStrongDeviceId, BC> BaseTransportIpContext<I, BC> for FakeIpSocketCtx<I, D> {
1570        fn get_default_hop_limits(&mut self, device: Option<&D>) -> HopLimits {
1571            device.map_or(DEFAULT_HOP_LIMITS, |device| {
1572                let hop_limit = self.get_device_state(device).default_hop_limit;
1573                HopLimits { unicast: hop_limit, multicast: DEFAULT_HOP_LIMITS.multicast }
1574            })
1575        }
1576
1577        type DevicesWithAddrIter<'a> = Box<dyn Iterator<Item = D> + 'a>;
1578
1579        fn with_devices_with_assigned_addr<O, F: FnOnce(Self::DevicesWithAddrIter<'_>) -> O>(
1580            &mut self,
1581            addr: SpecifiedAddr<I::Addr>,
1582            cb: F,
1583        ) -> O {
1584            cb(Box::new(self.devices.iter().filter_map(move |(device, state)| {
1585                state.addresses.contains(&addr).then(|| device.clone())
1586            })))
1587        }
1588
1589        fn get_original_destination(&mut self, _tuple: &Tuple<I>) -> Option<(I::Addr, u16)> {
1590            unimplemented!()
1591        }
1592    }
1593
1594    impl<I: IpExt, D: FakeStrongDeviceId> DeviceIdContext<AnyDevice> for FakeIpSocketCtx<I, D> {
1595        type DeviceId = D;
1596        type WeakDeviceId = D::Weak;
1597    }
1598
1599    impl<I, State, D, Meta, BC> IpSocketHandler<I, BC> for FakeCoreCtx<State, Meta, D>
1600    where
1601        I: IpExt + FilterIpExt,
1602        State: InnerFakeIpSocketCtx<I, D>,
1603        D: FakeStrongDeviceId,
1604        BC: TxMetadataBindingsTypes,
1605        FakeCoreCtx<State, Meta, D>:
1606            SendFrameContext<BC, SendIpPacketMeta<I, Self::DeviceId, SpecifiedAddr<I::Addr>>>,
1607    {
1608        fn new_ip_socket<O>(
1609            &mut self,
1610            _bindings_ctx: &mut BC,
1611            args: IpSocketArgs<'_, Self::DeviceId, I, O>,
1612        ) -> Result<IpSock<I, Self::WeakDeviceId>, IpSockCreationError>
1613        where
1614            O: RouteResolutionOptions<I>,
1615        {
1616            self.state.fake_ip_socket_ctx_mut().new_ip_socket(args)
1617        }
1618
1619        fn send_ip_packet<S, O>(
1620            &mut self,
1621            bindings_ctx: &mut BC,
1622            socket: &IpSock<I, Self::WeakDeviceId>,
1623            body: S,
1624            options: &O,
1625            // NB: Tx metadata plumbing is not supported for fake socket
1626            // contexts. Drop at the end of the scope.
1627            _tx_meta: BC::TxMetadata,
1628        ) -> Result<(), IpSockSendError>
1629        where
1630            S: TransportPacketSerializer<I>,
1631            S::Buffer: BufferMut,
1632            O: SendOptions<I> + RouteResolutionOptions<I>,
1633        {
1634            let meta = self.state.fake_ip_socket_ctx_mut().resolve_send_meta(socket, options)?;
1635            self.send_frame(bindings_ctx, meta, body).or_else(
1636                |SendFrameError { serializer: _, error }| IpSockSendError::from_send_frame(error),
1637            )
1638        }
1639
1640        fn confirm_reachable<O>(
1641            &mut self,
1642            _bindings_ctx: &mut BC,
1643            _socket: &IpSock<I, Self::WeakDeviceId>,
1644            _options: &O,
1645        ) {
1646        }
1647    }
1648
1649    impl<I: IpExt, D: FakeStrongDeviceId, BC> MulticastMembershipHandler<I, BC>
1650        for FakeIpSocketCtx<I, D>
1651    {
1652        fn join_multicast_group(
1653            &mut self,
1654            _bindings_ctx: &mut BC,
1655            device: &Self::DeviceId,
1656            addr: MulticastAddr<<I as Ip>::Addr>,
1657        ) {
1658            let value = self.get_device_state_mut(device).multicast_groups.entry(addr).or_insert(0);
1659            *value = value.checked_add(1).unwrap();
1660        }
1661
1662        fn leave_multicast_group(
1663            &mut self,
1664            _bindings_ctx: &mut BC,
1665            device: &Self::DeviceId,
1666            addr: MulticastAddr<<I as Ip>::Addr>,
1667        ) {
1668            let value = self
1669                .get_device_state_mut(device)
1670                .multicast_groups
1671                .get_mut(&addr)
1672                .unwrap_or_else(|| panic!("no entry for {addr} on {device:?}"));
1673            *value = value.checked_sub(1).unwrap();
1674        }
1675
1676        fn select_device_for_multicast_group(
1677            &mut self,
1678            addr: MulticastAddr<<I as Ip>::Addr>,
1679            _marks: &Marks,
1680        ) -> Result<Self::DeviceId, ResolveRouteError> {
1681            let remote_ip = SocketIpAddr::new_from_multicast(addr);
1682            self.lookup_route(None, None, remote_ip, /* transparent */ false)
1683                .map(|ResolvedRoute { device, .. }| device)
1684        }
1685    }
1686
1687    impl<I, BC, D, State, Meta> BaseTransportIpContext<I, BC> for FakeCoreCtx<State, Meta, D>
1688    where
1689        I: IpExt + FilterIpExt,
1690        D: FakeStrongDeviceId,
1691        State: InnerFakeIpSocketCtx<I, D>,
1692        BC: TxMetadataBindingsTypes,
1693        Self: IpSocketHandler<I, BC, DeviceId = D, WeakDeviceId = FakeWeakDeviceId<D>>,
1694    {
1695        type DevicesWithAddrIter<'a> = Box<dyn Iterator<Item = D> + 'a>;
1696
1697        fn with_devices_with_assigned_addr<O, F: FnOnce(Self::DevicesWithAddrIter<'_>) -> O>(
1698            &mut self,
1699            addr: SpecifiedAddr<I::Addr>,
1700            cb: F,
1701        ) -> O {
1702            BaseTransportIpContext::<I, BC>::with_devices_with_assigned_addr(
1703                self.state.fake_ip_socket_ctx_mut(),
1704                addr,
1705                cb,
1706            )
1707        }
1708
1709        fn get_default_hop_limits(&mut self, device: Option<&Self::DeviceId>) -> HopLimits {
1710            BaseTransportIpContext::<I, BC>::get_default_hop_limits(
1711                self.state.fake_ip_socket_ctx_mut(),
1712                device,
1713            )
1714        }
1715
1716        fn get_original_destination(&mut self, tuple: &Tuple<I>) -> Option<(I::Addr, u16)> {
1717            BaseTransportIpContext::<I, BC>::get_original_destination(
1718                self.state.fake_ip_socket_ctx_mut(),
1719                tuple,
1720            )
1721        }
1722    }
1723
1724    /// A fake context providing [`IpSocketHandler`] for tests.
1725    #[derive(Derivative)]
1726    #[derivative(Default(bound = ""))]
1727    pub struct FakeDualStackIpSocketCtx<D> {
1728        v4: FakeIpSocketCtx<Ipv4, D>,
1729        v6: FakeIpSocketCtx<Ipv6, D>,
1730    }
1731
1732    impl<D: FakeStrongDeviceId> FakeDualStackIpSocketCtx<D> {
1733        /// Creates a new [`FakeDualStackIpSocketCtx`] with `devices`.
1734        pub fn new<A: Into<SpecifiedAddr<IpAddr>>>(
1735            devices: impl IntoIterator<Item = FakeDeviceConfig<D, A>>,
1736        ) -> Self {
1737            let partition =
1738                |v: Vec<A>| -> (Vec<SpecifiedAddr<Ipv4Addr>>, Vec<SpecifiedAddr<Ipv6Addr>>) {
1739                    v.into_iter().fold((Vec::new(), Vec::new()), |(mut v4, mut v6), i| {
1740                        match IpAddr::from(i.into()) {
1741                            IpAddr::V4(a) => v4.push(a),
1742                            IpAddr::V6(a) => v6.push(a),
1743                        }
1744                        (v4, v6)
1745                    })
1746                };
1747
1748            let (v4, v6): (Vec<_>, Vec<_>) = devices
1749                .into_iter()
1750                .map(|FakeDeviceConfig { device, local_ips, remote_ips }| {
1751                    let (local_v4, local_v6) = partition(local_ips);
1752                    let (remote_v4, remote_v6) = partition(remote_ips);
1753                    (
1754                        FakeDeviceConfig {
1755                            device: device.clone(),
1756                            local_ips: local_v4,
1757                            remote_ips: remote_v4,
1758                        },
1759                        FakeDeviceConfig { device, local_ips: local_v6, remote_ips: remote_v6 },
1760                    )
1761                })
1762                .unzip();
1763            Self { v4: FakeIpSocketCtx::new(v4), v6: FakeIpSocketCtx::new(v6) }
1764        }
1765
1766        /// Returns the [`FakeIpSocketCtx`] for IP version `I`.
1767        pub fn inner_mut<I: Ip>(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1768            I::map_ip_out(self, |s| &mut s.v4, |s| &mut s.v6)
1769        }
1770
1771        fn inner<I: Ip>(&self) -> &FakeIpSocketCtx<I, D> {
1772            I::map_ip_out(self, |s| &s.v4, |s| &s.v6)
1773        }
1774
1775        /// Adds a fake direct route to `ip` through `device`.
1776        pub fn add_route(&mut self, device: D, ip: SpecifiedAddr<IpAddr>) {
1777            match IpAddr::from(ip) {
1778                IpAddr::V4(ip) => {
1779                    routing::testutil::add_on_link_routing_entry(&mut self.v4.table, ip, device)
1780                }
1781                IpAddr::V6(ip) => {
1782                    routing::testutil::add_on_link_routing_entry(&mut self.v6.table, ip, device)
1783                }
1784            }
1785        }
1786
1787        /// Adds a fake route to `subnet` through `device`.
1788        pub fn add_subnet_route<A: IpAddress>(&mut self, device: D, subnet: Subnet<A>) {
1789            let entry = Entry {
1790                subnet,
1791                device,
1792                gateway: None,
1793                metric: Metric::ExplicitMetric(RawMetric(0)),
1794            };
1795            A::Version::map_ip::<_, ()>(
1796                entry,
1797                |entry_v4| {
1798                    let _ = routing::testutil::add_entry(&mut self.v4.table, entry_v4)
1799                        .expect("Failed to add route");
1800                },
1801                |entry_v6| {
1802                    let _ = routing::testutil::add_entry(&mut self.v6.table, entry_v6)
1803                        .expect("Failed to add route");
1804                },
1805            );
1806        }
1807
1808        /// Returns a mutable reference to fake device state.
1809        pub fn get_device_state_mut<I: IpExt>(&mut self, device: &D) -> &mut FakeDeviceState<I> {
1810            self.inner_mut::<I>().get_device_state_mut(device)
1811        }
1812
1813        /// Returns the fake multicast memberships.
1814        pub fn multicast_memberships<I: IpExt>(
1815            &self,
1816        ) -> HashMap<(D, MulticastAddr<I::Addr>), NonZeroUsize> {
1817            self.inner::<I>().multicast_memberships()
1818        }
1819    }
1820
1821    impl<I: IpExt, S: InnerFakeIpSocketCtx<I, D>, Meta, D: FakeStrongDeviceId, BC>
1822        MulticastMembershipHandler<I, BC> for FakeCoreCtx<S, Meta, D>
1823    {
1824        fn join_multicast_group(
1825            &mut self,
1826            bindings_ctx: &mut BC,
1827            device: &Self::DeviceId,
1828            addr: MulticastAddr<<I as Ip>::Addr>,
1829        ) {
1830            MulticastMembershipHandler::<I, BC>::join_multicast_group(
1831                self.state.fake_ip_socket_ctx_mut(),
1832                bindings_ctx,
1833                device,
1834                addr,
1835            )
1836        }
1837
1838        fn leave_multicast_group(
1839            &mut self,
1840            bindings_ctx: &mut BC,
1841            device: &Self::DeviceId,
1842            addr: MulticastAddr<<I as Ip>::Addr>,
1843        ) {
1844            MulticastMembershipHandler::<I, BC>::leave_multicast_group(
1845                self.state.fake_ip_socket_ctx_mut(),
1846                bindings_ctx,
1847                device,
1848                addr,
1849            )
1850        }
1851
1852        fn select_device_for_multicast_group(
1853            &mut self,
1854            addr: MulticastAddr<<I as Ip>::Addr>,
1855            marks: &Marks,
1856        ) -> Result<Self::DeviceId, ResolveRouteError> {
1857            MulticastMembershipHandler::<I, BC>::select_device_for_multicast_group(
1858                self.state.fake_ip_socket_ctx_mut(),
1859                addr,
1860                marks,
1861            )
1862        }
1863    }
1864
1865    impl<I: Ip, D, State: InnerFakeIpSocketCtx<I, D>, Meta> InnerFakeIpSocketCtx<I, D>
1866        for FakeCoreCtx<State, Meta, D>
1867    {
1868        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1869            self.state.fake_ip_socket_ctx_mut()
1870        }
1871    }
1872
1873    impl<I: Ip, D: FakeStrongDeviceId> InnerFakeIpSocketCtx<I, D> for FakeDualStackIpSocketCtx<D> {
1874        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1875            self.inner_mut::<I>()
1876        }
1877    }
1878
1879    /// A device configuration for fake socket contexts.
1880    #[derive(Clone, GenericOverIp)]
1881    #[generic_over_ip()]
1882    pub struct FakeDeviceConfig<D, A> {
1883        /// The device.
1884        pub device: D,
1885        /// The device's local IPs.
1886        pub local_ips: Vec<A>,
1887        /// The remote IPs reachable from this device.
1888        pub remote_ips: Vec<A>,
1889    }
1890
1891    /// State associated with a fake device in [`FakeIpSocketCtx`].
1892    pub struct FakeDeviceState<I: Ip> {
1893        /// The default hop limit used by the device.
1894        pub default_hop_limit: NonZeroU8,
1895        /// The assigned device addresses.
1896        pub addresses: Vec<SpecifiedAddr<I::Addr>>,
1897        /// The joined multicast groups.
1898        pub multicast_groups: HashMap<MulticastAddr<I::Addr>, usize>,
1899    }
1900
1901    impl<I: Ip> FakeDeviceState<I> {
1902        /// Returns whether this fake device has joined multicast group `addr`.
1903        pub fn is_in_multicast_group(&self, addr: &MulticastAddr<I::Addr>) -> bool {
1904            self.multicast_groups.get(addr).is_some_and(|v| *v != 0)
1905        }
1906    }
1907
1908    impl<I: IpExt, D: FakeStrongDeviceId> FakeIpSocketCtx<I, D> {
1909        /// Creates a new `FakeIpSocketCtx` with the given device
1910        /// configs.
1911        pub fn new(
1912            device_configs: impl IntoIterator<Item = FakeDeviceConfig<D, SpecifiedAddr<I::Addr>>>,
1913        ) -> Self {
1914            let mut table = RoutingTable::default();
1915            let mut devices = HashMap::default();
1916            for FakeDeviceConfig { device, local_ips, remote_ips } in device_configs {
1917                for addr in remote_ips {
1918                    routing::testutil::add_on_link_routing_entry(&mut table, addr, device.clone())
1919                }
1920                let state = FakeDeviceState {
1921                    default_hop_limit: DEFAULT_HOP_LIMITS.unicast,
1922                    addresses: local_ips,
1923                    multicast_groups: Default::default(),
1924                };
1925                assert!(
1926                    devices.insert(device.clone(), state).is_none(),
1927                    "duplicate entries for {device:?}",
1928                );
1929            }
1930
1931            Self { table, devices, forwarding: Default::default() }
1932        }
1933
1934        /// Returns an immutable reference to the fake device state.
1935        pub fn get_device_state(&self, device: &D) -> &FakeDeviceState<I> {
1936            self.devices.get(device).unwrap_or_else(|| panic!("no device {device:?}"))
1937        }
1938
1939        /// Returns a mutable reference to the fake device state.
1940        pub fn get_device_state_mut(&mut self, device: &D) -> &mut FakeDeviceState<I> {
1941            self.devices.get_mut(device).unwrap_or_else(|| panic!("no device {device:?}"))
1942        }
1943
1944        pub(crate) fn multicast_memberships(
1945            &self,
1946        ) -> HashMap<(D, MulticastAddr<I::Addr>), NonZeroUsize> {
1947            self.devices
1948                .iter()
1949                .map(|(device, state)| {
1950                    state.multicast_groups.iter().filter_map(|(group, count)| {
1951                        NonZeroUsize::new(*count).map(|count| ((device.clone(), *group), count))
1952                    })
1953                })
1954                .flatten()
1955                .collect()
1956        }
1957
1958        fn new_ip_socket<O>(
1959            &mut self,
1960            args: IpSocketArgs<'_, D, I, O>,
1961        ) -> Result<IpSock<I, D::Weak>, IpSockCreationError>
1962        where
1963            O: RouteResolutionOptions<I>,
1964        {
1965            let IpSocketArgs { device, local_ip, remote_ip, proto, options } = args;
1966            let device = device
1967                .as_ref()
1968                .map(|d| d.as_strong_ref().ok_or(ResolveRouteError::Unreachable))
1969                .transpose()?;
1970            let device = device.as_ref().map(|d| d.as_ref());
1971            let resolved_route =
1972                self.lookup_route(device, local_ip, remote_ip, options.transparent())?;
1973            Ok(new_ip_socket(device, resolved_route, remote_ip, proto))
1974        }
1975
1976        fn lookup_route(
1977            &mut self,
1978            device: Option<&D>,
1979            local_ip: Option<IpDeviceAddr<I::Addr>>,
1980            addr: RoutableIpAddr<I::Addr>,
1981            transparent: bool,
1982        ) -> Result<ResolvedRoute<I, D>, ResolveRouteError> {
1983            let Self { table, devices, forwarding } = self;
1984            let (destination, ()) = table
1985                .lookup_filter_map(forwarding, device, addr.addr(), |_, d| match &local_ip {
1986                    None => Some(()),
1987                    Some(local_ip) => {
1988                        if transparent {
1989                            return Some(());
1990                        }
1991                        devices.get(d).and_then(|state| {
1992                            state.addresses.contains(local_ip.as_ref()).then_some(())
1993                        })
1994                    }
1995                })
1996                .next()
1997                .ok_or(ResolveRouteError::Unreachable)?;
1998
1999            let Destination { device, next_hop } = destination;
2000            let mut addrs = devices.get(device).unwrap().addresses.iter();
2001            let local_ip = match local_ip {
2002                None => {
2003                    let addr = addrs.next().ok_or(ResolveRouteError::NoSrcAddr)?;
2004                    IpDeviceAddr::new(addr.get()).expect("not valid device addr")
2005                }
2006                Some(local_ip) => {
2007                    if !transparent {
2008                        // We already constrained the set of devices so this
2009                        // should be a given.
2010                        assert!(
2011                            addrs.any(|a| a.get() == local_ip.addr()),
2012                            "didn't find IP {:?} in {:?}",
2013                            local_ip,
2014                            addrs.collect::<Vec<_>>()
2015                        );
2016                    }
2017                    local_ip
2018                }
2019            };
2020
2021            Ok(ResolvedRoute {
2022                src_addr: local_ip,
2023                device: device.clone(),
2024                local_delivery_device: None,
2025                next_hop,
2026                // NB: Keep unit tests simple and skip internal forwarding
2027                // logic. Instead, this is verified by integration tests.
2028                internal_forwarding: InternalForwarding::NotUsed,
2029            })
2030        }
2031
2032        fn resolve_send_meta<O>(
2033            &mut self,
2034            socket: &IpSock<I, D::Weak>,
2035            options: &O,
2036        ) -> Result<SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>, IpSockSendError>
2037        where
2038            O: SendOptions<I> + RouteResolutionOptions<I>,
2039        {
2040            let IpSockDefinition { remote_ip, local_ip, device, proto } = &socket.definition;
2041            let device = device
2042                .as_ref()
2043                .map(|d| d.upgrade().ok_or(ResolveRouteError::Unreachable))
2044                .transpose()?;
2045            let ResolvedRoute {
2046                src_addr,
2047                device,
2048                next_hop,
2049                local_delivery_device: _,
2050                internal_forwarding: _,
2051            } = self.lookup_route(
2052                device.as_ref(),
2053                Some(*local_ip),
2054                *remote_ip,
2055                options.transparent(),
2056            )?;
2057
2058            let remote_ip: &SpecifiedAddr<_> = remote_ip.as_ref();
2059
2060            let destination = IpPacketDestination::from_next_hop(next_hop, *remote_ip);
2061            Ok(SendIpPacketMeta {
2062                device,
2063                src_ip: src_addr.into(),
2064                dst_ip: *remote_ip,
2065                destination,
2066                proto: *proto,
2067                ttl: options.hop_limit(remote_ip),
2068                mtu: options.mtu(),
2069                dscp_and_ecn: DscpAndEcn::default(),
2070            })
2071        }
2072    }
2073}