Skip to main content

netstack3_ip/
socket.rs

1// Copyright 2019 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! IPv4 and IPv6 sockets.
6
7use core::cmp::Ordering;
8use core::convert::Infallible;
9use core::num::NonZeroU8;
10
11use log::{debug, error};
12use net_types::ip::{Ip, IpVersionMarker, Ipv6Addr, Mtu};
13use net_types::{MulticastAddress, ScopeableAddress, SpecifiedAddr, Witness as _};
14use netstack3_base::socket::{SocketIpAddr, SocketIpAddrExt as _};
15use netstack3_base::{
16    AnyDevice, CounterContext, DeviceIdContext, DeviceIdentifier, EitherDeviceId, InstantContext,
17    InterfaceProperties, IpDeviceAddr, IpExt, Marks, Mms, NetworkSerializationContext,
18    SendFrameErrorReason, StrongDeviceIdentifier, TxMetadata as _, TxMetadataBindingsTypes,
19    WeakDeviceIdentifier,
20};
21use netstack3_filter::{
22    self as filter, DynTransportSerializer, DynamicTransportSerializer, FilterBindingsContext,
23    FilterHandler as _, FilterIpExt, RawIpBody, SocketEgressFilterResult, SocketOpsFilter,
24    SocketOpsFilterBindingContext, TransportPacketSerializer,
25};
26use netstack3_trace::trace_duration;
27use packet::{
28    BufferMut, NestablePacketBuilder as _, PacketConstraints, SerializeError, Serializer,
29};
30use packet_formats::ip::{DscpAndEcn, IpPacketBuilder as _};
31use thiserror::Error;
32
33use crate::icmp::IcmpErrorHandler;
34use crate::internal::base::{
35    FilterHandlerProvider, IpDeviceMtuContext, IpLayerIpExt, IpLayerPacketMetadata,
36    IpPacketDestination, IpSendFrameError, IpSendFrameErrorReason, ResolveRouteError,
37    SendIpPacketMeta, reject_type_to_icmpv4_error, reject_type_to_icmpv6_error,
38};
39use crate::internal::counters::IpCounters;
40use crate::internal::device::state::IpDeviceStateIpExt;
41use crate::internal::routing::PacketOrigin;
42use crate::internal::routing::rules::RuleInput;
43use crate::internal::types::{InternalForwarding, ResolvedRoute, RoutableIpAddr};
44use crate::{HopLimits, NextHop};
45
46/// The arguments used for creating an [`IpSock`]
47pub struct IpSocketArgs<'a, D: StrongDeviceIdentifier, I: IpExt, O> {
48    /// The device the socket is bound to.
49    pub device: Option<EitherDeviceId<&'a D, &'a D::Weak>>,
50    /// The local IP to use for the connection. One is selected if not provided
51    /// based on the output route.
52    pub local_ip: Option<IpDeviceAddr<I::Addr>>,
53    /// The remote IP address for this connection.
54    pub remote_ip: RoutableIpAddr<I::Addr>,
55    /// The IP protocol in use.
56    pub proto: I::Proto,
57    /// Additional IP layer options.
58    pub options: &'a O,
59}
60/// An execution context defining a type of IP socket.
61pub trait IpSocketHandler<I: IpExt + FilterIpExt, BC: TxMetadataBindingsTypes>:
62    DeviceIdContext<AnyDevice>
63{
64    /// Constructs a new [`IpSock`].
65    ///
66    /// `new_ip_socket` constructs a new `IpSock` to the given remote IP
67    /// address from the given local IP address with the given IP protocol. If
68    /// no local IP address is given, one will be chosen automatically. If
69    /// `device` is `Some`, the socket will be bound to the given device - only
70    /// routes which egress over the device will be used. If no route is
71    /// available which egresses over the device - even if routes are available
72    /// which egress over other devices - the socket will be considered
73    /// unroutable.
74    ///
75    /// `new_ip_socket` returns an error if no route to the remote was found in
76    /// the forwarding table or if the given local IP address is not valid for
77    /// the found route.
78    fn new_ip_socket<O>(
79        &mut self,
80        bindings_ctx: &mut BC,
81        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
82    ) -> Result<IpSock<I, Self::WeakDeviceId>, IpSockCreationError>
83    where
84        O: RouteResolutionOptions<I>;
85
86    /// Sends an IP packet on a socket.
87    ///
88    /// The generated packet has its metadata initialized from `socket`,
89    /// including the source and destination addresses, the Time To Live/Hop
90    /// Limit, and the Protocol/Next Header. The outbound device is also chosen
91    /// based on information stored in the socket.
92    ///
93    /// `mtu` may be used to optionally impose an MTU on the outgoing packet.
94    /// Note that the device's MTU will still be imposed on the packet. That is,
95    /// the smaller of `mtu` and the device's MTU will be imposed on the packet.
96    ///
97    /// If the socket is currently unroutable, an error is returned.
98    fn send_ip_packet<S, O>(
99        &mut self,
100        bindings_ctx: &mut BC,
101        socket: &IpSock<I, Self::WeakDeviceId>,
102        body: S,
103        options: &O,
104        tx_metadata: BC::TxMetadata,
105    ) -> Result<(), IpSockSendError>
106    where
107        S: TransportPacketSerializer<I>,
108        S::Buffer: BufferMut,
109        O: SendOptions<I> + RouteResolutionOptions<I>;
110
111    /// Confirms the provided IP socket destination is reachable.
112    ///
113    /// Implementations must retrieve the next hop given the provided
114    /// IP socket and confirm neighbor reachability for the resolved target
115    /// device.
116    fn confirm_reachable<O>(
117        &mut self,
118        bindings_ctx: &mut BC,
119        socket: &IpSock<I, Self::WeakDeviceId>,
120        options: &O,
121    ) where
122        O: RouteResolutionOptions<I>;
123
124    /// Creates a temporary IP socket and sends a single packet on it.
125    ///
126    /// `local_ip`, `remote_ip`, `proto`, and `options` are passed directly to
127    /// [`IpSocketHandler::new_ip_socket`]. `get_body_from_src_ip` is given the
128    /// source IP address for the packet - which may have been chosen
129    /// automatically if `local_ip` is `None` - and returns the body to be
130    /// encapsulated. This is provided in case the body's contents depend on the
131    /// chosen source IP address.
132    ///
133    /// If `device` is specified, the available routes are limited to those that
134    /// egress over the device.
135    ///
136    /// `mtu` may be used to optionally impose an MTU on the outgoing packet.
137    /// Note that the device's MTU will still be imposed on the packet. That is,
138    /// the smaller of `mtu` and the device's MTU will be imposed on the packet.
139    ///
140    /// # Errors
141    ///
142    /// If an error is encountered while constructing the temporary IP socket
143    /// or sending the packet, `options` will be returned along with the
144    /// error. `get_body_from_src_ip` is fallible, and if there's an error,
145    /// it will be returned as well.
146    fn send_oneshot_ip_packet_with_fallible_serializer<S, E, F, O>(
147        &mut self,
148        bindings_ctx: &mut BC,
149        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
150        tx_metadata: BC::TxMetadata,
151        get_body_from_src_ip: F,
152    ) -> Result<(), SendOneShotIpPacketError<E>>
153    where
154        S: TransportPacketSerializer<I>,
155        S::Buffer: BufferMut,
156        F: FnOnce(IpDeviceAddr<I::Addr>) -> Result<S, E>,
157        O: SendOptions<I> + RouteResolutionOptions<I>,
158    {
159        let options = args.options;
160        let tmp = self
161            .new_ip_socket(bindings_ctx, args)
162            .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })?;
163        let packet = get_body_from_src_ip(*tmp.local_ip())
164            .map_err(SendOneShotIpPacketError::SerializeError)?;
165        self.send_ip_packet(bindings_ctx, &tmp, packet, options, tx_metadata)
166            .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })
167    }
168
169    /// Like `send_oneshot_ip_packet_with_fallible_serializer`, but a dynamic
170    /// transport serializer is used.
171    ///
172    /// This reduces code generation cost at the expense of some runtime
173    /// overhead.
174    fn send_oneshot_ip_packet_with_dyn_fallible_serializer<S, E, F, O>(
175        &mut self,
176        bindings_ctx: &mut BC,
177        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
178        tx_metadata: BC::TxMetadata,
179        get_body_from_src_ip: F,
180    ) -> Result<(), SendOneShotIpPacketError<E>>
181    where
182        S: DynamicTransportSerializer<I>,
183        F: FnOnce(IpDeviceAddr<I::Addr>) -> Result<S, E>,
184        O: SendOptions<I> + RouteResolutionOptions<I>,
185    {
186        let options = args.options;
187        let tmp = self
188            .new_ip_socket(bindings_ctx, args)
189            .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })?;
190        let mut packet = get_body_from_src_ip(*tmp.local_ip())
191            .map_err(SendOneShotIpPacketError::SerializeError)?;
192        self.send_ip_packet(
193            bindings_ctx,
194            &tmp,
195            DynTransportSerializer::new(&mut packet),
196            options,
197            tx_metadata,
198        )
199        .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })
200    }
201
202    /// Sends a one-shot IP packet but with a non-fallible serializer.
203    fn send_oneshot_ip_packet<S, F, O>(
204        &mut self,
205        bindings_ctx: &mut BC,
206        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
207        tx_metadata: BC::TxMetadata,
208        get_body_from_src_ip: F,
209    ) -> Result<(), IpSockCreateAndSendError>
210    where
211        S: TransportPacketSerializer<I>,
212        S::Buffer: BufferMut,
213        F: FnOnce(IpDeviceAddr<I::Addr>) -> S,
214        O: SendOptions<I> + RouteResolutionOptions<I>,
215    {
216        self.send_oneshot_ip_packet_with_fallible_serializer(
217            bindings_ctx,
218            args,
219            tx_metadata,
220            |ip| Ok::<_, Infallible>(get_body_from_src_ip(ip)),
221        )
222        .map_err(|err| match err {
223            SendOneShotIpPacketError::CreateAndSendError { err } => err,
224        })
225    }
226
227    /// Like `send_oneshot_ip_packet`, but a dynamic transport serializer is
228    /// used.
229    ///
230    /// This reduces code generation cost at the expense of some runtime
231    /// overhead.
232    fn send_oneshot_ip_packet_with_dyn_serializer<S, F, O>(
233        &mut self,
234        bindings_ctx: &mut BC,
235        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
236        tx_metadata: BC::TxMetadata,
237        get_body_from_src_ip: F,
238    ) -> Result<(), IpSockCreateAndSendError>
239    where
240        S: DynamicTransportSerializer<I>,
241        F: FnOnce(IpDeviceAddr<I::Addr>) -> S,
242        O: SendOptions<I> + RouteResolutionOptions<I>,
243    {
244        self.send_oneshot_ip_packet_with_dyn_fallible_serializer(
245            bindings_ctx,
246            args,
247            tx_metadata,
248            |ip| Ok::<_, Infallible>(get_body_from_src_ip(ip)),
249        )
250        .map_err(|err| match err {
251            SendOneShotIpPacketError::CreateAndSendError { err } => err,
252        })
253    }
254}
255
256/// An error in sending a packet on an IP socket.
257#[derive(Error, Copy, Clone, Debug, Eq, PartialEq)]
258pub enum IpSockSendError {
259    /// An MTU was exceeded.
260    ///
261    /// This could be caused by an MTU at any layer of the stack, including both
262    /// device MTUs and packet format body size limits.
263    #[error("a maximum transmission unit (MTU) was exceeded")]
264    Mtu,
265    /// The socket is currently unroutable.
266    #[error("the socket is currently unroutable: {0}")]
267    Unroutable(#[from] ResolveRouteError),
268    /// The socket operation would've resulted in illegal loopback addresses on
269    /// a non-loopback device.
270    #[error("illegal loopback address")]
271    IllegalLoopbackAddress,
272    /// Broadcast send is not allowed.
273    #[error("broadcast send is not enabled for the socket")]
274    BroadcastNotAllowed,
275}
276
277impl From<SerializeError<Infallible>> for IpSockSendError {
278    fn from(err: SerializeError<Infallible>) -> IpSockSendError {
279        match err {
280            SerializeError::SizeLimitExceeded => IpSockSendError::Mtu,
281        }
282    }
283}
284
285impl IpSockSendError {
286    /// Constructs a `Result` from an [`IpSendFrameErrorReason`] with
287    /// application-visible [`IpSockSendError`]s in the `Err` variant.
288    ///
289    /// Errors that are not bubbled up to applications are dropped.
290    fn from_ip_send_frame(e: IpSendFrameErrorReason) -> Result<(), Self> {
291        match e {
292            IpSendFrameErrorReason::Device(d) => Self::from_send_frame(d),
293            IpSendFrameErrorReason::IllegalLoopbackAddress => Err(Self::IllegalLoopbackAddress),
294        }
295    }
296
297    /// Constructs a `Result` from a [`SendFrameErrorReason`] with
298    /// application-visible [`IpSockSendError`]s in the `Err` variant.
299    ///
300    /// Errors that are not bubbled up to applications are dropped.
301    fn from_send_frame(e: SendFrameErrorReason) -> Result<(), Self> {
302        match e {
303            SendFrameErrorReason::Alloc | SendFrameErrorReason::QueueFull => Ok(()),
304            SendFrameErrorReason::SizeConstraintsViolation => Err(Self::Mtu),
305        }
306    }
307}
308
309/// An error in sending a packet on a temporary IP socket.
310#[derive(Error, Copy, Clone, Debug)]
311pub enum IpSockCreateAndSendError {
312    /// Cannot send via temporary socket.
313    #[error("cannot send via temporary socket: {0}")]
314    Send(#[from] IpSockSendError),
315    /// The temporary socket could not be created.
316    #[error("the temporary socket could not be created: {0}")]
317    Create(#[from] IpSockCreationError),
318}
319
320/// The error returned by
321/// [`IpSocketHandler::send_oneshot_ip_packet_with_fallible_serializer`].
322#[derive(Debug)]
323#[allow(missing_docs)]
324pub enum SendOneShotIpPacketError<E> {
325    CreateAndSendError { err: IpSockCreateAndSendError },
326    SerializeError(E),
327}
328
329/// Possible errors when retrieving the maximum transport message size.
330#[derive(Error, Copy, Clone, Debug, Eq, PartialEq)]
331pub enum MmsError {
332    /// Cannot find the device that is used for the ip socket, possibly because
333    /// there is no route.
334    #[error("cannot find the device: {0}")]
335    NoDevice(#[from] ResolveRouteError),
336    /// The MTU provided by the device is too small such that there is no room
337    /// for a transport message at all.
338    #[error("invalid MTU: {0:?}")]
339    MTUTooSmall(Mtu),
340}
341
342/// Gets device related information of an IP socket.
343pub trait DeviceIpSocketHandler<I: IpExt, BC>: DeviceIdContext<AnyDevice> {
344    /// Gets the maximum message size for the transport layer, it equals the
345    /// device MTU minus the IP header size.
346    ///
347    /// This corresponds to the GET_MAXSIZES call described in:
348    /// https://www.rfc-editor.org/rfc/rfc1122#section-3.4
349    fn get_mms<O: RouteResolutionOptions<I>>(
350        &mut self,
351        bindings_ctx: &mut BC,
352        ip_sock: &IpSock<I, Self::WeakDeviceId>,
353        options: &O,
354    ) -> Result<Mms, MmsError>;
355}
356
357/// An error encountered when creating an IP socket.
358#[derive(Error, Copy, Clone, Debug, Eq, PartialEq)]
359pub enum IpSockCreationError {
360    /// An error occurred while looking up a route.
361    #[error("a route cannot be determined: {0}")]
362    Route(#[from] ResolveRouteError),
363}
364
365/// An IP socket.
366#[derive(Clone, Debug)]
367#[cfg_attr(test, derive(PartialEq))]
368pub struct IpSock<I: IpExt, D> {
369    /// The definition of the socket.
370    ///
371    /// This does not change for the lifetime of the socket.
372    definition: IpSockDefinition<I, D>,
373}
374
375impl<I: IpExt, D> IpSock<I, D> {
376    /// Returns the socket's definition.
377    #[cfg(any(test, feature = "testutils"))]
378    pub fn definition(&self) -> &IpSockDefinition<I, D> {
379        &self.definition
380    }
381}
382
383/// The definition of an IP socket.
384///
385/// These values are part of the socket's definition, and never change.
386#[derive(Clone, Debug, PartialEq)]
387pub struct IpSockDefinition<I: IpExt, D> {
388    /// The socket's remote address.
389    pub remote_ip: SocketIpAddr<I::Addr>,
390    /// The socket's local address.
391    ///
392    /// Guaranteed to be unicast in its subnet since it's always equal to an
393    /// address assigned to the local device. We can't use the `UnicastAddr`
394    /// witness type since `Ipv4Addr` doesn't implement `UnicastAddress`.
395    //
396    // TODO(joshlf): Support unnumbered interfaces. Once we do that, a few
397    // issues arise: A) Does the unicast restriction still apply, and is that
398    // even well-defined for IPv4 in the absence of a subnet? B) Presumably we
399    // have to always bind to a particular interface?
400    pub local_ip: IpDeviceAddr<I::Addr>,
401    /// The socket's bound output device.
402    pub device: Option<D>,
403    /// The IP protocol the socket is bound to.
404    pub proto: I::Proto,
405}
406
407impl<I: IpExt, D> IpSock<I, D> {
408    /// Returns the socket's local IP address.
409    pub fn local_ip(&self) -> &IpDeviceAddr<I::Addr> {
410        &self.definition.local_ip
411    }
412    /// Returns the socket's remote IP address.
413    pub fn remote_ip(&self) -> &SocketIpAddr<I::Addr> {
414        &self.definition.remote_ip
415    }
416    /// Returns the selected output interface for the socket, if any.
417    pub fn device(&self) -> Option<&D> {
418        self.definition.device.as_ref()
419    }
420    /// Returns the socket's protocol.
421    pub fn proto(&self) -> I::Proto {
422        self.definition.proto
423    }
424}
425
426// TODO(joshlf): Once we support configuring transport-layer protocols using
427// type parameters, use that to ensure that `proto` is the right protocol for
428// the caller. We will still need to have a separate enforcement mechanism for
429// raw IP sockets once we support those.
430
431/// The bindings execution context for IP sockets.
432pub trait IpSocketBindingsContext<D>:
433    InstantContext
434    + FilterBindingsContext<D>
435    + TxMetadataBindingsTypes
436    + SocketOpsFilterBindingContext<D>
437{
438}
439impl<
440    D,
441    BC: InstantContext
442        + FilterBindingsContext<D>
443        + TxMetadataBindingsTypes
444        + SocketOpsFilterBindingContext<D>,
445> IpSocketBindingsContext<D> for BC
446{
447}
448
449/// The context required in order to implement [`IpSocketHandler`].
450///
451/// Blanket impls of `IpSocketHandler` are provided in terms of
452/// `IpSocketContext`.
453pub trait IpSocketContext<I, BC>:
454    DeviceIdContext<AnyDevice, DeviceId: InterfaceProperties<BC::DeviceClass>>
455    + FilterHandlerProvider<I, BC>
456    + IcmpErrorHandler<I, BC>
457where
458    I: IpLayerIpExt,
459    BC: IpSocketBindingsContext<Self::DeviceId>,
460{
461    /// Returns a route for a socket.
462    ///
463    /// If `device` is specified, the available routes are limited to those that
464    /// egress over the device.
465    fn lookup_route(
466        &mut self,
467        bindings_ctx: &mut BC,
468        device: Option<&Self::DeviceId>,
469        src_ip: Option<IpDeviceAddr<I::Addr>>,
470        dst_ip: RoutableIpAddr<I::Addr>,
471        transparent: bool,
472        marks: &Marks,
473    ) -> Result<ResolvedRoute<I, Self::DeviceId>, ResolveRouteError>;
474
475    /// Send an IP packet to the next-hop node.
476    fn send_ip_packet<S>(
477        &mut self,
478        bindings_ctx: &mut BC,
479        meta: SendIpPacketMeta<I, &Self::DeviceId, SpecifiedAddr<I::Addr>>,
480        body: S,
481        packet_metadata: IpLayerPacketMetadata<I, Self::WeakAddressId, BC>,
482    ) -> Result<(), IpSendFrameError<S>>
483    where
484        S: TransportPacketSerializer<I>,
485        S::Buffer: BufferMut;
486
487    /// Returns `DeviceId` for the loopback device.
488    fn get_loopback_device(&mut self) -> Option<Self::DeviceId>;
489
490    /// Confirms the provided IP socket destination is reachable.
491    ///
492    /// Implementations must retrieve the next hop given the provided
493    /// IP socket and confirm neighbor reachability for the resolved target
494    /// device.
495    fn confirm_reachable(
496        &mut self,
497        bindings_ctx: &mut BC,
498        dst: SpecifiedAddr<I::Addr>,
499        input: RuleInput<'_, I, Self::DeviceId>,
500    );
501}
502
503/// Enables a blanket implementation of [`IpSocketHandler`].
504///
505/// Implementing this marker trait for a type enables a blanket implementation
506/// of `IpSocketHandler` given the other requirements are met.
507pub trait UseIpSocketHandlerBlanket {}
508
509impl<I, BC, CC> IpSocketHandler<I, BC> for CC
510where
511    I: IpLayerIpExt + IpDeviceStateIpExt,
512    BC: IpSocketBindingsContext<Self::DeviceId>,
513    CC: IpSocketContext<I, BC> + CounterContext<IpCounters<I>> + UseIpSocketHandlerBlanket,
514    CC::DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>,
515{
516    fn new_ip_socket<O>(
517        &mut self,
518        bindings_ctx: &mut BC,
519        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
520    ) -> Result<IpSock<I, CC::WeakDeviceId>, IpSockCreationError>
521    where
522        O: RouteResolutionOptions<I>,
523    {
524        let IpSocketArgs { device, local_ip, remote_ip, proto, options } = args;
525        let device = device
526            .as_ref()
527            .map(|d| d.as_strong_ref().ok_or(ResolveRouteError::Unreachable))
528            .transpose()?;
529        let device = device.as_ref().map(|d| d.as_ref());
530
531        // Make sure the remote is routable with a local address before creating
532        // the socket. We do not care about the actual destination here because
533        // we will recalculate it when we send a packet so that the best route
534        // available at the time is used for each outgoing packet.
535        let resolved_route = self.lookup_route(
536            bindings_ctx,
537            device,
538            local_ip,
539            remote_ip,
540            options.transparent(),
541            options.marks(),
542        )?;
543        Ok(new_ip_socket(device, resolved_route, remote_ip, proto))
544    }
545
546    fn send_ip_packet<S, O>(
547        &mut self,
548        bindings_ctx: &mut BC,
549        ip_sock: &IpSock<I, CC::WeakDeviceId>,
550        body: S,
551        options: &O,
552        tx_metadata: BC::TxMetadata,
553    ) -> Result<(), IpSockSendError>
554    where
555        S: TransportPacketSerializer<I>,
556        S::Buffer: BufferMut,
557        O: SendOptions<I> + RouteResolutionOptions<I>,
558    {
559        send_ip_packet(self, bindings_ctx, ip_sock, body, options, tx_metadata)
560    }
561
562    fn confirm_reachable<O>(
563        &mut self,
564        bindings_ctx: &mut BC,
565        socket: &IpSock<I, CC::WeakDeviceId>,
566        options: &O,
567    ) where
568        O: RouteResolutionOptions<I>,
569    {
570        let bound_device = socket.device().and_then(|weak| weak.upgrade());
571        let bound_device = bound_device.as_ref();
572        let bound_address = Some((*socket.local_ip()).into());
573        let destination = (*socket.remote_ip()).into();
574        IpSocketContext::confirm_reachable(
575            self,
576            bindings_ctx,
577            destination,
578            RuleInput {
579                packet_origin: PacketOrigin::Local { bound_address, bound_device },
580                marks: options.marks(),
581            },
582        )
583    }
584}
585
586/// Provides hooks for altering route resolution behavior of [`IpSock`].
587///
588/// Must be implemented by the socket option type of an `IpSock` when using it
589/// to call [`IpSocketHandler::new_ip_socket`] or
590/// [`IpSocketHandler::send_ip_packet`]. This is implemented as a trait instead
591/// of an inherent impl on a type so that users of sockets that don't need
592/// certain option types can avoid allocating space for those options.
593// TODO(https://fxbug.dev/323389672): We need a mechanism to inform `IpSock` of
594// changes in the route resolution options when it starts caching previously
595// calculated routes. Any changes to the options here *MUST* cause the route to
596// be re-calculated.
597pub trait RouteResolutionOptions<I: Ip> {
598    /// Whether the socket is transparent.
599    ///
600    /// This allows transparently proxying traffic to the socket, and allows the
601    /// socket to be bound to a non-local address.
602    fn transparent(&self) -> bool;
603
604    /// Returns the marks carried by packets created on the socket.
605    fn marks(&self) -> &Marks;
606}
607
608/// Provides hooks for altering sending behavior of [`IpSock`].
609///
610/// Must be implemented by the socket option type of an `IpSock` when using it
611/// to call [`IpSocketHandler::send_ip_packet`]. This is implemented as a trait
612/// instead of an inherent impl on a type so that users of sockets that don't
613/// need certain option types, like TCP for anything multicast-related, can
614/// avoid allocating space for those options.
615pub trait SendOptions<I: IpExt> {
616    /// Returns the hop limit to set on a packet going to the given destination.
617    ///
618    /// If `Some(u)`, `u` will be used as the hop limit (IPv6) or TTL (IPv4) for
619    /// a packet going to the given destination. Otherwise the default value
620    /// will be used.
621    fn hop_limit(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8>;
622
623    /// Returns true if outgoing multicast packets should be looped back and
624    /// delivered to local receivers who joined the multicast group.
625    fn multicast_loop(&self) -> bool;
626
627    /// `Some` if the socket can be used to send broadcast packets.
628    fn allow_broadcast(&self) -> Option<I::BroadcastMarker>;
629
630    /// Returns TCLASS/TOS field value that should be set in IP headers.
631    fn dscp_and_ecn(&self) -> DscpAndEcn;
632
633    /// The IP MTU to use for this transmission.
634    ///
635    /// Note that the minimum overall MTU is used considering the device and
636    /// path. This option can be used to restrict an MTU to an upper bound.
637    fn mtu(&self) -> Mtu;
638}
639
640/// Empty send and creation options that never overrides default values.
641#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
642pub struct DefaultIpSocketOptions;
643
644impl<I: IpExt> SendOptions<I> for DefaultIpSocketOptions {
645    fn hop_limit(&self, _destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
646        None
647    }
648
649    fn multicast_loop(&self) -> bool {
650        false
651    }
652
653    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
654        None
655    }
656
657    fn dscp_and_ecn(&self) -> DscpAndEcn {
658        DscpAndEcn::default()
659    }
660
661    fn mtu(&self) -> Mtu {
662        Mtu::no_limit()
663    }
664}
665
666impl<I: Ip> RouteResolutionOptions<I> for DefaultIpSocketOptions {
667    fn transparent(&self) -> bool {
668        false
669    }
670
671    fn marks(&self) -> &Marks {
672        &Marks::UNMARKED
673    }
674}
675
676/// A trait providing send options delegation to an inner type.
677///
678/// A blanket impl of [`SendOptions`] is provided to all implementers. This
679/// trait has the same shape as `SendOptions` but all the methods provide
680/// default implementations that delegate to the value returned by
681/// `DelegatedSendOptions::Delegate`. For brevity, the default `delegate` is
682/// [`DefaultIpSocketOptions`].
683#[allow(missing_docs)]
684pub trait DelegatedSendOptions<I: IpExt>: OptionDelegationMarker {
685    /// Returns the delegate providing the impl for all default methods.
686    fn delegate(&self) -> &impl SendOptions<I> {
687        &DefaultIpSocketOptions
688    }
689
690    fn hop_limit(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
691        self.delegate().hop_limit(destination)
692    }
693
694    fn multicast_loop(&self) -> bool {
695        self.delegate().multicast_loop()
696    }
697
698    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
699        self.delegate().allow_broadcast()
700    }
701
702    fn dscp_and_ecn(&self) -> DscpAndEcn {
703        self.delegate().dscp_and_ecn()
704    }
705
706    fn mtu(&self) -> Mtu {
707        self.delegate().mtu()
708    }
709}
710
711impl<O: DelegatedSendOptions<I> + OptionDelegationMarker, I: IpExt> SendOptions<I> for O {
712    fn hop_limit(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
713        self.hop_limit(destination)
714    }
715
716    fn multicast_loop(&self) -> bool {
717        self.multicast_loop()
718    }
719
720    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
721        self.allow_broadcast()
722    }
723
724    fn dscp_and_ecn(&self) -> DscpAndEcn {
725        self.dscp_and_ecn()
726    }
727
728    fn mtu(&self) -> Mtu {
729        self.mtu()
730    }
731}
732
733/// A trait providing route resolution options delegation to an inner type.
734///
735/// A blanket impl of [`RouteResolutionOptions`] is provided to all
736/// implementers. This trait has the same shape as `RouteResolutionOptions` but
737/// all the methods provide default implementations that delegate to the value
738/// returned by `DelegatedRouteResolutionOptions::Delegate`. For brevity, the
739/// default `delegate` is [`DefaultIpSocketOptions`].
740#[allow(missing_docs)]
741pub trait DelegatedRouteResolutionOptions<I: Ip>: OptionDelegationMarker {
742    /// Returns the delegate providing the impl for all default methods.
743    fn delegate(&self) -> &impl RouteResolutionOptions<I> {
744        &DefaultIpSocketOptions
745    }
746
747    fn transparent(&self) -> bool {
748        self.delegate().transparent()
749    }
750
751    fn marks(&self) -> &Marks {
752        self.delegate().marks()
753    }
754}
755
756impl<O: DelegatedRouteResolutionOptions<I> + OptionDelegationMarker, I: IpExt>
757    RouteResolutionOptions<I> for O
758{
759    fn transparent(&self) -> bool {
760        self.transparent()
761    }
762
763    fn marks(&self) -> &Marks {
764        self.marks()
765    }
766}
767
768/// A marker trait to allow option delegation traits.
769///
770/// This trait sidesteps trait resolution rules around the delegation traits
771/// because of the `Ip` parameter in them.
772pub trait OptionDelegationMarker {}
773
774/// The configurable hop limits for a socket.
775#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
776pub struct SocketHopLimits<I: Ip> {
777    /// Unicast hop limit.
778    pub unicast: Option<NonZeroU8>,
779    /// Multicast hop limit.
780    // TODO(https://fxbug.dev/42059735): Make this an Option<u8> to allow sending
781    // multicast packets destined only for the local machine.
782    pub multicast: Option<NonZeroU8>,
783    /// An unused marker type signifying the IP version for which these hop
784    /// limits are valid. Including this helps prevent using the wrong hop limits
785    /// when operating on dualstack sockets.
786    pub version: IpVersionMarker<I>,
787}
788
789impl<I: Ip> SocketHopLimits<I> {
790    /// Returns a function that updates the unicast hop limit.
791    pub fn set_unicast(value: Option<NonZeroU8>) -> impl FnOnce(&mut Self) {
792        move |limits| limits.unicast = value
793    }
794
795    /// Returns a function that updates the multicast hop limit.
796    pub fn set_multicast(value: Option<NonZeroU8>) -> impl FnOnce(&mut Self) {
797        move |limits| limits.multicast = value
798    }
799
800    /// Returns the hop limits, or the provided defaults if unset.
801    pub fn get_limits_with_defaults(&self, defaults: &HopLimits) -> HopLimits {
802        let Self { unicast, multicast, version: _ } = self;
803        HopLimits {
804            unicast: unicast.unwrap_or(defaults.unicast),
805            multicast: multicast.unwrap_or(defaults.multicast),
806        }
807    }
808
809    /// Returns the appropriate hop limit to use for the given destination addr.
810    pub fn hop_limit_for_dst(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
811        let Self { unicast, multicast, version: _ } = self;
812        if destination.is_multicast() { *multicast } else { *unicast }
813    }
814}
815
816fn new_ip_socket<I, D>(
817    requested_device: Option<&D>,
818    route: ResolvedRoute<I, D>,
819    remote_ip: SocketIpAddr<I::Addr>,
820    proto: I::Proto,
821) -> IpSock<I, D::Weak>
822where
823    I: IpExt,
824    D: StrongDeviceIdentifier,
825{
826    // TODO(https://fxbug.dev/323389672): Cache a reference to the route to
827    // avoid the route lookup on send as long as the routing table hasn't
828    // changed in between these operations.
829    let ResolvedRoute {
830        src_addr,
831        device: route_device,
832        local_delivery_device,
833        next_hop: _,
834        internal_forwarding: _,
835    } = route;
836
837    // If the source or destination address require a device, make sure to
838    // set that in the socket definition. Otherwise defer to what was provided.
839    let socket_device = (src_addr.as_ref().must_have_zone() || remote_ip.as_ref().must_have_zone())
840        .then(|| {
841            // NB: The route device might be loopback, and in such cases
842            // we want to bind the socket to the device the source IP is
843            // assigned to instead.
844            local_delivery_device.unwrap_or(route_device)
845        })
846        .as_ref()
847        .or(requested_device)
848        .map(|d| d.downgrade());
849
850    let definition =
851        IpSockDefinition { local_ip: src_addr, remote_ip, device: socket_device, proto };
852    IpSock { definition }
853}
854
855fn send_ip_packet<I, S, BC, CC, O>(
856    core_ctx: &mut CC,
857    bindings_ctx: &mut BC,
858    socket: &IpSock<I, CC::WeakDeviceId>,
859    mut body: S,
860    options: &O,
861    tx_metadata: BC::TxMetadata,
862) -> Result<(), IpSockSendError>
863where
864    I: IpLayerIpExt,
865    S: TransportPacketSerializer<I>,
866    S::Buffer: BufferMut,
867    BC: IpSocketBindingsContext<CC::DeviceId>,
868    CC: IpSocketContext<I, BC> + CounterContext<IpCounters<I>>,
869    CC::DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>,
870    O: SendOptions<I> + RouteResolutionOptions<I>,
871{
872    trace_duration!("ip::send_packet");
873
874    // Extracted to a function without the serializer parameter to ease code
875    // generation.
876    fn resolve<
877        I: IpLayerIpExt,
878        CC: IpSocketContext<I, BC>,
879        BC: IpSocketBindingsContext<CC::DeviceId>,
880    >(
881        core_ctx: &mut CC,
882        bindings_ctx: &mut BC,
883        device: &Option<CC::WeakDeviceId>,
884        local_ip: IpDeviceAddr<I::Addr>,
885        remote_ip: RoutableIpAddr<I::Addr>,
886        transparent: bool,
887        marks: &Marks,
888    ) -> Result<ResolvedRoute<I, CC::DeviceId>, IpSockSendError> {
889        let device = match device.as_ref().map(|d| d.upgrade()) {
890            Some(Some(device)) => Some(device),
891            Some(None) => return Err(ResolveRouteError::Unreachable.into()),
892            None => None,
893        };
894        let route = core_ctx
895            .lookup_route(
896                bindings_ctx,
897                device.as_ref(),
898                Some(local_ip),
899                remote_ip,
900                transparent,
901                marks,
902            )
903            .map_err(|e| IpSockSendError::Unroutable(e))?;
904        assert_eq!(local_ip, route.src_addr);
905        Ok(route)
906    }
907
908    let IpSock {
909        definition: IpSockDefinition { remote_ip, local_ip, device: socket_device, proto },
910    } = socket;
911    let ResolvedRoute {
912        src_addr: local_ip,
913        device: mut egress_device,
914        mut next_hop,
915        mut local_delivery_device,
916        mut internal_forwarding,
917    } = resolve(
918        core_ctx,
919        bindings_ctx,
920        socket_device,
921        *local_ip,
922        *remote_ip,
923        options.transparent(),
924        options.marks(),
925    )?;
926
927    if matches!(next_hop, NextHop::Broadcast(_)) && options.allow_broadcast().is_none() {
928        return Err(IpSockSendError::BroadcastNotAllowed);
929    }
930
931    let previous_dst = remote_ip.addr();
932    let mut packet = filter::TxPacket::new(local_ip.addr(), remote_ip.addr(), *proto, &mut body);
933    let mut packet_metadata =
934        IpLayerPacketMetadata::from_tx_metadata_and_marks(tx_metadata, *options.marks());
935
936    let filter_result = core_ctx.filter_handler().local_egress_hook(
937        bindings_ctx,
938        &mut packet,
939        &egress_device,
940        &mut packet_metadata,
941    );
942    match filter_result {
943        filter::Verdict::Stop(filter::DropOrReject::Drop) => {
944            packet_metadata.acknowledge_drop();
945            return Ok(());
946        }
947        filter::Verdict::Stop(filter::DropOrReject::Reject(reject_type)) => {
948            packet_metadata.acknowledge_drop();
949
950            let Some(icmp_error): Option<I::IcmpError> = I::map_ip_out(
951                reject_type,
952                |reject_type| reject_type_to_icmpv4_error(reject_type),
953                |reject_type| reject_type_to_icmpv6_error(reject_type),
954            ) else {
955                debug!("Unsupported reject type: {:?}", reject_type);
956                return Ok(());
957            };
958
959            let src_ip = SocketIpAddr::new_from_witness(local_ip.into_inner().get());
960            let dst_ip = *remote_ip;
961            let ttl = options.hop_limit(&dst_ip.into()).map(|v| v.into()).unwrap_or(1);
962            let packet_builder = I::PacketBuilder::new(
963                src_ip.into_inner().get(),
964                dst_ip.into_inner().get(),
965                ttl,
966                *proto,
967            );
968            let header_len = packet_builder.constraints().header_len();
969            let ip_frame = packet_builder.wrap_body(body);
970            let packet = match ip_frame.serialize_outer(
971                &mut NetworkSerializationContext::default(),
972                packet::NoReuseBufferProvider(packet::new_buf_vec),
973            ) {
974                Ok(packet) => packet,
975                Err((error, _frame)) => {
976                    debug!("Failed to serialize packet {:?}", error);
977                    return Ok(());
978                }
979            };
980
981            // Invoke `send_icmp_error_message` with the `local_ip` as the
982            // `original_source_ip`, which will result in the ICMP error
983            // message getting sent back to the `socket`.
984            core_ctx.send_icmp_error_message(
985                bindings_ctx,
986                /*device=*/ None,
987                /*frame_dst=*/ None,
988                src_ip,
989                dst_ip,
990                packet,
991                icmp_error,
992                header_len,
993                *proto,
994                &options.marks(),
995            );
996
997            return Ok(());
998        }
999        filter::Verdict::Proceed(filter::Accept) => {}
1000    }
1001
1002    let Some(mut local_ip) = IpDeviceAddr::new(packet.src_addr()) else {
1003        packet_metadata.acknowledge_drop();
1004        return Err(IpSockSendError::Unroutable(ResolveRouteError::NoSrcAddr));
1005    };
1006    let Some(remote_ip) = RoutableIpAddr::new(packet.dst_addr()) else {
1007        packet_metadata.acknowledge_drop();
1008        return Err(IpSockSendError::Unroutable(ResolveRouteError::Unreachable));
1009    };
1010
1011    // If the LOCAL_EGRESS hook ended up rewriting the packet's destination, perform
1012    // re-routing based on the new destination.
1013    if remote_ip.addr() != previous_dst {
1014        let ResolvedRoute {
1015            src_addr: new_local_ip,
1016            device: new_device,
1017            next_hop: new_next_hop,
1018            local_delivery_device: new_local_delivery_device,
1019            internal_forwarding: new_internal_forwarding,
1020        } = match resolve(
1021            core_ctx,
1022            bindings_ctx,
1023            socket_device,
1024            local_ip,
1025            remote_ip,
1026            options.transparent(),
1027            options.marks(),
1028        ) {
1029            Ok(r) => r,
1030            Err(err) => {
1031                packet_metadata.acknowledge_drop();
1032                return Err(err);
1033            }
1034        };
1035        local_ip = new_local_ip;
1036        egress_device = new_device;
1037        next_hop = new_next_hop;
1038        local_delivery_device = new_local_delivery_device;
1039        internal_forwarding = new_internal_forwarding;
1040    }
1041
1042    // NB: Hit the forwarding hook if the route leverages internal forwarding.
1043    match internal_forwarding {
1044        InternalForwarding::Used(ingress_device) => {
1045            match core_ctx.filter_handler().forwarding_hook(
1046                &mut packet,
1047                &ingress_device,
1048                &egress_device,
1049                &mut packet_metadata,
1050            ) {
1051                filter::Verdict::Stop(filter::DropOrReject::Drop) => {
1052                    packet_metadata.acknowledge_drop();
1053                    return Ok(());
1054                }
1055                filter::Verdict::Stop(filter::DropOrReject::Reject(_reject_type)) => {
1056                    // TODO(https://fxbug.dev/466098884): Send reject packet.
1057                    packet_metadata.acknowledge_drop();
1058                    return Ok(());
1059                }
1060                filter::Verdict::Proceed(filter::Accept) => {}
1061            }
1062        }
1063        InternalForwarding::NotUsed => {}
1064    }
1065
1066    if let Some(socket_cookie) = packet_metadata.tx_metadata().socket_cookie() {
1067        let egress_filter_result = bindings_ctx.socket_ops_filter().on_egress(
1068            &packet,
1069            &egress_device,
1070            socket_cookie,
1071            packet_metadata.marks(),
1072        );
1073
1074        // TODO(https://fxbug.dev/412426836): Implement congestion signal handling.
1075        match egress_filter_result {
1076            SocketEgressFilterResult::Pass { congestion: _ } => (),
1077            SocketEgressFilterResult::Drop { congestion: _ } => {
1078                core_ctx.counters().socket_egress_filter_dropped.increment();
1079                packet_metadata.acknowledge_drop();
1080                return Ok(());
1081            }
1082        }
1083    }
1084
1085    // The packet needs to be delivered locally if it's sent to a broadcast
1086    // or multicast address. For multicast packets this feature can be disabled
1087    // with IP_MULTICAST_LOOP.
1088
1089    let loopback_packet = (!egress_device.is_loopback()
1090        && ((options.multicast_loop() && remote_ip.addr().is_multicast())
1091            || next_hop.is_broadcast()))
1092    .then(|| {
1093        body.serialize_new_buf(
1094            &mut NetworkSerializationContext::default(),
1095            PacketConstraints::UNCONSTRAINED,
1096            packet::new_buf_vec,
1097        )
1098    })
1099    .transpose()?
1100    .map(|buf| RawIpBody::new(*proto, local_ip.addr(), remote_ip.addr(), buf));
1101
1102    let destination = match &local_delivery_device {
1103        Some(d) => IpPacketDestination::Loopback(d),
1104        None => IpPacketDestination::from_next_hop(next_hop, remote_ip.into()),
1105    };
1106    let ttl = options.hop_limit(&remote_ip.into());
1107    let meta = SendIpPacketMeta {
1108        device: &egress_device,
1109        src_ip: local_ip.into(),
1110        dst_ip: remote_ip.into(),
1111        destination,
1112        ttl,
1113        proto: *proto,
1114        mtu: options.mtu(),
1115        dscp_and_ecn: options.dscp_and_ecn(),
1116    };
1117    IpSocketContext::send_ip_packet(core_ctx, bindings_ctx, meta, body, packet_metadata).or_else(
1118        |IpSendFrameError { serializer: _, error }| IpSockSendError::from_ip_send_frame(error),
1119    )?;
1120
1121    match (loopback_packet, core_ctx.get_loopback_device()) {
1122        (Some(loopback_packet), Some(loopback_device)) => {
1123            let meta = SendIpPacketMeta {
1124                device: &loopback_device,
1125                src_ip: local_ip.into(),
1126                dst_ip: remote_ip.into(),
1127                destination: IpPacketDestination::Loopback(&egress_device),
1128                ttl,
1129                proto: *proto,
1130                mtu: options.mtu(),
1131                dscp_and_ecn: options.dscp_and_ecn(),
1132            };
1133            let packet_metadata = IpLayerPacketMetadata::default();
1134
1135            // The loopback packet will hit the egress hook. LOCAL_EGRESS hook
1136            // is not called again.
1137            IpSocketContext::send_ip_packet(
1138                core_ctx,
1139                bindings_ctx,
1140                meta,
1141                loopback_packet,
1142                packet_metadata,
1143            )
1144            .unwrap_or_else(|IpSendFrameError { serializer: _, error }| {
1145                error!("failed to send loopback packet: {error:?}")
1146            });
1147        }
1148        (Some(_loopback_packet), None) => {
1149            error!("can't send a loopback packet without the loopback device")
1150        }
1151        _ => (),
1152    }
1153
1154    Ok(())
1155}
1156
1157/// Enables a blanket implementation of [`DeviceIpSocketHandler`].
1158///
1159/// Implementing this marker trait for a type enables a blanket implementation
1160/// of `DeviceIpSocketHandler` given the other requirements are met.
1161pub trait UseDeviceIpSocketHandlerBlanket {}
1162
1163impl<I, BC, CC> DeviceIpSocketHandler<I, BC> for CC
1164where
1165    I: IpLayerIpExt + IpDeviceStateIpExt,
1166    BC: IpSocketBindingsContext<CC::DeviceId>,
1167    CC: IpDeviceMtuContext<I> + IpSocketContext<I, BC> + UseDeviceIpSocketHandlerBlanket,
1168{
1169    fn get_mms<O: RouteResolutionOptions<I>>(
1170        &mut self,
1171        bindings_ctx: &mut BC,
1172        ip_sock: &IpSock<I, Self::WeakDeviceId>,
1173        options: &O,
1174    ) -> Result<Mms, MmsError> {
1175        let IpSockDefinition { remote_ip, local_ip, device, proto: _ } = &ip_sock.definition;
1176        let device = device
1177            .as_ref()
1178            .map(|d| d.upgrade().ok_or(ResolveRouteError::Unreachable))
1179            .transpose()?;
1180
1181        let ResolvedRoute {
1182            src_addr: _,
1183            local_delivery_device: _,
1184            device,
1185            next_hop: _,
1186            internal_forwarding: _,
1187        } = self
1188            .lookup_route(
1189                bindings_ctx,
1190                device.as_ref(),
1191                Some(*local_ip),
1192                *remote_ip,
1193                options.transparent(),
1194                options.marks(),
1195            )
1196            .map_err(MmsError::NoDevice)?;
1197        let mtu = self.get_mtu(&device);
1198        // TODO(https://fxbug.dev/42072935): Calculate the options size when they
1199        // are supported.
1200        Mms::from_mtu::<I>(mtu, 0 /* no ip options used */).ok_or(MmsError::MTUTooSmall(mtu))
1201    }
1202}
1203
1204/// IPv6 source address selection as defined in [RFC 6724 Section 5].
1205pub(crate) mod ipv6_source_address_selection {
1206    use net_types::ip::{AddrSubnet, IpAddress as _};
1207
1208    use super::*;
1209
1210    use netstack3_base::Ipv6DeviceAddr;
1211
1212    /// A source address selection candidate.
1213    pub struct SasCandidate<D> {
1214        /// The candidate address and subnet.
1215        pub addr_sub: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1216        /// True if the address is assigned (i.e. non tentative).
1217        pub assigned: bool,
1218        /// True if the address is deprecated (i.e. not preferred).
1219        pub deprecated: bool,
1220        /// True if the address is temporary (i.e. not permanent).
1221        pub temporary: bool,
1222        /// The device this address belongs to.
1223        pub device: D,
1224    }
1225
1226    /// Selects the source address for an IPv6 socket using the algorithm
1227    /// defined in [RFC 6724 Section 5].
1228    ///
1229    /// This algorithm is only applicable when the user has not explicitly
1230    /// specified a source address.
1231    ///
1232    /// `remote_ip` is the remote IP address of the socket, `outbound_device` is
1233    /// the device over which outbound traffic to `remote_ip` is sent (according
1234    /// to the forwarding table), and `addresses` is an iterator of all
1235    /// addresses on all devices. The algorithm works by iterating over
1236    /// `addresses` and selecting the address which is most preferred according
1237    /// to a set of selection criteria.
1238    pub fn select_ipv6_source_address<
1239        'a,
1240        D: PartialEq,
1241        A,
1242        I: Iterator<Item = A>,
1243        F: FnMut(&A) -> SasCandidate<D>,
1244    >(
1245        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1246        outbound_device: &D,
1247        addresses: I,
1248        mut get_candidate: F,
1249    ) -> Option<A> {
1250        // Source address selection as defined in RFC 6724 Section 5.
1251        //
1252        // The algorithm operates by defining a partial ordering on available
1253        // source addresses, and choosing one of the best address as defined by
1254        // that ordering (given multiple best addresses, the choice from among
1255        // those is implementation-defined). The partial order is defined in
1256        // terms of a sequence of rules. If a given rule defines an order
1257        // between two addresses, then that is their order. Otherwise, the next
1258        // rule must be consulted, and so on until all of the rules are
1259        // exhausted.
1260
1261        addresses
1262            .map(|item| {
1263                let candidate = get_candidate(&item);
1264                (item, candidate)
1265            })
1266            // Tentative addresses are not considered available to the source
1267            // selection algorithm.
1268            .filter(|(_, candidate)| candidate.assigned)
1269            .max_by(|(_, a), (_, b)| {
1270                select_ipv6_source_address_cmp(remote_ip, outbound_device, a, b)
1271            })
1272            .map(|(item, _candidate)| item)
1273    }
1274
1275    /// Comparison operator used by `select_ipv6_source_address`.
1276    fn select_ipv6_source_address_cmp<D: PartialEq>(
1277        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1278        outbound_device: &D,
1279        a: &SasCandidate<D>,
1280        b: &SasCandidate<D>,
1281    ) -> Ordering {
1282        // TODO(https://fxbug.dev/42123500): Implement rules 4, 5.5, and 6.
1283        let SasCandidate {
1284            addr_sub: a_addr_sub,
1285            assigned: a_assigned,
1286            deprecated: a_deprecated,
1287            temporary: a_temporary,
1288            device: a_device,
1289        } = a;
1290        let SasCandidate {
1291            addr_sub: b_addr_sub,
1292            assigned: b_assigned,
1293            deprecated: b_deprecated,
1294            temporary: b_temporary,
1295            device: b_device,
1296        } = b;
1297
1298        let a_addr = a_addr_sub.addr().into_specified();
1299        let b_addr = b_addr_sub.addr().into_specified();
1300
1301        // Assertions required in order for this implementation to be valid.
1302
1303        // Required by the implementation of Rule 1.
1304        if let Some(remote_ip) = remote_ip {
1305            debug_assert!(!(a_addr == remote_ip && b_addr == remote_ip));
1306        }
1307
1308        // Addresses that are not considered assigned are not valid source
1309        // addresses.
1310        debug_assert!(a_assigned);
1311        debug_assert!(b_assigned);
1312
1313        rule_1(remote_ip, a_addr, b_addr)
1314            .then_with(|| rule_2(remote_ip, a_addr, b_addr))
1315            .then_with(|| rule_3(*a_deprecated, *b_deprecated))
1316            .then_with(|| rule_5(outbound_device, a_device, b_device))
1317            .then_with(|| rule_7(*a_temporary, *b_temporary))
1318            .then_with(|| rule_8(remote_ip, *a_addr_sub, *b_addr_sub))
1319    }
1320
1321    // Assumes that `a` and `b` are not both equal to `remote_ip`.
1322    fn rule_1(
1323        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1324        a: SpecifiedAddr<Ipv6Addr>,
1325        b: SpecifiedAddr<Ipv6Addr>,
1326    ) -> Ordering {
1327        let remote_ip = match remote_ip {
1328            Some(remote_ip) => remote_ip,
1329            None => return Ordering::Equal,
1330        };
1331        if (a == remote_ip) != (b == remote_ip) {
1332            // Rule 1: Prefer same address.
1333            //
1334            // Note that both `a` and `b` cannot be equal to `remote_ip` since
1335            // that would imply that we had added the same address twice to the
1336            // same device.
1337            //
1338            // If `(a == remote_ip) != (b == remote_ip)`, then exactly one of
1339            // them is equal. If this inequality does not hold, then they must
1340            // both be unequal to `remote_ip`. In the first case, we have a tie,
1341            // and in the second case, the rule doesn't apply. In either case,
1342            // we move onto the next rule.
1343            if a == remote_ip { Ordering::Greater } else { Ordering::Less }
1344        } else {
1345            Ordering::Equal
1346        }
1347    }
1348
1349    fn rule_2(
1350        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1351        a: SpecifiedAddr<Ipv6Addr>,
1352        b: SpecifiedAddr<Ipv6Addr>,
1353    ) -> Ordering {
1354        // Scope ordering is defined by the Multicast Scope ID, see
1355        // https://datatracker.ietf.org/doc/html/rfc6724#section-3.1 .
1356        let remote_scope = match remote_ip {
1357            Some(remote_ip) => remote_ip.scope().multicast_scope_id(),
1358            None => return Ordering::Equal,
1359        };
1360        let a_scope = a.scope().multicast_scope_id();
1361        let b_scope = b.scope().multicast_scope_id();
1362        if a_scope < b_scope {
1363            if a_scope < remote_scope { Ordering::Less } else { Ordering::Greater }
1364        } else if a_scope > b_scope {
1365            if b_scope < remote_scope { Ordering::Greater } else { Ordering::Less }
1366        } else {
1367            Ordering::Equal
1368        }
1369    }
1370
1371    fn rule_3(a_deprecated: bool, b_deprecated: bool) -> Ordering {
1372        match (a_deprecated, b_deprecated) {
1373            (true, false) => Ordering::Less,
1374            (true, true) | (false, false) => Ordering::Equal,
1375            (false, true) => Ordering::Greater,
1376        }
1377    }
1378
1379    fn rule_5<D: PartialEq>(outbound_device: &D, a_device: &D, b_device: &D) -> Ordering {
1380        if (a_device == outbound_device) != (b_device == outbound_device) {
1381            // Rule 5: Prefer outgoing interface.
1382            if a_device == outbound_device { Ordering::Greater } else { Ordering::Less }
1383        } else {
1384            Ordering::Equal
1385        }
1386    }
1387
1388    // Prefer temporary addresses following rule 7.
1389    fn rule_7(a_temporary: bool, b_temporary: bool) -> Ordering {
1390        match (a_temporary, b_temporary) {
1391            (true, false) => Ordering::Greater,
1392            (true, true) | (false, false) => Ordering::Equal,
1393            (false, true) => Ordering::Less,
1394        }
1395    }
1396
1397    fn rule_8(
1398        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1399        a: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1400        b: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1401    ) -> Ordering {
1402        let remote_ip = match remote_ip {
1403            Some(remote_ip) => remote_ip,
1404            None => return Ordering::Equal,
1405        };
1406        // Per RFC 6724 Section 2.2:
1407        //
1408        //   We define the common prefix length CommonPrefixLen(S, D) of a
1409        //   source address S and a destination address D as the length of the
1410        //   longest prefix (looking at the most significant, or leftmost, bits)
1411        //   that the two addresses have in common, up to the length of S's
1412        //   prefix (i.e., the portion of the address not including the
1413        //   interface ID).  For example, CommonPrefixLen(fe80::1, fe80::2) is
1414        //   64.
1415        fn common_prefix_len(
1416            src: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1417            dst: SpecifiedAddr<Ipv6Addr>,
1418        ) -> u8 {
1419            core::cmp::min(src.addr().common_prefix_len(&dst), src.subnet().prefix())
1420        }
1421
1422        // Rule 8: Use longest matching prefix.
1423        //
1424        // Note that, per RFC 6724 Section 5:
1425        //
1426        //   Rule 8 MAY be superseded if the implementation has other means of
1427        //   choosing among source addresses.  For example, if the
1428        //   implementation somehow knows which source address will result in
1429        //   the "best" communications performance.
1430        //
1431        // We don't currently make use of this option, but it's an option for
1432        // the future.
1433        common_prefix_len(a, remote_ip).cmp(&common_prefix_len(b, remote_ip))
1434    }
1435
1436    #[cfg(test)]
1437    mod tests {
1438        use net_declare::net_ip_v6;
1439
1440        use super::*;
1441
1442        #[test]
1443        fn test_select_ipv6_source_address() {
1444            // Test the comparison operator used by `select_ipv6_source_address`
1445            // by separately testing each comparison condition.
1446
1447            let remote = SpecifiedAddr::new(net_ip_v6!("2001:0db8:1::")).unwrap();
1448            let local0 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:2::")).unwrap();
1449            let local1 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:3::")).unwrap();
1450            let link_local_remote = SpecifiedAddr::new(net_ip_v6!("fe80::1:2:42")).unwrap();
1451            let link_local = SpecifiedAddr::new(net_ip_v6!("fe80::1:2:4")).unwrap();
1452            let dev0 = &0;
1453            let dev1 = &1;
1454            let dev2 = &2;
1455
1456            // Rule 1: Prefer same address
1457            assert_eq!(rule_1(Some(remote), remote, local0), Ordering::Greater);
1458            assert_eq!(rule_1(Some(remote), local0, remote), Ordering::Less);
1459            assert_eq!(rule_1(Some(remote), local0, local1), Ordering::Equal);
1460            assert_eq!(rule_1(None, local0, local1), Ordering::Equal);
1461
1462            // Rule 2: Prefer appropriate scope
1463            assert_eq!(rule_2(Some(remote), local0, local1), Ordering::Equal);
1464            assert_eq!(rule_2(Some(remote), local1, local0), Ordering::Equal);
1465            assert_eq!(rule_2(Some(remote), local0, link_local), Ordering::Greater);
1466            assert_eq!(rule_2(Some(remote), link_local, local0), Ordering::Less);
1467            assert_eq!(rule_2(Some(link_local_remote), local0, link_local), Ordering::Less);
1468            assert_eq!(rule_2(Some(link_local_remote), link_local, local0), Ordering::Greater);
1469            assert_eq!(rule_1(None, local0, link_local), Ordering::Equal);
1470
1471            // Rule 3: Avoid deprecated states
1472            assert_eq!(rule_3(false, true), Ordering::Greater);
1473            assert_eq!(rule_3(true, false), Ordering::Less);
1474            assert_eq!(rule_3(true, true), Ordering::Equal);
1475            assert_eq!(rule_3(false, false), Ordering::Equal);
1476
1477            // Rule 5: Prefer outgoing interface
1478            assert_eq!(rule_5(dev0, dev0, dev2), Ordering::Greater);
1479            assert_eq!(rule_5(dev0, dev2, dev0), Ordering::Less);
1480            assert_eq!(rule_5(dev0, dev0, dev0), Ordering::Equal);
1481            assert_eq!(rule_5(dev0, dev2, dev2), Ordering::Equal);
1482
1483            // Rule 7: Prefer temporary address.
1484            assert_eq!(rule_7(true, false), Ordering::Greater);
1485            assert_eq!(rule_7(false, true), Ordering::Less);
1486            assert_eq!(rule_7(true, true), Ordering::Equal);
1487            assert_eq!(rule_7(false, false), Ordering::Equal);
1488
1489            // Rule 8: Use longest matching prefix.
1490            {
1491                let new_addr_entry = |addr, prefix_len| AddrSubnet::new(addr, prefix_len).unwrap();
1492
1493                // First, test that the longest prefix match is preferred when
1494                // using addresses whose common prefix length is shorter than
1495                // the subnet prefix length.
1496
1497                // 4 leading 0x01 bytes.
1498                let remote = SpecifiedAddr::new(net_ip_v6!("1111::")).unwrap();
1499                // 3 leading 0x01 bytes.
1500                let local0 = new_addr_entry(net_ip_v6!("1110::"), 64);
1501                // 2 leading 0x01 bytes.
1502                let local1 = new_addr_entry(net_ip_v6!("1100::"), 64);
1503
1504                assert_eq!(rule_8(Some(remote), local0, local1), Ordering::Greater);
1505                assert_eq!(rule_8(Some(remote), local1, local0), Ordering::Less);
1506                assert_eq!(rule_8(Some(remote), local0, local0), Ordering::Equal);
1507                assert_eq!(rule_8(Some(remote), local1, local1), Ordering::Equal);
1508                assert_eq!(rule_8(None, local0, local1), Ordering::Equal);
1509
1510                // Second, test that the common prefix length is capped at the
1511                // subnet prefix length.
1512
1513                // 3 leading 0x01 bytes, but a subnet prefix length of 8 (1 byte).
1514                let local0 = new_addr_entry(net_ip_v6!("1110::"), 8);
1515                // 2 leading 0x01 bytes, but a subnet prefix length of 8 (1 byte).
1516                let local1 = new_addr_entry(net_ip_v6!("1100::"), 8);
1517
1518                assert_eq!(rule_8(Some(remote), local0, local1), Ordering::Equal);
1519                assert_eq!(rule_8(Some(remote), local1, local0), Ordering::Equal);
1520                assert_eq!(rule_8(Some(remote), local0, local0), Ordering::Equal);
1521                assert_eq!(rule_8(Some(remote), local1, local1), Ordering::Equal);
1522                assert_eq!(rule_8(None, local0, local1), Ordering::Equal);
1523            }
1524
1525            {
1526                let new_addr_entry = |addr, device| SasCandidate {
1527                    addr_sub: AddrSubnet::new(addr, 128).unwrap(),
1528                    deprecated: false,
1529                    assigned: true,
1530                    temporary: false,
1531                    device,
1532                };
1533
1534                // If no rules apply, then the two address entries are equal.
1535                assert_eq!(
1536                    select_ipv6_source_address_cmp(
1537                        Some(remote),
1538                        dev0,
1539                        &new_addr_entry(*local0, *dev1),
1540                        &new_addr_entry(*local1, *dev2),
1541                    ),
1542                    Ordering::Equal
1543                );
1544            }
1545        }
1546
1547        #[test]
1548        fn test_select_ipv6_source_address_no_remote() {
1549            // Verify that source address selection correctly applies all
1550            // applicable rules when the remote is `None`.
1551            let dev0 = &0;
1552            let dev1 = &1;
1553            let dev2 = &2;
1554
1555            let local0 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:2::")).unwrap();
1556            let local1 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:3::")).unwrap();
1557
1558            let new_addr_entry = |addr, deprecated, device| SasCandidate {
1559                addr_sub: AddrSubnet::new(addr, 128).unwrap(),
1560                deprecated,
1561                assigned: true,
1562                temporary: false,
1563                device,
1564            };
1565
1566            // Verify that Rule 3 still applies (avoid deprecated states).
1567            assert_eq!(
1568                select_ipv6_source_address_cmp(
1569                    None,
1570                    dev0,
1571                    &new_addr_entry(*local0, false, *dev1),
1572                    &new_addr_entry(*local1, true, *dev2),
1573                ),
1574                Ordering::Greater
1575            );
1576
1577            // Verify that Rule 5 still applies (Prefer outgoing interface).
1578            assert_eq!(
1579                select_ipv6_source_address_cmp(
1580                    None,
1581                    dev0,
1582                    &new_addr_entry(*local0, false, *dev0),
1583                    &new_addr_entry(*local1, false, *dev1),
1584                ),
1585                Ordering::Greater
1586            );
1587        }
1588    }
1589}
1590
1591/// Test fake implementations of the traits defined in the `socket` module.
1592#[cfg(any(test, feature = "testutils"))]
1593pub(crate) mod testutil {
1594    use alloc::boxed::Box;
1595    use alloc::vec::Vec;
1596    use core::num::NonZeroUsize;
1597
1598    use crate::internal::types::RoutePreference;
1599    use derivative::Derivative;
1600    use net_types::ip::{GenericOverIp, IpAddr, IpAddress, Ipv4, Ipv4Addr, Ipv6, Subnet};
1601    use net_types::{MulticastAddr, Witness as _};
1602    use netstack3_base::testutil::{FakeCoreCtx, FakeStrongDeviceId, FakeWeakDeviceId};
1603    use netstack3_base::{SendFrameContext, SendFrameError};
1604    use netstack3_filter::Tuple;
1605    use netstack3_hashmap::HashMap;
1606
1607    use super::*;
1608    use crate::internal::base::{
1609        BaseTransportIpContext, DEFAULT_HOP_LIMITS, HopLimits, MulticastMembershipHandler,
1610    };
1611    use crate::internal::routing::testutil::FakeIpRoutingCtx;
1612    use crate::internal::routing::{self, RoutingTable};
1613    use crate::internal::types::{Destination, Entry, Metric, RawMetric};
1614
1615    /// A fake implementation of the traits required by the transport layer from
1616    /// the IP layer.
1617    #[derive(Derivative, GenericOverIp)]
1618    #[generic_over_ip(I, Ip)]
1619    #[derivative(Default(bound = ""))]
1620    pub struct FakeIpSocketCtx<I: Ip, D> {
1621        pub(crate) table: RoutingTable<I, D>,
1622        forwarding: FakeIpRoutingCtx<D>,
1623        devices: HashMap<D, FakeDeviceState<I>>,
1624    }
1625
1626    /// A trait enabling [`FakeIpSockeCtx`]'s implementations for
1627    /// [`FakeCoreCtx`] with types that hold a [`FakeIpSocketCtx`] internally,
1628    pub trait InnerFakeIpSocketCtx<I: Ip, D> {
1629        /// Gets a mutable reference to the inner fake context.
1630        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D>;
1631    }
1632
1633    impl<I: Ip, D> InnerFakeIpSocketCtx<I, D> for FakeIpSocketCtx<I, D> {
1634        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1635            self
1636        }
1637    }
1638
1639    impl<I: IpExt, D: FakeStrongDeviceId, BC> BaseTransportIpContext<I, BC> for FakeIpSocketCtx<I, D> {
1640        fn get_default_hop_limits(&mut self, device: Option<&D>) -> HopLimits {
1641            device.map_or(DEFAULT_HOP_LIMITS, |device| {
1642                let hop_limit = self.get_device_state(device).default_hop_limit;
1643                HopLimits { unicast: hop_limit, multicast: DEFAULT_HOP_LIMITS.multicast }
1644            })
1645        }
1646
1647        type DevicesWithAddrIter<'a> = Box<dyn Iterator<Item = D> + 'a>;
1648
1649        fn with_devices_with_assigned_addr<O, F: FnOnce(Self::DevicesWithAddrIter<'_>) -> O>(
1650            &mut self,
1651            addr: SpecifiedAddr<I::Addr>,
1652            cb: F,
1653        ) -> O {
1654            cb(Box::new(self.devices.iter().filter_map(move |(device, state)| {
1655                state.addresses.contains(&addr).then(|| device.clone())
1656            })))
1657        }
1658
1659        fn get_original_destination(&mut self, _tuple: &Tuple<I>) -> Option<(I::Addr, u16)> {
1660            unimplemented!()
1661        }
1662    }
1663
1664    impl<I: IpExt, D: FakeStrongDeviceId> DeviceIdContext<AnyDevice> for FakeIpSocketCtx<I, D> {
1665        type DeviceId = D;
1666        type WeakDeviceId = D::Weak;
1667    }
1668
1669    impl<I, State, D, Meta, BC> IpSocketHandler<I, BC> for FakeCoreCtx<State, Meta, D>
1670    where
1671        I: IpExt + FilterIpExt,
1672        State: InnerFakeIpSocketCtx<I, D>,
1673        D: FakeStrongDeviceId,
1674        BC: TxMetadataBindingsTypes,
1675        FakeCoreCtx<State, Meta, D>:
1676            SendFrameContext<BC, SendIpPacketMeta<I, Self::DeviceId, SpecifiedAddr<I::Addr>>>,
1677    {
1678        fn new_ip_socket<O>(
1679            &mut self,
1680            _bindings_ctx: &mut BC,
1681            args: IpSocketArgs<'_, Self::DeviceId, I, O>,
1682        ) -> Result<IpSock<I, Self::WeakDeviceId>, IpSockCreationError>
1683        where
1684            O: RouteResolutionOptions<I>,
1685        {
1686            self.state.fake_ip_socket_ctx_mut().new_ip_socket(args)
1687        }
1688
1689        fn send_ip_packet<S, O>(
1690            &mut self,
1691            bindings_ctx: &mut BC,
1692            socket: &IpSock<I, Self::WeakDeviceId>,
1693            body: S,
1694            options: &O,
1695            // NB: Tx metadata plumbing is not supported for fake socket
1696            // contexts. Drop at the end of the scope.
1697            _tx_meta: BC::TxMetadata,
1698        ) -> Result<(), IpSockSendError>
1699        where
1700            S: TransportPacketSerializer<I>,
1701            S::Buffer: BufferMut,
1702            O: SendOptions<I> + RouteResolutionOptions<I>,
1703        {
1704            let meta = self.state.fake_ip_socket_ctx_mut().resolve_send_meta(socket, options)?;
1705            self.send_frame(bindings_ctx, meta, body).or_else(
1706                |SendFrameError { serializer: _, error }| IpSockSendError::from_send_frame(error),
1707            )
1708        }
1709
1710        fn confirm_reachable<O>(
1711            &mut self,
1712            _bindings_ctx: &mut BC,
1713            _socket: &IpSock<I, Self::WeakDeviceId>,
1714            _options: &O,
1715        ) {
1716        }
1717    }
1718
1719    impl<I: IpExt, D: FakeStrongDeviceId, BC> MulticastMembershipHandler<I, BC>
1720        for FakeIpSocketCtx<I, D>
1721    {
1722        fn join_multicast_group(
1723            &mut self,
1724            _bindings_ctx: &mut BC,
1725            device: &Self::DeviceId,
1726            addr: MulticastAddr<<I as Ip>::Addr>,
1727        ) {
1728            let value = self.get_device_state_mut(device).multicast_groups.entry(addr).or_insert(0);
1729            *value = value.checked_add(1).unwrap();
1730        }
1731
1732        fn leave_multicast_group(
1733            &mut self,
1734            _bindings_ctx: &mut BC,
1735            device: &Self::DeviceId,
1736            addr: MulticastAddr<<I as Ip>::Addr>,
1737        ) {
1738            let value = self
1739                .get_device_state_mut(device)
1740                .multicast_groups
1741                .get_mut(&addr)
1742                .unwrap_or_else(|| panic!("no entry for {addr} on {device:?}"));
1743            *value = value.checked_sub(1).unwrap();
1744        }
1745
1746        fn select_device_for_multicast_group(
1747            &mut self,
1748            addr: MulticastAddr<<I as Ip>::Addr>,
1749            _marks: &Marks,
1750        ) -> Result<Self::DeviceId, ResolveRouteError> {
1751            let remote_ip = SocketIpAddr::new_from_multicast(addr);
1752            self.lookup_route(None, None, remote_ip, /* transparent */ false)
1753                .map(|ResolvedRoute { device, .. }| device)
1754        }
1755    }
1756
1757    impl<I, BC, D, State, Meta> BaseTransportIpContext<I, BC> for FakeCoreCtx<State, Meta, D>
1758    where
1759        I: IpExt + FilterIpExt,
1760        D: FakeStrongDeviceId,
1761        State: InnerFakeIpSocketCtx<I, D>,
1762        BC: TxMetadataBindingsTypes,
1763        Self: IpSocketHandler<I, BC, DeviceId = D, WeakDeviceId = FakeWeakDeviceId<D>>,
1764    {
1765        type DevicesWithAddrIter<'a> = Box<dyn Iterator<Item = D> + 'a>;
1766
1767        fn with_devices_with_assigned_addr<O, F: FnOnce(Self::DevicesWithAddrIter<'_>) -> O>(
1768            &mut self,
1769            addr: SpecifiedAddr<I::Addr>,
1770            cb: F,
1771        ) -> O {
1772            BaseTransportIpContext::<I, BC>::with_devices_with_assigned_addr(
1773                self.state.fake_ip_socket_ctx_mut(),
1774                addr,
1775                cb,
1776            )
1777        }
1778
1779        fn get_default_hop_limits(&mut self, device: Option<&Self::DeviceId>) -> HopLimits {
1780            BaseTransportIpContext::<I, BC>::get_default_hop_limits(
1781                self.state.fake_ip_socket_ctx_mut(),
1782                device,
1783            )
1784        }
1785
1786        fn get_original_destination(&mut self, tuple: &Tuple<I>) -> Option<(I::Addr, u16)> {
1787            BaseTransportIpContext::<I, BC>::get_original_destination(
1788                self.state.fake_ip_socket_ctx_mut(),
1789                tuple,
1790            )
1791        }
1792    }
1793
1794    /// A fake context providing [`IpSocketHandler`] for tests.
1795    #[derive(Derivative)]
1796    #[derivative(Default(bound = ""))]
1797    pub struct FakeDualStackIpSocketCtx<D> {
1798        v4: FakeIpSocketCtx<Ipv4, D>,
1799        v6: FakeIpSocketCtx<Ipv6, D>,
1800    }
1801
1802    impl<D: FakeStrongDeviceId> FakeDualStackIpSocketCtx<D> {
1803        /// Creates a new [`FakeDualStackIpSocketCtx`] with `devices`.
1804        pub fn new<A: Into<SpecifiedAddr<IpAddr>>>(
1805            devices: impl IntoIterator<Item = FakeDeviceConfig<D, A>>,
1806        ) -> Self {
1807            let partition =
1808                |v: Vec<A>| -> (Vec<SpecifiedAddr<Ipv4Addr>>, Vec<SpecifiedAddr<Ipv6Addr>>) {
1809                    v.into_iter().fold((Vec::new(), Vec::new()), |(mut v4, mut v6), i| {
1810                        match IpAddr::from(i.into()) {
1811                            IpAddr::V4(a) => v4.push(a),
1812                            IpAddr::V6(a) => v6.push(a),
1813                        }
1814                        (v4, v6)
1815                    })
1816                };
1817
1818            let (v4, v6): (Vec<_>, Vec<_>) = devices
1819                .into_iter()
1820                .map(|FakeDeviceConfig { device, local_ips, remote_ips }| {
1821                    let (local_v4, local_v6) = partition(local_ips);
1822                    let (remote_v4, remote_v6) = partition(remote_ips);
1823                    (
1824                        FakeDeviceConfig {
1825                            device: device.clone(),
1826                            local_ips: local_v4,
1827                            remote_ips: remote_v4,
1828                        },
1829                        FakeDeviceConfig { device, local_ips: local_v6, remote_ips: remote_v6 },
1830                    )
1831                })
1832                .unzip();
1833            Self { v4: FakeIpSocketCtx::new(v4), v6: FakeIpSocketCtx::new(v6) }
1834        }
1835
1836        /// Returns the [`FakeIpSocketCtx`] for IP version `I`.
1837        pub fn inner_mut<I: Ip>(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1838            I::map_ip_out(self, |s| &mut s.v4, |s| &mut s.v6)
1839        }
1840
1841        fn inner<I: Ip>(&self) -> &FakeIpSocketCtx<I, D> {
1842            I::map_ip_out(self, |s| &s.v4, |s| &s.v6)
1843        }
1844
1845        /// Adds a fake direct route to `ip` through `device`.
1846        pub fn add_route(&mut self, device: D, ip: SpecifiedAddr<IpAddr>) {
1847            match IpAddr::from(ip) {
1848                IpAddr::V4(ip) => {
1849                    routing::testutil::add_on_link_routing_entry(&mut self.v4.table, ip, device)
1850                }
1851                IpAddr::V6(ip) => {
1852                    routing::testutil::add_on_link_routing_entry(&mut self.v6.table, ip, device)
1853                }
1854            }
1855        }
1856
1857        /// Adds a fake route to `subnet` through `device`.
1858        pub fn add_subnet_route<A: IpAddress>(&mut self, device: D, subnet: Subnet<A>) {
1859            let entry = Entry {
1860                subnet,
1861                device,
1862                gateway: None,
1863                metric: Metric::ExplicitMetric(RawMetric(0)),
1864                route_preference: RoutePreference::Medium,
1865            };
1866            A::Version::map_ip::<_, ()>(
1867                entry,
1868                |entry_v4| {
1869                    let _ = routing::testutil::add_entry(&mut self.v4.table, entry_v4)
1870                        .expect("Failed to add route");
1871                },
1872                |entry_v6| {
1873                    let _ = routing::testutil::add_entry(&mut self.v6.table, entry_v6)
1874                        .expect("Failed to add route");
1875                },
1876            );
1877        }
1878
1879        /// Returns a mutable reference to fake device state.
1880        pub fn get_device_state_mut<I: IpExt>(&mut self, device: &D) -> &mut FakeDeviceState<I> {
1881            self.inner_mut::<I>().get_device_state_mut(device)
1882        }
1883
1884        /// Returns the fake multicast memberships.
1885        pub fn multicast_memberships<I: IpExt>(
1886            &self,
1887        ) -> HashMap<(D, MulticastAddr<I::Addr>), NonZeroUsize> {
1888            self.inner::<I>().multicast_memberships()
1889        }
1890    }
1891
1892    impl<I: IpExt, S: InnerFakeIpSocketCtx<I, D>, Meta, D: FakeStrongDeviceId, BC>
1893        MulticastMembershipHandler<I, BC> for FakeCoreCtx<S, Meta, D>
1894    {
1895        fn join_multicast_group(
1896            &mut self,
1897            bindings_ctx: &mut BC,
1898            device: &Self::DeviceId,
1899            addr: MulticastAddr<<I as Ip>::Addr>,
1900        ) {
1901            MulticastMembershipHandler::<I, BC>::join_multicast_group(
1902                self.state.fake_ip_socket_ctx_mut(),
1903                bindings_ctx,
1904                device,
1905                addr,
1906            )
1907        }
1908
1909        fn leave_multicast_group(
1910            &mut self,
1911            bindings_ctx: &mut BC,
1912            device: &Self::DeviceId,
1913            addr: MulticastAddr<<I as Ip>::Addr>,
1914        ) {
1915            MulticastMembershipHandler::<I, BC>::leave_multicast_group(
1916                self.state.fake_ip_socket_ctx_mut(),
1917                bindings_ctx,
1918                device,
1919                addr,
1920            )
1921        }
1922
1923        fn select_device_for_multicast_group(
1924            &mut self,
1925            addr: MulticastAddr<<I as Ip>::Addr>,
1926            marks: &Marks,
1927        ) -> Result<Self::DeviceId, ResolveRouteError> {
1928            MulticastMembershipHandler::<I, BC>::select_device_for_multicast_group(
1929                self.state.fake_ip_socket_ctx_mut(),
1930                addr,
1931                marks,
1932            )
1933        }
1934    }
1935
1936    impl<I: Ip, D, State: InnerFakeIpSocketCtx<I, D>, Meta> InnerFakeIpSocketCtx<I, D>
1937        for FakeCoreCtx<State, Meta, D>
1938    {
1939        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1940            self.state.fake_ip_socket_ctx_mut()
1941        }
1942    }
1943
1944    impl<I: Ip, D: FakeStrongDeviceId> InnerFakeIpSocketCtx<I, D> for FakeDualStackIpSocketCtx<D> {
1945        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1946            self.inner_mut::<I>()
1947        }
1948    }
1949
1950    /// A device configuration for fake socket contexts.
1951    #[derive(Clone, GenericOverIp)]
1952    #[generic_over_ip()]
1953    pub struct FakeDeviceConfig<D, A> {
1954        /// The device.
1955        pub device: D,
1956        /// The device's local IPs.
1957        pub local_ips: Vec<A>,
1958        /// The remote IPs reachable from this device.
1959        pub remote_ips: Vec<A>,
1960    }
1961
1962    /// State associated with a fake device in [`FakeIpSocketCtx`].
1963    pub struct FakeDeviceState<I: Ip> {
1964        /// The default hop limit used by the device.
1965        pub default_hop_limit: NonZeroU8,
1966        /// The assigned device addresses.
1967        pub addresses: Vec<SpecifiedAddr<I::Addr>>,
1968        /// The joined multicast groups.
1969        pub multicast_groups: HashMap<MulticastAddr<I::Addr>, usize>,
1970    }
1971
1972    impl<I: Ip> FakeDeviceState<I> {
1973        /// Returns whether this fake device has joined multicast group `addr`.
1974        pub fn is_in_multicast_group(&self, addr: &MulticastAddr<I::Addr>) -> bool {
1975            self.multicast_groups.get(addr).is_some_and(|v| *v != 0)
1976        }
1977    }
1978
1979    impl<I: IpExt, D: FakeStrongDeviceId> FakeIpSocketCtx<I, D> {
1980        /// Creates a new `FakeIpSocketCtx` with the given device
1981        /// configs.
1982        pub fn new(
1983            device_configs: impl IntoIterator<Item = FakeDeviceConfig<D, SpecifiedAddr<I::Addr>>>,
1984        ) -> Self {
1985            let mut table = RoutingTable::default();
1986            let mut devices = HashMap::default();
1987            for FakeDeviceConfig { device, local_ips, remote_ips } in device_configs {
1988                for addr in remote_ips {
1989                    routing::testutil::add_on_link_routing_entry(&mut table, addr, device.clone())
1990                }
1991                let state = FakeDeviceState {
1992                    default_hop_limit: DEFAULT_HOP_LIMITS.unicast,
1993                    addresses: local_ips,
1994                    multicast_groups: Default::default(),
1995                };
1996                assert!(
1997                    devices.insert(device.clone(), state).is_none(),
1998                    "duplicate entries for {device:?}",
1999                );
2000            }
2001
2002            Self { table, devices, forwarding: Default::default() }
2003        }
2004
2005        /// Returns an immutable reference to the fake device state.
2006        pub fn get_device_state(&self, device: &D) -> &FakeDeviceState<I> {
2007            self.devices.get(device).unwrap_or_else(|| panic!("no device {device:?}"))
2008        }
2009
2010        /// Returns a mutable reference to the fake device state.
2011        pub fn get_device_state_mut(&mut self, device: &D) -> &mut FakeDeviceState<I> {
2012            self.devices.get_mut(device).unwrap_or_else(|| panic!("no device {device:?}"))
2013        }
2014
2015        pub(crate) fn multicast_memberships(
2016            &self,
2017        ) -> HashMap<(D, MulticastAddr<I::Addr>), NonZeroUsize> {
2018            self.devices
2019                .iter()
2020                .map(|(device, state)| {
2021                    state.multicast_groups.iter().filter_map(|(group, count)| {
2022                        NonZeroUsize::new(*count).map(|count| ((device.clone(), *group), count))
2023                    })
2024                })
2025                .flatten()
2026                .collect()
2027        }
2028
2029        fn new_ip_socket<O>(
2030            &mut self,
2031            args: IpSocketArgs<'_, D, I, O>,
2032        ) -> Result<IpSock<I, D::Weak>, IpSockCreationError>
2033        where
2034            O: RouteResolutionOptions<I>,
2035        {
2036            let IpSocketArgs { device, local_ip, remote_ip, proto, options } = args;
2037            let device = device
2038                .as_ref()
2039                .map(|d| d.as_strong_ref().ok_or(ResolveRouteError::Unreachable))
2040                .transpose()?;
2041            let device = device.as_ref().map(|d| d.as_ref());
2042            let resolved_route =
2043                self.lookup_route(device, local_ip, remote_ip, options.transparent())?;
2044            Ok(new_ip_socket(device, resolved_route, remote_ip, proto))
2045        }
2046
2047        fn lookup_route(
2048            &mut self,
2049            device: Option<&D>,
2050            local_ip: Option<IpDeviceAddr<I::Addr>>,
2051            addr: RoutableIpAddr<I::Addr>,
2052            transparent: bool,
2053        ) -> Result<ResolvedRoute<I, D>, ResolveRouteError> {
2054            let Self { table, devices, forwarding } = self;
2055            let (destination, ()) = table
2056                .lookup_filter_map(forwarding, device, addr.addr(), |_, d| match &local_ip {
2057                    None => Some(()),
2058                    Some(local_ip) => {
2059                        if transparent {
2060                            return Some(());
2061                        }
2062                        devices.get(d).and_then(|state| {
2063                            state.addresses.contains(local_ip.as_ref()).then_some(())
2064                        })
2065                    }
2066                })
2067                .next()
2068                .ok_or(ResolveRouteError::Unreachable)?;
2069
2070            let Destination { device, next_hop } = destination;
2071            let mut addrs = devices.get(device).unwrap().addresses.iter();
2072            let local_ip = match local_ip {
2073                None => {
2074                    let addr = addrs.next().ok_or(ResolveRouteError::NoSrcAddr)?;
2075                    IpDeviceAddr::new(addr.get()).expect("not valid device addr")
2076                }
2077                Some(local_ip) => {
2078                    if !transparent {
2079                        // We already constrained the set of devices so this
2080                        // should be a given.
2081                        assert!(
2082                            addrs.any(|a| a.get() == local_ip.addr()),
2083                            "didn't find IP {:?} in {:?}",
2084                            local_ip,
2085                            addrs.collect::<Vec<_>>()
2086                        );
2087                    }
2088                    local_ip
2089                }
2090            };
2091
2092            Ok(ResolvedRoute {
2093                src_addr: local_ip,
2094                device: device.clone(),
2095                local_delivery_device: None,
2096                next_hop,
2097                // NB: Keep unit tests simple and skip internal forwarding
2098                // logic. Instead, this is verified by integration tests.
2099                internal_forwarding: InternalForwarding::NotUsed,
2100            })
2101        }
2102
2103        fn resolve_send_meta<O>(
2104            &mut self,
2105            socket: &IpSock<I, D::Weak>,
2106            options: &O,
2107        ) -> Result<SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>, IpSockSendError>
2108        where
2109            O: SendOptions<I> + RouteResolutionOptions<I>,
2110        {
2111            let IpSockDefinition { remote_ip, local_ip, device, proto } = &socket.definition;
2112            let device = device
2113                .as_ref()
2114                .map(|d| d.upgrade().ok_or(ResolveRouteError::Unreachable))
2115                .transpose()?;
2116            let ResolvedRoute {
2117                src_addr,
2118                device,
2119                next_hop,
2120                local_delivery_device: _,
2121                internal_forwarding: _,
2122            } = self.lookup_route(
2123                device.as_ref(),
2124                Some(*local_ip),
2125                *remote_ip,
2126                options.transparent(),
2127            )?;
2128
2129            let remote_ip: &SpecifiedAddr<_> = remote_ip.as_ref();
2130
2131            let destination = IpPacketDestination::from_next_hop(next_hop, *remote_ip);
2132            Ok(SendIpPacketMeta {
2133                device,
2134                src_ip: src_addr.into(),
2135                dst_ip: *remote_ip,
2136                destination,
2137                proto: *proto,
2138                ttl: options.hop_limit(remote_ip),
2139                mtu: options.mtu(),
2140                dscp_and_ecn: DscpAndEcn::default(),
2141            })
2142        }
2143    }
2144}