Skip to main content

netstack3_ip/
socket.rs

1// Copyright 2019 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! IPv4 and IPv6 sockets.
6
7use core::cmp::Ordering;
8use core::convert::Infallible;
9use core::num::NonZeroU8;
10
11use log::{debug, error};
12use net_types::ip::{Ip, IpVersionMarker, Ipv6Addr, Mtu};
13use net_types::{MulticastAddress, ScopeableAddress, SpecifiedAddr, Witness as _};
14use netstack3_base::socket::{SocketIpAddr, SocketIpAddrExt as _};
15use netstack3_base::{
16    AnyDevice, CounterContext, DeviceIdContext, DeviceIdentifier, EitherDeviceId, InstantContext,
17    InterfaceProperties, IpDeviceAddr, IpExt, Marks, Mms, SendFrameErrorReason,
18    StrongDeviceIdentifier, TxMetadata as _, TxMetadataBindingsTypes, WeakDeviceIdentifier,
19};
20use netstack3_filter::{
21    self as filter, DynTransportSerializer, DynamicTransportSerializer, FilterBindingsContext,
22    FilterHandler as _, FilterIpExt, RawIpBody, SocketEgressFilterResult, SocketOpsFilter,
23    SocketOpsFilterBindingContext, TransportPacketSerializer,
24};
25use netstack3_trace::trace_duration;
26use packet::{BufferMut, PacketBuilder as _, PacketConstraints, SerializeError, Serializer};
27use packet_formats::ip::{DscpAndEcn, IpPacketBuilder as _};
28use thiserror::Error;
29
30use crate::icmp::IcmpErrorHandler;
31use crate::internal::base::{
32    FilterHandlerProvider, IpDeviceMtuContext, IpLayerIpExt, IpLayerPacketMetadata,
33    IpPacketDestination, IpSendFrameError, IpSendFrameErrorReason, ResolveRouteError,
34    SendIpPacketMeta, reject_type_to_icmpv4_error, reject_type_to_icmpv6_error,
35};
36use crate::internal::counters::IpCounters;
37use crate::internal::device::state::IpDeviceStateIpExt;
38use crate::internal::routing::PacketOrigin;
39use crate::internal::routing::rules::RuleInput;
40use crate::internal::types::{InternalForwarding, ResolvedRoute, RoutableIpAddr};
41use crate::{HopLimits, NextHop};
42
43/// The arguments used for creating an [`IpSock`]
44pub struct IpSocketArgs<'a, D: StrongDeviceIdentifier, I: IpExt, O> {
45    /// The device the socket is bound to.
46    pub device: Option<EitherDeviceId<&'a D, &'a D::Weak>>,
47    /// The local IP to use for the connection. One is selected if not provided
48    /// based on the output route.
49    pub local_ip: Option<IpDeviceAddr<I::Addr>>,
50    /// The remote IP address for this connection.
51    pub remote_ip: RoutableIpAddr<I::Addr>,
52    /// The IP protocol in use.
53    pub proto: I::Proto,
54    /// Additional IP layer options.
55    pub options: &'a O,
56}
57/// An execution context defining a type of IP socket.
58pub trait IpSocketHandler<I: IpExt + FilterIpExt, BC: TxMetadataBindingsTypes>:
59    DeviceIdContext<AnyDevice>
60{
61    /// Constructs a new [`IpSock`].
62    ///
63    /// `new_ip_socket` constructs a new `IpSock` to the given remote IP
64    /// address from the given local IP address with the given IP protocol. If
65    /// no local IP address is given, one will be chosen automatically. If
66    /// `device` is `Some`, the socket will be bound to the given device - only
67    /// routes which egress over the device will be used. If no route is
68    /// available which egresses over the device - even if routes are available
69    /// which egress over other devices - the socket will be considered
70    /// unroutable.
71    ///
72    /// `new_ip_socket` returns an error if no route to the remote was found in
73    /// the forwarding table or if the given local IP address is not valid for
74    /// the found route.
75    fn new_ip_socket<O>(
76        &mut self,
77        bindings_ctx: &mut BC,
78        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
79    ) -> Result<IpSock<I, Self::WeakDeviceId>, IpSockCreationError>
80    where
81        O: RouteResolutionOptions<I>;
82
83    /// Sends an IP packet on a socket.
84    ///
85    /// The generated packet has its metadata initialized from `socket`,
86    /// including the source and destination addresses, the Time To Live/Hop
87    /// Limit, and the Protocol/Next Header. The outbound device is also chosen
88    /// based on information stored in the socket.
89    ///
90    /// `mtu` may be used to optionally impose an MTU on the outgoing packet.
91    /// Note that the device's MTU will still be imposed on the packet. That is,
92    /// the smaller of `mtu` and the device's MTU will be imposed on the packet.
93    ///
94    /// If the socket is currently unroutable, an error is returned.
95    fn send_ip_packet<S, O>(
96        &mut self,
97        bindings_ctx: &mut BC,
98        socket: &IpSock<I, Self::WeakDeviceId>,
99        body: S,
100        options: &O,
101        tx_metadata: BC::TxMetadata,
102    ) -> Result<(), IpSockSendError>
103    where
104        S: TransportPacketSerializer<I>,
105        S::Buffer: BufferMut,
106        O: SendOptions<I> + RouteResolutionOptions<I>;
107
108    /// Confirms the provided IP socket destination is reachable.
109    ///
110    /// Implementations must retrieve the next hop given the provided
111    /// IP socket and confirm neighbor reachability for the resolved target
112    /// device.
113    fn confirm_reachable<O>(
114        &mut self,
115        bindings_ctx: &mut BC,
116        socket: &IpSock<I, Self::WeakDeviceId>,
117        options: &O,
118    ) where
119        O: RouteResolutionOptions<I>;
120
121    /// Creates a temporary IP socket and sends a single packet on it.
122    ///
123    /// `local_ip`, `remote_ip`, `proto`, and `options` are passed directly to
124    /// [`IpSocketHandler::new_ip_socket`]. `get_body_from_src_ip` is given the
125    /// source IP address for the packet - which may have been chosen
126    /// automatically if `local_ip` is `None` - and returns the body to be
127    /// encapsulated. This is provided in case the body's contents depend on the
128    /// chosen source IP address.
129    ///
130    /// If `device` is specified, the available routes are limited to those that
131    /// egress over the device.
132    ///
133    /// `mtu` may be used to optionally impose an MTU on the outgoing packet.
134    /// Note that the device's MTU will still be imposed on the packet. That is,
135    /// the smaller of `mtu` and the device's MTU will be imposed on the packet.
136    ///
137    /// # Errors
138    ///
139    /// If an error is encountered while constructing the temporary IP socket
140    /// or sending the packet, `options` will be returned along with the
141    /// error. `get_body_from_src_ip` is fallible, and if there's an error,
142    /// it will be returned as well.
143    fn send_oneshot_ip_packet_with_fallible_serializer<S, E, F, O>(
144        &mut self,
145        bindings_ctx: &mut BC,
146        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
147        tx_metadata: BC::TxMetadata,
148        get_body_from_src_ip: F,
149    ) -> Result<(), SendOneShotIpPacketError<E>>
150    where
151        S: TransportPacketSerializer<I>,
152        S::Buffer: BufferMut,
153        F: FnOnce(IpDeviceAddr<I::Addr>) -> Result<S, E>,
154        O: SendOptions<I> + RouteResolutionOptions<I>,
155    {
156        let options = args.options;
157        let tmp = self
158            .new_ip_socket(bindings_ctx, args)
159            .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })?;
160        let packet = get_body_from_src_ip(*tmp.local_ip())
161            .map_err(SendOneShotIpPacketError::SerializeError)?;
162        self.send_ip_packet(bindings_ctx, &tmp, packet, options, tx_metadata)
163            .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })
164    }
165
166    /// Like `send_oneshot_ip_packet_with_fallible_serializer`, but a dynamic
167    /// transport serializer is used.
168    ///
169    /// This reduces code generation cost at the expense of some runtime
170    /// overhead.
171    fn send_oneshot_ip_packet_with_dyn_fallible_serializer<S, E, F, O>(
172        &mut self,
173        bindings_ctx: &mut BC,
174        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
175        tx_metadata: BC::TxMetadata,
176        get_body_from_src_ip: F,
177    ) -> Result<(), SendOneShotIpPacketError<E>>
178    where
179        S: DynamicTransportSerializer<I>,
180        F: FnOnce(IpDeviceAddr<I::Addr>) -> Result<S, E>,
181        O: SendOptions<I> + RouteResolutionOptions<I>,
182    {
183        let options = args.options;
184        let tmp = self
185            .new_ip_socket(bindings_ctx, args)
186            .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })?;
187        let mut packet = get_body_from_src_ip(*tmp.local_ip())
188            .map_err(SendOneShotIpPacketError::SerializeError)?;
189        self.send_ip_packet(
190            bindings_ctx,
191            &tmp,
192            DynTransportSerializer::new(&mut packet),
193            options,
194            tx_metadata,
195        )
196        .map_err(|err| SendOneShotIpPacketError::CreateAndSendError { err: err.into() })
197    }
198
199    /// Sends a one-shot IP packet but with a non-fallible serializer.
200    fn send_oneshot_ip_packet<S, F, O>(
201        &mut self,
202        bindings_ctx: &mut BC,
203        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
204        tx_metadata: BC::TxMetadata,
205        get_body_from_src_ip: F,
206    ) -> Result<(), IpSockCreateAndSendError>
207    where
208        S: TransportPacketSerializer<I>,
209        S::Buffer: BufferMut,
210        F: FnOnce(IpDeviceAddr<I::Addr>) -> S,
211        O: SendOptions<I> + RouteResolutionOptions<I>,
212    {
213        self.send_oneshot_ip_packet_with_fallible_serializer(
214            bindings_ctx,
215            args,
216            tx_metadata,
217            |ip| Ok::<_, Infallible>(get_body_from_src_ip(ip)),
218        )
219        .map_err(|err| match err {
220            SendOneShotIpPacketError::CreateAndSendError { err } => err,
221        })
222    }
223
224    /// Like `send_oneshot_ip_packet`, but a dynamic transport serializer is
225    /// used.
226    ///
227    /// This reduces code generation cost at the expense of some runtime
228    /// overhead.
229    fn send_oneshot_ip_packet_with_dyn_serializer<S, F, O>(
230        &mut self,
231        bindings_ctx: &mut BC,
232        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
233        tx_metadata: BC::TxMetadata,
234        get_body_from_src_ip: F,
235    ) -> Result<(), IpSockCreateAndSendError>
236    where
237        S: DynamicTransportSerializer<I>,
238        F: FnOnce(IpDeviceAddr<I::Addr>) -> S,
239        O: SendOptions<I> + RouteResolutionOptions<I>,
240    {
241        self.send_oneshot_ip_packet_with_dyn_fallible_serializer(
242            bindings_ctx,
243            args,
244            tx_metadata,
245            |ip| Ok::<_, Infallible>(get_body_from_src_ip(ip)),
246        )
247        .map_err(|err| match err {
248            SendOneShotIpPacketError::CreateAndSendError { err } => err,
249        })
250    }
251}
252
253/// An error in sending a packet on an IP socket.
254#[derive(Error, Copy, Clone, Debug, Eq, PartialEq)]
255pub enum IpSockSendError {
256    /// An MTU was exceeded.
257    ///
258    /// This could be caused by an MTU at any layer of the stack, including both
259    /// device MTUs and packet format body size limits.
260    #[error("a maximum transmission unit (MTU) was exceeded")]
261    Mtu,
262    /// The socket is currently unroutable.
263    #[error("the socket is currently unroutable: {0}")]
264    Unroutable(#[from] ResolveRouteError),
265    /// The socket operation would've resulted in illegal loopback addresses on
266    /// a non-loopback device.
267    #[error("illegal loopback address")]
268    IllegalLoopbackAddress,
269    /// Broadcast send is not allowed.
270    #[error("broadcast send is not enabled for the socket")]
271    BroadcastNotAllowed,
272}
273
274impl From<SerializeError<Infallible>> for IpSockSendError {
275    fn from(err: SerializeError<Infallible>) -> IpSockSendError {
276        match err {
277            SerializeError::SizeLimitExceeded => IpSockSendError::Mtu,
278        }
279    }
280}
281
282impl IpSockSendError {
283    /// Constructs a `Result` from an [`IpSendFrameErrorReason`] with
284    /// application-visible [`IpSockSendError`]s in the `Err` variant.
285    ///
286    /// Errors that are not bubbled up to applications are dropped.
287    fn from_ip_send_frame(e: IpSendFrameErrorReason) -> Result<(), Self> {
288        match e {
289            IpSendFrameErrorReason::Device(d) => Self::from_send_frame(d),
290            IpSendFrameErrorReason::IllegalLoopbackAddress => Err(Self::IllegalLoopbackAddress),
291        }
292    }
293
294    /// Constructs a `Result` from a [`SendFrameErrorReason`] with
295    /// application-visible [`IpSockSendError`]s in the `Err` variant.
296    ///
297    /// Errors that are not bubbled up to applications are dropped.
298    fn from_send_frame(e: SendFrameErrorReason) -> Result<(), Self> {
299        match e {
300            SendFrameErrorReason::Alloc | SendFrameErrorReason::QueueFull => Ok(()),
301            SendFrameErrorReason::SizeConstraintsViolation => Err(Self::Mtu),
302        }
303    }
304}
305
306/// An error in sending a packet on a temporary IP socket.
307#[derive(Error, Copy, Clone, Debug)]
308pub enum IpSockCreateAndSendError {
309    /// Cannot send via temporary socket.
310    #[error("cannot send via temporary socket: {0}")]
311    Send(#[from] IpSockSendError),
312    /// The temporary socket could not be created.
313    #[error("the temporary socket could not be created: {0}")]
314    Create(#[from] IpSockCreationError),
315}
316
317/// The error returned by
318/// [`IpSocketHandler::send_oneshot_ip_packet_with_fallible_serializer`].
319#[derive(Debug)]
320#[allow(missing_docs)]
321pub enum SendOneShotIpPacketError<E> {
322    CreateAndSendError { err: IpSockCreateAndSendError },
323    SerializeError(E),
324}
325
326/// Possible errors when retrieving the maximum transport message size.
327#[derive(Error, Copy, Clone, Debug, Eq, PartialEq)]
328pub enum MmsError {
329    /// Cannot find the device that is used for the ip socket, possibly because
330    /// there is no route.
331    #[error("cannot find the device: {0}")]
332    NoDevice(#[from] ResolveRouteError),
333    /// The MTU provided by the device is too small such that there is no room
334    /// for a transport message at all.
335    #[error("invalid MTU: {0:?}")]
336    MTUTooSmall(Mtu),
337}
338
339/// Gets device related information of an IP socket.
340pub trait DeviceIpSocketHandler<I: IpExt, BC>: DeviceIdContext<AnyDevice> {
341    /// Gets the maximum message size for the transport layer, it equals the
342    /// device MTU minus the IP header size.
343    ///
344    /// This corresponds to the GET_MAXSIZES call described in:
345    /// https://www.rfc-editor.org/rfc/rfc1122#section-3.4
346    fn get_mms<O: RouteResolutionOptions<I>>(
347        &mut self,
348        bindings_ctx: &mut BC,
349        ip_sock: &IpSock<I, Self::WeakDeviceId>,
350        options: &O,
351    ) -> Result<Mms, MmsError>;
352}
353
354/// An error encountered when creating an IP socket.
355#[derive(Error, Copy, Clone, Debug, Eq, PartialEq)]
356pub enum IpSockCreationError {
357    /// An error occurred while looking up a route.
358    #[error("a route cannot be determined: {0}")]
359    Route(#[from] ResolveRouteError),
360}
361
362/// An IP socket.
363#[derive(Clone, Debug)]
364#[cfg_attr(test, derive(PartialEq))]
365pub struct IpSock<I: IpExt, D> {
366    /// The definition of the socket.
367    ///
368    /// This does not change for the lifetime of the socket.
369    definition: IpSockDefinition<I, D>,
370}
371
372impl<I: IpExt, D> IpSock<I, D> {
373    /// Returns the socket's definition.
374    #[cfg(any(test, feature = "testutils"))]
375    pub fn definition(&self) -> &IpSockDefinition<I, D> {
376        &self.definition
377    }
378}
379
380/// The definition of an IP socket.
381///
382/// These values are part of the socket's definition, and never change.
383#[derive(Clone, Debug, PartialEq)]
384pub struct IpSockDefinition<I: IpExt, D> {
385    /// The socket's remote address.
386    pub remote_ip: SocketIpAddr<I::Addr>,
387    /// The socket's local address.
388    ///
389    /// Guaranteed to be unicast in its subnet since it's always equal to an
390    /// address assigned to the local device. We can't use the `UnicastAddr`
391    /// witness type since `Ipv4Addr` doesn't implement `UnicastAddress`.
392    //
393    // TODO(joshlf): Support unnumbered interfaces. Once we do that, a few
394    // issues arise: A) Does the unicast restriction still apply, and is that
395    // even well-defined for IPv4 in the absence of a subnet? B) Presumably we
396    // have to always bind to a particular interface?
397    pub local_ip: IpDeviceAddr<I::Addr>,
398    /// The socket's bound output device.
399    pub device: Option<D>,
400    /// The IP protocol the socket is bound to.
401    pub proto: I::Proto,
402}
403
404impl<I: IpExt, D> IpSock<I, D> {
405    /// Returns the socket's local IP address.
406    pub fn local_ip(&self) -> &IpDeviceAddr<I::Addr> {
407        &self.definition.local_ip
408    }
409    /// Returns the socket's remote IP address.
410    pub fn remote_ip(&self) -> &SocketIpAddr<I::Addr> {
411        &self.definition.remote_ip
412    }
413    /// Returns the selected output interface for the socket, if any.
414    pub fn device(&self) -> Option<&D> {
415        self.definition.device.as_ref()
416    }
417    /// Returns the socket's protocol.
418    pub fn proto(&self) -> I::Proto {
419        self.definition.proto
420    }
421}
422
423// TODO(joshlf): Once we support configuring transport-layer protocols using
424// type parameters, use that to ensure that `proto` is the right protocol for
425// the caller. We will still need to have a separate enforcement mechanism for
426// raw IP sockets once we support those.
427
428/// The bindings execution context for IP sockets.
429pub trait IpSocketBindingsContext<D>:
430    InstantContext
431    + FilterBindingsContext<D>
432    + TxMetadataBindingsTypes
433    + SocketOpsFilterBindingContext<D>
434{
435}
436impl<
437    D,
438    BC: InstantContext
439        + FilterBindingsContext<D>
440        + TxMetadataBindingsTypes
441        + SocketOpsFilterBindingContext<D>,
442> IpSocketBindingsContext<D> for BC
443{
444}
445
446/// The context required in order to implement [`IpSocketHandler`].
447///
448/// Blanket impls of `IpSocketHandler` are provided in terms of
449/// `IpSocketContext`.
450pub trait IpSocketContext<I, BC>:
451    DeviceIdContext<AnyDevice, DeviceId: InterfaceProperties<BC::DeviceClass>>
452    + FilterHandlerProvider<I, BC>
453    + IcmpErrorHandler<I, BC>
454where
455    I: IpLayerIpExt,
456    BC: IpSocketBindingsContext<Self::DeviceId>,
457{
458    /// Returns a route for a socket.
459    ///
460    /// If `device` is specified, the available routes are limited to those that
461    /// egress over the device.
462    fn lookup_route(
463        &mut self,
464        bindings_ctx: &mut BC,
465        device: Option<&Self::DeviceId>,
466        src_ip: Option<IpDeviceAddr<I::Addr>>,
467        dst_ip: RoutableIpAddr<I::Addr>,
468        transparent: bool,
469        marks: &Marks,
470    ) -> Result<ResolvedRoute<I, Self::DeviceId>, ResolveRouteError>;
471
472    /// Send an IP packet to the next-hop node.
473    fn send_ip_packet<S>(
474        &mut self,
475        bindings_ctx: &mut BC,
476        meta: SendIpPacketMeta<I, &Self::DeviceId, SpecifiedAddr<I::Addr>>,
477        body: S,
478        packet_metadata: IpLayerPacketMetadata<I, Self::WeakAddressId, BC>,
479    ) -> Result<(), IpSendFrameError<S>>
480    where
481        S: TransportPacketSerializer<I>,
482        S::Buffer: BufferMut;
483
484    /// Returns `DeviceId` for the loopback device.
485    fn get_loopback_device(&mut self) -> Option<Self::DeviceId>;
486
487    /// Confirms the provided IP socket destination is reachable.
488    ///
489    /// Implementations must retrieve the next hop given the provided
490    /// IP socket and confirm neighbor reachability for the resolved target
491    /// device.
492    fn confirm_reachable(
493        &mut self,
494        bindings_ctx: &mut BC,
495        dst: SpecifiedAddr<I::Addr>,
496        input: RuleInput<'_, I, Self::DeviceId>,
497    );
498}
499
500/// Enables a blanket implementation of [`IpSocketHandler`].
501///
502/// Implementing this marker trait for a type enables a blanket implementation
503/// of `IpSocketHandler` given the other requirements are met.
504pub trait UseIpSocketHandlerBlanket {}
505
506impl<I, BC, CC> IpSocketHandler<I, BC> for CC
507where
508    I: IpLayerIpExt + IpDeviceStateIpExt,
509    BC: IpSocketBindingsContext<Self::DeviceId>,
510    CC: IpSocketContext<I, BC> + CounterContext<IpCounters<I>> + UseIpSocketHandlerBlanket,
511    CC::DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>,
512{
513    fn new_ip_socket<O>(
514        &mut self,
515        bindings_ctx: &mut BC,
516        args: IpSocketArgs<'_, Self::DeviceId, I, O>,
517    ) -> Result<IpSock<I, CC::WeakDeviceId>, IpSockCreationError>
518    where
519        O: RouteResolutionOptions<I>,
520    {
521        let IpSocketArgs { device, local_ip, remote_ip, proto, options } = args;
522        let device = device
523            .as_ref()
524            .map(|d| d.as_strong_ref().ok_or(ResolveRouteError::Unreachable))
525            .transpose()?;
526        let device = device.as_ref().map(|d| d.as_ref());
527
528        // Make sure the remote is routable with a local address before creating
529        // the socket. We do not care about the actual destination here because
530        // we will recalculate it when we send a packet so that the best route
531        // available at the time is used for each outgoing packet.
532        let resolved_route = self.lookup_route(
533            bindings_ctx,
534            device,
535            local_ip,
536            remote_ip,
537            options.transparent(),
538            options.marks(),
539        )?;
540        Ok(new_ip_socket(device, resolved_route, remote_ip, proto))
541    }
542
543    fn send_ip_packet<S, O>(
544        &mut self,
545        bindings_ctx: &mut BC,
546        ip_sock: &IpSock<I, CC::WeakDeviceId>,
547        body: S,
548        options: &O,
549        tx_metadata: BC::TxMetadata,
550    ) -> Result<(), IpSockSendError>
551    where
552        S: TransportPacketSerializer<I>,
553        S::Buffer: BufferMut,
554        O: SendOptions<I> + RouteResolutionOptions<I>,
555    {
556        send_ip_packet(self, bindings_ctx, ip_sock, body, options, tx_metadata)
557    }
558
559    fn confirm_reachable<O>(
560        &mut self,
561        bindings_ctx: &mut BC,
562        socket: &IpSock<I, CC::WeakDeviceId>,
563        options: &O,
564    ) where
565        O: RouteResolutionOptions<I>,
566    {
567        let bound_device = socket.device().and_then(|weak| weak.upgrade());
568        let bound_device = bound_device.as_ref();
569        let bound_address = Some((*socket.local_ip()).into());
570        let destination = (*socket.remote_ip()).into();
571        IpSocketContext::confirm_reachable(
572            self,
573            bindings_ctx,
574            destination,
575            RuleInput {
576                packet_origin: PacketOrigin::Local { bound_address, bound_device },
577                marks: options.marks(),
578            },
579        )
580    }
581}
582
583/// Provides hooks for altering route resolution behavior of [`IpSock`].
584///
585/// Must be implemented by the socket option type of an `IpSock` when using it
586/// to call [`IpSocketHandler::new_ip_socket`] or
587/// [`IpSocketHandler::send_ip_packet`]. This is implemented as a trait instead
588/// of an inherent impl on a type so that users of sockets that don't need
589/// certain option types can avoid allocating space for those options.
590// TODO(https://fxbug.dev/323389672): We need a mechanism to inform `IpSock` of
591// changes in the route resolution options when it starts caching previously
592// calculated routes. Any changes to the options here *MUST* cause the route to
593// be re-calculated.
594pub trait RouteResolutionOptions<I: Ip> {
595    /// Whether the socket is transparent.
596    ///
597    /// This allows transparently proxying traffic to the socket, and allows the
598    /// socket to be bound to a non-local address.
599    fn transparent(&self) -> bool;
600
601    /// Returns the marks carried by packets created on the socket.
602    fn marks(&self) -> &Marks;
603}
604
605/// Provides hooks for altering sending behavior of [`IpSock`].
606///
607/// Must be implemented by the socket option type of an `IpSock` when using it
608/// to call [`IpSocketHandler::send_ip_packet`]. This is implemented as a trait
609/// instead of an inherent impl on a type so that users of sockets that don't
610/// need certain option types, like TCP for anything multicast-related, can
611/// avoid allocating space for those options.
612pub trait SendOptions<I: IpExt> {
613    /// Returns the hop limit to set on a packet going to the given destination.
614    ///
615    /// If `Some(u)`, `u` will be used as the hop limit (IPv6) or TTL (IPv4) for
616    /// a packet going to the given destination. Otherwise the default value
617    /// will be used.
618    fn hop_limit(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8>;
619
620    /// Returns true if outgoing multicast packets should be looped back and
621    /// delivered to local receivers who joined the multicast group.
622    fn multicast_loop(&self) -> bool;
623
624    /// `Some` if the socket can be used to send broadcast packets.
625    fn allow_broadcast(&self) -> Option<I::BroadcastMarker>;
626
627    /// Returns TCLASS/TOS field value that should be set in IP headers.
628    fn dscp_and_ecn(&self) -> DscpAndEcn;
629
630    /// The IP MTU to use for this transmission.
631    ///
632    /// Note that the minimum overall MTU is used considering the device and
633    /// path. This option can be used to restrict an MTU to an upper bound.
634    fn mtu(&self) -> Mtu;
635}
636
637/// Empty send and creation options that never overrides default values.
638#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
639pub struct DefaultIpSocketOptions;
640
641impl<I: IpExt> SendOptions<I> for DefaultIpSocketOptions {
642    fn hop_limit(&self, _destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
643        None
644    }
645
646    fn multicast_loop(&self) -> bool {
647        false
648    }
649
650    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
651        None
652    }
653
654    fn dscp_and_ecn(&self) -> DscpAndEcn {
655        DscpAndEcn::default()
656    }
657
658    fn mtu(&self) -> Mtu {
659        Mtu::no_limit()
660    }
661}
662
663impl<I: Ip> RouteResolutionOptions<I> for DefaultIpSocketOptions {
664    fn transparent(&self) -> bool {
665        false
666    }
667
668    fn marks(&self) -> &Marks {
669        &Marks::UNMARKED
670    }
671}
672
673/// A trait providing send options delegation to an inner type.
674///
675/// A blanket impl of [`SendOptions`] is provided to all implementers. This
676/// trait has the same shape as `SendOptions` but all the methods provide
677/// default implementations that delegate to the value returned by
678/// `DelegatedSendOptions::Delegate`. For brevity, the default `delegate` is
679/// [`DefaultIpSocketOptions`].
680#[allow(missing_docs)]
681pub trait DelegatedSendOptions<I: IpExt>: OptionDelegationMarker {
682    /// Returns the delegate providing the impl for all default methods.
683    fn delegate(&self) -> &impl SendOptions<I> {
684        &DefaultIpSocketOptions
685    }
686
687    fn hop_limit(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
688        self.delegate().hop_limit(destination)
689    }
690
691    fn multicast_loop(&self) -> bool {
692        self.delegate().multicast_loop()
693    }
694
695    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
696        self.delegate().allow_broadcast()
697    }
698
699    fn dscp_and_ecn(&self) -> DscpAndEcn {
700        self.delegate().dscp_and_ecn()
701    }
702
703    fn mtu(&self) -> Mtu {
704        self.delegate().mtu()
705    }
706}
707
708impl<O: DelegatedSendOptions<I> + OptionDelegationMarker, I: IpExt> SendOptions<I> for O {
709    fn hop_limit(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
710        self.hop_limit(destination)
711    }
712
713    fn multicast_loop(&self) -> bool {
714        self.multicast_loop()
715    }
716
717    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
718        self.allow_broadcast()
719    }
720
721    fn dscp_and_ecn(&self) -> DscpAndEcn {
722        self.dscp_and_ecn()
723    }
724
725    fn mtu(&self) -> Mtu {
726        self.mtu()
727    }
728}
729
730/// A trait providing route resolution options delegation to an inner type.
731///
732/// A blanket impl of [`RouteResolutionOptions`] is provided to all
733/// implementers. This trait has the same shape as `RouteResolutionOptions` but
734/// all the methods provide default implementations that delegate to the value
735/// returned by `DelegatedRouteResolutionOptions::Delegate`. For brevity, the
736/// default `delegate` is [`DefaultIpSocketOptions`].
737#[allow(missing_docs)]
738pub trait DelegatedRouteResolutionOptions<I: Ip>: OptionDelegationMarker {
739    /// Returns the delegate providing the impl for all default methods.
740    fn delegate(&self) -> &impl RouteResolutionOptions<I> {
741        &DefaultIpSocketOptions
742    }
743
744    fn transparent(&self) -> bool {
745        self.delegate().transparent()
746    }
747
748    fn marks(&self) -> &Marks {
749        self.delegate().marks()
750    }
751}
752
753impl<O: DelegatedRouteResolutionOptions<I> + OptionDelegationMarker, I: IpExt>
754    RouteResolutionOptions<I> for O
755{
756    fn transparent(&self) -> bool {
757        self.transparent()
758    }
759
760    fn marks(&self) -> &Marks {
761        self.marks()
762    }
763}
764
765/// A marker trait to allow option delegation traits.
766///
767/// This trait sidesteps trait resolution rules around the delegation traits
768/// because of the `Ip` parameter in them.
769pub trait OptionDelegationMarker {}
770
771/// The configurable hop limits for a socket.
772#[derive(Copy, Clone, Debug, Default, Eq, PartialEq)]
773pub struct SocketHopLimits<I: Ip> {
774    /// Unicast hop limit.
775    pub unicast: Option<NonZeroU8>,
776    /// Multicast hop limit.
777    // TODO(https://fxbug.dev/42059735): Make this an Option<u8> to allow sending
778    // multicast packets destined only for the local machine.
779    pub multicast: Option<NonZeroU8>,
780    /// An unused marker type signifying the IP version for which these hop
781    /// limits are valid. Including this helps prevent using the wrong hop limits
782    /// when operating on dualstack sockets.
783    pub version: IpVersionMarker<I>,
784}
785
786impl<I: Ip> SocketHopLimits<I> {
787    /// Returns a function that updates the unicast hop limit.
788    pub fn set_unicast(value: Option<NonZeroU8>) -> impl FnOnce(&mut Self) {
789        move |limits| limits.unicast = value
790    }
791
792    /// Returns a function that updates the multicast hop limit.
793    pub fn set_multicast(value: Option<NonZeroU8>) -> impl FnOnce(&mut Self) {
794        move |limits| limits.multicast = value
795    }
796
797    /// Returns the hop limits, or the provided defaults if unset.
798    pub fn get_limits_with_defaults(&self, defaults: &HopLimits) -> HopLimits {
799        let Self { unicast, multicast, version: _ } = self;
800        HopLimits {
801            unicast: unicast.unwrap_or(defaults.unicast),
802            multicast: multicast.unwrap_or(defaults.multicast),
803        }
804    }
805
806    /// Returns the appropriate hop limit to use for the given destination addr.
807    pub fn hop_limit_for_dst(&self, destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
808        let Self { unicast, multicast, version: _ } = self;
809        if destination.is_multicast() { *multicast } else { *unicast }
810    }
811}
812
813fn new_ip_socket<I, D>(
814    requested_device: Option<&D>,
815    route: ResolvedRoute<I, D>,
816    remote_ip: SocketIpAddr<I::Addr>,
817    proto: I::Proto,
818) -> IpSock<I, D::Weak>
819where
820    I: IpExt,
821    D: StrongDeviceIdentifier,
822{
823    // TODO(https://fxbug.dev/323389672): Cache a reference to the route to
824    // avoid the route lookup on send as long as the routing table hasn't
825    // changed in between these operations.
826    let ResolvedRoute {
827        src_addr,
828        device: route_device,
829        local_delivery_device,
830        next_hop: _,
831        internal_forwarding: _,
832    } = route;
833
834    // If the source or destination address require a device, make sure to
835    // set that in the socket definition. Otherwise defer to what was provided.
836    let socket_device = (src_addr.as_ref().must_have_zone() || remote_ip.as_ref().must_have_zone())
837        .then(|| {
838            // NB: The route device might be loopback, and in such cases
839            // we want to bind the socket to the device the source IP is
840            // assigned to instead.
841            local_delivery_device.unwrap_or(route_device)
842        })
843        .as_ref()
844        .or(requested_device)
845        .map(|d| d.downgrade());
846
847    let definition =
848        IpSockDefinition { local_ip: src_addr, remote_ip, device: socket_device, proto };
849    IpSock { definition }
850}
851
852fn send_ip_packet<I, S, BC, CC, O>(
853    core_ctx: &mut CC,
854    bindings_ctx: &mut BC,
855    socket: &IpSock<I, CC::WeakDeviceId>,
856    mut body: S,
857    options: &O,
858    tx_metadata: BC::TxMetadata,
859) -> Result<(), IpSockSendError>
860where
861    I: IpLayerIpExt,
862    S: TransportPacketSerializer<I>,
863    S::Buffer: BufferMut,
864    BC: IpSocketBindingsContext<CC::DeviceId>,
865    CC: IpSocketContext<I, BC> + CounterContext<IpCounters<I>>,
866    CC::DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>,
867    O: SendOptions<I> + RouteResolutionOptions<I>,
868{
869    trace_duration!("ip::send_packet");
870
871    // Extracted to a function without the serializer parameter to ease code
872    // generation.
873    fn resolve<
874        I: IpLayerIpExt,
875        CC: IpSocketContext<I, BC>,
876        BC: IpSocketBindingsContext<CC::DeviceId>,
877    >(
878        core_ctx: &mut CC,
879        bindings_ctx: &mut BC,
880        device: &Option<CC::WeakDeviceId>,
881        local_ip: IpDeviceAddr<I::Addr>,
882        remote_ip: RoutableIpAddr<I::Addr>,
883        transparent: bool,
884        marks: &Marks,
885    ) -> Result<ResolvedRoute<I, CC::DeviceId>, IpSockSendError> {
886        let device = match device.as_ref().map(|d| d.upgrade()) {
887            Some(Some(device)) => Some(device),
888            Some(None) => return Err(ResolveRouteError::Unreachable.into()),
889            None => None,
890        };
891        let route = core_ctx
892            .lookup_route(
893                bindings_ctx,
894                device.as_ref(),
895                Some(local_ip),
896                remote_ip,
897                transparent,
898                marks,
899            )
900            .map_err(|e| IpSockSendError::Unroutable(e))?;
901        assert_eq!(local_ip, route.src_addr);
902        Ok(route)
903    }
904
905    let IpSock {
906        definition: IpSockDefinition { remote_ip, local_ip, device: socket_device, proto },
907    } = socket;
908    let ResolvedRoute {
909        src_addr: local_ip,
910        device: mut egress_device,
911        mut next_hop,
912        mut local_delivery_device,
913        mut internal_forwarding,
914    } = resolve(
915        core_ctx,
916        bindings_ctx,
917        socket_device,
918        *local_ip,
919        *remote_ip,
920        options.transparent(),
921        options.marks(),
922    )?;
923
924    if matches!(next_hop, NextHop::Broadcast(_)) && options.allow_broadcast().is_none() {
925        return Err(IpSockSendError::BroadcastNotAllowed);
926    }
927
928    let previous_dst = remote_ip.addr();
929    let mut packet = filter::TxPacket::new(local_ip.addr(), remote_ip.addr(), *proto, &mut body);
930    let mut packet_metadata =
931        IpLayerPacketMetadata::from_tx_metadata_and_marks(tx_metadata, *options.marks());
932
933    let filter_result = core_ctx.filter_handler().local_egress_hook(
934        bindings_ctx,
935        &mut packet,
936        &egress_device,
937        &mut packet_metadata,
938    );
939    match filter_result {
940        filter::Verdict::Stop(filter::DropOrReject::Drop) => {
941            packet_metadata.acknowledge_drop();
942            return Ok(());
943        }
944        filter::Verdict::Stop(filter::DropOrReject::Reject(reject_type)) => {
945            packet_metadata.acknowledge_drop();
946
947            let Some(icmp_error): Option<I::IcmpError> = I::map_ip_out(
948                reject_type,
949                |reject_type| reject_type_to_icmpv4_error(reject_type),
950                |reject_type| reject_type_to_icmpv6_error(reject_type),
951            ) else {
952                debug!("Unsupported reject type: {:?}", reject_type);
953                return Ok(());
954            };
955
956            let src_ip = SocketIpAddr::new_from_witness(local_ip.into_inner().get());
957            let dst_ip = *remote_ip;
958            let ttl = options.hop_limit(&dst_ip.into()).map(|v| v.into()).unwrap_or(1);
959            let packet_builder = I::PacketBuilder::new(
960                src_ip.into_inner().get(),
961                dst_ip.into_inner().get(),
962                ttl,
963                *proto,
964            );
965            let header_len = packet_builder.constraints().header_len();
966            let ip_frame = packet_builder.wrap_body(body);
967            let packet = match ip_frame
968                .serialize_outer(packet::NoReuseBufferProvider(packet::new_buf_vec))
969            {
970                Ok(packet) => packet,
971                Err((error, _frame)) => {
972                    debug!("Failed to serialize packet {:?}", error);
973                    return Ok(());
974                }
975            };
976
977            // Invoke `send_icmp_error_message` with the `local_ip` as the
978            // `original_source_ip`, which will result in the ICMP error
979            // message getting sent back to the `socket`.
980            core_ctx.send_icmp_error_message(
981                bindings_ctx,
982                /*device=*/ None,
983                /*frame_dst=*/ None,
984                src_ip,
985                dst_ip,
986                packet,
987                icmp_error,
988                header_len,
989                *proto,
990                &options.marks(),
991            );
992
993            return Ok(());
994        }
995        filter::Verdict::Proceed(filter::Accept) => {}
996    }
997
998    let Some(mut local_ip) = IpDeviceAddr::new(packet.src_addr()) else {
999        packet_metadata.acknowledge_drop();
1000        return Err(IpSockSendError::Unroutable(ResolveRouteError::NoSrcAddr));
1001    };
1002    let Some(remote_ip) = RoutableIpAddr::new(packet.dst_addr()) else {
1003        packet_metadata.acknowledge_drop();
1004        return Err(IpSockSendError::Unroutable(ResolveRouteError::Unreachable));
1005    };
1006
1007    // If the LOCAL_EGRESS hook ended up rewriting the packet's destination, perform
1008    // re-routing based on the new destination.
1009    if remote_ip.addr() != previous_dst {
1010        let ResolvedRoute {
1011            src_addr: new_local_ip,
1012            device: new_device,
1013            next_hop: new_next_hop,
1014            local_delivery_device: new_local_delivery_device,
1015            internal_forwarding: new_internal_forwarding,
1016        } = match resolve(
1017            core_ctx,
1018            bindings_ctx,
1019            socket_device,
1020            local_ip,
1021            remote_ip,
1022            options.transparent(),
1023            options.marks(),
1024        ) {
1025            Ok(r) => r,
1026            Err(err) => {
1027                packet_metadata.acknowledge_drop();
1028                return Err(err);
1029            }
1030        };
1031        local_ip = new_local_ip;
1032        egress_device = new_device;
1033        next_hop = new_next_hop;
1034        local_delivery_device = new_local_delivery_device;
1035        internal_forwarding = new_internal_forwarding;
1036    }
1037
1038    // NB: Hit the forwarding hook if the route leverages internal forwarding.
1039    match internal_forwarding {
1040        InternalForwarding::Used(ingress_device) => {
1041            match core_ctx.filter_handler().forwarding_hook(
1042                &mut packet,
1043                &ingress_device,
1044                &egress_device,
1045                &mut packet_metadata,
1046            ) {
1047                filter::Verdict::Stop(filter::DropOrReject::Drop) => {
1048                    packet_metadata.acknowledge_drop();
1049                    return Ok(());
1050                }
1051                filter::Verdict::Stop(filter::DropOrReject::Reject(_reject_type)) => {
1052                    // TODO(https://fxbug.dev/466098884): Send reject packet.
1053                    packet_metadata.acknowledge_drop();
1054                    return Ok(());
1055                }
1056                filter::Verdict::Proceed(filter::Accept) => {}
1057            }
1058        }
1059        InternalForwarding::NotUsed => {}
1060    }
1061
1062    if let Some(socket_cookie) = packet_metadata.tx_metadata().socket_cookie() {
1063        let egress_filter_result = bindings_ctx.socket_ops_filter().on_egress(
1064            &packet,
1065            &egress_device,
1066            socket_cookie,
1067            packet_metadata.marks(),
1068        );
1069
1070        // TODO(https://fxbug.dev/412426836): Implement congestion signal handling.
1071        match egress_filter_result {
1072            SocketEgressFilterResult::Pass { congestion: _ } => (),
1073            SocketEgressFilterResult::Drop { congestion: _ } => {
1074                core_ctx.counters().socket_egress_filter_dropped.increment();
1075                packet_metadata.acknowledge_drop();
1076                return Ok(());
1077            }
1078        }
1079    }
1080
1081    // The packet needs to be delivered locally if it's sent to a broadcast
1082    // or multicast address. For multicast packets this feature can be disabled
1083    // with IP_MULTICAST_LOOP.
1084
1085    let loopback_packet = (!egress_device.is_loopback()
1086        && ((options.multicast_loop() && remote_ip.addr().is_multicast())
1087            || next_hop.is_broadcast()))
1088    .then(|| body.serialize_new_buf(PacketConstraints::UNCONSTRAINED, packet::new_buf_vec))
1089    .transpose()?
1090    .map(|buf| RawIpBody::new(*proto, local_ip.addr(), remote_ip.addr(), buf));
1091
1092    let destination = match &local_delivery_device {
1093        Some(d) => IpPacketDestination::Loopback(d),
1094        None => IpPacketDestination::from_next_hop(next_hop, remote_ip.into()),
1095    };
1096    let ttl = options.hop_limit(&remote_ip.into());
1097    let meta = SendIpPacketMeta {
1098        device: &egress_device,
1099        src_ip: local_ip.into(),
1100        dst_ip: remote_ip.into(),
1101        destination,
1102        ttl,
1103        proto: *proto,
1104        mtu: options.mtu(),
1105        dscp_and_ecn: options.dscp_and_ecn(),
1106    };
1107    IpSocketContext::send_ip_packet(core_ctx, bindings_ctx, meta, body, packet_metadata).or_else(
1108        |IpSendFrameError { serializer: _, error }| IpSockSendError::from_ip_send_frame(error),
1109    )?;
1110
1111    match (loopback_packet, core_ctx.get_loopback_device()) {
1112        (Some(loopback_packet), Some(loopback_device)) => {
1113            let meta = SendIpPacketMeta {
1114                device: &loopback_device,
1115                src_ip: local_ip.into(),
1116                dst_ip: remote_ip.into(),
1117                destination: IpPacketDestination::Loopback(&egress_device),
1118                ttl,
1119                proto: *proto,
1120                mtu: options.mtu(),
1121                dscp_and_ecn: options.dscp_and_ecn(),
1122            };
1123            let packet_metadata = IpLayerPacketMetadata::default();
1124
1125            // The loopback packet will hit the egress hook. LOCAL_EGRESS hook
1126            // is not called again.
1127            IpSocketContext::send_ip_packet(
1128                core_ctx,
1129                bindings_ctx,
1130                meta,
1131                loopback_packet,
1132                packet_metadata,
1133            )
1134            .unwrap_or_else(|IpSendFrameError { serializer: _, error }| {
1135                error!("failed to send loopback packet: {error:?}")
1136            });
1137        }
1138        (Some(_loopback_packet), None) => {
1139            error!("can't send a loopback packet without the loopback device")
1140        }
1141        _ => (),
1142    }
1143
1144    Ok(())
1145}
1146
1147/// Enables a blanket implementation of [`DeviceIpSocketHandler`].
1148///
1149/// Implementing this marker trait for a type enables a blanket implementation
1150/// of `DeviceIpSocketHandler` given the other requirements are met.
1151pub trait UseDeviceIpSocketHandlerBlanket {}
1152
1153impl<I, BC, CC> DeviceIpSocketHandler<I, BC> for CC
1154where
1155    I: IpLayerIpExt + IpDeviceStateIpExt,
1156    BC: IpSocketBindingsContext<CC::DeviceId>,
1157    CC: IpDeviceMtuContext<I> + IpSocketContext<I, BC> + UseDeviceIpSocketHandlerBlanket,
1158{
1159    fn get_mms<O: RouteResolutionOptions<I>>(
1160        &mut self,
1161        bindings_ctx: &mut BC,
1162        ip_sock: &IpSock<I, Self::WeakDeviceId>,
1163        options: &O,
1164    ) -> Result<Mms, MmsError> {
1165        let IpSockDefinition { remote_ip, local_ip, device, proto: _ } = &ip_sock.definition;
1166        let device = device
1167            .as_ref()
1168            .map(|d| d.upgrade().ok_or(ResolveRouteError::Unreachable))
1169            .transpose()?;
1170
1171        let ResolvedRoute {
1172            src_addr: _,
1173            local_delivery_device: _,
1174            device,
1175            next_hop: _,
1176            internal_forwarding: _,
1177        } = self
1178            .lookup_route(
1179                bindings_ctx,
1180                device.as_ref(),
1181                Some(*local_ip),
1182                *remote_ip,
1183                options.transparent(),
1184                options.marks(),
1185            )
1186            .map_err(MmsError::NoDevice)?;
1187        let mtu = self.get_mtu(&device);
1188        // TODO(https://fxbug.dev/42072935): Calculate the options size when they
1189        // are supported.
1190        Mms::from_mtu::<I>(mtu, 0 /* no ip options used */).ok_or(MmsError::MTUTooSmall(mtu))
1191    }
1192}
1193
1194/// IPv6 source address selection as defined in [RFC 6724 Section 5].
1195pub(crate) mod ipv6_source_address_selection {
1196    use net_types::ip::{AddrSubnet, IpAddress as _};
1197
1198    use super::*;
1199
1200    use netstack3_base::Ipv6DeviceAddr;
1201
1202    /// A source address selection candidate.
1203    pub struct SasCandidate<D> {
1204        /// The candidate address and subnet.
1205        pub addr_sub: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1206        /// True if the address is assigned (i.e. non tentative).
1207        pub assigned: bool,
1208        /// True if the address is deprecated (i.e. not preferred).
1209        pub deprecated: bool,
1210        /// True if the address is temporary (i.e. not permanent).
1211        pub temporary: bool,
1212        /// The device this address belongs to.
1213        pub device: D,
1214    }
1215
1216    /// Selects the source address for an IPv6 socket using the algorithm
1217    /// defined in [RFC 6724 Section 5].
1218    ///
1219    /// This algorithm is only applicable when the user has not explicitly
1220    /// specified a source address.
1221    ///
1222    /// `remote_ip` is the remote IP address of the socket, `outbound_device` is
1223    /// the device over which outbound traffic to `remote_ip` is sent (according
1224    /// to the forwarding table), and `addresses` is an iterator of all
1225    /// addresses on all devices. The algorithm works by iterating over
1226    /// `addresses` and selecting the address which is most preferred according
1227    /// to a set of selection criteria.
1228    pub fn select_ipv6_source_address<
1229        'a,
1230        D: PartialEq,
1231        A,
1232        I: Iterator<Item = A>,
1233        F: FnMut(&A) -> SasCandidate<D>,
1234    >(
1235        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1236        outbound_device: &D,
1237        addresses: I,
1238        mut get_candidate: F,
1239    ) -> Option<A> {
1240        // Source address selection as defined in RFC 6724 Section 5.
1241        //
1242        // The algorithm operates by defining a partial ordering on available
1243        // source addresses, and choosing one of the best address as defined by
1244        // that ordering (given multiple best addresses, the choice from among
1245        // those is implementation-defined). The partial order is defined in
1246        // terms of a sequence of rules. If a given rule defines an order
1247        // between two addresses, then that is their order. Otherwise, the next
1248        // rule must be consulted, and so on until all of the rules are
1249        // exhausted.
1250
1251        addresses
1252            .map(|item| {
1253                let candidate = get_candidate(&item);
1254                (item, candidate)
1255            })
1256            // Tentative addresses are not considered available to the source
1257            // selection algorithm.
1258            .filter(|(_, candidate)| candidate.assigned)
1259            .max_by(|(_, a), (_, b)| {
1260                select_ipv6_source_address_cmp(remote_ip, outbound_device, a, b)
1261            })
1262            .map(|(item, _candidate)| item)
1263    }
1264
1265    /// Comparison operator used by `select_ipv6_source_address`.
1266    fn select_ipv6_source_address_cmp<D: PartialEq>(
1267        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1268        outbound_device: &D,
1269        a: &SasCandidate<D>,
1270        b: &SasCandidate<D>,
1271    ) -> Ordering {
1272        // TODO(https://fxbug.dev/42123500): Implement rules 4, 5.5, and 6.
1273        let SasCandidate {
1274            addr_sub: a_addr_sub,
1275            assigned: a_assigned,
1276            deprecated: a_deprecated,
1277            temporary: a_temporary,
1278            device: a_device,
1279        } = a;
1280        let SasCandidate {
1281            addr_sub: b_addr_sub,
1282            assigned: b_assigned,
1283            deprecated: b_deprecated,
1284            temporary: b_temporary,
1285            device: b_device,
1286        } = b;
1287
1288        let a_addr = a_addr_sub.addr().into_specified();
1289        let b_addr = b_addr_sub.addr().into_specified();
1290
1291        // Assertions required in order for this implementation to be valid.
1292
1293        // Required by the implementation of Rule 1.
1294        if let Some(remote_ip) = remote_ip {
1295            debug_assert!(!(a_addr == remote_ip && b_addr == remote_ip));
1296        }
1297
1298        // Addresses that are not considered assigned are not valid source
1299        // addresses.
1300        debug_assert!(a_assigned);
1301        debug_assert!(b_assigned);
1302
1303        rule_1(remote_ip, a_addr, b_addr)
1304            .then_with(|| rule_2(remote_ip, a_addr, b_addr))
1305            .then_with(|| rule_3(*a_deprecated, *b_deprecated))
1306            .then_with(|| rule_5(outbound_device, a_device, b_device))
1307            .then_with(|| rule_7(*a_temporary, *b_temporary))
1308            .then_with(|| rule_8(remote_ip, *a_addr_sub, *b_addr_sub))
1309    }
1310
1311    // Assumes that `a` and `b` are not both equal to `remote_ip`.
1312    fn rule_1(
1313        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1314        a: SpecifiedAddr<Ipv6Addr>,
1315        b: SpecifiedAddr<Ipv6Addr>,
1316    ) -> Ordering {
1317        let remote_ip = match remote_ip {
1318            Some(remote_ip) => remote_ip,
1319            None => return Ordering::Equal,
1320        };
1321        if (a == remote_ip) != (b == remote_ip) {
1322            // Rule 1: Prefer same address.
1323            //
1324            // Note that both `a` and `b` cannot be equal to `remote_ip` since
1325            // that would imply that we had added the same address twice to the
1326            // same device.
1327            //
1328            // If `(a == remote_ip) != (b == remote_ip)`, then exactly one of
1329            // them is equal. If this inequality does not hold, then they must
1330            // both be unequal to `remote_ip`. In the first case, we have a tie,
1331            // and in the second case, the rule doesn't apply. In either case,
1332            // we move onto the next rule.
1333            if a == remote_ip { Ordering::Greater } else { Ordering::Less }
1334        } else {
1335            Ordering::Equal
1336        }
1337    }
1338
1339    fn rule_2(
1340        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1341        a: SpecifiedAddr<Ipv6Addr>,
1342        b: SpecifiedAddr<Ipv6Addr>,
1343    ) -> Ordering {
1344        // Scope ordering is defined by the Multicast Scope ID, see
1345        // https://datatracker.ietf.org/doc/html/rfc6724#section-3.1 .
1346        let remote_scope = match remote_ip {
1347            Some(remote_ip) => remote_ip.scope().multicast_scope_id(),
1348            None => return Ordering::Equal,
1349        };
1350        let a_scope = a.scope().multicast_scope_id();
1351        let b_scope = b.scope().multicast_scope_id();
1352        if a_scope < b_scope {
1353            if a_scope < remote_scope { Ordering::Less } else { Ordering::Greater }
1354        } else if a_scope > b_scope {
1355            if b_scope < remote_scope { Ordering::Greater } else { Ordering::Less }
1356        } else {
1357            Ordering::Equal
1358        }
1359    }
1360
1361    fn rule_3(a_deprecated: bool, b_deprecated: bool) -> Ordering {
1362        match (a_deprecated, b_deprecated) {
1363            (true, false) => Ordering::Less,
1364            (true, true) | (false, false) => Ordering::Equal,
1365            (false, true) => Ordering::Greater,
1366        }
1367    }
1368
1369    fn rule_5<D: PartialEq>(outbound_device: &D, a_device: &D, b_device: &D) -> Ordering {
1370        if (a_device == outbound_device) != (b_device == outbound_device) {
1371            // Rule 5: Prefer outgoing interface.
1372            if a_device == outbound_device { Ordering::Greater } else { Ordering::Less }
1373        } else {
1374            Ordering::Equal
1375        }
1376    }
1377
1378    // Prefer temporary addresses following rule 7.
1379    fn rule_7(a_temporary: bool, b_temporary: bool) -> Ordering {
1380        match (a_temporary, b_temporary) {
1381            (true, false) => Ordering::Greater,
1382            (true, true) | (false, false) => Ordering::Equal,
1383            (false, true) => Ordering::Less,
1384        }
1385    }
1386
1387    fn rule_8(
1388        remote_ip: Option<SpecifiedAddr<Ipv6Addr>>,
1389        a: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1390        b: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1391    ) -> Ordering {
1392        let remote_ip = match remote_ip {
1393            Some(remote_ip) => remote_ip,
1394            None => return Ordering::Equal,
1395        };
1396        // Per RFC 6724 Section 2.2:
1397        //
1398        //   We define the common prefix length CommonPrefixLen(S, D) of a
1399        //   source address S and a destination address D as the length of the
1400        //   longest prefix (looking at the most significant, or leftmost, bits)
1401        //   that the two addresses have in common, up to the length of S's
1402        //   prefix (i.e., the portion of the address not including the
1403        //   interface ID).  For example, CommonPrefixLen(fe80::1, fe80::2) is
1404        //   64.
1405        fn common_prefix_len(
1406            src: AddrSubnet<Ipv6Addr, Ipv6DeviceAddr>,
1407            dst: SpecifiedAddr<Ipv6Addr>,
1408        ) -> u8 {
1409            core::cmp::min(src.addr().common_prefix_len(&dst), src.subnet().prefix())
1410        }
1411
1412        // Rule 8: Use longest matching prefix.
1413        //
1414        // Note that, per RFC 6724 Section 5:
1415        //
1416        //   Rule 8 MAY be superseded if the implementation has other means of
1417        //   choosing among source addresses.  For example, if the
1418        //   implementation somehow knows which source address will result in
1419        //   the "best" communications performance.
1420        //
1421        // We don't currently make use of this option, but it's an option for
1422        // the future.
1423        common_prefix_len(a, remote_ip).cmp(&common_prefix_len(b, remote_ip))
1424    }
1425
1426    #[cfg(test)]
1427    mod tests {
1428        use net_declare::net_ip_v6;
1429
1430        use super::*;
1431
1432        #[test]
1433        fn test_select_ipv6_source_address() {
1434            // Test the comparison operator used by `select_ipv6_source_address`
1435            // by separately testing each comparison condition.
1436
1437            let remote = SpecifiedAddr::new(net_ip_v6!("2001:0db8:1::")).unwrap();
1438            let local0 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:2::")).unwrap();
1439            let local1 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:3::")).unwrap();
1440            let link_local_remote = SpecifiedAddr::new(net_ip_v6!("fe80::1:2:42")).unwrap();
1441            let link_local = SpecifiedAddr::new(net_ip_v6!("fe80::1:2:4")).unwrap();
1442            let dev0 = &0;
1443            let dev1 = &1;
1444            let dev2 = &2;
1445
1446            // Rule 1: Prefer same address
1447            assert_eq!(rule_1(Some(remote), remote, local0), Ordering::Greater);
1448            assert_eq!(rule_1(Some(remote), local0, remote), Ordering::Less);
1449            assert_eq!(rule_1(Some(remote), local0, local1), Ordering::Equal);
1450            assert_eq!(rule_1(None, local0, local1), Ordering::Equal);
1451
1452            // Rule 2: Prefer appropriate scope
1453            assert_eq!(rule_2(Some(remote), local0, local1), Ordering::Equal);
1454            assert_eq!(rule_2(Some(remote), local1, local0), Ordering::Equal);
1455            assert_eq!(rule_2(Some(remote), local0, link_local), Ordering::Greater);
1456            assert_eq!(rule_2(Some(remote), link_local, local0), Ordering::Less);
1457            assert_eq!(rule_2(Some(link_local_remote), local0, link_local), Ordering::Less);
1458            assert_eq!(rule_2(Some(link_local_remote), link_local, local0), Ordering::Greater);
1459            assert_eq!(rule_1(None, local0, link_local), Ordering::Equal);
1460
1461            // Rule 3: Avoid deprecated states
1462            assert_eq!(rule_3(false, true), Ordering::Greater);
1463            assert_eq!(rule_3(true, false), Ordering::Less);
1464            assert_eq!(rule_3(true, true), Ordering::Equal);
1465            assert_eq!(rule_3(false, false), Ordering::Equal);
1466
1467            // Rule 5: Prefer outgoing interface
1468            assert_eq!(rule_5(dev0, dev0, dev2), Ordering::Greater);
1469            assert_eq!(rule_5(dev0, dev2, dev0), Ordering::Less);
1470            assert_eq!(rule_5(dev0, dev0, dev0), Ordering::Equal);
1471            assert_eq!(rule_5(dev0, dev2, dev2), Ordering::Equal);
1472
1473            // Rule 7: Prefer temporary address.
1474            assert_eq!(rule_7(true, false), Ordering::Greater);
1475            assert_eq!(rule_7(false, true), Ordering::Less);
1476            assert_eq!(rule_7(true, true), Ordering::Equal);
1477            assert_eq!(rule_7(false, false), Ordering::Equal);
1478
1479            // Rule 8: Use longest matching prefix.
1480            {
1481                let new_addr_entry = |addr, prefix_len| AddrSubnet::new(addr, prefix_len).unwrap();
1482
1483                // First, test that the longest prefix match is preferred when
1484                // using addresses whose common prefix length is shorter than
1485                // the subnet prefix length.
1486
1487                // 4 leading 0x01 bytes.
1488                let remote = SpecifiedAddr::new(net_ip_v6!("1111::")).unwrap();
1489                // 3 leading 0x01 bytes.
1490                let local0 = new_addr_entry(net_ip_v6!("1110::"), 64);
1491                // 2 leading 0x01 bytes.
1492                let local1 = new_addr_entry(net_ip_v6!("1100::"), 64);
1493
1494                assert_eq!(rule_8(Some(remote), local0, local1), Ordering::Greater);
1495                assert_eq!(rule_8(Some(remote), local1, local0), Ordering::Less);
1496                assert_eq!(rule_8(Some(remote), local0, local0), Ordering::Equal);
1497                assert_eq!(rule_8(Some(remote), local1, local1), Ordering::Equal);
1498                assert_eq!(rule_8(None, local0, local1), Ordering::Equal);
1499
1500                // Second, test that the common prefix length is capped at the
1501                // subnet prefix length.
1502
1503                // 3 leading 0x01 bytes, but a subnet prefix length of 8 (1 byte).
1504                let local0 = new_addr_entry(net_ip_v6!("1110::"), 8);
1505                // 2 leading 0x01 bytes, but a subnet prefix length of 8 (1 byte).
1506                let local1 = new_addr_entry(net_ip_v6!("1100::"), 8);
1507
1508                assert_eq!(rule_8(Some(remote), local0, local1), Ordering::Equal);
1509                assert_eq!(rule_8(Some(remote), local1, local0), Ordering::Equal);
1510                assert_eq!(rule_8(Some(remote), local0, local0), Ordering::Equal);
1511                assert_eq!(rule_8(Some(remote), local1, local1), Ordering::Equal);
1512                assert_eq!(rule_8(None, local0, local1), Ordering::Equal);
1513            }
1514
1515            {
1516                let new_addr_entry = |addr, device| SasCandidate {
1517                    addr_sub: AddrSubnet::new(addr, 128).unwrap(),
1518                    deprecated: false,
1519                    assigned: true,
1520                    temporary: false,
1521                    device,
1522                };
1523
1524                // If no rules apply, then the two address entries are equal.
1525                assert_eq!(
1526                    select_ipv6_source_address_cmp(
1527                        Some(remote),
1528                        dev0,
1529                        &new_addr_entry(*local0, *dev1),
1530                        &new_addr_entry(*local1, *dev2),
1531                    ),
1532                    Ordering::Equal
1533                );
1534            }
1535        }
1536
1537        #[test]
1538        fn test_select_ipv6_source_address_no_remote() {
1539            // Verify that source address selection correctly applies all
1540            // applicable rules when the remote is `None`.
1541            let dev0 = &0;
1542            let dev1 = &1;
1543            let dev2 = &2;
1544
1545            let local0 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:2::")).unwrap();
1546            let local1 = SpecifiedAddr::new(net_ip_v6!("2001:0db8:3::")).unwrap();
1547
1548            let new_addr_entry = |addr, deprecated, device| SasCandidate {
1549                addr_sub: AddrSubnet::new(addr, 128).unwrap(),
1550                deprecated,
1551                assigned: true,
1552                temporary: false,
1553                device,
1554            };
1555
1556            // Verify that Rule 3 still applies (avoid deprecated states).
1557            assert_eq!(
1558                select_ipv6_source_address_cmp(
1559                    None,
1560                    dev0,
1561                    &new_addr_entry(*local0, false, *dev1),
1562                    &new_addr_entry(*local1, true, *dev2),
1563                ),
1564                Ordering::Greater
1565            );
1566
1567            // Verify that Rule 5 still applies (Prefer outgoing interface).
1568            assert_eq!(
1569                select_ipv6_source_address_cmp(
1570                    None,
1571                    dev0,
1572                    &new_addr_entry(*local0, false, *dev0),
1573                    &new_addr_entry(*local1, false, *dev1),
1574                ),
1575                Ordering::Greater
1576            );
1577        }
1578    }
1579}
1580
1581/// Test fake implementations of the traits defined in the `socket` module.
1582#[cfg(any(test, feature = "testutils"))]
1583pub(crate) mod testutil {
1584    use alloc::boxed::Box;
1585    use alloc::vec::Vec;
1586    use core::num::NonZeroUsize;
1587
1588    use crate::internal::types::RoutePreference;
1589    use derivative::Derivative;
1590    use net_types::ip::{GenericOverIp, IpAddr, IpAddress, Ipv4, Ipv4Addr, Ipv6, Subnet};
1591    use net_types::{MulticastAddr, Witness as _};
1592    use netstack3_base::testutil::{FakeCoreCtx, FakeStrongDeviceId, FakeWeakDeviceId};
1593    use netstack3_base::{SendFrameContext, SendFrameError};
1594    use netstack3_filter::Tuple;
1595    use netstack3_hashmap::HashMap;
1596
1597    use super::*;
1598    use crate::internal::base::{
1599        BaseTransportIpContext, DEFAULT_HOP_LIMITS, HopLimits, MulticastMembershipHandler,
1600    };
1601    use crate::internal::routing::testutil::FakeIpRoutingCtx;
1602    use crate::internal::routing::{self, RoutingTable};
1603    use crate::internal::types::{Destination, Entry, Metric, RawMetric};
1604
1605    /// A fake implementation of the traits required by the transport layer from
1606    /// the IP layer.
1607    #[derive(Derivative, GenericOverIp)]
1608    #[generic_over_ip(I, Ip)]
1609    #[derivative(Default(bound = ""))]
1610    pub struct FakeIpSocketCtx<I: Ip, D> {
1611        pub(crate) table: RoutingTable<I, D>,
1612        forwarding: FakeIpRoutingCtx<D>,
1613        devices: HashMap<D, FakeDeviceState<I>>,
1614    }
1615
1616    /// A trait enabling [`FakeIpSockeCtx`]'s implementations for
1617    /// [`FakeCoreCtx`] with types that hold a [`FakeIpSocketCtx`] internally,
1618    pub trait InnerFakeIpSocketCtx<I: Ip, D> {
1619        /// Gets a mutable reference to the inner fake context.
1620        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D>;
1621    }
1622
1623    impl<I: Ip, D> InnerFakeIpSocketCtx<I, D> for FakeIpSocketCtx<I, D> {
1624        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1625            self
1626        }
1627    }
1628
1629    impl<I: IpExt, D: FakeStrongDeviceId, BC> BaseTransportIpContext<I, BC> for FakeIpSocketCtx<I, D> {
1630        fn get_default_hop_limits(&mut self, device: Option<&D>) -> HopLimits {
1631            device.map_or(DEFAULT_HOP_LIMITS, |device| {
1632                let hop_limit = self.get_device_state(device).default_hop_limit;
1633                HopLimits { unicast: hop_limit, multicast: DEFAULT_HOP_LIMITS.multicast }
1634            })
1635        }
1636
1637        type DevicesWithAddrIter<'a> = Box<dyn Iterator<Item = D> + 'a>;
1638
1639        fn with_devices_with_assigned_addr<O, F: FnOnce(Self::DevicesWithAddrIter<'_>) -> O>(
1640            &mut self,
1641            addr: SpecifiedAddr<I::Addr>,
1642            cb: F,
1643        ) -> O {
1644            cb(Box::new(self.devices.iter().filter_map(move |(device, state)| {
1645                state.addresses.contains(&addr).then(|| device.clone())
1646            })))
1647        }
1648
1649        fn get_original_destination(&mut self, _tuple: &Tuple<I>) -> Option<(I::Addr, u16)> {
1650            unimplemented!()
1651        }
1652    }
1653
1654    impl<I: IpExt, D: FakeStrongDeviceId> DeviceIdContext<AnyDevice> for FakeIpSocketCtx<I, D> {
1655        type DeviceId = D;
1656        type WeakDeviceId = D::Weak;
1657    }
1658
1659    impl<I, State, D, Meta, BC> IpSocketHandler<I, BC> for FakeCoreCtx<State, Meta, D>
1660    where
1661        I: IpExt + FilterIpExt,
1662        State: InnerFakeIpSocketCtx<I, D>,
1663        D: FakeStrongDeviceId,
1664        BC: TxMetadataBindingsTypes,
1665        FakeCoreCtx<State, Meta, D>:
1666            SendFrameContext<BC, SendIpPacketMeta<I, Self::DeviceId, SpecifiedAddr<I::Addr>>>,
1667    {
1668        fn new_ip_socket<O>(
1669            &mut self,
1670            _bindings_ctx: &mut BC,
1671            args: IpSocketArgs<'_, Self::DeviceId, I, O>,
1672        ) -> Result<IpSock<I, Self::WeakDeviceId>, IpSockCreationError>
1673        where
1674            O: RouteResolutionOptions<I>,
1675        {
1676            self.state.fake_ip_socket_ctx_mut().new_ip_socket(args)
1677        }
1678
1679        fn send_ip_packet<S, O>(
1680            &mut self,
1681            bindings_ctx: &mut BC,
1682            socket: &IpSock<I, Self::WeakDeviceId>,
1683            body: S,
1684            options: &O,
1685            // NB: Tx metadata plumbing is not supported for fake socket
1686            // contexts. Drop at the end of the scope.
1687            _tx_meta: BC::TxMetadata,
1688        ) -> Result<(), IpSockSendError>
1689        where
1690            S: TransportPacketSerializer<I>,
1691            S::Buffer: BufferMut,
1692            O: SendOptions<I> + RouteResolutionOptions<I>,
1693        {
1694            let meta = self.state.fake_ip_socket_ctx_mut().resolve_send_meta(socket, options)?;
1695            self.send_frame(bindings_ctx, meta, body).or_else(
1696                |SendFrameError { serializer: _, error }| IpSockSendError::from_send_frame(error),
1697            )
1698        }
1699
1700        fn confirm_reachable<O>(
1701            &mut self,
1702            _bindings_ctx: &mut BC,
1703            _socket: &IpSock<I, Self::WeakDeviceId>,
1704            _options: &O,
1705        ) {
1706        }
1707    }
1708
1709    impl<I: IpExt, D: FakeStrongDeviceId, BC> MulticastMembershipHandler<I, BC>
1710        for FakeIpSocketCtx<I, D>
1711    {
1712        fn join_multicast_group(
1713            &mut self,
1714            _bindings_ctx: &mut BC,
1715            device: &Self::DeviceId,
1716            addr: MulticastAddr<<I as Ip>::Addr>,
1717        ) {
1718            let value = self.get_device_state_mut(device).multicast_groups.entry(addr).or_insert(0);
1719            *value = value.checked_add(1).unwrap();
1720        }
1721
1722        fn leave_multicast_group(
1723            &mut self,
1724            _bindings_ctx: &mut BC,
1725            device: &Self::DeviceId,
1726            addr: MulticastAddr<<I as Ip>::Addr>,
1727        ) {
1728            let value = self
1729                .get_device_state_mut(device)
1730                .multicast_groups
1731                .get_mut(&addr)
1732                .unwrap_or_else(|| panic!("no entry for {addr} on {device:?}"));
1733            *value = value.checked_sub(1).unwrap();
1734        }
1735
1736        fn select_device_for_multicast_group(
1737            &mut self,
1738            addr: MulticastAddr<<I as Ip>::Addr>,
1739            _marks: &Marks,
1740        ) -> Result<Self::DeviceId, ResolveRouteError> {
1741            let remote_ip = SocketIpAddr::new_from_multicast(addr);
1742            self.lookup_route(None, None, remote_ip, /* transparent */ false)
1743                .map(|ResolvedRoute { device, .. }| device)
1744        }
1745    }
1746
1747    impl<I, BC, D, State, Meta> BaseTransportIpContext<I, BC> for FakeCoreCtx<State, Meta, D>
1748    where
1749        I: IpExt + FilterIpExt,
1750        D: FakeStrongDeviceId,
1751        State: InnerFakeIpSocketCtx<I, D>,
1752        BC: TxMetadataBindingsTypes,
1753        Self: IpSocketHandler<I, BC, DeviceId = D, WeakDeviceId = FakeWeakDeviceId<D>>,
1754    {
1755        type DevicesWithAddrIter<'a> = Box<dyn Iterator<Item = D> + 'a>;
1756
1757        fn with_devices_with_assigned_addr<O, F: FnOnce(Self::DevicesWithAddrIter<'_>) -> O>(
1758            &mut self,
1759            addr: SpecifiedAddr<I::Addr>,
1760            cb: F,
1761        ) -> O {
1762            BaseTransportIpContext::<I, BC>::with_devices_with_assigned_addr(
1763                self.state.fake_ip_socket_ctx_mut(),
1764                addr,
1765                cb,
1766            )
1767        }
1768
1769        fn get_default_hop_limits(&mut self, device: Option<&Self::DeviceId>) -> HopLimits {
1770            BaseTransportIpContext::<I, BC>::get_default_hop_limits(
1771                self.state.fake_ip_socket_ctx_mut(),
1772                device,
1773            )
1774        }
1775
1776        fn get_original_destination(&mut self, tuple: &Tuple<I>) -> Option<(I::Addr, u16)> {
1777            BaseTransportIpContext::<I, BC>::get_original_destination(
1778                self.state.fake_ip_socket_ctx_mut(),
1779                tuple,
1780            )
1781        }
1782    }
1783
1784    /// A fake context providing [`IpSocketHandler`] for tests.
1785    #[derive(Derivative)]
1786    #[derivative(Default(bound = ""))]
1787    pub struct FakeDualStackIpSocketCtx<D> {
1788        v4: FakeIpSocketCtx<Ipv4, D>,
1789        v6: FakeIpSocketCtx<Ipv6, D>,
1790    }
1791
1792    impl<D: FakeStrongDeviceId> FakeDualStackIpSocketCtx<D> {
1793        /// Creates a new [`FakeDualStackIpSocketCtx`] with `devices`.
1794        pub fn new<A: Into<SpecifiedAddr<IpAddr>>>(
1795            devices: impl IntoIterator<Item = FakeDeviceConfig<D, A>>,
1796        ) -> Self {
1797            let partition =
1798                |v: Vec<A>| -> (Vec<SpecifiedAddr<Ipv4Addr>>, Vec<SpecifiedAddr<Ipv6Addr>>) {
1799                    v.into_iter().fold((Vec::new(), Vec::new()), |(mut v4, mut v6), i| {
1800                        match IpAddr::from(i.into()) {
1801                            IpAddr::V4(a) => v4.push(a),
1802                            IpAddr::V6(a) => v6.push(a),
1803                        }
1804                        (v4, v6)
1805                    })
1806                };
1807
1808            let (v4, v6): (Vec<_>, Vec<_>) = devices
1809                .into_iter()
1810                .map(|FakeDeviceConfig { device, local_ips, remote_ips }| {
1811                    let (local_v4, local_v6) = partition(local_ips);
1812                    let (remote_v4, remote_v6) = partition(remote_ips);
1813                    (
1814                        FakeDeviceConfig {
1815                            device: device.clone(),
1816                            local_ips: local_v4,
1817                            remote_ips: remote_v4,
1818                        },
1819                        FakeDeviceConfig { device, local_ips: local_v6, remote_ips: remote_v6 },
1820                    )
1821                })
1822                .unzip();
1823            Self { v4: FakeIpSocketCtx::new(v4), v6: FakeIpSocketCtx::new(v6) }
1824        }
1825
1826        /// Returns the [`FakeIpSocketCtx`] for IP version `I`.
1827        pub fn inner_mut<I: Ip>(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1828            I::map_ip_out(self, |s| &mut s.v4, |s| &mut s.v6)
1829        }
1830
1831        fn inner<I: Ip>(&self) -> &FakeIpSocketCtx<I, D> {
1832            I::map_ip_out(self, |s| &s.v4, |s| &s.v6)
1833        }
1834
1835        /// Adds a fake direct route to `ip` through `device`.
1836        pub fn add_route(&mut self, device: D, ip: SpecifiedAddr<IpAddr>) {
1837            match IpAddr::from(ip) {
1838                IpAddr::V4(ip) => {
1839                    routing::testutil::add_on_link_routing_entry(&mut self.v4.table, ip, device)
1840                }
1841                IpAddr::V6(ip) => {
1842                    routing::testutil::add_on_link_routing_entry(&mut self.v6.table, ip, device)
1843                }
1844            }
1845        }
1846
1847        /// Adds a fake route to `subnet` through `device`.
1848        pub fn add_subnet_route<A: IpAddress>(&mut self, device: D, subnet: Subnet<A>) {
1849            let entry = Entry {
1850                subnet,
1851                device,
1852                gateway: None,
1853                metric: Metric::ExplicitMetric(RawMetric(0)),
1854                route_preference: RoutePreference::Medium,
1855            };
1856            A::Version::map_ip::<_, ()>(
1857                entry,
1858                |entry_v4| {
1859                    let _ = routing::testutil::add_entry(&mut self.v4.table, entry_v4)
1860                        .expect("Failed to add route");
1861                },
1862                |entry_v6| {
1863                    let _ = routing::testutil::add_entry(&mut self.v6.table, entry_v6)
1864                        .expect("Failed to add route");
1865                },
1866            );
1867        }
1868
1869        /// Returns a mutable reference to fake device state.
1870        pub fn get_device_state_mut<I: IpExt>(&mut self, device: &D) -> &mut FakeDeviceState<I> {
1871            self.inner_mut::<I>().get_device_state_mut(device)
1872        }
1873
1874        /// Returns the fake multicast memberships.
1875        pub fn multicast_memberships<I: IpExt>(
1876            &self,
1877        ) -> HashMap<(D, MulticastAddr<I::Addr>), NonZeroUsize> {
1878            self.inner::<I>().multicast_memberships()
1879        }
1880    }
1881
1882    impl<I: IpExt, S: InnerFakeIpSocketCtx<I, D>, Meta, D: FakeStrongDeviceId, BC>
1883        MulticastMembershipHandler<I, BC> for FakeCoreCtx<S, Meta, D>
1884    {
1885        fn join_multicast_group(
1886            &mut self,
1887            bindings_ctx: &mut BC,
1888            device: &Self::DeviceId,
1889            addr: MulticastAddr<<I as Ip>::Addr>,
1890        ) {
1891            MulticastMembershipHandler::<I, BC>::join_multicast_group(
1892                self.state.fake_ip_socket_ctx_mut(),
1893                bindings_ctx,
1894                device,
1895                addr,
1896            )
1897        }
1898
1899        fn leave_multicast_group(
1900            &mut self,
1901            bindings_ctx: &mut BC,
1902            device: &Self::DeviceId,
1903            addr: MulticastAddr<<I as Ip>::Addr>,
1904        ) {
1905            MulticastMembershipHandler::<I, BC>::leave_multicast_group(
1906                self.state.fake_ip_socket_ctx_mut(),
1907                bindings_ctx,
1908                device,
1909                addr,
1910            )
1911        }
1912
1913        fn select_device_for_multicast_group(
1914            &mut self,
1915            addr: MulticastAddr<<I as Ip>::Addr>,
1916            marks: &Marks,
1917        ) -> Result<Self::DeviceId, ResolveRouteError> {
1918            MulticastMembershipHandler::<I, BC>::select_device_for_multicast_group(
1919                self.state.fake_ip_socket_ctx_mut(),
1920                addr,
1921                marks,
1922            )
1923        }
1924    }
1925
1926    impl<I: Ip, D, State: InnerFakeIpSocketCtx<I, D>, Meta> InnerFakeIpSocketCtx<I, D>
1927        for FakeCoreCtx<State, Meta, D>
1928    {
1929        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1930            self.state.fake_ip_socket_ctx_mut()
1931        }
1932    }
1933
1934    impl<I: Ip, D: FakeStrongDeviceId> InnerFakeIpSocketCtx<I, D> for FakeDualStackIpSocketCtx<D> {
1935        fn fake_ip_socket_ctx_mut(&mut self) -> &mut FakeIpSocketCtx<I, D> {
1936            self.inner_mut::<I>()
1937        }
1938    }
1939
1940    /// A device configuration for fake socket contexts.
1941    #[derive(Clone, GenericOverIp)]
1942    #[generic_over_ip()]
1943    pub struct FakeDeviceConfig<D, A> {
1944        /// The device.
1945        pub device: D,
1946        /// The device's local IPs.
1947        pub local_ips: Vec<A>,
1948        /// The remote IPs reachable from this device.
1949        pub remote_ips: Vec<A>,
1950    }
1951
1952    /// State associated with a fake device in [`FakeIpSocketCtx`].
1953    pub struct FakeDeviceState<I: Ip> {
1954        /// The default hop limit used by the device.
1955        pub default_hop_limit: NonZeroU8,
1956        /// The assigned device addresses.
1957        pub addresses: Vec<SpecifiedAddr<I::Addr>>,
1958        /// The joined multicast groups.
1959        pub multicast_groups: HashMap<MulticastAddr<I::Addr>, usize>,
1960    }
1961
1962    impl<I: Ip> FakeDeviceState<I> {
1963        /// Returns whether this fake device has joined multicast group `addr`.
1964        pub fn is_in_multicast_group(&self, addr: &MulticastAddr<I::Addr>) -> bool {
1965            self.multicast_groups.get(addr).is_some_and(|v| *v != 0)
1966        }
1967    }
1968
1969    impl<I: IpExt, D: FakeStrongDeviceId> FakeIpSocketCtx<I, D> {
1970        /// Creates a new `FakeIpSocketCtx` with the given device
1971        /// configs.
1972        pub fn new(
1973            device_configs: impl IntoIterator<Item = FakeDeviceConfig<D, SpecifiedAddr<I::Addr>>>,
1974        ) -> Self {
1975            let mut table = RoutingTable::default();
1976            let mut devices = HashMap::default();
1977            for FakeDeviceConfig { device, local_ips, remote_ips } in device_configs {
1978                for addr in remote_ips {
1979                    routing::testutil::add_on_link_routing_entry(&mut table, addr, device.clone())
1980                }
1981                let state = FakeDeviceState {
1982                    default_hop_limit: DEFAULT_HOP_LIMITS.unicast,
1983                    addresses: local_ips,
1984                    multicast_groups: Default::default(),
1985                };
1986                assert!(
1987                    devices.insert(device.clone(), state).is_none(),
1988                    "duplicate entries for {device:?}",
1989                );
1990            }
1991
1992            Self { table, devices, forwarding: Default::default() }
1993        }
1994
1995        /// Returns an immutable reference to the fake device state.
1996        pub fn get_device_state(&self, device: &D) -> &FakeDeviceState<I> {
1997            self.devices.get(device).unwrap_or_else(|| panic!("no device {device:?}"))
1998        }
1999
2000        /// Returns a mutable reference to the fake device state.
2001        pub fn get_device_state_mut(&mut self, device: &D) -> &mut FakeDeviceState<I> {
2002            self.devices.get_mut(device).unwrap_or_else(|| panic!("no device {device:?}"))
2003        }
2004
2005        pub(crate) fn multicast_memberships(
2006            &self,
2007        ) -> HashMap<(D, MulticastAddr<I::Addr>), NonZeroUsize> {
2008            self.devices
2009                .iter()
2010                .map(|(device, state)| {
2011                    state.multicast_groups.iter().filter_map(|(group, count)| {
2012                        NonZeroUsize::new(*count).map(|count| ((device.clone(), *group), count))
2013                    })
2014                })
2015                .flatten()
2016                .collect()
2017        }
2018
2019        fn new_ip_socket<O>(
2020            &mut self,
2021            args: IpSocketArgs<'_, D, I, O>,
2022        ) -> Result<IpSock<I, D::Weak>, IpSockCreationError>
2023        where
2024            O: RouteResolutionOptions<I>,
2025        {
2026            let IpSocketArgs { device, local_ip, remote_ip, proto, options } = args;
2027            let device = device
2028                .as_ref()
2029                .map(|d| d.as_strong_ref().ok_or(ResolveRouteError::Unreachable))
2030                .transpose()?;
2031            let device = device.as_ref().map(|d| d.as_ref());
2032            let resolved_route =
2033                self.lookup_route(device, local_ip, remote_ip, options.transparent())?;
2034            Ok(new_ip_socket(device, resolved_route, remote_ip, proto))
2035        }
2036
2037        fn lookup_route(
2038            &mut self,
2039            device: Option<&D>,
2040            local_ip: Option<IpDeviceAddr<I::Addr>>,
2041            addr: RoutableIpAddr<I::Addr>,
2042            transparent: bool,
2043        ) -> Result<ResolvedRoute<I, D>, ResolveRouteError> {
2044            let Self { table, devices, forwarding } = self;
2045            let (destination, ()) = table
2046                .lookup_filter_map(forwarding, device, addr.addr(), |_, d| match &local_ip {
2047                    None => Some(()),
2048                    Some(local_ip) => {
2049                        if transparent {
2050                            return Some(());
2051                        }
2052                        devices.get(d).and_then(|state| {
2053                            state.addresses.contains(local_ip.as_ref()).then_some(())
2054                        })
2055                    }
2056                })
2057                .next()
2058                .ok_or(ResolveRouteError::Unreachable)?;
2059
2060            let Destination { device, next_hop } = destination;
2061            let mut addrs = devices.get(device).unwrap().addresses.iter();
2062            let local_ip = match local_ip {
2063                None => {
2064                    let addr = addrs.next().ok_or(ResolveRouteError::NoSrcAddr)?;
2065                    IpDeviceAddr::new(addr.get()).expect("not valid device addr")
2066                }
2067                Some(local_ip) => {
2068                    if !transparent {
2069                        // We already constrained the set of devices so this
2070                        // should be a given.
2071                        assert!(
2072                            addrs.any(|a| a.get() == local_ip.addr()),
2073                            "didn't find IP {:?} in {:?}",
2074                            local_ip,
2075                            addrs.collect::<Vec<_>>()
2076                        );
2077                    }
2078                    local_ip
2079                }
2080            };
2081
2082            Ok(ResolvedRoute {
2083                src_addr: local_ip,
2084                device: device.clone(),
2085                local_delivery_device: None,
2086                next_hop,
2087                // NB: Keep unit tests simple and skip internal forwarding
2088                // logic. Instead, this is verified by integration tests.
2089                internal_forwarding: InternalForwarding::NotUsed,
2090            })
2091        }
2092
2093        fn resolve_send_meta<O>(
2094            &mut self,
2095            socket: &IpSock<I, D::Weak>,
2096            options: &O,
2097        ) -> Result<SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>, IpSockSendError>
2098        where
2099            O: SendOptions<I> + RouteResolutionOptions<I>,
2100        {
2101            let IpSockDefinition { remote_ip, local_ip, device, proto } = &socket.definition;
2102            let device = device
2103                .as_ref()
2104                .map(|d| d.upgrade().ok_or(ResolveRouteError::Unreachable))
2105                .transpose()?;
2106            let ResolvedRoute {
2107                src_addr,
2108                device,
2109                next_hop,
2110                local_delivery_device: _,
2111                internal_forwarding: _,
2112            } = self.lookup_route(
2113                device.as_ref(),
2114                Some(*local_ip),
2115                *remote_ip,
2116                options.transparent(),
2117            )?;
2118
2119            let remote_ip: &SpecifiedAddr<_> = remote_ip.as_ref();
2120
2121            let destination = IpPacketDestination::from_next_hop(next_hop, *remote_ip);
2122            Ok(SendIpPacketMeta {
2123                device,
2124                src_ip: src_addr.into(),
2125                dst_ip: *remote_ip,
2126                destination,
2127                proto: *proto,
2128                ttl: options.hop_limit(remote_ip),
2129                mtu: options.mtu(),
2130                dscp_and_ecn: DscpAndEcn::default(),
2131            })
2132        }
2133    }
2134}