netstack3_ip/
base.rs

1// Copyright 2018 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use alloc::boxed::Box;
6use alloc::vec::Vec;
7use core::convert::Infallible as Never;
8use core::fmt::Debug;
9use core::hash::Hash;
10use core::marker::PhantomData;
11use core::num::NonZeroU8;
12use core::ops::ControlFlow;
13#[cfg(test)]
14use core::ops::DerefMut;
15use core::sync::atomic::{self, AtomicU16};
16
17use derivative::Derivative;
18use explicit::ResultExt as _;
19use lock_order::lock::{OrderedLockAccess, OrderedLockRef};
20use log::{debug, trace};
21use net_types::ip::{
22    GenericOverIp, Ip, Ipv4, Ipv4Addr, Ipv4SourceAddr, Ipv6, Ipv6Addr, Ipv6SourceAddr, Mtu, Subnet,
23};
24use net_types::{
25    LinkLocalAddress, MulticastAddr, MulticastAddress, NonMappedAddr, NonMulticastAddr,
26    SpecifiedAddr, SpecifiedAddress as _, Witness,
27};
28use netstack3_base::socket::{SocketCookie, SocketIpAddrExt as _};
29use netstack3_base::sync::{Mutex, PrimaryRc, RwLock, StrongRc, WeakRc};
30use netstack3_base::{
31    AnyDevice, BroadcastIpExt, CoreTimerContext, Counter, CounterCollectionSpec, CounterContext,
32    DeviceIdContext, DeviceIdentifier as _, ErrorAndSerializer, EventContext, FrameDestination,
33    HandleableTimer, InstantContext, InterfaceProperties, IpAddressId, IpDeviceAddr,
34    IpDeviceAddressIdContext, IpExt, MarkDomain, Marks, Matcher as _, MatcherBindingsTypes,
35    NestedIntoCoreTimerCtx, NotFoundError, ResourceCounterContext, RngContext,
36    SendFrameErrorReason, StrongDeviceIdentifier, TimerBindingsTypes, TimerContext, TimerHandler,
37    TxMetadata as _, TxMetadataBindingsTypes, WeakIpAddressId, WrapBroadcastMarker,
38};
39use netstack3_filter::{
40    self as filter, ConnectionDirection, ConntrackConnection, FilterBindingsContext,
41    FilterBindingsTypes, FilterHandler as _, FilterIpContext, FilterIpExt, FilterIpMetadata,
42    FilterIpPacket, FilterPacketMetadata, FilterTimerId, ForwardedPacket, IngressVerdict, IpPacket,
43    MarkAction, MaybeTransportPacket as _, TransportPacketSerializer, Tuple, WeakConnectionError,
44    WeakConntrackConnection,
45};
46use netstack3_hashmap::HashMap;
47use packet::{
48    Buf, BufferMut, GrowBuffer, LayoutBufferAlloc, PacketBuilder as _, PacketConstraints,
49    ParseBuffer, ParseBufferMut, ParseMetadata, SerializeError, Serializer as _,
50};
51use packet_formats::error::IpParseError;
52use packet_formats::ip::{DscpAndEcn, IpPacket as _, IpPacketBuilder as _};
53use packet_formats::ipv4::{Ipv4FragmentType, Ipv4Packet};
54use packet_formats::ipv6::Ipv6Packet;
55use thiserror::Error;
56use zerocopy::SplitByteSlice;
57
58use crate::internal::counters::{IpCounters, IpCountersIpExt};
59use crate::internal::device::opaque_iid::IidSecret;
60use crate::internal::device::slaac::SlaacCounters;
61use crate::internal::device::state::{
62    IpAddressData, IpAddressFlags, IpDeviceStateBindingsTypes, IpDeviceStateIpExt, WeakAddressId,
63};
64use crate::internal::device::{
65    self, IpDeviceAddressContext, IpDeviceBindingsContext, IpDeviceIpExt, IpDeviceSendContext,
66};
67use crate::internal::fragmentation::{FragmentableIpSerializer, FragmentationIpExt, IpFragmenter};
68use crate::internal::gmp::GmpQueryHandler;
69use crate::internal::gmp::igmp::IgmpCounters;
70use crate::internal::gmp::mld::MldCounters;
71use crate::internal::icmp::counters::IcmpCountersIpExt;
72use crate::internal::icmp::{
73    IcmpBindingsTypes, IcmpErrorHandler, IcmpHandlerIpExt, Icmpv4Error, Icmpv4ErrorKind,
74    Icmpv4State, Icmpv4StateBuilder, Icmpv6ErrorKind, Icmpv6State, Icmpv6StateBuilder,
75};
76use crate::internal::ipv6::Ipv6PacketAction;
77use crate::internal::local_delivery::{
78    IpHeaderInfo, Ipv4HeaderInfo, Ipv6HeaderInfo, LocalDeliveryPacketInfo, ReceiveIpPacketMeta,
79    TransparentLocalDelivery,
80};
81use crate::internal::multicast_forwarding::counters::MulticastForwardingCounters;
82use crate::internal::multicast_forwarding::route::{
83    MulticastRouteIpExt, MulticastRouteTarget, MulticastRouteTargets,
84};
85use crate::internal::multicast_forwarding::state::{
86    MulticastForwardingState, MulticastForwardingStateContext,
87};
88use crate::internal::multicast_forwarding::{
89    MulticastForwardingBindingsTypes, MulticastForwardingDeviceContext, MulticastForwardingEvent,
90    MulticastForwardingTimerId,
91};
92use crate::internal::path_mtu::{PmtuBindingsTypes, PmtuCache, PmtuTimerId};
93use crate::internal::raw::counters::RawIpSocketCounters;
94use crate::internal::raw::{RawIpSocketHandler, RawIpSocketMap, RawIpSocketsBindingsTypes};
95use crate::internal::reassembly::{
96    FragmentBindingsTypes, FragmentHandler, FragmentProcessingState, FragmentTimerId,
97    FragmentablePacket, IpPacketFragmentCache, ReassemblyIpExt,
98};
99use crate::internal::routing::rules::{Rule, RuleAction, RuleInput, RulesTable};
100use crate::internal::routing::{
101    IpRoutingBindingsTypes, IpRoutingDeviceContext, NonLocalSrcAddrPolicy, PacketOrigin,
102    RoutingTable,
103};
104use crate::internal::socket::{IpSocketBindingsContext, IpSocketContext, IpSocketHandler};
105use crate::internal::types::{
106    self, Destination, InternalForwarding, NextHop, ResolvedRoute, RoutableIpAddr,
107};
108use crate::internal::{ipv6, multicast_forwarding};
109
110#[cfg(test)]
111mod tests;
112
113/// Default IPv4 TTL.
114pub const DEFAULT_TTL: NonZeroU8 = NonZeroU8::new(64).unwrap();
115
116/// Hop limits for packets sent to multicast and unicast destinations.
117#[derive(Copy, Clone, Debug, Eq, PartialEq)]
118#[allow(missing_docs)]
119pub struct HopLimits {
120    pub unicast: NonZeroU8,
121    pub multicast: NonZeroU8,
122}
123
124/// Default hop limits for sockets.
125pub const DEFAULT_HOP_LIMITS: HopLimits =
126    HopLimits { unicast: DEFAULT_TTL, multicast: NonZeroU8::new(1).unwrap() };
127
128/// The IPv6 subnet that contains all addresses; `::/0`.
129// Safe because 0 is less than the number of IPv6 address bits.
130pub const IPV6_DEFAULT_SUBNET: Subnet<Ipv6Addr> =
131    unsafe { Subnet::new_unchecked(Ipv6::UNSPECIFIED_ADDRESS, 0) };
132
133/// An error encountered when receiving a transport-layer packet.
134#[derive(Debug)]
135#[allow(missing_docs)]
136pub enum TransportReceiveError {
137    ProtocolUnsupported,
138    PortUnreachable,
139}
140
141impl TransportReceiveError {
142    fn into_icmpv4_error(self, header_len: usize) -> Icmpv4Error {
143        let kind = match self {
144            TransportReceiveError::ProtocolUnsupported => Icmpv4ErrorKind::ProtocolUnreachable,
145            TransportReceiveError::PortUnreachable => Icmpv4ErrorKind::PortUnreachable,
146        };
147        Icmpv4Error { kind, header_len }
148    }
149
150    fn into_icmpv6_error(self, header_len: usize) -> Icmpv6ErrorKind {
151        match self {
152            TransportReceiveError::ProtocolUnsupported => {
153                Icmpv6ErrorKind::ProtocolUnreachable { header_len }
154            }
155            TransportReceiveError::PortUnreachable => Icmpv6ErrorKind::PortUnreachable,
156        }
157    }
158}
159
160/// Sidecar metadata passed along with the packet.
161///
162/// Note: This metadata may be regenerated when packet handling requires
163/// performing multiple actions (e.g. sending the packet out multiple interfaces
164/// as part of multicast forwarding).
165#[derive(Derivative)]
166#[derivative(Default(bound = ""))]
167pub struct IpLayerPacketMetadata<
168    I: packet_formats::ip::IpExt,
169    A,
170    BT: FilterBindingsTypes + TxMetadataBindingsTypes,
171> {
172    conntrack_connection_and_direction:
173        Option<(ConntrackConnection<I, A, BT>, ConnectionDirection)>,
174
175    /// Tx metadata associated with this packet.
176    ///
177    /// This may be non-default even in the rx path for looped back packets that
178    /// are still forcing tx frame ownership for sockets.
179    tx_metadata: BT::TxMetadata,
180
181    /// Marks attached to the packet that can be acted upon by routing/filtering.
182    marks: Marks,
183
184    /// Socket cookie of the associate socket if any. The value should be
185    /// passed to eBPF programs that process the packet, but it should not be
186    /// used as a unique identifier of the resource inside the netstack.
187    socket_cookie: Option<SocketCookie>,
188
189    #[cfg(debug_assertions)]
190    drop_check: IpLayerPacketMetadataDropCheck,
191}
192
193/// A type that asserts, on drop, that it was intentionally being dropped.
194///
195/// NOTE: Unfortunately, debugging this requires backtraces, since track_caller
196/// won't do what we want (https://github.com/rust-lang/rust/issues/116942).
197/// Since this is only enabled in debug, the assumption is that stacktraces are
198/// enabled.
199#[cfg(debug_assertions)]
200#[derive(Default)]
201struct IpLayerPacketMetadataDropCheck {
202    okay_to_drop: bool,
203}
204
205/// Metadata that is produced and consumed by the IP layer for each packet, but
206/// which also traverses the device layer.
207#[derive(Derivative)]
208#[derivative(Debug(bound = ""), Default(bound = ""))]
209pub struct DeviceIpLayerMetadata<BT: TxMetadataBindingsTypes> {
210    /// Weak reference to this packet's connection tracking entry, if the packet is
211    /// tracked.
212    ///
213    /// This allows NAT to consistently associate locally-generated, looped-back
214    /// packets with the same connection at every filtering hook even when NAT may
215    /// have been performed on them, causing them to no longer match the original or
216    /// reply tuples of the connection.
217    conntrack_entry: Option<(WeakConntrackConnection, ConnectionDirection)>,
218    /// Tx metadata associated with this packet.
219    ///
220    /// This may be non-default even in the rx path for looped back packets that
221    /// are still forcing tx frame ownership for sockets.
222    tx_metadata: BT::TxMetadata,
223    /// Marks attached to this packet. For all the incoming packets, they are None
224    /// by default but can be changed by a filtering rule.
225    ///
226    /// Note: The marks will be preserved if the packet is being looped back, i.e.,
227    /// the receiver will be able to observe the marks set by the sender. This is
228    /// consistent with Linux behavior.
229    marks: Marks,
230}
231
232impl<BT: TxMetadataBindingsTypes> DeviceIpLayerMetadata<BT> {
233    /// Discards the remaining IP layer information and returns only the tx
234    /// metadata used for buffer ownership.
235    pub fn into_tx_metadata(self) -> BT::TxMetadata {
236        self.tx_metadata
237    }
238    /// Creates new IP layer metadata with the marks.
239    #[cfg(any(test, feature = "testutils"))]
240    pub fn with_marks(marks: Marks) -> Self {
241        Self { conntrack_entry: None, tx_metadata: Default::default(), marks }
242    }
243}
244
245impl<
246    I: IpLayerIpExt,
247    A: WeakIpAddressId<I::Addr>,
248    BT: FilterBindingsTypes + TxMetadataBindingsTypes,
249> IpLayerPacketMetadata<I, A, BT>
250{
251    fn from_device_ip_layer_metadata<CC, D>(
252        core_ctx: &mut CC,
253        device: &D,
254        DeviceIpLayerMetadata { conntrack_entry, tx_metadata, marks }: DeviceIpLayerMetadata<BT>,
255    ) -> Self
256    where
257        CC: ResourceCounterContext<D, IpCounters<I>>,
258    {
259        let conntrack_connection_and_direction = match conntrack_entry
260            .map(|(conn, dir)| conn.into_inner().map(|conn| (conn, dir)))
261            .transpose()
262        {
263            // Either the packet was tracked and we've preserved its conntrack entry across
264            // loopback, or it was untracked and we just stash the `None`.
265            Ok(conn_and_dir) => conn_and_dir,
266            // Conntrack entry was removed from table after packet was enqueued in loopback.
267            Err(WeakConnectionError::EntryRemoved) => None,
268            // Conntrack entry no longer matches the packet (for example, it could be that
269            // this is an IPv6 packet that was modified at the device layer and therefore it
270            // no longer matches its IPv4 conntrack entry).
271            Err(WeakConnectionError::InvalidEntry) => {
272                core_ctx.increment_both(device, |c| &c.invalid_cached_conntrack_entry);
273                None
274            }
275        };
276
277        let socket_cookie = tx_metadata.socket_cookie();
278
279        Self {
280            conntrack_connection_and_direction,
281            tx_metadata,
282            marks,
283            socket_cookie,
284            #[cfg(debug_assertions)]
285            drop_check: Default::default(),
286        }
287    }
288}
289
290impl<I: IpExt, A, BT: FilterBindingsTypes + TxMetadataBindingsTypes>
291    IpLayerPacketMetadata<I, A, BT>
292{
293    pub(crate) fn from_tx_metadata_and_marks(tx_metadata: BT::TxMetadata, marks: Marks) -> Self {
294        let socket_cookie = tx_metadata.socket_cookie();
295        Self {
296            conntrack_connection_and_direction: None,
297            tx_metadata,
298            marks,
299            socket_cookie,
300            #[cfg(debug_assertions)]
301            drop_check: Default::default(),
302        }
303    }
304
305    pub(crate) fn into_parts(
306        self,
307    ) -> (
308        Option<(ConntrackConnection<I, A, BT>, ConnectionDirection)>,
309        BT::TxMetadata,
310        Marks,
311        Option<SocketCookie>,
312    ) {
313        let Self {
314            tx_metadata,
315            marks,
316            conntrack_connection_and_direction,
317            socket_cookie,
318            #[cfg(debug_assertions)]
319            mut drop_check,
320        } = self;
321        #[cfg(debug_assertions)]
322        {
323            drop_check.okay_to_drop = true;
324        }
325        (conntrack_connection_and_direction, tx_metadata, marks, socket_cookie)
326    }
327
328    /// Acknowledge that it's okay to drop this packet metadata.
329    ///
330    /// When compiled with debug assertions, dropping [`IplayerPacketMetadata`]
331    /// will panic if this method has not previously been called.
332    pub(crate) fn acknowledge_drop(self) {
333        #[cfg(debug_assertions)]
334        {
335            let mut this = self;
336            this.drop_check.okay_to_drop = true;
337        }
338    }
339
340    /// Returns the tx metadata associated with this packet.
341    pub(crate) fn tx_metadata(&self) -> &BT::TxMetadata {
342        &self.tx_metadata
343    }
344
345    /// Returns the marks attached to this packet.
346    pub(crate) fn marks(&self) -> &Marks {
347        &self.marks
348    }
349}
350
351#[cfg(debug_assertions)]
352impl Drop for IpLayerPacketMetadataDropCheck {
353    fn drop(&mut self) {
354        if !self.okay_to_drop {
355            panic!(
356                "IpLayerPacketMetadata dropped without acknowledgement.  https://fxbug.dev/334127474"
357            );
358        }
359    }
360}
361
362impl<I: packet_formats::ip::IpExt, A, BT: FilterBindingsTypes + TxMetadataBindingsTypes>
363    FilterIpMetadata<I, A, BT> for IpLayerPacketMetadata<I, A, BT>
364{
365    fn take_connection_and_direction(
366        &mut self,
367    ) -> Option<(ConntrackConnection<I, A, BT>, ConnectionDirection)> {
368        self.conntrack_connection_and_direction.take()
369    }
370
371    fn replace_connection_and_direction(
372        &mut self,
373        conn: ConntrackConnection<I, A, BT>,
374        direction: ConnectionDirection,
375    ) -> Option<ConntrackConnection<I, A, BT>> {
376        self.conntrack_connection_and_direction.replace((conn, direction)).map(|(conn, _dir)| conn)
377    }
378}
379
380impl<I: packet_formats::ip::IpExt, A, BT: FilterBindingsTypes + TxMetadataBindingsTypes>
381    FilterPacketMetadata for IpLayerPacketMetadata<I, A, BT>
382{
383    fn apply_mark_action(&mut self, domain: MarkDomain, action: MarkAction) {
384        action.apply(self.marks.get_mut(domain))
385    }
386
387    fn cookie(&self) -> Option<SocketCookie> {
388        self.socket_cookie.clone()
389    }
390
391    fn marks(&self) -> &Marks {
392        &self.marks
393    }
394}
395
396/// Send errors observed at or above the IP layer that carry a serializer.
397pub type IpSendFrameError<S> = ErrorAndSerializer<IpSendFrameErrorReason, S>;
398
399/// Send error cause for [`IpSendFrameError`].
400#[derive(Debug, PartialEq)]
401pub enum IpSendFrameErrorReason {
402    /// Error comes from the device layer.
403    Device(SendFrameErrorReason),
404    /// The frame's source or destination address is in the loopback subnet, but
405    /// the target device is not the loopback device.
406    IllegalLoopbackAddress,
407}
408
409impl From<SendFrameErrorReason> for IpSendFrameErrorReason {
410    fn from(value: SendFrameErrorReason) -> Self {
411        Self::Device(value)
412    }
413}
414
415/// The execution context provided by a transport layer protocol to the IP
416/// layer.
417///
418/// An implementation for `()` is provided which indicates that a particular
419/// transport layer protocol is unsupported.
420pub trait IpTransportContext<I: IpExt, BC, CC: DeviceIdContext<AnyDevice> + ?Sized> {
421    /// Type used to identify sockets for early demux.
422    type EarlyDemuxSocket;
423
424    /// Performs early demux.
425    ///
426    /// Tries to match the packet with a connected socket that will receive the
427    /// packet. If a match is found, the socket information is passed to
428    /// `LOCAL_INGRESS` filters. The socket is also passed to
429    /// `receive_ip_packet` to avoid demuxing the packet twice.
430    ///
431    /// The socket may be invalidated if the source address is changed by SNAT.
432    /// In that case, `receive_ip_packet` is called with `early_demux_socket`
433    /// set to `None`.
434    fn early_demux<B: ParseBuffer>(
435        core_ctx: &mut CC,
436        device: &CC::DeviceId,
437        src_ip: I::Addr,
438        dst_ip: I::Addr,
439        buffer: B,
440    ) -> Option<Self::EarlyDemuxSocket>;
441
442    /// Receive an ICMP error message.
443    ///
444    /// All arguments beginning with `original_` are fields from the IP packet
445    /// that triggered the error. The `original_body` is provided here so that
446    /// the error can be associated with a transport-layer socket. `device`
447    /// identifies the device that received the ICMP error message packet.
448    ///
449    /// While ICMPv4 error messages are supposed to contain the first 8 bytes of
450    /// the body of the offending packet, and ICMPv6 error messages are supposed
451    /// to contain as much of the offending packet as possible without violating
452    /// the IPv6 minimum MTU, the caller does NOT guarantee that either of these
453    /// hold. It is `receive_icmp_error`'s responsibility to handle any length
454    /// of `original_body`, and to perform any necessary validation.
455    fn receive_icmp_error(
456        core_ctx: &mut CC,
457        bindings_ctx: &mut BC,
458        device: &CC::DeviceId,
459        original_src_ip: Option<SpecifiedAddr<I::Addr>>,
460        original_dst_ip: SpecifiedAddr<I::Addr>,
461        original_body: &[u8],
462        err: I::ErrorCode,
463    );
464
465    /// Receive a transport layer packet in an IP packet.
466    ///
467    /// In the event of an unreachable port, `receive_ip_packet` returns the
468    /// buffer in its original state (with the transport packet un-parsed) in
469    /// the `Err` variant.
470    fn receive_ip_packet<B: BufferMut, H: IpHeaderInfo<I>>(
471        core_ctx: &mut CC,
472        bindings_ctx: &mut BC,
473        device: &CC::DeviceId,
474        src_ip: I::RecvSrcAddr,
475        dst_ip: SpecifiedAddr<I::Addr>,
476        buffer: B,
477        info: &LocalDeliveryPacketInfo<I, H>,
478        early_demux_socket: Option<Self::EarlyDemuxSocket>,
479    ) -> Result<(), (B, TransportReceiveError)>;
480}
481
482impl<I: IpExt, BC, CC: DeviceIdContext<AnyDevice> + ?Sized> IpTransportContext<I, BC, CC> for () {
483    type EarlyDemuxSocket = Never;
484
485    fn early_demux<B: ParseBuffer>(
486        _core_ctx: &mut CC,
487        _device: &CC::DeviceId,
488        _src_ip: I::Addr,
489        _dst_ip: I::Addr,
490        _buffer: B,
491    ) -> Option<Self::EarlyDemuxSocket> {
492        None
493    }
494
495    fn receive_icmp_error(
496        _core_ctx: &mut CC,
497        _bindings_ctx: &mut BC,
498        _device: &CC::DeviceId,
499        _original_src_ip: Option<SpecifiedAddr<I::Addr>>,
500        _original_dst_ip: SpecifiedAddr<I::Addr>,
501        _original_body: &[u8],
502        err: I::ErrorCode,
503    ) {
504        trace!(
505            "IpTransportContext::receive_icmp_error: Received ICMP error message ({:?}) for unsupported IP protocol",
506            err
507        );
508    }
509
510    fn receive_ip_packet<B: BufferMut, H: IpHeaderInfo<I>>(
511        _core_ctx: &mut CC,
512        _bindings_ctx: &mut BC,
513        _device: &CC::DeviceId,
514        _src_ip: I::RecvSrcAddr,
515        _dst_ip: SpecifiedAddr<I::Addr>,
516        buffer: B,
517        _info: &LocalDeliveryPacketInfo<I, H>,
518        _early_demux_socket: Option<Never>,
519    ) -> Result<(), (B, TransportReceiveError)> {
520        Err((buffer, TransportReceiveError::ProtocolUnsupported))
521    }
522}
523
524/// The base execution context provided by the IP layer to transport layer
525/// protocols.
526pub trait BaseTransportIpContext<I: IpExt, BC>: DeviceIdContext<AnyDevice> {
527    /// The iterator given to
528    /// [`BaseTransportIpContext::with_devices_with_assigned_addr`].
529    type DevicesWithAddrIter<'s>: Iterator<Item = Self::DeviceId>;
530
531    /// Is this one of our local addresses, and is it in the assigned state?
532    ///
533    /// Calls `cb` with an iterator over all the local interfaces for which
534    /// `addr` is an associated address, and, for IPv6, for which it is in the
535    /// "assigned" state.
536    fn with_devices_with_assigned_addr<O, F: FnOnce(Self::DevicesWithAddrIter<'_>) -> O>(
537        &mut self,
538        addr: SpecifiedAddr<I::Addr>,
539        cb: F,
540    ) -> O;
541
542    /// Get default hop limits.
543    ///
544    /// If `device` is not `None` and exists, its hop limits will be returned.
545    /// Otherwise the system defaults are returned.
546    fn get_default_hop_limits(&mut self, device: Option<&Self::DeviceId>) -> HopLimits;
547
548    /// Gets the original destination for the tracked connection indexed by
549    /// `tuple`, which includes the source and destination addresses and
550    /// transport-layer ports as well as the transport protocol number.
551    fn get_original_destination(&mut self, tuple: &Tuple<I>) -> Option<(I::Addr, u16)>;
552}
553
554/// A marker trait for the traits required by the transport layer from the IP
555/// layer.
556pub trait TransportIpContext<I: IpExt + FilterIpExt, BC: TxMetadataBindingsTypes>:
557    BaseTransportIpContext<I, BC> + IpSocketHandler<I, BC>
558{
559}
560
561impl<I, CC, BC> TransportIpContext<I, BC> for CC
562where
563    I: IpExt + FilterIpExt,
564    CC: BaseTransportIpContext<I, BC> + IpSocketHandler<I, BC>,
565    BC: TxMetadataBindingsTypes,
566{
567}
568
569/// Abstraction over the ability to join and leave multicast groups.
570pub trait MulticastMembershipHandler<I: Ip, BC>: DeviceIdContext<AnyDevice> {
571    /// Requests that the specified device join the given multicast group.
572    ///
573    /// If this method is called multiple times with the same device and
574    /// address, the device will remain joined to the multicast group until
575    /// [`MulticastTransportIpContext::leave_multicast_group`] has been called
576    /// the same number of times.
577    fn join_multicast_group(
578        &mut self,
579        bindings_ctx: &mut BC,
580        device: &Self::DeviceId,
581        addr: MulticastAddr<I::Addr>,
582    );
583
584    /// Requests that the specified device leave the given multicast group.
585    ///
586    /// Each call to this method must correspond to an earlier call to
587    /// [`MulticastTransportIpContext::join_multicast_group`]. The device
588    /// remains a member of the multicast group so long as some call to
589    /// `join_multicast_group` has been made without a corresponding call to
590    /// `leave_multicast_group`.
591    fn leave_multicast_group(
592        &mut self,
593        bindings_ctx: &mut BC,
594        device: &Self::DeviceId,
595        addr: MulticastAddr<I::Addr>,
596    );
597
598    /// Selects a default device with which to join the given multicast group.
599    ///
600    /// The selection is made by consulting the routing table; If there is no
601    /// route available to the given address, an error is returned.
602    fn select_device_for_multicast_group(
603        &mut self,
604        addr: MulticastAddr<I::Addr>,
605        marks: &Marks,
606    ) -> Result<Self::DeviceId, ResolveRouteError>;
607}
608
609// TODO(joshlf): With all 256 protocol numbers (minus reserved ones) given their
610// own associated type in both traits, running `cargo check` on a 2018 MacBook
611// Pro takes over a minute. Eventually - and before we formally publish this as
612// a library - we should identify the bottleneck in the compiler and optimize
613// it. For the time being, however, we only support protocol numbers that we
614// actually use (TCP and UDP).
615
616/// Enables a blanket implementation of [`TransportIpContext`].
617///
618/// Implementing this marker trait for a type enables a blanket implementation
619/// of `TransportIpContext` given the other requirements are met.
620pub trait UseTransportIpContextBlanket {}
621
622/// An iterator supporting the blanket implementation of
623/// [`BaseTransportIpContext::with_devices_with_assigned_addr`].
624pub struct AssignedAddressDeviceIterator<Iter, I, D>(Iter, PhantomData<(I, D)>);
625
626impl<Iter, I, D> Iterator for AssignedAddressDeviceIterator<Iter, I, D>
627where
628    Iter: Iterator<Item = (D, I::AddressStatus)>,
629    I: IpLayerIpExt,
630{
631    type Item = D;
632    fn next(&mut self) -> Option<D> {
633        let Self(iter, PhantomData) = self;
634        iter.by_ref().find_map(|(device, state)| is_unicast_assigned::<I>(&state).then_some(device))
635    }
636}
637
638impl<
639    I: IpLayerIpExt,
640    BC: FilterBindingsContext<CC::DeviceId> + TxMetadataBindingsTypes + IpRoutingBindingsTypes,
641    CC: IpDeviceContext<I>
642        + IpSocketHandler<I, BC>
643        + IpStateContext<I, BC>
644        + FilterIpContext<I, BC>
645        + UseTransportIpContextBlanket,
646> BaseTransportIpContext<I, BC> for CC
647{
648    type DevicesWithAddrIter<'s> =
649        AssignedAddressDeviceIterator<CC::DeviceAndAddressStatusIter<'s>, I, CC::DeviceId>;
650
651    fn with_devices_with_assigned_addr<O, F: FnOnce(Self::DevicesWithAddrIter<'_>) -> O>(
652        &mut self,
653        addr: SpecifiedAddr<I::Addr>,
654        cb: F,
655    ) -> O {
656        self.with_address_statuses(addr, |it| cb(AssignedAddressDeviceIterator(it, PhantomData)))
657    }
658
659    fn get_default_hop_limits(&mut self, device: Option<&Self::DeviceId>) -> HopLimits {
660        match device {
661            Some(device) => HopLimits {
662                unicast: IpDeviceEgressStateContext::<I>::get_hop_limit(self, device),
663                ..DEFAULT_HOP_LIMITS
664            },
665            None => DEFAULT_HOP_LIMITS,
666        }
667    }
668
669    fn get_original_destination(&mut self, tuple: &Tuple<I>) -> Option<(I::Addr, u16)> {
670        self.with_filter_state(|state| {
671            let conn = state.conntrack.get_connection(&tuple)?;
672
673            if !conn.destination_nat() {
674                return None;
675            }
676
677            // The tuple marking the original direction of the connection is
678            // never modified by NAT. This means it can be used to recover the
679            // destination before NAT was performed.
680            let original = conn.original_tuple();
681            Some((original.dst_addr, original.dst_port_or_id))
682        })
683    }
684}
685
686/// The status of an IP address on an interface.
687#[derive(Debug, PartialEq)]
688#[allow(missing_docs)]
689pub enum AddressStatus<S> {
690    Present(S),
691    Unassigned,
692}
693
694impl<S> AddressStatus<S> {
695    fn into_present(self) -> Option<S> {
696        match self {
697            Self::Present(s) => Some(s),
698            Self::Unassigned => None,
699        }
700    }
701}
702
703impl AddressStatus<Ipv4PresentAddressStatus> {
704    /// Creates an IPv4 `AddressStatus` for `addr` on `device`.
705    pub fn from_context_addr_v4<
706        BC: IpDeviceStateBindingsTypes,
707        CC: device::IpDeviceStateContext<Ipv4, BC> + GmpQueryHandler<Ipv4, BC>,
708    >(
709        core_ctx: &mut CC,
710        device: &CC::DeviceId,
711        addr: SpecifiedAddr<Ipv4Addr>,
712    ) -> AddressStatus<Ipv4PresentAddressStatus> {
713        if addr.is_limited_broadcast() {
714            return AddressStatus::Present(Ipv4PresentAddressStatus::LimitedBroadcast);
715        }
716
717        if MulticastAddr::new(addr.get())
718            .is_some_and(|addr| GmpQueryHandler::gmp_is_in_group(core_ctx, device, addr))
719        {
720            return AddressStatus::Present(Ipv4PresentAddressStatus::Multicast);
721        }
722
723        core_ctx.with_address_ids(device, |mut addrs, core_ctx| {
724            addrs
725                .find_map(|addr_id| {
726                    let dev_addr = addr_id.addr_sub();
727                    let (dev_addr, subnet) = dev_addr.addr_subnet();
728
729                    if **dev_addr == addr {
730                        let assigned = core_ctx.with_ip_address_data(
731                            device,
732                            &addr_id,
733                            |IpAddressData { flags: IpAddressFlags { assigned }, config: _ }| {
734                                *assigned
735                            },
736                        );
737
738                        if assigned {
739                            Some(AddressStatus::Present(Ipv4PresentAddressStatus::UnicastAssigned))
740                        } else {
741                            Some(AddressStatus::Present(Ipv4PresentAddressStatus::UnicastTentative))
742                        }
743                    } else if addr.get() == subnet.broadcast() {
744                        Some(AddressStatus::Present(Ipv4PresentAddressStatus::SubnetBroadcast))
745                    } else if device.is_loopback() && subnet.contains(addr.as_ref()) {
746                        Some(AddressStatus::Present(Ipv4PresentAddressStatus::LoopbackSubnet))
747                    } else {
748                        None
749                    }
750                })
751                .unwrap_or(AddressStatus::Unassigned)
752        })
753    }
754}
755
756impl AddressStatus<Ipv6PresentAddressStatus> {
757    /// /// Creates an IPv6 `AddressStatus` for `addr` on `device`.
758    pub fn from_context_addr_v6<
759        BC: IpDeviceBindingsContext<Ipv6, CC::DeviceId>,
760        CC: device::Ipv6DeviceContext<BC> + GmpQueryHandler<Ipv6, BC>,
761    >(
762        core_ctx: &mut CC,
763        device: &CC::DeviceId,
764        addr: SpecifiedAddr<Ipv6Addr>,
765    ) -> AddressStatus<Ipv6PresentAddressStatus> {
766        if MulticastAddr::new(addr.get())
767            .is_some_and(|addr| GmpQueryHandler::gmp_is_in_group(core_ctx, device, addr))
768        {
769            return AddressStatus::Present(Ipv6PresentAddressStatus::Multicast);
770        }
771
772        let addr_id = match core_ctx.get_address_id(device, addr) {
773            Ok(o) => o,
774            Err(NotFoundError) => return AddressStatus::Unassigned,
775        };
776
777        let assigned = core_ctx.with_ip_address_data(
778            device,
779            &addr_id,
780            |IpAddressData { flags: IpAddressFlags { assigned }, config: _ }| *assigned,
781        );
782
783        if assigned {
784            AddressStatus::Present(Ipv6PresentAddressStatus::UnicastAssigned)
785        } else {
786            AddressStatus::Present(Ipv6PresentAddressStatus::UnicastTentative)
787        }
788    }
789}
790
791impl<S: GenericOverIp<I>, I: Ip> GenericOverIp<I> for AddressStatus<S> {
792    type Type = AddressStatus<S::Type>;
793}
794
795/// The status of an IPv4 address.
796#[derive(Debug, PartialEq)]
797#[allow(missing_docs)]
798pub enum Ipv4PresentAddressStatus {
799    LimitedBroadcast,
800    SubnetBroadcast,
801    Multicast,
802    UnicastAssigned,
803    UnicastTentative,
804    /// This status indicates that the queried device was Loopback. The address
805    /// belongs to a subnet that is assigned to the interface. This status
806    /// takes lower precedence than `Unicast` and `SubnetBroadcast``, E.g. if
807    /// the loopback device is assigned `127.0.0.1/8`:
808    ///   * address `127.0.0.1` -> `Unicast`
809    ///   * address `127.0.0.2` -> `LoopbackSubnet`
810    ///   * address `127.255.255.255` -> `SubnetBroadcast`
811    /// This exists for Linux conformance, which on the Loopback device,
812    /// considers an IPv4 address assigned if it belongs to one of the device's
813    /// assigned subnets.
814    LoopbackSubnet,
815}
816
817impl Ipv4PresentAddressStatus {
818    fn to_broadcast_marker(&self) -> Option<<Ipv4 as BroadcastIpExt>::BroadcastMarker> {
819        match self {
820            Self::LimitedBroadcast | Self::SubnetBroadcast => Some(()),
821            Self::Multicast
822            | Self::UnicastAssigned
823            | Self::UnicastTentative
824            | Self::LoopbackSubnet => None,
825        }
826    }
827}
828
829/// The status of an IPv6 address.
830#[derive(Debug, PartialEq)]
831#[allow(missing_docs)]
832pub enum Ipv6PresentAddressStatus {
833    Multicast,
834    UnicastAssigned,
835    UnicastTentative,
836}
837
838/// An extension trait providing IP layer properties.
839pub trait IpLayerIpExt:
840    IpExt
841    + MulticastRouteIpExt
842    + IcmpHandlerIpExt
843    + FilterIpExt
844    + FragmentationIpExt
845    + IpDeviceIpExt
846    + IpCountersIpExt
847    + IcmpCountersIpExt
848    + ReassemblyIpExt
849{
850    /// IP Address status.
851    type AddressStatus: Debug;
852    /// IP Address state.
853    type State<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>: AsRef<
854        IpStateInner<Self, StrongDeviceId, BT>,
855    >;
856    /// State kept for packet identifiers.
857    type PacketIdState;
858    /// The type of a single packet identifier.
859    type PacketId;
860    /// Produces the next packet ID from the state.
861    fn next_packet_id_from_state(state: &Self::PacketIdState) -> Self::PacketId;
862}
863
864impl IpLayerIpExt for Ipv4 {
865    type AddressStatus = Ipv4PresentAddressStatus;
866    type State<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes> =
867        Ipv4State<StrongDeviceId, BT>;
868    type PacketIdState = AtomicU16;
869    type PacketId = u16;
870    fn next_packet_id_from_state(next_packet_id: &Self::PacketIdState) -> Self::PacketId {
871        // Relaxed ordering as we only need atomicity without synchronization. See
872        // https://en.cppreference.com/w/cpp/atomic/memory_order#Relaxed_ordering
873        // for more details.
874        next_packet_id.fetch_add(1, atomic::Ordering::Relaxed)
875    }
876}
877
878impl IpLayerIpExt for Ipv6 {
879    type AddressStatus = Ipv6PresentAddressStatus;
880    type State<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes> =
881        Ipv6State<StrongDeviceId, BT>;
882    type PacketIdState = ();
883    type PacketId = ();
884    fn next_packet_id_from_state((): &Self::PacketIdState) -> Self::PacketId {
885        ()
886    }
887}
888
889/// The state context provided to the IP layer.
890pub trait IpStateContext<I: IpLayerIpExt, BT: IpRoutingBindingsTypes + MatcherBindingsTypes>:
891    IpRouteTablesContext<I, BT, DeviceId: InterfaceProperties<BT::DeviceClass>>
892{
893    /// The context that provides access to the IP routing tables.
894    type IpRouteTablesCtx<'a>: IpRouteTablesContext<I, BT, DeviceId = Self::DeviceId>;
895
896    /// Gets an immutable reference to the rules table.
897    fn with_rules_table<
898        O,
899        F: FnOnce(&mut Self::IpRouteTablesCtx<'_>, &RulesTable<I, Self::DeviceId, BT>) -> O,
900    >(
901        &mut self,
902        cb: F,
903    ) -> O;
904
905    /// Gets a mutable reference to the rules table.
906    fn with_rules_table_mut<
907        O,
908        F: FnOnce(&mut Self::IpRouteTablesCtx<'_>, &mut RulesTable<I, Self::DeviceId, BT>) -> O,
909    >(
910        &mut self,
911        cb: F,
912    ) -> O;
913}
914
915/// The state context that gives access to routing tables provided to the IP layer.
916pub trait IpRouteTablesContext<I: IpLayerIpExt, BT: IpRoutingBindingsTypes>:
917    IpRouteTableContext<I, BT> + IpDeviceContext<I>
918{
919    /// The inner context that can provide access to individual routing tables.
920    type Ctx<'a>: IpRouteTableContext<I, BT, DeviceId = Self::DeviceId, WeakDeviceId = Self::WeakDeviceId>;
921
922    /// Gets the main table ID.
923    fn main_table_id(&self) -> RoutingTableId<I, Self::DeviceId, BT>;
924
925    /// Gets immutable access to all the routing tables that currently exist.
926    fn with_ip_routing_tables<
927        O,
928        F: FnOnce(
929            &mut Self::Ctx<'_>,
930            &HashMap<
931                RoutingTableId<I, Self::DeviceId, BT>,
932                PrimaryRc<BaseRoutingTableState<I, Self::DeviceId, BT>>,
933            >,
934        ) -> O,
935    >(
936        &mut self,
937        cb: F,
938    ) -> O;
939
940    /// Gets mutable access to all the routing tables that currently exist.
941    fn with_ip_routing_tables_mut<
942        O,
943        F: FnOnce(
944            &mut HashMap<
945                RoutingTableId<I, Self::DeviceId, BT>,
946                PrimaryRc<BaseRoutingTableState<I, Self::DeviceId, BT>>,
947            >,
948        ) -> O,
949    >(
950        &mut self,
951        cb: F,
952    ) -> O;
953
954    // TODO(https://fxbug.dev/354724171): Remove this function when we no longer
955    // make routing decisions starting from the main table.
956    /// Calls the function with an immutable reference to IP routing table.
957    fn with_main_ip_routing_table<
958        O,
959        F: FnOnce(&mut Self::IpDeviceIdCtx<'_>, &RoutingTable<I, Self::DeviceId>) -> O,
960    >(
961        &mut self,
962        cb: F,
963    ) -> O {
964        let main_table_id = self.main_table_id();
965        self.with_ip_routing_table(&main_table_id, cb)
966    }
967
968    // TODO(https://fxbug.dev/341194323): Remove this function when we no longer
969    // only update the main routing table by default.
970    /// Calls the function with a mutable reference to IP routing table.
971    fn with_main_ip_routing_table_mut<
972        O,
973        F: FnOnce(&mut Self::IpDeviceIdCtx<'_>, &mut RoutingTable<I, Self::DeviceId>) -> O,
974    >(
975        &mut self,
976        cb: F,
977    ) -> O {
978        let main_table_id = self.main_table_id();
979        self.with_ip_routing_table_mut(&main_table_id, cb)
980    }
981}
982
983/// The state context that gives access to a singular routing table.
984pub trait IpRouteTableContext<I: IpLayerIpExt, BT: IpRoutingBindingsTypes>:
985    IpDeviceContext<I>
986{
987    /// The inner device id context.
988    type IpDeviceIdCtx<'a>: DeviceIdContext<AnyDevice, DeviceId = Self::DeviceId, WeakDeviceId = Self::WeakDeviceId>
989        + IpRoutingDeviceContext<I>
990        + IpDeviceContext<I>;
991
992    /// Calls the function with an immutable reference to IP routing table.
993    fn with_ip_routing_table<
994        O,
995        F: FnOnce(&mut Self::IpDeviceIdCtx<'_>, &RoutingTable<I, Self::DeviceId>) -> O,
996    >(
997        &mut self,
998        table_id: &RoutingTableId<I, Self::DeviceId, BT>,
999        cb: F,
1000    ) -> O;
1001
1002    /// Calls the function with a mutable reference to IP routing table.
1003    fn with_ip_routing_table_mut<
1004        O,
1005        F: FnOnce(&mut Self::IpDeviceIdCtx<'_>, &mut RoutingTable<I, Self::DeviceId>) -> O,
1006    >(
1007        &mut self,
1008        table_id: &RoutingTableId<I, Self::DeviceId, BT>,
1009        cb: F,
1010    ) -> O;
1011}
1012
1013/// Provides access to an IP device's state for IP layer egress.
1014pub trait IpDeviceEgressStateContext<I: IpLayerIpExt>: DeviceIdContext<AnyDevice> {
1015    /// Calls the callback with the next packet ID.
1016    fn with_next_packet_id<O, F: FnOnce(&I::PacketIdState) -> O>(&self, cb: F) -> O;
1017
1018    /// Returns the best local address for communicating with the remote.
1019    fn get_local_addr_for_remote(
1020        &mut self,
1021        device_id: &Self::DeviceId,
1022        remote: Option<SpecifiedAddr<I::Addr>>,
1023    ) -> Option<IpDeviceAddr<I::Addr>>;
1024
1025    /// Returns the hop limit.
1026    fn get_hop_limit(&mut self, device_id: &Self::DeviceId) -> NonZeroU8;
1027}
1028
1029/// Provides access to an IP device's state for IP layer ingress.
1030pub trait IpDeviceIngressStateContext<I: IpLayerIpExt>: DeviceIdContext<AnyDevice> {
1031    /// Gets the status of an address.
1032    ///
1033    /// Only the specified device will be checked for the address. Returns
1034    /// [`AddressStatus::Unassigned`] if the address is not assigned to the
1035    /// device.
1036    fn address_status_for_device(
1037        &mut self,
1038        addr: SpecifiedAddr<I::Addr>,
1039        device_id: &Self::DeviceId,
1040    ) -> AddressStatus<I::AddressStatus>;
1041}
1042
1043/// The IP device context provided to the IP layer.
1044pub trait IpDeviceContext<I: IpLayerIpExt>:
1045    IpDeviceEgressStateContext<I> + IpDeviceIngressStateContext<I>
1046{
1047    /// Is the device enabled?
1048    fn is_ip_device_enabled(&mut self, device_id: &Self::DeviceId) -> bool;
1049
1050    /// The iterator provided to [`IpDeviceContext::with_address_statuses`].
1051    type DeviceAndAddressStatusIter<'a>: Iterator<Item = (Self::DeviceId, I::AddressStatus)>;
1052
1053    /// Provides access to the status of an address.
1054    ///
1055    /// Calls the provided callback with an iterator over the devices for which
1056    /// the address is assigned and the status of the assignment for each
1057    /// device.
1058    fn with_address_statuses<F: FnOnce(Self::DeviceAndAddressStatusIter<'_>) -> R, R>(
1059        &mut self,
1060        addr: SpecifiedAddr<I::Addr>,
1061        cb: F,
1062    ) -> R;
1063
1064    /// Returns true iff the device has unicast forwarding enabled.
1065    fn is_device_unicast_forwarding_enabled(&mut self, device_id: &Self::DeviceId) -> bool;
1066}
1067
1068/// Provides the ability to check neighbor reachability via a specific device.
1069pub trait IpDeviceConfirmReachableContext<I: IpLayerIpExt, BC>: DeviceIdContext<AnyDevice> {
1070    /// Confirm transport-layer forward reachability to the specified neighbor
1071    /// through the specified device.
1072    fn confirm_reachable(
1073        &mut self,
1074        bindings_ctx: &mut BC,
1075        device: &Self::DeviceId,
1076        neighbor: SpecifiedAddr<I::Addr>,
1077    );
1078}
1079
1080/// Provides access to an IP device's MTU for the IP layer.
1081pub trait IpDeviceMtuContext<I: Ip>: DeviceIdContext<AnyDevice> {
1082    /// Returns the MTU of the device.
1083    ///
1084    /// The MTU is the maximum size of an IP packet.
1085    fn get_mtu(&mut self, device_id: &Self::DeviceId) -> Mtu;
1086}
1087
1088/// Events observed at the IP layer.
1089#[derive(Debug, Eq, Hash, PartialEq, GenericOverIp)]
1090#[generic_over_ip(I, Ip)]
1091pub enum IpLayerEvent<DeviceId, I: IpLayerIpExt> {
1092    /// A route needs to be added.
1093    AddRoute(types::AddableEntry<I::Addr, DeviceId>),
1094    /// Routes matching these specifiers need to be removed.
1095    RemoveRoutes {
1096        /// Destination subnet
1097        subnet: Subnet<I::Addr>,
1098        /// Outgoing interface
1099        device: DeviceId,
1100        /// Gateway/next-hop
1101        gateway: Option<SpecifiedAddr<I::Addr>>,
1102    },
1103    /// The multicast forwarding engine emitted an event.
1104    MulticastForwarding(MulticastForwardingEvent<I, DeviceId>),
1105}
1106
1107impl<DeviceId, I: IpLayerIpExt> From<MulticastForwardingEvent<I, DeviceId>>
1108    for IpLayerEvent<DeviceId, I>
1109{
1110    fn from(event: MulticastForwardingEvent<I, DeviceId>) -> IpLayerEvent<DeviceId, I> {
1111        IpLayerEvent::MulticastForwarding(event)
1112    }
1113}
1114
1115impl<DeviceId, I: IpLayerIpExt> IpLayerEvent<DeviceId, I> {
1116    /// Changes the device id type with `map`.
1117    pub fn map_device<N, F: Fn(DeviceId) -> N>(self, map: F) -> IpLayerEvent<N, I> {
1118        match self {
1119            IpLayerEvent::AddRoute(types::AddableEntry { subnet, device, gateway, metric }) => {
1120                IpLayerEvent::AddRoute(types::AddableEntry {
1121                    subnet,
1122                    device: map(device),
1123                    gateway,
1124                    metric,
1125                })
1126            }
1127            IpLayerEvent::RemoveRoutes { subnet, device, gateway } => {
1128                IpLayerEvent::RemoveRoutes { subnet, device: map(device), gateway }
1129            }
1130            IpLayerEvent::MulticastForwarding(e) => {
1131                IpLayerEvent::MulticastForwarding(e.map_device(map))
1132            }
1133        }
1134    }
1135}
1136
1137/// An event signifying a router advertisement has been received.
1138#[derive(Derivative, PartialEq, Eq, Clone, Hash)]
1139#[derivative(Debug)]
1140pub struct RouterAdvertisementEvent<D> {
1141    /// The raw bytes of the router advertisement message's options.
1142    // NB: avoid deriving Debug for this since it could contain PII.
1143    #[derivative(Debug = "ignore")]
1144    pub options_bytes: Box<[u8]>,
1145    /// The source address of the RA message.
1146    pub source: net_types::ip::Ipv6Addr,
1147    /// The device on which the message was received.
1148    pub device: D,
1149}
1150
1151impl<D> RouterAdvertisementEvent<D> {
1152    /// Maps the contained device ID type.
1153    pub fn map_device<N, F: Fn(D) -> N>(self, map: F) -> RouterAdvertisementEvent<N> {
1154        let Self { options_bytes, source, device } = self;
1155        RouterAdvertisementEvent { options_bytes, source, device: map(device) }
1156    }
1157}
1158
1159/// Ipv6-specific bindings execution context for the IP layer.
1160pub trait NdpBindingsContext<DeviceId>: EventContext<RouterAdvertisementEvent<DeviceId>> {}
1161impl<DeviceId, BC: EventContext<RouterAdvertisementEvent<DeviceId>>> NdpBindingsContext<DeviceId>
1162    for BC
1163{
1164}
1165
1166/// Defines how socket marks should be handled by the IP layer.
1167pub trait MarksBindingsContext {
1168    /// Mark domains for marks that should be kept when an egress packet is
1169    /// passed from the IP layer to the device. For egress packets that are
1170    /// delivered locally through the loopback interface, these marks are
1171    /// passed to the ingress path and can be observed by ingress filter hooks.
1172    fn marks_to_keep_on_egress() -> &'static [MarkDomain];
1173
1174    /// Mark domains for marks that should be copied to ingress packets. If
1175    /// early demux results in a socket then these marks are copied from the
1176    /// socket to the packet and can be observed in `LOCAL_INGRESS` filter
1177    /// hook.
1178    fn marks_to_set_on_ingress() -> &'static [MarkDomain];
1179}
1180
1181/// The bindings execution context for the IP layer.
1182pub trait IpLayerBindingsContext<I: IpLayerIpExt, DeviceId>:
1183    InstantContext
1184    + EventContext<IpLayerEvent<DeviceId, I>>
1185    + FilterBindingsContext<DeviceId>
1186    + TxMetadataBindingsTypes
1187    + IpRoutingBindingsTypes
1188    + MarksBindingsContext
1189{
1190}
1191impl<
1192    I: IpLayerIpExt,
1193    DeviceId,
1194    BC: InstantContext
1195        + EventContext<IpLayerEvent<DeviceId, I>>
1196        + FilterBindingsContext<DeviceId>
1197        + TxMetadataBindingsTypes
1198        + IpRoutingBindingsTypes
1199        + MarksBindingsContext,
1200> IpLayerBindingsContext<I, DeviceId> for BC
1201{
1202}
1203
1204/// A marker trait for bindings types at the IP layer.
1205pub trait IpLayerBindingsTypes:
1206    IcmpBindingsTypes + IpStateBindingsTypes + IpRoutingBindingsTypes
1207{
1208}
1209impl<BT: IcmpBindingsTypes + IpStateBindingsTypes + IpRoutingBindingsTypes> IpLayerBindingsTypes
1210    for BT
1211{
1212}
1213
1214/// The execution context for the IP layer.
1215pub trait IpLayerContext<
1216    I: IpLayerIpExt,
1217    BC: IpLayerBindingsContext<I, <Self as DeviceIdContext<AnyDevice>>::DeviceId>,
1218>:
1219    IpStateContext<I, BC>
1220    + IpDeviceContext<I>
1221    + IpDeviceMtuContext<I>
1222    + IpDeviceSendContext<I, BC>
1223    + IcmpErrorHandler<I, BC>
1224    + MulticastForwardingStateContext<I, BC>
1225    + MulticastForwardingDeviceContext<I>
1226    + CounterContext<MulticastForwardingCounters<I>>
1227    + ResourceCounterContext<<Self as DeviceIdContext<AnyDevice>>::DeviceId, IpCounters<I>>
1228{
1229}
1230
1231impl<
1232    I: IpLayerIpExt,
1233    BC: IpLayerBindingsContext<I, <CC as DeviceIdContext<AnyDevice>>::DeviceId>,
1234    CC: IpStateContext<I, BC>
1235        + IpDeviceContext<I>
1236        + IpDeviceMtuContext<I>
1237        + IpDeviceSendContext<I, BC>
1238        + IcmpErrorHandler<I, BC>
1239        + MulticastForwardingStateContext<I, BC>
1240        + MulticastForwardingDeviceContext<I>
1241        + CounterContext<MulticastForwardingCounters<I>>
1242        + ResourceCounterContext<<Self as DeviceIdContext<AnyDevice>>::DeviceId, IpCounters<I>>,
1243> IpLayerContext<I, BC> for CC
1244{
1245}
1246
1247fn is_unicast_assigned<I: IpLayerIpExt>(status: &I::AddressStatus) -> bool {
1248    #[derive(GenericOverIp)]
1249    #[generic_over_ip(I, Ip)]
1250    struct WrapAddressStatus<'a, I: IpLayerIpExt>(&'a I::AddressStatus);
1251
1252    I::map_ip(
1253        WrapAddressStatus(status),
1254        |WrapAddressStatus(status)| match status {
1255            Ipv4PresentAddressStatus::UnicastAssigned
1256            | Ipv4PresentAddressStatus::LoopbackSubnet => true,
1257            Ipv4PresentAddressStatus::UnicastTentative
1258            | Ipv4PresentAddressStatus::LimitedBroadcast
1259            | Ipv4PresentAddressStatus::SubnetBroadcast
1260            | Ipv4PresentAddressStatus::Multicast => false,
1261        },
1262        |WrapAddressStatus(status)| match status {
1263            Ipv6PresentAddressStatus::UnicastAssigned => true,
1264            Ipv6PresentAddressStatus::Multicast | Ipv6PresentAddressStatus::UnicastTentative => {
1265                false
1266            }
1267        },
1268    )
1269}
1270
1271fn is_local_assigned_address<I: Ip + IpLayerIpExt, CC: IpDeviceIngressStateContext<I>>(
1272    core_ctx: &mut CC,
1273    device: &CC::DeviceId,
1274    addr: IpDeviceAddr<I::Addr>,
1275) -> bool {
1276    match core_ctx.address_status_for_device(addr.into(), device) {
1277        AddressStatus::Present(status) => is_unicast_assigned::<I>(&status),
1278        AddressStatus::Unassigned => false,
1279    }
1280}
1281
1282fn get_device_with_assigned_address<I, CC>(
1283    core_ctx: &mut CC,
1284    addr: IpDeviceAddr<I::Addr>,
1285) -> Option<(CC::DeviceId, I::AddressStatus)>
1286where
1287    I: IpLayerIpExt,
1288    CC: IpDeviceContext<I>,
1289{
1290    core_ctx.with_address_statuses(addr.into(), |mut it| {
1291        it.find_map(|(device, status)| {
1292            is_unicast_assigned::<I>(&status).then_some((device, status))
1293        })
1294    })
1295}
1296
1297// Returns the local IP address to use for sending packets from the
1298// given device to `addr`, restricting to `local_ip` if it is not
1299// `None`.
1300fn get_local_addr<I: Ip + IpLayerIpExt, CC: IpDeviceContext<I>>(
1301    core_ctx: &mut CC,
1302    local_ip_and_policy: Option<(IpDeviceAddr<I::Addr>, NonLocalSrcAddrPolicy)>,
1303    device: &CC::DeviceId,
1304    remote_addr: Option<RoutableIpAddr<I::Addr>>,
1305) -> Result<IpDeviceAddr<I::Addr>, ResolveRouteError> {
1306    match local_ip_and_policy {
1307        Some((local_ip, NonLocalSrcAddrPolicy::Allow)) => Ok(local_ip),
1308        Some((local_ip, NonLocalSrcAddrPolicy::Deny)) => {
1309            is_local_assigned_address(core_ctx, device, local_ip)
1310                .then_some(local_ip)
1311                .ok_or(ResolveRouteError::NoSrcAddr)
1312        }
1313        None => core_ctx
1314            .get_local_addr_for_remote(device, remote_addr.map(Into::into))
1315            .ok_or(ResolveRouteError::NoSrcAddr),
1316    }
1317}
1318
1319/// An error occurred while resolving the route to a destination
1320#[derive(Error, Copy, Clone, Debug, Eq, GenericOverIp, PartialEq)]
1321#[generic_over_ip()]
1322pub enum ResolveRouteError {
1323    /// A source address could not be selected.
1324    #[error("a source address could not be selected")]
1325    NoSrcAddr,
1326    /// The destination in unreachable.
1327    #[error("no route exists to the destination IP address")]
1328    Unreachable,
1329}
1330
1331/// Like [`get_local_addr`], but willing to forward internally as necessary.
1332fn get_local_addr_with_internal_forwarding<I, CC>(
1333    core_ctx: &mut CC,
1334    local_ip_and_policy: Option<(IpDeviceAddr<I::Addr>, NonLocalSrcAddrPolicy)>,
1335    device: &CC::DeviceId,
1336    remote_addr: Option<RoutableIpAddr<I::Addr>>,
1337) -> Result<(IpDeviceAddr<I::Addr>, InternalForwarding<CC::DeviceId>), ResolveRouteError>
1338where
1339    I: IpLayerIpExt,
1340    CC: IpDeviceContext<I>,
1341{
1342    match get_local_addr(core_ctx, local_ip_and_policy, device, remote_addr) {
1343        Ok(src_addr) => Ok((src_addr, InternalForwarding::NotUsed)),
1344        Err(e) => {
1345            // If a local_ip was specified, the local_ip is assigned to a
1346            // device, and that device has forwarding enabled, use internal
1347            // forwarding.
1348            //
1349            // This enables a weak host model when the Netstack is configured as
1350            // a router. Conceptually the netstack is forwarding the packet from
1351            // the local IP's device to the output device of the selected route.
1352            if let Some((local_ip, _policy)) = local_ip_and_policy {
1353                if let Some((device, _addr_status)) =
1354                    get_device_with_assigned_address(core_ctx, local_ip)
1355                {
1356                    if core_ctx.is_device_unicast_forwarding_enabled(&device) {
1357                        return Ok((local_ip, InternalForwarding::Used(device)));
1358                    }
1359                }
1360            }
1361            Err(e)
1362        }
1363    }
1364}
1365
1366/// The information about the rule walk in addition to a custom state. This type is introduced so
1367/// that `walk_rules` can be extended later with more information about the walk if needed.
1368#[derive(Debug, PartialEq, Eq)]
1369struct RuleWalkInfo<O> {
1370    /// Whether there is a rule with a source address matcher during the walk.
1371    observed_source_address_matcher: bool,
1372    /// The custom info carried. For example this could be the lookup result from the user provided
1373    /// function.
1374    inner: O,
1375}
1376
1377/// A helper function that traverses through the rules table.
1378///
1379/// To walk through the rules, you need to provide it with an initial value for the loop and a
1380/// callback function that yieds a [`ControlFlow`] result to indicate whether the traversal should
1381/// stop.
1382///
1383/// # Returns
1384///
1385/// - `ControlFlow::Break(RuleAction::Lookup(_))` if we hit a lookup rule and an output is
1386///   yielded from the route table.
1387/// - `ControlFlow::Break(RuleAction::Unreachable)` if we hit an unreachable rule.
1388/// - `ControlFlow::Continue(_)` if we finished walking the rules table without yielding any
1389///   result.
1390fn walk_rules<
1391    I: IpLayerIpExt,
1392    BT: IpRoutingBindingsTypes + MatcherBindingsTypes,
1393    CC: IpRouteTablesContext<I, BT, DeviceId: InterfaceProperties<BT::DeviceClass>>,
1394    O,
1395    State,
1396    F: FnMut(
1397        State,
1398        &mut CC::IpDeviceIdCtx<'_>,
1399        &RoutingTable<I, CC::DeviceId>,
1400    ) -> ControlFlow<O, State>,
1401>(
1402    core_ctx: &mut CC,
1403    rules: &RulesTable<I, CC::DeviceId, BT>,
1404    init: State,
1405    rule_input: &RuleInput<'_, I, CC::DeviceId>,
1406    mut lookup_table: F,
1407) -> ControlFlow<RuleAction<RuleWalkInfo<O>>, RuleWalkInfo<State>> {
1408    rules.iter().try_fold(
1409        RuleWalkInfo { inner: init, observed_source_address_matcher: false },
1410        |RuleWalkInfo { inner: state, observed_source_address_matcher },
1411         Rule { action, matcher }| {
1412            let observed_source_address_matcher =
1413                observed_source_address_matcher || matcher.source_address_matcher.is_some();
1414            if !matcher.matches(rule_input) {
1415                return ControlFlow::Continue(RuleWalkInfo {
1416                    inner: state,
1417                    observed_source_address_matcher,
1418                });
1419            }
1420            match action {
1421                RuleAction::Unreachable => return ControlFlow::Break(RuleAction::Unreachable),
1422                RuleAction::Lookup(table_id) => core_ctx.with_ip_routing_table(
1423                    &table_id,
1424                    |core_ctx, table| match lookup_table(state, core_ctx, table) {
1425                        ControlFlow::Break(out) => {
1426                            ControlFlow::Break(RuleAction::Lookup(RuleWalkInfo {
1427                                inner: out,
1428                                observed_source_address_matcher,
1429                            }))
1430                        }
1431                        ControlFlow::Continue(state) => ControlFlow::Continue(RuleWalkInfo {
1432                            inner: state,
1433                            observed_source_address_matcher,
1434                        }),
1435                    },
1436                ),
1437            }
1438        },
1439    )
1440}
1441
1442/// Returns the outgoing routing instructions for reaching the given destination.
1443///
1444/// If a `device` is specified, the resolved route is limited to those that
1445/// egress over the device.
1446///
1447/// If `src_ip` is specified the resolved route is limited to those that egress
1448/// over a device with the address assigned.
1449///
1450/// This function should only be used for calculating a route for an outgoing packet
1451/// that is generated by us.
1452pub fn resolve_output_route_to_destination<
1453    I: Ip + IpDeviceStateIpExt + IpDeviceIpExt + IpLayerIpExt,
1454    BC: IpDeviceBindingsContext<I, CC::DeviceId> + IpLayerBindingsContext<I, CC::DeviceId>,
1455    CC: IpStateContext<I, BC> + IpDeviceContext<I> + device::IpDeviceConfigurationContext<I, BC>,
1456>(
1457    core_ctx: &mut CC,
1458    device: Option<&CC::DeviceId>,
1459    src_ip_and_policy: Option<(IpDeviceAddr<I::Addr>, NonLocalSrcAddrPolicy)>,
1460    dst_ip: Option<RoutableIpAddr<I::Addr>>,
1461    marks: &Marks,
1462) -> Result<ResolvedRoute<I, CC::DeviceId>, ResolveRouteError> {
1463    enum LocalDelivery<A, D> {
1464        WeakLoopback { dst_ip: A, device: D },
1465        StrongForDevice(D),
1466    }
1467
1468    // Check if locally destined. If the destination is an address assigned
1469    // on an interface, and an egress interface wasn't specifically
1470    // selected, route via the loopback device. This lets us operate as a
1471    // strong host when an outgoing interface is explicitly requested while
1472    // still enabling local delivery via the loopback interface, which is
1473    // acting as a weak host. Note that if the loopback interface is
1474    // requested as an outgoing interface, route selection is still
1475    // performed as a strong host! This makes the loopback interface behave
1476    // more like the other interfaces on the system.
1477    //
1478    // TODO(https://fxbug.dev/42175703): Encode the delivery of locally-
1479    // destined packets to loopback in the route table.
1480    //
1481    // TODO(https://fxbug.dev/322539434): Linux is more permissive about
1482    // allowing cross-device local delivery even when SO_BINDTODEVICE or
1483    // link-local addresses are involved, and this behavior may need to be
1484    // emulated.
1485    let local_delivery_instructions: Option<LocalDelivery<IpDeviceAddr<I::Addr>, CC::DeviceId>> = {
1486        let dst_ip = dst_ip.and_then(IpDeviceAddr::new_from_socket_ip_addr);
1487        match (device, dst_ip) {
1488            (Some(device), Some(dst_ip)) => is_local_assigned_address(core_ctx, device, dst_ip)
1489                .then_some(LocalDelivery::StrongForDevice(device.clone())),
1490            (None, Some(dst_ip)) => {
1491                get_device_with_assigned_address(core_ctx, dst_ip).map(
1492                    |(dst_device, _addr_status)| {
1493                        // If either the source or destination addresses needs
1494                        // a zone ID, then use strong host to enforce that the
1495                        // source and destination addresses are assigned to the
1496                        // same interface.
1497                        if src_ip_and_policy
1498                            .is_some_and(|(ip, _policy)| ip.as_ref().must_have_zone())
1499                            || dst_ip.as_ref().must_have_zone()
1500                        {
1501                            LocalDelivery::StrongForDevice(dst_device)
1502                        } else {
1503                            LocalDelivery::WeakLoopback { dst_ip, device: dst_device }
1504                        }
1505                    },
1506                )
1507            }
1508            (_, None) => None,
1509        }
1510    };
1511
1512    if let Some(local_delivery) = local_delivery_instructions {
1513        let loopback = core_ctx.loopback_id().ok_or(ResolveRouteError::Unreachable)?;
1514
1515        let (src_addr, dest_device) = match local_delivery {
1516            LocalDelivery::WeakLoopback { dst_ip, device } => {
1517                let src_ip = match src_ip_and_policy {
1518                    Some((src_ip, NonLocalSrcAddrPolicy::Deny)) => {
1519                        let _device = get_device_with_assigned_address(core_ctx, src_ip)
1520                            .ok_or(ResolveRouteError::NoSrcAddr)?;
1521                        src_ip
1522                    }
1523                    Some((src_ip, NonLocalSrcAddrPolicy::Allow)) => src_ip,
1524                    None => dst_ip,
1525                };
1526                (src_ip, device)
1527            }
1528            LocalDelivery::StrongForDevice(device) => {
1529                (get_local_addr(core_ctx, src_ip_and_policy, &device, dst_ip)?, device)
1530            }
1531        };
1532        return Ok(ResolvedRoute {
1533            src_addr,
1534            local_delivery_device: Some(dest_device),
1535            device: loopback,
1536            next_hop: NextHop::RemoteAsNeighbor,
1537            internal_forwarding: InternalForwarding::NotUsed,
1538        });
1539    }
1540    let bound_address = src_ip_and_policy.map(|(sock_addr, _policy)| sock_addr.into_inner().get());
1541    let rule_input = RuleInput {
1542        packet_origin: PacketOrigin::Local { bound_address, bound_device: device },
1543        marks,
1544    };
1545    core_ctx.with_rules_table(|core_ctx, rules: &RulesTable<_, _, BC>| {
1546        let mut walk_rules = |rule_input, src_ip_and_policy| {
1547            walk_rules(
1548                core_ctx,
1549                rules,
1550                None, /* first error encountered */
1551                rule_input,
1552                |first_error, core_ctx, table| {
1553                    let mut matching_with_addr = table.lookup_filter_map(
1554                        core_ctx,
1555                        device,
1556                        dst_ip.map_or(I::UNSPECIFIED_ADDRESS, |a| a.addr()),
1557                        |core_ctx, d| {
1558                            Some(get_local_addr_with_internal_forwarding(
1559                                core_ctx,
1560                                src_ip_and_policy,
1561                                d,
1562                                dst_ip,
1563                            ))
1564                        },
1565                    );
1566
1567                    let first_error_in_this_table = match matching_with_addr.next() {
1568                        Some((
1569                            Destination { device, next_hop },
1570                            Ok((local_addr, internal_forwarding)),
1571                        )) => {
1572                            return ControlFlow::Break(Ok((
1573                                Destination { device: device.clone(), next_hop },
1574                                local_addr,
1575                                internal_forwarding,
1576                            )));
1577                        }
1578                        Some((_, Err(e))) => e,
1579                        // Note: rule evaluation will continue on to the next rule, if the
1580                        // previous rule was `Lookup` but the table didn't have the route
1581                        // inside of it.
1582                        None => return ControlFlow::Continue(first_error),
1583                    };
1584
1585                    matching_with_addr
1586                        .filter_map(|(destination, local_addr)| {
1587                            // Select successful routes. We ignore later errors
1588                            // since we've already saved the first one.
1589                            local_addr.ok_checked::<ResolveRouteError>().map(
1590                                |(local_addr, internal_forwarding)| {
1591                                    (destination, local_addr, internal_forwarding)
1592                                },
1593                            )
1594                        })
1595                        .next()
1596                        .map_or(
1597                            ControlFlow::Continue(first_error.or(Some(first_error_in_this_table))),
1598                            |(
1599                                Destination { device, next_hop },
1600                                local_addr,
1601                                internal_forwarding,
1602                            )| {
1603                                ControlFlow::Break(Ok((
1604                                    Destination { device: device.clone(), next_hop },
1605                                    local_addr,
1606                                    internal_forwarding,
1607                                )))
1608                            },
1609                        )
1610                },
1611            )
1612        };
1613
1614        let result = match walk_rules(&rule_input, src_ip_and_policy) {
1615            // Only try to resolve a route again if all of the following are true:
1616            // 1. The source address is not provided by the caller.
1617            // 2. A route is successfully resolved so we selected a source address.
1618            // 3. There is a rule with a source address matcher during the resolution.
1619            // The rationale is to make sure the route resolution converges to a sensible route
1620            // after considering the source address we select.
1621            ControlFlow::Break(RuleAction::Lookup(RuleWalkInfo {
1622                inner: Ok((_dst, selected_src_addr, _internal_forwarding)),
1623                observed_source_address_matcher: true,
1624            })) if src_ip_and_policy.is_none() => walk_rules(
1625                &RuleInput {
1626                    packet_origin: PacketOrigin::Local {
1627                        bound_address: Some(selected_src_addr.into()),
1628                        bound_device: device,
1629                    },
1630                    marks,
1631                },
1632                Some((selected_src_addr, NonLocalSrcAddrPolicy::Deny)),
1633            ),
1634            result => result,
1635        };
1636
1637        match result {
1638            ControlFlow::Break(RuleAction::Lookup(RuleWalkInfo {
1639                inner: result,
1640                observed_source_address_matcher: _,
1641            })) => {
1642                result.map(|(Destination { device, next_hop }, src_addr, internal_forwarding)| {
1643                    ResolvedRoute {
1644                        src_addr,
1645                        device,
1646                        local_delivery_device: None,
1647                        next_hop,
1648                        internal_forwarding,
1649                    }
1650                })
1651            }
1652            ControlFlow::Break(RuleAction::Unreachable) => Err(ResolveRouteError::Unreachable),
1653            ControlFlow::Continue(RuleWalkInfo {
1654                inner: first_error,
1655                observed_source_address_matcher: _,
1656            }) => Err(first_error.unwrap_or(ResolveRouteError::Unreachable)),
1657        }
1658    })
1659}
1660
1661/// Enables a blanket implementation of [`IpSocketContext`].
1662///
1663/// Implementing this marker trait for a type enables a blanket implementation
1664/// of `IpSocketContext` given the other requirements are met.
1665pub trait UseIpSocketContextBlanket {}
1666
1667impl<
1668    I: Ip + IpDeviceStateIpExt + IpDeviceIpExt + IpLayerIpExt,
1669    BC: IpDeviceBindingsContext<I, CC::DeviceId>
1670        + IpLayerBindingsContext<I, CC::DeviceId>
1671        + IpSocketBindingsContext<CC::DeviceId>,
1672    CC: IpLayerEgressContext<I, BC>
1673        + IpStateContext<I, BC>
1674        + IpDeviceContext<I>
1675        + IpDeviceConfirmReachableContext<I, BC>
1676        + IpDeviceMtuContext<I>
1677        + device::IpDeviceConfigurationContext<I, BC>
1678        + UseIpSocketContextBlanket,
1679> IpSocketContext<I, BC> for CC
1680{
1681    fn lookup_route(
1682        &mut self,
1683        _bindings_ctx: &mut BC,
1684        device: Option<&CC::DeviceId>,
1685        local_ip: Option<IpDeviceAddr<I::Addr>>,
1686        addr: RoutableIpAddr<I::Addr>,
1687        transparent: bool,
1688        marks: &Marks,
1689    ) -> Result<ResolvedRoute<I, CC::DeviceId>, ResolveRouteError> {
1690        let src_ip_and_policy = local_ip.map(|local_ip| {
1691            (
1692                local_ip,
1693                if transparent {
1694                    NonLocalSrcAddrPolicy::Allow
1695                } else {
1696                    NonLocalSrcAddrPolicy::Deny
1697                },
1698            )
1699        });
1700        let res =
1701            resolve_output_route_to_destination(self, device, src_ip_and_policy, Some(addr), marks);
1702        trace!(
1703            "lookup_route(\
1704                device={device:?}, \
1705                local_ip={local_ip:?}, \
1706                addr={addr:?}, \
1707                transparent={transparent:?}, \
1708                marks={marks:?}) => {res:?}"
1709        );
1710        res
1711    }
1712
1713    fn send_ip_packet<S>(
1714        &mut self,
1715        bindings_ctx: &mut BC,
1716        meta: SendIpPacketMeta<
1717            I,
1718            &<CC as DeviceIdContext<AnyDevice>>::DeviceId,
1719            SpecifiedAddr<I::Addr>,
1720        >,
1721        body: S,
1722        packet_metadata: IpLayerPacketMetadata<I, CC::WeakAddressId, BC>,
1723    ) -> Result<(), IpSendFrameError<S>>
1724    where
1725        S: TransportPacketSerializer<I>,
1726        S::Buffer: BufferMut,
1727    {
1728        send_ip_packet_from_device(self, bindings_ctx, meta.into(), body, packet_metadata)
1729    }
1730
1731    fn get_loopback_device(&mut self) -> Option<Self::DeviceId> {
1732        device::IpDeviceConfigurationContext::<I, _>::loopback_id(self)
1733    }
1734
1735    fn confirm_reachable(
1736        &mut self,
1737        bindings_ctx: &mut BC,
1738        dst: SpecifiedAddr<I::Addr>,
1739        input: RuleInput<'_, I, Self::DeviceId>,
1740    ) {
1741        match lookup_route_table(self, dst.get(), input) {
1742            Some(Destination { next_hop, device }) => {
1743                let neighbor = match next_hop {
1744                    NextHop::RemoteAsNeighbor => dst,
1745                    NextHop::Gateway(gateway) => gateway,
1746                    NextHop::Broadcast(marker) => {
1747                        I::map_ip::<_, ()>(
1748                            WrapBroadcastMarker(marker),
1749                            |WrapBroadcastMarker(())| {
1750                                debug!(
1751                                    "can't confirm {dst:?}@{device:?} as reachable: \
1752                                    dst is a broadcast address"
1753                                );
1754                            },
1755                            |WrapBroadcastMarker(never)| match never {},
1756                        );
1757                        return;
1758                    }
1759                };
1760                IpDeviceConfirmReachableContext::confirm_reachable(
1761                    self,
1762                    bindings_ctx,
1763                    &device,
1764                    neighbor,
1765                );
1766            }
1767            None => {
1768                debug!("can't confirm {dst:?} as reachable: no route");
1769            }
1770        }
1771    }
1772}
1773
1774/// Trait that provides basic socket information for types that carry a socket
1775/// ID.
1776pub trait SocketMetadata<CC>
1777where
1778    CC: ?Sized,
1779{
1780    /// Returns Socket cookie for the socket.
1781    fn socket_cookie(&self, core_ctx: &mut CC) -> SocketCookie;
1782    /// Returns Socket Marks.
1783    fn marks(&self, core_ctx: &mut CC) -> Marks;
1784}
1785
1786/// The IP context providing dispatch to the available transport protocols.
1787///
1788/// This trait acts like a demux on the transport protocol for ingress IP
1789/// packets.
1790pub trait IpTransportDispatchContext<I: IpLayerIpExt, BC>: DeviceIdContext<AnyDevice> {
1791    /// Early Demux result.
1792    type EarlyDemuxSocket: SocketMetadata<Self>;
1793
1794    /// Performs early demux result.
1795    fn early_demux<B: ParseBuffer>(
1796        &mut self,
1797        device: &Self::DeviceId,
1798        frame_dst: Option<FrameDestination>,
1799        src_ip: I::Addr,
1800        dst_ip: I::Addr,
1801        proto: I::Proto,
1802        body: B,
1803    ) -> Option<Self::EarlyDemuxSocket>;
1804
1805    /// Dispatches a received incoming IP packet to the appropriate protocol.
1806    fn dispatch_receive_ip_packet<B: BufferMut, H: IpHeaderInfo<I>>(
1807        &mut self,
1808        bindings_ctx: &mut BC,
1809        device: &Self::DeviceId,
1810        src_ip: I::RecvSrcAddr,
1811        dst_ip: SpecifiedAddr<I::Addr>,
1812        proto: I::Proto,
1813        body: B,
1814        info: &LocalDeliveryPacketInfo<I, H>,
1815        early_demux_socket: Option<Self::EarlyDemuxSocket>,
1816    ) -> Result<(), TransportReceiveError>;
1817}
1818
1819/// A marker trait for all the contexts required for IP ingress.
1820pub trait IpLayerIngressContext<I: IpLayerIpExt, BC: IpLayerBindingsContext<I, Self::DeviceId>>:
1821    IpTransportDispatchContext<
1822        I,
1823        BC,
1824        DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>,
1825    > + IpDeviceIngressStateContext<I>
1826    + IpDeviceMtuContext<I>
1827    + IpDeviceSendContext<I, BC>
1828    + IcmpErrorHandler<I, BC>
1829    + IpLayerContext<I, BC>
1830    + FragmentHandler<I, BC>
1831    + FilterHandlerProvider<I, BC>
1832    + RawIpSocketHandler<I, BC>
1833{
1834}
1835
1836impl<
1837    I: IpLayerIpExt,
1838    BC: IpLayerBindingsContext<I, CC::DeviceId>,
1839    CC: IpTransportDispatchContext<
1840            I,
1841            BC,
1842            DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>,
1843        > + IpDeviceIngressStateContext<I>
1844        + IpDeviceMtuContext<I>
1845        + IpDeviceSendContext<I, BC>
1846        + IcmpErrorHandler<I, BC>
1847        + IpLayerContext<I, BC>
1848        + FragmentHandler<I, BC>
1849        + FilterHandlerProvider<I, BC>
1850        + RawIpSocketHandler<I, BC>,
1851> IpLayerIngressContext<I, BC> for CC
1852{
1853}
1854
1855/// A marker trait for all the contexts required for IP egress.
1856pub trait IpLayerEgressContext<I, BC>:
1857    IpDeviceSendContext<I, BC, DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>>
1858    + FilterHandlerProvider<I, BC>
1859    + ResourceCounterContext<Self::DeviceId, IpCounters<I>>
1860where
1861    I: IpLayerIpExt,
1862    BC: FilterBindingsContext<Self::DeviceId> + TxMetadataBindingsTypes,
1863{
1864}
1865
1866impl<I, BC, CC> IpLayerEgressContext<I, BC> for CC
1867where
1868    I: IpLayerIpExt,
1869    BC: FilterBindingsContext<CC::DeviceId> + TxMetadataBindingsTypes,
1870    CC: IpDeviceSendContext<I, BC, DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>>
1871        + FilterHandlerProvider<I, BC>
1872        + ResourceCounterContext<Self::DeviceId, IpCounters<I>>,
1873{
1874}
1875
1876/// A marker trait for all the contexts required for IP forwarding.
1877pub trait IpLayerForwardingContext<I: IpLayerIpExt, BC: IpLayerBindingsContext<I, Self::DeviceId>>:
1878    IpLayerEgressContext<I, BC> + IcmpErrorHandler<I, BC> + IpDeviceMtuContext<I>
1879{
1880}
1881
1882impl<
1883    I: IpLayerIpExt,
1884    BC: IpLayerBindingsContext<I, CC::DeviceId>,
1885    CC: IpLayerEgressContext<I, BC> + IcmpErrorHandler<I, BC> + IpDeviceMtuContext<I>,
1886> IpLayerForwardingContext<I, BC> for CC
1887{
1888}
1889
1890/// A builder for IPv4 state.
1891#[derive(Copy, Clone, Default)]
1892pub struct Ipv4StateBuilder {
1893    icmp: Icmpv4StateBuilder,
1894}
1895
1896impl Ipv4StateBuilder {
1897    /// Get the builder for the ICMPv4 state.
1898    #[cfg(any(test, feature = "testutils"))]
1899    pub fn icmpv4_builder(&mut self) -> &mut Icmpv4StateBuilder {
1900        &mut self.icmp
1901    }
1902
1903    /// Builds the [`Ipv4State`].
1904    pub fn build<
1905        CC: CoreTimerContext<IpLayerTimerId, BC>,
1906        StrongDeviceId: StrongDeviceIdentifier,
1907        BC: TimerContext + RngContext + IpLayerBindingsTypes,
1908    >(
1909        self,
1910        bindings_ctx: &mut BC,
1911    ) -> Ipv4State<StrongDeviceId, BC> {
1912        let Ipv4StateBuilder { icmp } = self;
1913
1914        Ipv4State {
1915            inner: IpStateInner::new::<CC>(bindings_ctx),
1916            icmp: icmp.build(),
1917            next_packet_id: Default::default(),
1918        }
1919    }
1920}
1921
1922/// A builder for IPv6 state.
1923///
1924/// By default, opaque IIDs will not be used to generate stable SLAAC addresses.
1925#[derive(Copy, Clone)]
1926pub struct Ipv6StateBuilder {
1927    icmp: Icmpv6StateBuilder,
1928    slaac_stable_secret_key: Option<IidSecret>,
1929}
1930
1931impl Ipv6StateBuilder {
1932    /// Sets the secret key used to generate stable SLAAC addresses.
1933    ///
1934    /// If `slaac_stable_secret_key` is left unset, opaque IIDs will not be used to
1935    /// generate stable SLAAC addresses.
1936    pub fn slaac_stable_secret_key(&mut self, secret_key: IidSecret) -> &mut Self {
1937        self.slaac_stable_secret_key = Some(secret_key);
1938        self
1939    }
1940
1941    /// Builds the [`Ipv6State`].
1942    ///
1943    /// # Panics
1944    ///
1945    /// Panics if the `slaac_stable_secret_key` has not been set.
1946    pub fn build<
1947        CC: CoreTimerContext<IpLayerTimerId, BC>,
1948        StrongDeviceId: StrongDeviceIdentifier,
1949        BC: TimerContext + RngContext + IpLayerBindingsTypes,
1950    >(
1951        self,
1952        bindings_ctx: &mut BC,
1953    ) -> Ipv6State<StrongDeviceId, BC> {
1954        let Ipv6StateBuilder { icmp, slaac_stable_secret_key } = self;
1955
1956        let slaac_stable_secret_key = slaac_stable_secret_key
1957            .expect("stable SLAAC secret key was not provided to `Ipv6StateBuilder`");
1958
1959        Ipv6State {
1960            inner: IpStateInner::new::<CC>(bindings_ctx),
1961            icmp: icmp.build(),
1962            slaac_counters: Default::default(),
1963            slaac_temp_secret_key: IidSecret::new_random(&mut bindings_ctx.rng()),
1964            slaac_stable_secret_key,
1965        }
1966    }
1967}
1968
1969impl Default for Ipv6StateBuilder {
1970    fn default() -> Self {
1971        #[cfg(any(test, feature = "testutils"))]
1972        let slaac_stable_secret_key = Some(IidSecret::ALL_ONES);
1973
1974        #[cfg(not(any(test, feature = "testutils")))]
1975        let slaac_stable_secret_key = None;
1976
1977        Self { icmp: Icmpv6StateBuilder::default(), slaac_stable_secret_key }
1978    }
1979}
1980
1981/// The stack's IPv4 state.
1982pub struct Ipv4State<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes> {
1983    /// The common inner IP layer state.
1984    pub inner: IpStateInner<Ipv4, StrongDeviceId, BT>,
1985    /// The ICMP state.
1986    pub icmp: Icmpv4State<BT>,
1987    /// The atomic counter providing IPv4 packet identifiers.
1988    pub next_packet_id: AtomicU16,
1989}
1990
1991impl<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
1992    AsRef<IpStateInner<Ipv4, StrongDeviceId, BT>> for Ipv4State<StrongDeviceId, BT>
1993{
1994    fn as_ref(&self) -> &IpStateInner<Ipv4, StrongDeviceId, BT> {
1995        &self.inner
1996    }
1997}
1998
1999/// Generates an IP packet ID.
2000///
2001/// This is only meaningful for IPv4, see [`IpLayerIpExt`].
2002pub fn gen_ip_packet_id<I: IpLayerIpExt, CC: IpDeviceEgressStateContext<I>>(
2003    core_ctx: &mut CC,
2004) -> I::PacketId {
2005    core_ctx.with_next_packet_id(|state| I::next_packet_id_from_state(state))
2006}
2007
2008/// The stack's IPv6 state.
2009pub struct Ipv6State<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes> {
2010    /// The common inner IP layer state.
2011    pub inner: IpStateInner<Ipv6, StrongDeviceId, BT>,
2012    /// ICMPv6 state.
2013    pub icmp: Icmpv6State<BT>,
2014    /// Stateless address autoconfiguration counters.
2015    pub slaac_counters: SlaacCounters,
2016    /// Secret key used for generating SLAAC temporary addresses.
2017    pub slaac_temp_secret_key: IidSecret,
2018    /// Secret key used for generating SLAAC stable addresses.
2019    ///
2020    /// If `None`, opaque IIDs will not be used to generate stable SLAAC
2021    /// addresses.
2022    pub slaac_stable_secret_key: IidSecret,
2023}
2024
2025impl<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2026    AsRef<IpStateInner<Ipv6, StrongDeviceId, BT>> for Ipv6State<StrongDeviceId, BT>
2027{
2028    fn as_ref(&self) -> &IpStateInner<Ipv6, StrongDeviceId, BT> {
2029        &self.inner
2030    }
2031}
2032
2033impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2034    OrderedLockAccess<IpPacketFragmentCache<I, BT>> for IpStateInner<I, D, BT>
2035{
2036    type Lock = Mutex<IpPacketFragmentCache<I, BT>>;
2037    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2038        OrderedLockRef::new(&self.fragment_cache)
2039    }
2040}
2041
2042impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2043    OrderedLockAccess<PmtuCache<I, BT>> for IpStateInner<I, D, BT>
2044{
2045    type Lock = Mutex<PmtuCache<I, BT>>;
2046    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2047        OrderedLockRef::new(&self.pmtu_cache)
2048    }
2049}
2050
2051impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2052    OrderedLockAccess<RulesTable<I, D, BT>> for IpStateInner<I, D, BT>
2053{
2054    type Lock = RwLock<RulesTable<I, D, BT>>;
2055    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2056        OrderedLockRef::new(&self.rules_table)
2057    }
2058}
2059
2060impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2061    OrderedLockAccess<HashMap<RoutingTableId<I, D, BT>, PrimaryRc<BaseRoutingTableState<I, D, BT>>>>
2062    for IpStateInner<I, D, BT>
2063{
2064    type Lock =
2065        Mutex<HashMap<RoutingTableId<I, D, BT>, PrimaryRc<BaseRoutingTableState<I, D, BT>>>>;
2066    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2067        OrderedLockRef::new(&self.tables)
2068    }
2069}
2070
2071impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpRoutingBindingsTypes>
2072    OrderedLockAccess<RoutingTable<I, D>> for RoutingTableId<I, D, BT>
2073{
2074    type Lock = RwLock<RoutingTable<I, D>>;
2075    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2076        let Self(inner) = self;
2077        OrderedLockRef::new(&inner.routing_table)
2078    }
2079}
2080
2081impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2082    OrderedLockAccess<MulticastForwardingState<I, D, BT>> for IpStateInner<I, D, BT>
2083{
2084    type Lock = RwLock<MulticastForwardingState<I, D, BT>>;
2085    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2086        OrderedLockRef::new(&self.multicast_forwarding)
2087    }
2088}
2089
2090impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2091    OrderedLockAccess<RawIpSocketMap<I, D::Weak, BT>> for IpStateInner<I, D, BT>
2092{
2093    type Lock = RwLock<RawIpSocketMap<I, D::Weak, BT>>;
2094    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2095        OrderedLockRef::new(&self.raw_sockets)
2096    }
2097}
2098
2099impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2100    OrderedLockAccess<filter::State<I, WeakAddressId<I, BT>, BT>> for IpStateInner<I, D, BT>
2101{
2102    type Lock = RwLock<filter::State<I, WeakAddressId<I, BT>, BT>>;
2103    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2104        OrderedLockRef::new(&self.filter)
2105    }
2106}
2107
2108/// Marker trait for the bindings types required by the IP layer's inner state.
2109pub trait IpStateBindingsTypes:
2110    PmtuBindingsTypes
2111    + FragmentBindingsTypes
2112    + RawIpSocketsBindingsTypes
2113    + FilterBindingsTypes
2114    + MulticastForwardingBindingsTypes
2115    + IpDeviceStateBindingsTypes
2116    + IpRoutingBindingsTypes
2117{
2118}
2119impl<BT> IpStateBindingsTypes for BT where
2120    BT: PmtuBindingsTypes
2121        + FragmentBindingsTypes
2122        + RawIpSocketsBindingsTypes
2123        + FilterBindingsTypes
2124        + MulticastForwardingBindingsTypes
2125        + IpDeviceStateBindingsTypes
2126        + IpRoutingBindingsTypes
2127{
2128}
2129
2130/// Bindings ID for a routing table.
2131#[derive(Derivative)]
2132#[derivative(Debug(bound = ""))]
2133#[derivative(Clone(bound = "BT::RoutingTableId: Clone"))]
2134pub enum RoutingTableCookie<BT: IpRoutingBindingsTypes> {
2135    /// Main table.
2136    Main,
2137    /// A table added by user (Bindings).
2138    BindingsId(BT::RoutingTableId),
2139}
2140
2141/// State for a routing table.
2142#[derive(Derivative)]
2143#[derivative(Debug(bound = "D: Debug"))]
2144pub struct BaseRoutingTableState<I: Ip, D, BT: IpRoutingBindingsTypes> {
2145    routing_table: RwLock<RoutingTable<I, D>>,
2146    bindings_id: RoutingTableCookie<BT>,
2147}
2148
2149impl<I: Ip, D, BT: IpRoutingBindingsTypes> BaseRoutingTableState<I, D, BT> {
2150    pub(crate) fn with_bindings_id(bindings_id: RoutingTableCookie<BT>) -> Self {
2151        Self { bindings_id, routing_table: Default::default() }
2152    }
2153}
2154
2155/// Identifier to a routing table.
2156#[derive(Derivative)]
2157#[derivative(PartialEq(bound = ""))]
2158#[derivative(Eq(bound = ""))]
2159#[derivative(Hash(bound = ""))]
2160#[derivative(Clone(bound = ""))]
2161pub struct RoutingTableId<I: Ip, D, BT: IpRoutingBindingsTypes>(
2162    StrongRc<BaseRoutingTableState<I, D, BT>>,
2163);
2164
2165impl<I: Ip, D, BT: IpRoutingBindingsTypes> Debug for RoutingTableId<I, D, BT> {
2166    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2167        let Self(rc) = self;
2168        f.debug_tuple("RoutingTableId").field(&I::NAME).field(&rc.bindings_id).finish()
2169    }
2170}
2171
2172impl<I: Ip, D, BT: IpRoutingBindingsTypes> RoutingTableId<I, D, BT> {
2173    /// Creates a new table ID.
2174    pub(crate) fn new(rc: StrongRc<BaseRoutingTableState<I, D, BT>>) -> Self {
2175        Self(rc)
2176    }
2177
2178    /// Provides direct access to the forwarding table.
2179    #[cfg(any(test, feature = "testutils"))]
2180    pub fn table(&self) -> &RwLock<RoutingTable<I, D>> {
2181        let Self(inner) = self;
2182        &inner.routing_table
2183    }
2184
2185    /// Downgrades the strong ID into a weak one.
2186    pub fn downgrade(&self) -> WeakRoutingTableId<I, D, BT>
2187    where
2188        BT::RoutingTableId: Clone,
2189    {
2190        let Self(rc) = self;
2191        WeakRoutingTableId { rc: StrongRc::downgrade(rc), bindings_id: rc.bindings_id.clone() }
2192    }
2193
2194    #[cfg(test)]
2195    fn get_mut(&self) -> impl DerefMut<Target = RoutingTable<I, D>> + '_ {
2196        let Self(rc) = self;
2197        rc.routing_table.write()
2198    }
2199
2200    /// Gets the bindings cookie for this routing table.
2201    pub fn bindings_id(&self) -> &RoutingTableCookie<BT> {
2202        let Self(rc) = self;
2203        &rc.bindings_id
2204    }
2205}
2206
2207/// Weak Identifier to a routing table.
2208#[derive(Derivative)]
2209#[derivative(Clone(bound = "BT::RoutingTableId: Clone"))]
2210#[derivative(PartialEq, Eq, Hash)]
2211pub struct WeakRoutingTableId<I: Ip, D, BT: IpRoutingBindingsTypes> {
2212    rc: WeakRc<BaseRoutingTableState<I, D, BT>>,
2213    #[derivative(PartialEq = "ignore")]
2214    #[derivative(Hash = "ignore")]
2215    bindings_id: RoutingTableCookie<BT>,
2216}
2217
2218impl<I: Ip, D, BT: IpRoutingBindingsTypes> Debug for WeakRoutingTableId<I, D, BT> {
2219    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2220        let Self { bindings_id, .. } = self;
2221        f.debug_tuple("WeakRoutingTableId").field(&I::NAME).field(bindings_id).finish()
2222    }
2223}
2224
2225/// The inner state for the IP layer for IP version `I`.
2226#[derive(GenericOverIp)]
2227#[generic_over_ip(I, Ip)]
2228pub struct IpStateInner<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpStateBindingsTypes> {
2229    rules_table: RwLock<RulesTable<I, D, BT>>,
2230    // TODO(https://fxbug.dev/355059838): Explore the option to let Bindings create the main table.
2231    main_table_id: RoutingTableId<I, D, BT>,
2232    multicast_forwarding: RwLock<MulticastForwardingState<I, D, BT>>,
2233    multicast_forwarding_counters: MulticastForwardingCounters<I>,
2234    fragment_cache: Mutex<IpPacketFragmentCache<I, BT>>,
2235    pmtu_cache: Mutex<PmtuCache<I, BT>>,
2236    counters: IpCounters<I>,
2237    raw_sockets: RwLock<RawIpSocketMap<I, D::Weak, BT>>,
2238    raw_socket_counters: RawIpSocketCounters<I>,
2239    filter: RwLock<filter::State<I, WeakAddressId<I, BT>, BT>>,
2240    // Make sure the primary IDs are dropped last. Also note that the following hash map also stores
2241    // the primary ID to the main table, and if the user (Bindings) attempts to remove the main
2242    // table without dropping `main_table_id` first, it will panic. This serves as an assertion
2243    // that the main table cannot be removed and Bindings must never attempt to remove the main
2244    // routing table.
2245    tables: Mutex<HashMap<RoutingTableId<I, D, BT>, PrimaryRc<BaseRoutingTableState<I, D, BT>>>>,
2246    igmp_counters: IgmpCounters,
2247    mld_counters: MldCounters,
2248}
2249
2250impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpStateBindingsTypes> IpStateInner<I, D, BT> {
2251    /// Gets the IP counters.
2252    pub fn counters(&self) -> &IpCounters<I> {
2253        &self.counters
2254    }
2255
2256    /// Gets the multicast forwarding counters.
2257    pub fn multicast_forwarding_counters(&self) -> &MulticastForwardingCounters<I> {
2258        &self.multicast_forwarding_counters
2259    }
2260
2261    /// Gets the aggregate raw IP socket counters.
2262    pub fn raw_ip_socket_counters(&self) -> &RawIpSocketCounters<I> {
2263        &self.raw_socket_counters
2264    }
2265
2266    /// Gets the main table ID.
2267    pub fn main_table_id(&self) -> &RoutingTableId<I, D, BT> {
2268        &self.main_table_id
2269    }
2270
2271    /// Provides direct access to the path MTU cache.
2272    #[cfg(any(test, feature = "testutils"))]
2273    pub fn pmtu_cache(&self) -> &Mutex<PmtuCache<I, BT>> {
2274        &self.pmtu_cache
2275    }
2276
2277    /// Provides direct access to the filtering state.
2278    #[cfg(any(test, feature = "testutils"))]
2279    pub fn filter(&self) -> &RwLock<filter::State<I, WeakAddressId<I, BT>, BT>> {
2280        &self.filter
2281    }
2282
2283    /// Gets the stack-wide IGMP counters.
2284    pub fn igmp_counters(&self) -> &IgmpCounters {
2285        &self.igmp_counters
2286    }
2287
2288    /// Gets the stack-wide MLD counters.
2289    pub fn mld_counters(&self) -> &MldCounters {
2290        &self.mld_counters
2291    }
2292}
2293
2294impl<
2295    I: IpLayerIpExt,
2296    D: StrongDeviceIdentifier,
2297    BC: TimerContext + RngContext + IpStateBindingsTypes + IpRoutingBindingsTypes,
2298> IpStateInner<I, D, BC>
2299{
2300    /// Creates a new inner IP layer state.
2301    fn new<CC: CoreTimerContext<IpLayerTimerId, BC>>(bindings_ctx: &mut BC) -> Self {
2302        let main_table: PrimaryRc<BaseRoutingTableState<I, D, BC>> =
2303            PrimaryRc::new(BaseRoutingTableState::with_bindings_id(RoutingTableCookie::Main));
2304        let main_table_id = RoutingTableId(PrimaryRc::clone_strong(&main_table));
2305        Self {
2306            rules_table: RwLock::new(RulesTable::new(main_table_id.clone())),
2307            tables: Mutex::new(HashMap::from_iter(core::iter::once((
2308                main_table_id.clone(),
2309                main_table,
2310            )))),
2311            main_table_id,
2312            multicast_forwarding: Default::default(),
2313            multicast_forwarding_counters: Default::default(),
2314            fragment_cache: Mutex::new(
2315                IpPacketFragmentCache::new::<NestedIntoCoreTimerCtx<CC, _>>(bindings_ctx),
2316            ),
2317            pmtu_cache: Mutex::new(PmtuCache::new::<NestedIntoCoreTimerCtx<CC, _>>(bindings_ctx)),
2318            counters: Default::default(),
2319            raw_sockets: Default::default(),
2320            raw_socket_counters: Default::default(),
2321            filter: RwLock::new(filter::State::new::<NestedIntoCoreTimerCtx<CC, _>>(bindings_ctx)),
2322            igmp_counters: Default::default(),
2323            mld_counters: Default::default(),
2324        }
2325    }
2326}
2327
2328/// The identifier for timer events in the IP layer.
2329#[derive(Debug, Clone, Eq, PartialEq, Hash, GenericOverIp)]
2330#[generic_over_ip()]
2331pub enum IpLayerTimerId {
2332    /// A timer event for IPv4 packet reassembly timers.
2333    ReassemblyTimeoutv4(FragmentTimerId<Ipv4>),
2334    /// A timer event for IPv6 packet reassembly timers.
2335    ReassemblyTimeoutv6(FragmentTimerId<Ipv6>),
2336    /// A timer event for IPv4 path MTU discovery.
2337    PmtuTimeoutv4(PmtuTimerId<Ipv4>),
2338    /// A timer event for IPv6 path MTU discovery.
2339    PmtuTimeoutv6(PmtuTimerId<Ipv6>),
2340    /// A timer event for IPv4 filtering timers.
2341    FilterTimerv4(FilterTimerId<Ipv4>),
2342    /// A timer event for IPv6 filtering timers.
2343    FilterTimerv6(FilterTimerId<Ipv6>),
2344    /// A timer event for IPv4 Multicast forwarding timers.
2345    MulticastForwardingTimerv4(MulticastForwardingTimerId<Ipv4>),
2346    /// A timer event for IPv6 Multicast forwarding timers.
2347    MulticastForwardingTimerv6(MulticastForwardingTimerId<Ipv6>),
2348}
2349
2350impl<I: Ip> From<FragmentTimerId<I>> for IpLayerTimerId {
2351    fn from(timer: FragmentTimerId<I>) -> IpLayerTimerId {
2352        I::map_ip(timer, IpLayerTimerId::ReassemblyTimeoutv4, IpLayerTimerId::ReassemblyTimeoutv6)
2353    }
2354}
2355
2356impl<I: Ip> From<PmtuTimerId<I>> for IpLayerTimerId {
2357    fn from(timer: PmtuTimerId<I>) -> IpLayerTimerId {
2358        I::map_ip(timer, IpLayerTimerId::PmtuTimeoutv4, IpLayerTimerId::PmtuTimeoutv6)
2359    }
2360}
2361
2362impl<I: Ip> From<FilterTimerId<I>> for IpLayerTimerId {
2363    fn from(timer: FilterTimerId<I>) -> IpLayerTimerId {
2364        I::map_ip(timer, IpLayerTimerId::FilterTimerv4, IpLayerTimerId::FilterTimerv6)
2365    }
2366}
2367
2368impl<I: Ip> From<MulticastForwardingTimerId<I>> for IpLayerTimerId {
2369    fn from(timer: MulticastForwardingTimerId<I>) -> IpLayerTimerId {
2370        I::map_ip(
2371            timer,
2372            IpLayerTimerId::MulticastForwardingTimerv4,
2373            IpLayerTimerId::MulticastForwardingTimerv6,
2374        )
2375    }
2376}
2377
2378impl<CC, BC> HandleableTimer<CC, BC> for IpLayerTimerId
2379where
2380    CC: TimerHandler<BC, FragmentTimerId<Ipv4>>
2381        + TimerHandler<BC, FragmentTimerId<Ipv6>>
2382        + TimerHandler<BC, PmtuTimerId<Ipv4>>
2383        + TimerHandler<BC, PmtuTimerId<Ipv6>>
2384        + TimerHandler<BC, FilterTimerId<Ipv4>>
2385        + TimerHandler<BC, FilterTimerId<Ipv6>>
2386        + TimerHandler<BC, MulticastForwardingTimerId<Ipv4>>
2387        + TimerHandler<BC, MulticastForwardingTimerId<Ipv6>>,
2388    BC: TimerBindingsTypes,
2389{
2390    fn handle(self, core_ctx: &mut CC, bindings_ctx: &mut BC, timer: BC::UniqueTimerId) {
2391        match self {
2392            IpLayerTimerId::ReassemblyTimeoutv4(id) => {
2393                core_ctx.handle_timer(bindings_ctx, id, timer)
2394            }
2395            IpLayerTimerId::ReassemblyTimeoutv6(id) => {
2396                core_ctx.handle_timer(bindings_ctx, id, timer)
2397            }
2398            IpLayerTimerId::PmtuTimeoutv4(id) => core_ctx.handle_timer(bindings_ctx, id, timer),
2399            IpLayerTimerId::PmtuTimeoutv6(id) => core_ctx.handle_timer(bindings_ctx, id, timer),
2400            IpLayerTimerId::FilterTimerv4(id) => core_ctx.handle_timer(bindings_ctx, id, timer),
2401            IpLayerTimerId::FilterTimerv6(id) => core_ctx.handle_timer(bindings_ctx, id, timer),
2402            IpLayerTimerId::MulticastForwardingTimerv4(id) => {
2403                core_ctx.handle_timer(bindings_ctx, id, timer)
2404            }
2405            IpLayerTimerId::MulticastForwardingTimerv6(id) => {
2406                core_ctx.handle_timer(bindings_ctx, id, timer)
2407            }
2408        }
2409    }
2410}
2411
2412/// An ICMP error, and the metadata required to send it.
2413///
2414/// This allows the sending of the ICMP error to be decoupled from the
2415/// generation of the error, which is advantageous because sending the error
2416/// requires the underlying packet buffer, which cannot be "moved" in certain
2417/// contexts.
2418pub(crate) struct IcmpErrorSender<'a, I: IcmpHandlerIpExt, D> {
2419    /// The ICMP error that should be sent.
2420    err: I::IcmpError,
2421    /// The original source IP address of the packet (before the local-ingress
2422    /// hook evaluation).
2423    src_ip: I::SourceAddress,
2424    /// The original destination IP address of the packet (before the
2425    /// local-ingress hook evaluation).
2426    dst_ip: SpecifiedAddr<I::Addr>,
2427    /// The frame destination of the packet.
2428    frame_dst: Option<FrameDestination>,
2429    /// The device out which to send the error.
2430    device: &'a D,
2431    /// The metadata from the packet, allowing the packet's backing buffer to be
2432    /// returned to it's pre-IP-parse state with [`GrowBuffer::undo_parse`].
2433    meta: ParseMetadata,
2434    /// The marks used to send the ICMP error.
2435    marks: Marks,
2436}
2437
2438impl<'a, I: IcmpHandlerIpExt, D> IcmpErrorSender<'a, I, D> {
2439    /// Generate an send an appropriate ICMP error in response to this error.
2440    ///
2441    /// The provided `body` must be the original buffer from which the IP
2442    /// packet responsible for this error was parsed. It is expected to be in a
2443    /// state that allows undoing the IP packet parse (e.g. unmodified after the
2444    /// IP packet was parsed).
2445    fn respond_with_icmp_error<B, BC, CC>(
2446        self,
2447        core_ctx: &mut CC,
2448        bindings_ctx: &mut BC,
2449        mut body: B,
2450    ) where
2451        B: BufferMut,
2452        CC: IcmpErrorHandler<I, BC, DeviceId = D>,
2453    {
2454        let IcmpErrorSender { err, src_ip, dst_ip, frame_dst, device, meta, marks } = self;
2455        // Undo the parsing of the IP Packet, moving the buffer's cursor so that
2456        // it points at the start of the IP header. This way, the sent ICMP
2457        // error will contain the entire original IP packet.
2458        body.undo_parse(meta);
2459
2460        core_ctx.send_icmp_error_message(
2461            bindings_ctx,
2462            device,
2463            frame_dst,
2464            src_ip,
2465            dst_ip,
2466            body,
2467            err,
2468            &marks,
2469        );
2470    }
2471}
2472
2473// Early demux results may be invalidated by SNAT in the LOCAL_INGRESS hook.
2474// This struct is used to check if the early demux result is still valid.
2475//
2476// TODO(https://fxbug.dev/476507679): Add tests to ensure this works properly
2477// once SNAT is fully implemented.
2478#[derive(PartialEq, Eq)]
2479struct EarlyDemuxResult<I: Ip, S> {
2480    socket: S,
2481    src_addr: I::Addr,
2482    src_port: Option<u16>,
2483}
2484
2485impl<I: FilterIpExt, S> EarlyDemuxResult<I, S> {
2486    fn new<P: IpPacket<I>>(socket: S, packet: &P) -> Self {
2487        let src_port =
2488            packet.maybe_transport_packet().transport_packet_data().map(|t| t.src_port());
2489        Self { socket, src_addr: packet.src_addr(), src_port }
2490    }
2491
2492    // Returns the socket if it's still the right socket to handle the packet.
2493    fn take_socket<P: IpPacket<I>>(self, packet: &P) -> Option<S> {
2494        let src_port =
2495            packet.maybe_transport_packet().transport_packet_data().map(|t| t.src_port());
2496        (self.src_addr == packet.src_addr() && self.src_port == src_port).then_some(self.socket)
2497    }
2498
2499    fn update_packet_metadata<CC, BC>(
2500        &self,
2501        core_ctx: &mut CC,
2502        packet_metadata: &mut IpLayerPacketMetadata<I, CC::WeakAddressId, BC>,
2503    ) where
2504        I: IpLayerIpExt,
2505        S: SocketMetadata<CC>,
2506        BC: IpLayerBindingsContext<I, CC::DeviceId>,
2507        CC: IpLayerIngressContext<I, BC>,
2508    {
2509        packet_metadata.socket_cookie = Some(self.socket.socket_cookie(core_ctx));
2510        for mark in BC::marks_to_set_on_ingress() {
2511            *packet_metadata.marks.get_mut(*mark) = self.socket.marks(core_ctx).get(*mark).clone();
2512        }
2513    }
2514}
2515
2516// TODO(joshlf): Once we support multiple extension headers in IPv6, we will
2517// need to verify that the callers of this function are still sound. In
2518// particular, they may accidentally pass a parse_metadata argument which
2519// corresponds to a single extension header rather than all of the IPv6 headers.
2520
2521/// Dispatch a received IPv4 packet to the appropriate protocol.
2522///
2523/// `device` is the device the packet was received on. `parse_metadata` is the
2524/// parse metadata associated with parsing the IP headers. It is used to undo
2525/// that parsing. Both `device` and `parse_metadata` are required in order to
2526/// send ICMP messages in response to unrecognized protocols or ports. If either
2527/// of `device` or `parse_metadata` is `None`, the caller promises that the
2528/// protocol and port are recognized.
2529///
2530/// # Panics
2531///
2532/// `dispatch_receive_ipv4_packet` panics if the protocol is unrecognized and
2533/// `parse_metadata` is `None`. If an IGMP message is received but it is not
2534/// coming from a device, i.e., `device` given is `None`,
2535/// `dispatch_receive_ip_packet` will also panic.
2536fn dispatch_receive_ipv4_packet<
2537    'a,
2538    'b,
2539    BC: IpLayerBindingsContext<Ipv4, CC::DeviceId>,
2540    CC: IpLayerIngressContext<Ipv4, BC>,
2541>(
2542    core_ctx: &'a mut CC,
2543    bindings_ctx: &'a mut BC,
2544    device: &'b CC::DeviceId,
2545    frame_dst: Option<FrameDestination>,
2546    mut packet: Ipv4Packet<&'a mut [u8]>,
2547    mut packet_metadata: IpLayerPacketMetadata<Ipv4, CC::WeakAddressId, BC>,
2548    receive_meta: ReceiveIpPacketMeta<Ipv4>,
2549) -> Result<(), IcmpErrorSender<'b, Ipv4, CC::DeviceId>> {
2550    core_ctx.increment_both(device, |c| &c.dispatch_receive_ip_packet);
2551
2552    match frame_dst {
2553        Some(FrameDestination::Individual { local: false }) => {
2554            core_ctx.increment_both(device, |c| &c.dispatch_receive_ip_packet_other_host);
2555        }
2556        Some(FrameDestination::Individual { local: true })
2557        | Some(FrameDestination::Multicast)
2558        | Some(FrameDestination::Broadcast)
2559        | None => (),
2560    };
2561
2562    // Skip early demux if the packet was redirected to a TPROXY.
2563    // TODO(https://fxbug.dev/475851987): Handle TPROXY in early_demux.
2564    let early_demux_result = receive_meta
2565        .transparent_override
2566        .is_none()
2567        .then(|| {
2568            core_ctx.early_demux(
2569                device,
2570                frame_dst,
2571                packet.src_ip(),
2572                packet.dst_ip(),
2573                packet.proto(),
2574                packet.body(),
2575            )
2576        })
2577        .flatten()
2578        .map(|socket| {
2579            let early_demux_result = EarlyDemuxResult::new(socket, &packet);
2580            early_demux_result.update_packet_metadata(core_ctx, &mut packet_metadata);
2581            early_demux_result
2582        });
2583
2584    let proto = packet.proto();
2585
2586    match core_ctx.filter_handler().local_ingress_hook(
2587        bindings_ctx,
2588        &mut packet,
2589        device,
2590        &mut packet_metadata,
2591    ) {
2592        filter::Verdict::Drop => {
2593            packet_metadata.acknowledge_drop();
2594            return Ok(());
2595        }
2596        filter::Verdict::Accept(()) => {}
2597    }
2598    let marks = packet_metadata.marks;
2599    packet_metadata.acknowledge_drop();
2600
2601    // These invariants are validated by the caller of this function, but it's
2602    // possible for the LOCAL_INGRESS hook to rewrite the packet, so we have to
2603    // check them again.
2604    let Some(src_ip) = packet.src_ipv4() else {
2605        debug!(
2606            "dispatch_receive_ipv4_packet: received packet from invalid source {} after the \
2607            LOCAL_INGRESS hook; dropping",
2608            packet.src_ip()
2609        );
2610        core_ctx.increment_both(device, |c| &c.invalid_source);
2611        return Ok(());
2612    };
2613    let Some(dst_ip) = SpecifiedAddr::new(packet.dst_ip()) else {
2614        core_ctx.increment_both(device, |c| &c.unspecified_destination);
2615        debug!(
2616            "dispatch_receive_ipv4_packet: Received packet with unspecified destination IP address \
2617            after the LOCAL_INGRESS hook; dropping"
2618        );
2619        return Ok(());
2620    };
2621
2622    core_ctx.deliver_packet_to_raw_ip_sockets(bindings_ctx, &packet, &device);
2623
2624    // Check if the early demux result is still valid.
2625    let early_demux_socket = early_demux_result.and_then(|result| result.take_socket(&packet));
2626
2627    let (prefix, options, body) = packet.parts_with_body_mut();
2628    let buffer = Buf::new(body, ..);
2629    let header_info = Ipv4HeaderInfo { prefix, options: options.as_ref() };
2630    let receive_info = LocalDeliveryPacketInfo { meta: receive_meta, header_info, marks };
2631
2632    core_ctx
2633        .dispatch_receive_ip_packet(
2634            bindings_ctx,
2635            device,
2636            src_ip,
2637            dst_ip,
2638            proto,
2639            buffer,
2640            &receive_info,
2641            early_demux_socket,
2642        )
2643        .or_else(|err| {
2644            if let Ipv4SourceAddr::Specified(src_ip) = src_ip {
2645                let (_, _, _, meta) = packet.into_metadata();
2646                Err(IcmpErrorSender {
2647                    err: err.into_icmpv4_error(meta.header_len()),
2648                    src_ip,
2649                    dst_ip,
2650                    frame_dst,
2651                    device,
2652                    meta,
2653                    marks,
2654                })
2655            } else {
2656                Ok(())
2657            }
2658        })
2659}
2660
2661/// Dispatch a received IPv6 packet to the appropriate protocol.
2662///
2663/// `dispatch_receive_ipv6_packet` has the same semantics as
2664/// `dispatch_receive_ipv4_packet`, but for IPv6.
2665fn dispatch_receive_ipv6_packet<
2666    'a,
2667    'b,
2668    BC: IpLayerBindingsContext<Ipv6, CC::DeviceId>,
2669    CC: IpLayerIngressContext<Ipv6, BC>,
2670>(
2671    core_ctx: &'a mut CC,
2672    bindings_ctx: &'a mut BC,
2673    device: &'b CC::DeviceId,
2674    frame_dst: Option<FrameDestination>,
2675    mut packet: Ipv6Packet<&'a mut [u8]>,
2676    mut packet_metadata: IpLayerPacketMetadata<Ipv6, CC::WeakAddressId, BC>,
2677    meta: ReceiveIpPacketMeta<Ipv6>,
2678) -> Result<(), IcmpErrorSender<'b, Ipv6, CC::DeviceId>> {
2679    // TODO(https://fxbug.dev/42095067): Once we support multiple extension
2680    // headers in IPv6, we will need to verify that the callers of this
2681    // function are still sound. In particular, they may accidentally pass a
2682    // parse_metadata argument which corresponds to a single extension
2683    // header rather than all of the IPv6 headers.
2684
2685    core_ctx.increment_both(device, |c| &c.dispatch_receive_ip_packet);
2686
2687    match frame_dst {
2688        Some(FrameDestination::Individual { local: false }) => {
2689            core_ctx.increment_both(device, |c| &c.dispatch_receive_ip_packet_other_host);
2690        }
2691        Some(FrameDestination::Individual { local: true })
2692        | Some(FrameDestination::Multicast)
2693        | Some(FrameDestination::Broadcast)
2694        | None => (),
2695    }
2696
2697    // Skip early demux if the packet was redirected to a TPROXY.
2698    // TODO(https://fxbug.dev/475851987): Handle TPROXY in early_demux.
2699    let early_demux_result = meta
2700        .transparent_override
2701        .is_none()
2702        .then(|| {
2703            core_ctx.early_demux(
2704                device,
2705                frame_dst,
2706                packet.src_ip(),
2707                packet.dst_ip(),
2708                packet.proto(),
2709                packet.body(),
2710            )
2711        })
2712        .flatten()
2713        .map(|socket| {
2714            let early_demux_result = EarlyDemuxResult::new(socket, &packet);
2715            early_demux_result.update_packet_metadata(core_ctx, &mut packet_metadata);
2716            early_demux_result
2717        });
2718
2719    let proto = packet.proto();
2720
2721    match core_ctx.filter_handler().local_ingress_hook(
2722        bindings_ctx,
2723        &mut packet,
2724        device,
2725        &mut packet_metadata,
2726    ) {
2727        filter::Verdict::Drop => {
2728            packet_metadata.acknowledge_drop();
2729            return Ok(());
2730        }
2731        filter::Verdict::Accept(()) => {}
2732    }
2733
2734    // These invariants are validated by the caller of this function, but it's
2735    // possible for the LOCAL_INGRESS hook to rewrite the packet, so we have to
2736    // check them again.
2737    let Some(src_ip) = packet.src_ipv6() else {
2738        debug!(
2739            "dispatch_receive_ipv6_packet: received packet from invalid source {} after the \
2740            LOCAL_INGRESS hook; dropping",
2741            packet.src_ip()
2742        );
2743
2744        core_ctx.increment_both(device, |c| &c.invalid_source);
2745        return Ok(());
2746    };
2747    let Some(dst_ip) = SpecifiedAddr::new(packet.dst_ip()) else {
2748        core_ctx.increment_both(device, |c| &c.unspecified_destination);
2749        debug!(
2750            "dispatch_receive_ipv6_packet: Received packet with unspecified destination IP address \
2751            after the LOCAL_INGRESS hook; dropping"
2752        );
2753        return Ok(());
2754    };
2755
2756    core_ctx.deliver_packet_to_raw_ip_sockets(bindings_ctx, &packet, &device);
2757
2758    // Check if the early demux result is still valid.
2759    let early_demux_socket = early_demux_result.and_then(|result| result.take_socket(&packet));
2760
2761    let (fixed, extension, body) = packet.parts_with_body_mut();
2762    let buffer = Buf::new(body, ..);
2763    let header_info = Ipv6HeaderInfo { fixed, extension };
2764    let receive_info = LocalDeliveryPacketInfo { meta, header_info, marks: packet_metadata.marks };
2765
2766    let result = core_ctx
2767        .dispatch_receive_ip_packet(
2768            bindings_ctx,
2769            device,
2770            src_ip,
2771            dst_ip,
2772            proto,
2773            buffer,
2774            &receive_info,
2775            early_demux_socket,
2776        )
2777        .or_else(|err| {
2778            if let Ipv6SourceAddr::Unicast(src_ip) = src_ip {
2779                let (_, _, _, meta) = packet.into_metadata();
2780                Err(IcmpErrorSender {
2781                    err: err.into_icmpv6_error(meta.header_len()),
2782                    src_ip: *src_ip,
2783                    dst_ip,
2784                    frame_dst,
2785                    device,
2786                    meta,
2787                    marks: receive_info.marks,
2788                })
2789            } else {
2790                Ok(())
2791            }
2792        });
2793    packet_metadata.acknowledge_drop();
2794    result
2795}
2796
2797/// The metadata required to forward an IP Packet.
2798///
2799/// This allows the forwarding of the packet to be decoupled from the
2800/// determination of how to forward. This is advantageous because forwarding
2801/// requires the underlying packet buffer, which cannot be "moved" in certain
2802/// contexts.
2803pub(crate) struct IpPacketForwarder<
2804    'a,
2805    I: IpLayerIpExt,
2806    D,
2807    A,
2808    BT: FilterBindingsTypes + TxMetadataBindingsTypes,
2809> {
2810    inbound_device: &'a D,
2811    outbound_device: &'a D,
2812    packet_meta: IpLayerPacketMetadata<I, A, BT>,
2813    src_ip: I::RecvSrcAddr,
2814    dst_ip: SpecifiedAddr<I::Addr>,
2815    destination: IpPacketDestination<I, &'a D>,
2816    proto: I::Proto,
2817    parse_meta: ParseMetadata,
2818    frame_dst: Option<FrameDestination>,
2819}
2820
2821impl<'a, I, D, A, BC> IpPacketForwarder<'a, I, D, A, BC>
2822where
2823    I: IpLayerIpExt,
2824    BC: IpLayerBindingsContext<I, D>,
2825{
2826    // Forward the provided buffer as specified by this [`IpPacketForwarder`].
2827    fn forward_with_buffer<CC, B>(self, core_ctx: &mut CC, bindings_ctx: &mut BC, buffer: B)
2828    where
2829        B: BufferMut,
2830        CC: IpLayerForwardingContext<I, BC, DeviceId = D, WeakAddressId = A>,
2831    {
2832        let Self {
2833            inbound_device,
2834            outbound_device,
2835            packet_meta,
2836            src_ip,
2837            dst_ip,
2838            destination,
2839            proto,
2840            parse_meta,
2841            frame_dst,
2842        } = self;
2843
2844        let packet = ForwardedPacket::new(src_ip.get(), dst_ip.get(), proto, parse_meta, buffer);
2845
2846        trace!("forward_with_buffer: forwarding {} packet", I::NAME);
2847
2848        let marks = packet_meta.marks;
2849        match send_ip_frame(
2850            core_ctx,
2851            bindings_ctx,
2852            outbound_device,
2853            destination,
2854            packet,
2855            packet_meta,
2856            Mtu::no_limit(),
2857        ) {
2858            Ok(()) => (),
2859            Err(IpSendFrameError { serializer, error }) => {
2860                match error {
2861                    IpSendFrameErrorReason::Device(
2862                        SendFrameErrorReason::SizeConstraintsViolation,
2863                    ) => {
2864                        debug!("failed to forward {} packet: MTU exceeded", I::NAME);
2865                        core_ctx.increment_both(outbound_device, |c| &c.mtu_exceeded);
2866                        let mtu = core_ctx.get_mtu(inbound_device);
2867                        // NB: Ipv6 sends a PacketTooBig error. Ipv4 sends nothing.
2868                        let Some(err) = I::new_mtu_exceeded(proto, parse_meta.header_len(), mtu)
2869                        else {
2870                            return;
2871                        };
2872                        // NB: Only send an ICMP error if the sender's src
2873                        // is specified.
2874                        let Some(src_ip) = I::received_source_as_icmp_source(src_ip) else {
2875                            return;
2876                        };
2877                        // TODO(https://fxbug.dev/362489447): Increment the TTL since we
2878                        // just decremented it. The fact that we don't do this is
2879                        // technically a violation of the ICMP spec (we're not
2880                        // encapsulating the original packet that caused the
2881                        // issue, but a slightly modified version of it), but
2882                        // it's not that big of a deal because it won't affect
2883                        // the sender's ability to figure out the minimum path
2884                        // MTU. This may break other logic, though, so we should
2885                        // still fix it eventually.
2886                        core_ctx.send_icmp_error_message(
2887                            bindings_ctx,
2888                            inbound_device,
2889                            frame_dst,
2890                            src_ip,
2891                            dst_ip,
2892                            serializer.into_buffer(),
2893                            err,
2894                            &marks,
2895                        );
2896                    }
2897                    IpSendFrameErrorReason::Device(SendFrameErrorReason::QueueFull)
2898                    | IpSendFrameErrorReason::Device(SendFrameErrorReason::Alloc)
2899                    | IpSendFrameErrorReason::IllegalLoopbackAddress => (),
2900                }
2901                debug!("failed to forward {} packet: {error:?}", I::NAME);
2902            }
2903        }
2904    }
2905}
2906
2907/// The action to take for a packet that was a candidate for forwarding.
2908pub(crate) enum ForwardingAction<
2909    'a,
2910    I: IpLayerIpExt,
2911    D,
2912    A,
2913    BT: FilterBindingsTypes + TxMetadataBindingsTypes,
2914> {
2915    /// Drop the packet without forwarding it or generating an ICMP error.
2916    SilentlyDrop,
2917    /// Forward the packet, as specified by the [`IpPacketForwarder`].
2918    Forward(IpPacketForwarder<'a, I, D, A, BT>),
2919    /// Drop the packet without forwarding, and generate an ICMP error as
2920    /// specified by the [`IcmpErrorSender`].
2921    DropWithIcmpError(IcmpErrorSender<'a, I, D>),
2922}
2923
2924impl<'a, I, D, A, BC> ForwardingAction<'a, I, D, A, BC>
2925where
2926    I: IpLayerIpExt,
2927    BC: IpLayerBindingsContext<I, D>,
2928{
2929    /// Perform the action prescribed by self, with the provided packet buffer.
2930    pub(crate) fn perform_action_with_buffer<CC, B>(
2931        self,
2932        core_ctx: &mut CC,
2933        bindings_ctx: &mut BC,
2934        buffer: B,
2935    ) where
2936        B: BufferMut,
2937        CC: IpLayerForwardingContext<I, BC, DeviceId = D, WeakAddressId = A>,
2938    {
2939        match self {
2940            ForwardingAction::SilentlyDrop => {}
2941            ForwardingAction::Forward(forwarder) => {
2942                forwarder.forward_with_buffer(core_ctx, bindings_ctx, buffer)
2943            }
2944            ForwardingAction::DropWithIcmpError(icmp_sender) => {
2945                icmp_sender.respond_with_icmp_error(core_ctx, bindings_ctx, buffer)
2946            }
2947        }
2948    }
2949}
2950
2951/// Determine which [`ForwardingAction`] should be taken for an IP packet.
2952pub(crate) fn determine_ip_packet_forwarding_action<'a, 'b, I, BC, CC>(
2953    core_ctx: &'a mut CC,
2954    mut packet: I::Packet<&'a mut [u8]>,
2955    mut packet_meta: IpLayerPacketMetadata<I, CC::WeakAddressId, BC>,
2956    minimum_ttl: Option<u8>,
2957    inbound_device: &'b CC::DeviceId,
2958    outbound_device: &'b CC::DeviceId,
2959    destination: IpPacketDestination<I, &'b CC::DeviceId>,
2960    frame_dst: Option<FrameDestination>,
2961    src_ip: I::RecvSrcAddr,
2962    dst_ip: SpecifiedAddr<I::Addr>,
2963) -> ForwardingAction<'b, I, CC::DeviceId, CC::WeakAddressId, BC>
2964where
2965    I: IpLayerIpExt,
2966    BC: IpLayerBindingsContext<I, CC::DeviceId>,
2967    CC: IpLayerForwardingContext<I, BC>,
2968{
2969    // When forwarding, if a datagram's TTL is one or zero, discard it, as
2970    // decrementing the TTL would put it below the allowed minimum value.
2971    // For IPv4, see "TTL" section, https://tools.ietf.org/html/rfc791#page-14.
2972    // For IPv6, see "Hop Limit" section, https://datatracker.ietf.org/doc/html/rfc2460#page-5.
2973    const DEFAULT_MINIMUM_FORWARDING_TTL: u8 = 2;
2974    let minimum_ttl = minimum_ttl.unwrap_or(DEFAULT_MINIMUM_FORWARDING_TTL);
2975
2976    let ttl = packet.ttl();
2977    if ttl < minimum_ttl {
2978        debug!(
2979            "{} packet not forwarded due to inadequate TTL: got={ttl} minimum={minimum_ttl}",
2980            I::NAME
2981        );
2982        // As per RFC 792's specification of the Time Exceeded Message:
2983        //     If the gateway processing a datagram finds the time to live
2984        //     field is zero it must discard the datagram. The gateway may
2985        //     also notify the source host via the time exceeded message.
2986        // And RFC 4443 section 3.3:
2987        //    If a router receives a packet with a Hop Limit of zero, or if
2988        //    a router decrements a packet's Hop Limit to zero, it MUST
2989        //    discard the packet and originate an ICMPv6 Time Exceeded
2990        //    message with Code 0 to the source of the packet.
2991        // Don't send a Time Exceeded Message in cases where the netstack is
2992        // enforcing a higher minimum TTL (e.g. as part of a multicast route).
2993        if ttl > 1 {
2994            packet_meta.acknowledge_drop();
2995            return ForwardingAction::SilentlyDrop;
2996        }
2997
2998        core_ctx.increment_both(inbound_device, |c| &c.ttl_expired);
2999
3000        // Only send an ICMP error if the src_ip is specified.
3001        let Some(src_ip) = I::received_source_as_icmp_source(src_ip) else {
3002            core_ctx.increment_both(inbound_device, |c| &c.unspecified_source);
3003            packet_meta.acknowledge_drop();
3004            return ForwardingAction::SilentlyDrop;
3005        };
3006
3007        // Construct and send the appropriate ICMP error for the IP version.
3008        let version_specific_meta = packet.version_specific_meta();
3009        let (_, _, proto, parse_meta): (I::Addr, I::Addr, _, _) = packet.into_metadata();
3010        let err = I::new_ttl_expired(proto, parse_meta.header_len(), version_specific_meta);
3011        let action = ForwardingAction::DropWithIcmpError(IcmpErrorSender {
3012            err,
3013            src_ip,
3014            dst_ip,
3015            frame_dst,
3016            device: inbound_device,
3017            meta: parse_meta,
3018            marks: packet_meta.marks,
3019        });
3020        packet_meta.acknowledge_drop();
3021        return action;
3022    }
3023
3024    trace!("determine_ip_packet_forwarding_action: adequate TTL");
3025
3026    // For IPv6 packets, handle extension headers first.
3027    //
3028    // Any previous handling of extension headers was done under the
3029    // assumption that we are the final destination of the packet. Now that
3030    // we know we're forwarding, we need to re-examine them.
3031    let maybe_ipv6_packet_action = I::map_ip_in(
3032        &packet,
3033        |_packet| None,
3034        |packet| {
3035            Some(ipv6::handle_extension_headers(core_ctx, inbound_device, frame_dst, packet, false))
3036        },
3037    );
3038    match maybe_ipv6_packet_action {
3039        None => {} // NB: Ipv4 case.
3040        Some(Ipv6PacketAction::_Discard) => {
3041            core_ctx.increment_both(inbound_device, |c| {
3042                #[derive(GenericOverIp)]
3043                #[generic_over_ip(I, Ip)]
3044                struct InCounters<'a, I: IpLayerIpExt>(
3045                    &'a <I::RxCounters as CounterCollectionSpec>::CounterCollection<Counter>,
3046                );
3047                I::map_ip_in::<_, _>(
3048                    InCounters(&c.version_rx),
3049                    |_counters| {
3050                        unreachable!(
3051                            "`I` must be `Ipv6` because we're handling IPv6 extension headers"
3052                        )
3053                    },
3054                    |InCounters(counters)| &counters.extension_header_discard,
3055                )
3056            });
3057            trace!(
3058                "determine_ip_packet_forwarding_action: handled IPv6 extension headers: \
3059                discarding packet"
3060            );
3061            packet_meta.acknowledge_drop();
3062            return ForwardingAction::SilentlyDrop;
3063        }
3064        Some(Ipv6PacketAction::Continue) => {
3065            trace!(
3066                "determine_ip_packet_forwarding_action: handled IPv6 extension headers: \
3067                forwarding packet"
3068            );
3069        }
3070        Some(Ipv6PacketAction::ProcessFragment) => {
3071            unreachable!(
3072                "When forwarding packets, we should only ever look at the hop by hop \
3073                    options extension header (if present)"
3074            )
3075        }
3076    };
3077
3078    match core_ctx.filter_handler().forwarding_hook(
3079        I::as_filter_packet(&mut packet),
3080        inbound_device,
3081        outbound_device,
3082        &mut packet_meta,
3083    ) {
3084        filter::Verdict::Drop => {
3085            packet_meta.acknowledge_drop();
3086            trace!("determine_ip_packet_forwarding_action: filter verdict: Drop");
3087            return ForwardingAction::SilentlyDrop;
3088        }
3089        filter::Verdict::Accept(()) => {}
3090    }
3091
3092    packet.set_ttl(ttl - 1);
3093    let (_, _, proto, parse_meta): (I::Addr, I::Addr, _, _) = packet.into_metadata();
3094    ForwardingAction::Forward(IpPacketForwarder {
3095        inbound_device,
3096        outbound_device,
3097        packet_meta,
3098        src_ip,
3099        dst_ip,
3100        destination,
3101        proto,
3102        parse_meta,
3103        frame_dst,
3104    })
3105}
3106
3107pub(crate) fn send_ip_frame<I, CC, BC, S>(
3108    core_ctx: &mut CC,
3109    bindings_ctx: &mut BC,
3110    device: &CC::DeviceId,
3111    destination: IpPacketDestination<I, &CC::DeviceId>,
3112    mut body: S,
3113    mut packet_metadata: IpLayerPacketMetadata<I, CC::WeakAddressId, BC>,
3114    limit_mtu: Mtu,
3115) -> Result<(), IpSendFrameError<S>>
3116where
3117    I: IpLayerIpExt,
3118    BC: FilterBindingsContext<CC::DeviceId> + TxMetadataBindingsTypes + MarksBindingsContext,
3119    CC: IpLayerEgressContext<I, BC> + IpDeviceMtuContext<I> + IpDeviceAddressIdContext<I>,
3120    S: FragmentableIpSerializer<I, Buffer: BufferMut> + FilterIpPacket<I>,
3121{
3122    let (verdict, proof) = core_ctx.filter_handler().egress_hook(
3123        bindings_ctx,
3124        &mut body,
3125        device,
3126        &mut packet_metadata,
3127    );
3128    match verdict {
3129        filter::Verdict::Drop => {
3130            packet_metadata.acknowledge_drop();
3131            return Ok(());
3132        }
3133        filter::Verdict::Accept(()) => {}
3134    }
3135
3136    // If the packet is leaving through the loopback device, attempt to extract a
3137    // weak reference to the packet's conntrack entry to plumb that through the
3138    // device layer so it can be reused on ingress to the IP layer.
3139    let (conntrack_connection_and_direction, tx_metadata, marks, _socket_cookie) =
3140        packet_metadata.into_parts();
3141    let conntrack_entry = if device.is_loopback() {
3142        conntrack_connection_and_direction
3143            .and_then(|(conn, dir)| WeakConntrackConnection::new(&conn).map(|conn| (conn, dir)))
3144    } else {
3145        None
3146    };
3147
3148    let mut device_layer_marks = Marks::default();
3149    for mark in BC::marks_to_keep_on_egress() {
3150        *device_layer_marks.get_mut(*mark) = *marks.get(*mark);
3151    }
3152
3153    let device_ip_layer_metadata =
3154        DeviceIpLayerMetadata { conntrack_entry, tx_metadata, marks: device_layer_marks };
3155
3156    // The filtering layer may have changed our address. Perform a last moment
3157    // check to protect against sending loopback addresses on the wire for
3158    // non-loopback devices, which is an RFC violation.
3159    if !device.is_loopback()
3160        && (I::LOOPBACK_SUBNET.contains(&body.src_addr())
3161            || I::LOOPBACK_SUBNET.contains(&body.dst_addr()))
3162    {
3163        core_ctx.increment_both(device, |c| &c.tx_illegal_loopback_address);
3164        return Err(IpSendFrameError {
3165            serializer: body,
3166            error: IpSendFrameErrorReason::IllegalLoopbackAddress,
3167        });
3168    }
3169
3170    // Use the minimum MTU between the target device and the requested mtu.
3171    let mtu = limit_mtu.min(core_ctx.get_mtu(device));
3172
3173    let body = body.with_size_limit(mtu.into());
3174
3175    let fits_mtu =
3176        match body.serialize_new_buf(PacketConstraints::UNCONSTRAINED, AlwaysFailBufferAlloc) {
3177            // We hit the allocator that refused to allocate new data, which
3178            // means the MTU is respected.
3179            Err(SerializeError::Alloc(())) => true,
3180            // MTU failure, we should try to fragment.
3181            Err(SerializeError::SizeLimitExceeded) => false,
3182        };
3183
3184    if fits_mtu {
3185        return core_ctx
3186            .send_ip_frame(bindings_ctx, device, destination, device_ip_layer_metadata, body, proof)
3187            .map_err(|ErrorAndSerializer { serializer, error }| IpSendFrameError {
3188                serializer: serializer.into_inner(),
3189                error: error.into(),
3190            });
3191    }
3192
3193    // Body doesn't fit MTU, we must fragment this serializer in order to send
3194    // it out.
3195    core_ctx.increment_both(device, |c| &c.fragmentation.fragmentation_required);
3196
3197    // Taken on the last frame.
3198    let mut device_ip_layer_metadata = Some(device_ip_layer_metadata);
3199    let body = body.into_inner();
3200    let result = match IpFragmenter::new(bindings_ctx, &body, mtu) {
3201        Ok(mut fragmenter) => loop {
3202            let (fragment, has_more) = match fragmenter.next() {
3203                None => break Ok(()),
3204                Some(f) => f,
3205            };
3206
3207            // TODO(https://fxbug.dev/391953082): We should penalize sockets
3208            // via the tx metadata when we incur IP fragmentation instead of
3209            // just attaching the ownership to the last fragment. For now, we
3210            // attach the tx metadata to the last frame only.
3211            let device_ip_layer_metadata = if has_more {
3212                // Unwrap here because only the last frame can take it.
3213                let device_ip_layer_metadata = device_ip_layer_metadata.as_ref().unwrap();
3214                DeviceIpLayerMetadata {
3215                    conntrack_entry: device_ip_layer_metadata.conntrack_entry.clone(),
3216                    tx_metadata: Default::default(),
3217                    marks: device_ip_layer_metadata.marks,
3218                }
3219            } else {
3220                // Unwrap here because the last frame can only happen once.
3221                device_ip_layer_metadata.take().unwrap()
3222            };
3223
3224            match core_ctx.send_ip_frame(
3225                bindings_ctx,
3226                device,
3227                destination.clone(),
3228                device_ip_layer_metadata,
3229                fragment,
3230                proof.clone_for_fragmentation(),
3231            ) {
3232                Ok(()) => {
3233                    core_ctx.increment_both(device, |c| &c.fragmentation.fragments);
3234                }
3235                Err(ErrorAndSerializer { serializer: _, error }) => {
3236                    core_ctx
3237                        .increment_both(device, |c| &c.fragmentation.error_fragmented_serializer);
3238                    break Err(error);
3239                }
3240            }
3241        },
3242        Err(e) => {
3243            core_ctx.increment_both(device, |c| &c.fragmentation.error_counter(&e));
3244            Err(SendFrameErrorReason::SizeConstraintsViolation)
3245        }
3246    };
3247    result.map_err(|e| IpSendFrameError { serializer: body, error: e.into() })
3248}
3249
3250/// A buffer allocator that always fails to allocate a new buffer.
3251///
3252/// Can be used to check for packet size constraints in serializer without in
3253/// fact serializing the buffer.
3254struct AlwaysFailBufferAlloc;
3255
3256impl LayoutBufferAlloc<Never> for AlwaysFailBufferAlloc {
3257    type Error = ();
3258    fn layout_alloc(
3259        self,
3260        _prefix: usize,
3261        _body: usize,
3262        _suffix: usize,
3263    ) -> Result<Never, Self::Error> {
3264        Err(())
3265    }
3266}
3267
3268/// Drop a packet and undo the effects of parsing it.
3269///
3270/// `drop_packet_and_undo_parse!` takes a `$packet` and a `$buffer` which the
3271/// packet was parsed from. It saves the results of the `src_ip()`, `dst_ip()`,
3272/// `proto()`, and `parse_metadata()` methods. It drops `$packet` and uses the
3273/// result of `parse_metadata()` to undo the effects of parsing the packet.
3274/// Finally, it returns the source IP, destination IP, protocol, and parse
3275/// metadata.
3276macro_rules! drop_packet_and_undo_parse {
3277    ($packet:expr, $buffer:expr) => {{
3278        let (src_ip, dst_ip, proto, meta) = $packet.into_metadata();
3279        $buffer.undo_parse(meta);
3280        (src_ip, dst_ip, proto, meta)
3281    }};
3282}
3283
3284/// The result of calling [`process_fragment`], depending on what action needs
3285/// to be taken by the caller.
3286enum ProcessFragmentResult<'a, I: IpLayerIpExt> {
3287    /// Processing of the packet is complete and no more action should be
3288    /// taken.
3289    Done,
3290
3291    /// Reassembly is not needed. The returned packet is the same one that was
3292    /// passed in the call to [`process_fragment`].
3293    NotNeeded(I::Packet<&'a mut [u8]>),
3294
3295    /// A packet was successfully reassembled into the provided buffer. If a
3296    /// parsed packet is needed, then the caller must perform that parsing.
3297    Reassembled(Vec<u8>),
3298}
3299
3300/// Process a fragment and reassemble if required.
3301///
3302/// Attempts to process a potential fragment packet and reassemble if we are
3303/// ready to do so. Returns an enum to the caller with the result of processing
3304/// the potential fragment.
3305fn process_fragment<'a, I, CC, BC>(
3306    core_ctx: &mut CC,
3307    bindings_ctx: &mut BC,
3308    device: &CC::DeviceId,
3309    packet: I::Packet<&'a mut [u8]>,
3310) -> ProcessFragmentResult<'a, I>
3311where
3312    I: IpLayerIpExt,
3313    for<'b> I::Packet<&'b mut [u8]>: FragmentablePacket,
3314    CC: IpLayerIngressContext<I, BC>,
3315    BC: IpLayerBindingsContext<I, CC::DeviceId>,
3316{
3317    match FragmentHandler::<I, _>::process_fragment::<&mut [u8]>(core_ctx, bindings_ctx, packet) {
3318        // Handle the packet right away since reassembly is not needed.
3319        FragmentProcessingState::NotNeeded(packet) => {
3320            trace!("receive_ip_packet: not fragmented");
3321            ProcessFragmentResult::NotNeeded(packet)
3322        }
3323        // Ready to reassemble a packet.
3324        FragmentProcessingState::Ready { key, packet_len } => {
3325            trace!("receive_ip_packet: fragmented, ready for reassembly");
3326            // Allocate a buffer of `packet_len` bytes.
3327            let mut buffer = Buf::new(alloc::vec![0; packet_len], ..);
3328
3329            // Attempt to reassemble the packet.
3330            let reassemble_result = match FragmentHandler::<I, _>::reassemble_packet(
3331                core_ctx,
3332                bindings_ctx,
3333                &key,
3334                buffer.buffer_view_mut(),
3335            ) {
3336                // Successfully reassembled the packet, handle it.
3337                Ok(()) => ProcessFragmentResult::Reassembled(buffer.into_inner()),
3338                Err(e) => {
3339                    core_ctx.increment_both(device, |c| &c.fragment_reassembly_error);
3340                    debug!("receive_ip_packet: fragmented, failed to reassemble: {:?}", e);
3341                    ProcessFragmentResult::Done
3342                }
3343            };
3344            reassemble_result
3345        }
3346        // Cannot proceed since we need more fragments before we
3347        // can reassemble a packet.
3348        FragmentProcessingState::NeedMoreFragments => {
3349            core_ctx.increment_both(device, |c| &c.need_more_fragments);
3350            trace!("receive_ip_packet: fragmented, need more before reassembly");
3351            ProcessFragmentResult::Done
3352        }
3353        // TODO(ghanan): Handle invalid fragments.
3354        FragmentProcessingState::InvalidFragment => {
3355            core_ctx.increment_both(device, |c| &c.invalid_fragment);
3356            trace!("receive_ip_packet: fragmented, invalid");
3357            ProcessFragmentResult::Done
3358        }
3359        FragmentProcessingState::OutOfMemory => {
3360            core_ctx.increment_both(device, |c| &c.fragment_cache_full);
3361            trace!("receive_ip_packet: fragmented, dropped because OOM");
3362            ProcessFragmentResult::Done
3363        }
3364    }
3365}
3366
3367// TODO(joshlf): Can we turn `try_parse_ip_packet` into a function? So far, I've
3368// been unable to get the borrow checker to accept it.
3369
3370/// Try to parse an IP packet from a buffer.
3371///
3372/// If parsing fails, return the buffer to its original state so that its
3373/// contents can be used to send an ICMP error message. When invoked, the macro
3374/// expands to an expression whose type is `Result<P, P::Error>`, where `P` is
3375/// the parsed packet type.
3376macro_rules! try_parse_ip_packet {
3377    ($buffer:expr) => {{
3378        let p_len = $buffer.prefix_len();
3379        let s_len = $buffer.suffix_len();
3380
3381        let result = $buffer.parse_mut();
3382
3383        if let Err(err) = result {
3384            // Revert `buffer` to it's original state.
3385            let n_p_len = $buffer.prefix_len();
3386            let n_s_len = $buffer.suffix_len();
3387
3388            if p_len > n_p_len {
3389                $buffer.grow_front(p_len - n_p_len);
3390            }
3391
3392            if s_len > n_s_len {
3393                $buffer.grow_back(s_len - n_s_len);
3394            }
3395
3396            Err(err)
3397        } else {
3398            result
3399        }
3400    }};
3401}
3402
3403/// Clone an IP packet so that it may be delivered to a multicast route target.
3404///
3405/// Note: We must copy the underlying data here, as the filtering
3406/// engine may uniquely modify each instance as part of
3407/// performing forwarding.
3408///
3409/// In the future there are potential optimizations we could
3410/// pursue, including:
3411///   * Copy-on-write semantics for the buffer/packet so that
3412///     copies of the underlying data are done on an as-needed
3413///     basis.
3414///   * Avoid reparsing the IP packet. Because we're parsing an
3415///     exact copy of a known good packet, it would be safe to
3416///     adopt the data as an IP packet without performing any
3417///     validation.
3418// NB: This is a macro, not a function, because Rust's "move" semantics prevent
3419// us from returning both a buffer and a packet referencing that buffer.
3420macro_rules! clone_packet_for_mcast_forwarding {
3421    {let ($new_data:ident, $new_buffer:ident, $new_packet:ident) = $packet:ident} => {
3422        let mut $new_data = $packet.to_vec();
3423        let mut $new_buffer: Buf<&mut [u8]> = Buf::new($new_data.as_mut(), ..);
3424        let $new_packet = try_parse_ip_packet!($new_buffer).unwrap();
3425    };
3426}
3427
3428/// Receive an IPv4 packet from a device.
3429///
3430/// `frame_dst` specifies how this packet was received; see [`FrameDestination`]
3431/// for options.
3432pub fn receive_ipv4_packet<
3433    BC: IpLayerBindingsContext<Ipv4, CC::DeviceId>,
3434    B: BufferMut,
3435    CC: IpLayerIngressContext<Ipv4, BC>,
3436>(
3437    core_ctx: &mut CC,
3438    bindings_ctx: &mut BC,
3439    device: &CC::DeviceId,
3440    frame_dst: Option<FrameDestination>,
3441    device_ip_layer_metadata: DeviceIpLayerMetadata<BC>,
3442    buffer: B,
3443) {
3444    if !core_ctx.is_ip_device_enabled(&device) {
3445        return;
3446    }
3447
3448    // This is required because we may need to process the buffer that was
3449    // passed in or a reassembled one, which have different types.
3450    let mut buffer: packet::Either<B, Buf<Vec<u8>>> = packet::Either::A(buffer);
3451
3452    core_ctx.increment_both(device, |c| &c.receive_ip_packet);
3453    trace!("receive_ip_packet({device:?})");
3454
3455    let packet: Ipv4Packet<_> = match try_parse_ip_packet!(buffer) {
3456        Ok(packet) => packet,
3457        // Conditionally send an ICMP response if we encountered a parameter
3458        // problem error when parsing an IPv4 packet. Note, we do not always
3459        // send back an ICMP response as it can be used as an attack vector for
3460        // DDoS attacks. We only send back an ICMP response if the RFC requires
3461        // that we MUST send one, as noted by `must_send_icmp` and `action`.
3462        // TODO(https://fxbug.dev/42157630): test this code path once
3463        // `Ipv4Packet::parse` can return an `IpParseError::ParameterProblem`
3464        // error.
3465        Err(IpParseError::ParameterProblem {
3466            src_ip,
3467            dst_ip,
3468            code,
3469            pointer,
3470            must_send_icmp,
3471            header_len,
3472            action,
3473        }) if must_send_icmp && action.should_send_icmp(&dst_ip) => {
3474            core_ctx.increment_both(device, |c| &c.parameter_problem);
3475            // `should_send_icmp_to_multicast` should never return `true` for IPv4.
3476            assert!(!action.should_send_icmp_to_multicast());
3477            let dst_ip = match SpecifiedAddr::new(dst_ip) {
3478                Some(ip) => ip,
3479                None => {
3480                    core_ctx.increment_both(device, |c| &c.unspecified_destination);
3481                    debug!(
3482                        "receive_ipv4_packet: Received packet with unspecified destination IP address; dropping"
3483                    );
3484                    return;
3485                }
3486            };
3487            let src_ip = match Ipv4SourceAddr::new(src_ip) {
3488                None => {
3489                    core_ctx.increment_both(device, |c| &c.invalid_source);
3490                    return;
3491                }
3492                Some(Ipv4SourceAddr::Unspecified) => {
3493                    core_ctx.increment_both(device, |c| &c.unspecified_source);
3494                    return;
3495                }
3496                Some(Ipv4SourceAddr::Specified(src_ip)) => src_ip,
3497            };
3498            IcmpErrorHandler::<Ipv4, _>::send_icmp_error_message(
3499                core_ctx,
3500                bindings_ctx,
3501                device,
3502                frame_dst,
3503                src_ip,
3504                dst_ip,
3505                buffer,
3506                Icmpv4Error {
3507                    kind: Icmpv4ErrorKind::ParameterProblem {
3508                        code,
3509                        pointer,
3510                        // When the call to `action.should_send_icmp` returns true, it always means that
3511                        // the IPv4 packet that failed parsing is an initial fragment.
3512                        fragment_type: Ipv4FragmentType::InitialFragment,
3513                    },
3514                    header_len,
3515                },
3516                &device_ip_layer_metadata.marks,
3517            );
3518            return;
3519        }
3520        _ => return, // TODO(joshlf): Do something with ICMP here?
3521    };
3522
3523    // We verify these properties later by actually creating the corresponding
3524    // witness types after the INGRESS filtering hook, but we keep these checks
3525    // here as an optimization to return early and save some work.
3526    if packet.src_ipv4().is_none() {
3527        debug!(
3528            "receive_ipv4_packet: received packet from invalid source {}; dropping",
3529            packet.src_ip()
3530        );
3531        core_ctx.increment_both(device, |c| &c.invalid_source);
3532        return;
3533    };
3534    if !packet.dst_ip().is_specified() {
3535        core_ctx.increment_both(device, |c| &c.unspecified_destination);
3536        debug!("receive_ipv4_packet: Received packet with unspecified destination IP; dropping");
3537        return;
3538    };
3539
3540    // Reassemble all packets before local delivery or forwarding. Reassembly
3541    // before forwarding is not RFC-compliant, but it's the easiest way to
3542    // ensure that fragments are filtered properly. Linux does this and it
3543    // doesn't seem to create major problems.
3544    //
3545    // TODO(https://fxbug.dev/345814518): Forward fragments without reassembly.
3546    //
3547    // Note, the `process_fragment` function could panic if the packet does not
3548    // have fragment data. However, we are guaranteed that it will not panic
3549    // because the fragment data is in the fixed header so it is always present
3550    // (even if the fragment data has values that implies that the packet is not
3551    // fragmented).
3552    let mut packet = match process_fragment(core_ctx, bindings_ctx, device, packet) {
3553        ProcessFragmentResult::Done => return,
3554        ProcessFragmentResult::NotNeeded(packet) => packet,
3555        ProcessFragmentResult::Reassembled(buf) => {
3556            let buf = Buf::new(buf, ..);
3557            buffer = packet::Either::B(buf);
3558
3559            match buffer.parse_mut() {
3560                Ok(packet) => packet,
3561                Err(err) => {
3562                    core_ctx.increment_both(device, |c| &c.fragment_reassembly_error);
3563                    debug!("receive_ip_packet: fragmented, failed to reassemble: {:?}", err);
3564                    return;
3565                }
3566            }
3567        }
3568    };
3569
3570    // TODO(ghanan): Act upon options.
3571
3572    let mut packet_metadata = IpLayerPacketMetadata::from_device_ip_layer_metadata(
3573        core_ctx,
3574        device,
3575        device_ip_layer_metadata,
3576    );
3577    let mut filter = core_ctx.filter_handler();
3578    match filter.ingress_hook(bindings_ctx, &mut packet, device, &mut packet_metadata) {
3579        IngressVerdict::Verdict(filter::Verdict::Accept(())) => {}
3580        IngressVerdict::Verdict(filter::Verdict::Drop) => {
3581            packet_metadata.acknowledge_drop();
3582            return;
3583        }
3584        IngressVerdict::TransparentLocalDelivery { addr, port } => {
3585            // Drop the filter handler since it holds a mutable borrow of `core_ctx`, which
3586            // we need to provide to the packet dispatch function.
3587            drop(filter);
3588
3589            let Some(addr) = SpecifiedAddr::new(addr) else {
3590                core_ctx.increment_both(device, |c| &c.unspecified_destination);
3591                debug!("cannot perform transparent delivery to unspecified destination; dropping");
3592                return;
3593            };
3594
3595            let receive_meta = ReceiveIpPacketMeta {
3596                // It's possible that the packet was actually sent to a
3597                // broadcast address, but it doesn't matter here since it's
3598                // being delivered to a transparent proxy.
3599                broadcast: None,
3600                transparent_override: Some(TransparentLocalDelivery { addr, port }),
3601            };
3602
3603            // Short-circuit the routing process and override local demux, providing a local
3604            // address and port to which the packet should be transparently delivered at the
3605            // transport layer.
3606            dispatch_receive_ipv4_packet(
3607                core_ctx,
3608                bindings_ctx,
3609                device,
3610                frame_dst,
3611                packet,
3612                packet_metadata,
3613                receive_meta,
3614            )
3615            .unwrap_or_else(|err| err.respond_with_icmp_error(core_ctx, bindings_ctx, buffer));
3616            return;
3617        }
3618    }
3619    // Drop the filter handler since it holds a mutable borrow of `core_ctx`, which
3620    // we need below.
3621    drop(filter);
3622
3623    let Some(src_ip) = packet.src_ipv4() else {
3624        core_ctx.increment_both(device, |c| &c.invalid_source);
3625        debug!(
3626            "receive_ipv4_packet: received packet from invalid source {}; dropping",
3627            packet.src_ip()
3628        );
3629        return;
3630    };
3631
3632    let action = receive_ipv4_packet_action(
3633        core_ctx,
3634        bindings_ctx,
3635        device,
3636        &packet,
3637        frame_dst,
3638        &packet_metadata.marks,
3639    );
3640    match action {
3641        ReceivePacketAction::MulticastForward { targets, address_status, dst_ip } => {
3642            // TOOD(https://fxbug.dev/364242513): Support connection tracking of
3643            // the multiplexed flows created by multicast forwarding. Here, we
3644            // use the existing metadata for the first action taken, and then
3645            // a default instance for each subsequent action. The first action
3646            // will populate the conntrack table with an entry, which will then
3647            // be used by all subsequent forwards.
3648            let mut packet_metadata = Some(packet_metadata);
3649            for MulticastRouteTarget { output_interface, min_ttl } in targets.as_ref() {
3650                clone_packet_for_mcast_forwarding! {
3651                    let (copy_of_data, copy_of_buffer, copy_of_packet) = packet
3652                };
3653                determine_ip_packet_forwarding_action::<Ipv4, _, _>(
3654                    core_ctx,
3655                    copy_of_packet,
3656                    packet_metadata.take().unwrap_or_default(),
3657                    Some(*min_ttl),
3658                    device,
3659                    &output_interface,
3660                    IpPacketDestination::from_addr(dst_ip),
3661                    frame_dst,
3662                    src_ip,
3663                    dst_ip,
3664                )
3665                .perform_action_with_buffer(core_ctx, bindings_ctx, copy_of_buffer);
3666            }
3667
3668            // If we also have an interest in the packet, deliver it locally.
3669            if let Some(address_status) = address_status {
3670                let receive_meta = ReceiveIpPacketMeta {
3671                    broadcast: address_status.to_broadcast_marker(),
3672                    transparent_override: None,
3673                };
3674                dispatch_receive_ipv4_packet(
3675                    core_ctx,
3676                    bindings_ctx,
3677                    device,
3678                    frame_dst,
3679                    packet,
3680                    packet_metadata.take().unwrap_or_default(),
3681                    receive_meta,
3682                )
3683                .unwrap_or_else(|err| err.respond_with_icmp_error(core_ctx, bindings_ctx, buffer));
3684            }
3685        }
3686        ReceivePacketAction::Deliver { address_status, internal_forwarding } => {
3687            // NB: when performing internal forwarding, hit the
3688            // forwarding hook.
3689            match internal_forwarding {
3690                InternalForwarding::Used(outbound_device) => {
3691                    core_ctx.increment_both(device, |c| &c.forward);
3692                    match core_ctx.filter_handler().forwarding_hook(
3693                        &mut packet,
3694                        device,
3695                        &outbound_device,
3696                        &mut packet_metadata,
3697                    ) {
3698                        filter::Verdict::Drop => {
3699                            packet_metadata.acknowledge_drop();
3700                            return;
3701                        }
3702                        filter::Verdict::Accept(()) => {}
3703                    }
3704                }
3705                InternalForwarding::NotUsed => {}
3706            }
3707
3708            let receive_meta = ReceiveIpPacketMeta {
3709                broadcast: address_status.to_broadcast_marker(),
3710                transparent_override: None,
3711            };
3712            dispatch_receive_ipv4_packet(
3713                core_ctx,
3714                bindings_ctx,
3715                device,
3716                frame_dst,
3717                packet,
3718                packet_metadata,
3719                receive_meta,
3720            )
3721            .unwrap_or_else(|err| err.respond_with_icmp_error(core_ctx, bindings_ctx, buffer));
3722        }
3723        ReceivePacketAction::Forward {
3724            original_dst,
3725            dst: Destination { device: dst_device, next_hop },
3726        } => {
3727            determine_ip_packet_forwarding_action::<Ipv4, _, _>(
3728                core_ctx,
3729                packet,
3730                packet_metadata,
3731                None,
3732                device,
3733                &dst_device,
3734                IpPacketDestination::from_next_hop(next_hop, original_dst),
3735                frame_dst,
3736                src_ip,
3737                original_dst,
3738            )
3739            .perform_action_with_buffer(core_ctx, bindings_ctx, buffer);
3740        }
3741        ReceivePacketAction::SendNoRouteToDest { dst: dst_ip } => {
3742            use packet_formats::ipv4::Ipv4Header as _;
3743            core_ctx.increment_both(device, |c| &c.no_route_to_host);
3744            debug!("received IPv4 packet with no known route to destination {}", dst_ip);
3745            let fragment_type = packet.fragment_type();
3746            let (_, _, proto, meta): (Ipv4Addr, Ipv4Addr, _, _) =
3747                drop_packet_and_undo_parse!(packet, buffer);
3748            let marks = packet_metadata.marks;
3749            packet_metadata.acknowledge_drop();
3750            let src_ip = match src_ip {
3751                Ipv4SourceAddr::Unspecified => {
3752                    core_ctx.increment_both(device, |c| &c.unspecified_source);
3753                    return;
3754                }
3755                Ipv4SourceAddr::Specified(src_ip) => src_ip,
3756            };
3757            IcmpErrorHandler::<Ipv4, _>::send_icmp_error_message(
3758                core_ctx,
3759                bindings_ctx,
3760                device,
3761                frame_dst,
3762                src_ip,
3763                dst_ip,
3764                buffer,
3765                Icmpv4Error {
3766                    kind: Icmpv4ErrorKind::NetUnreachable { proto, fragment_type },
3767                    header_len: meta.header_len(),
3768                },
3769                &marks,
3770            );
3771        }
3772        ReceivePacketAction::Drop { reason } => {
3773            let src_ip = packet.src_ip();
3774            let dst_ip = packet.dst_ip();
3775            packet_metadata.acknowledge_drop();
3776            core_ctx.increment_both(device, |c| &c.dropped);
3777            debug!(
3778                "receive_ipv4_packet: dropping packet from {src_ip} to {dst_ip} received on \
3779                {device:?}: {reason:?}",
3780            );
3781        }
3782    }
3783}
3784
3785/// Receive an IPv6 packet from a device.
3786///
3787/// `frame_dst` specifies how this packet was received; see [`FrameDestination`]
3788/// for options.
3789pub fn receive_ipv6_packet<
3790    BC: IpLayerBindingsContext<Ipv6, CC::DeviceId>,
3791    B: BufferMut,
3792    CC: IpLayerIngressContext<Ipv6, BC>,
3793>(
3794    core_ctx: &mut CC,
3795    bindings_ctx: &mut BC,
3796    device: &CC::DeviceId,
3797    frame_dst: Option<FrameDestination>,
3798    device_ip_layer_metadata: DeviceIpLayerMetadata<BC>,
3799    buffer: B,
3800) {
3801    if !core_ctx.is_ip_device_enabled(&device) {
3802        return;
3803    }
3804
3805    // This is required because we may need to process the buffer that was
3806    // passed in or a reassembled one, which have different types.
3807    let mut buffer: packet::Either<B, Buf<Vec<u8>>> = packet::Either::A(buffer);
3808
3809    core_ctx.increment_both(device, |c| &c.receive_ip_packet);
3810    trace!("receive_ipv6_packet({:?})", device);
3811
3812    let packet: Ipv6Packet<_> = match try_parse_ip_packet!(buffer) {
3813        Ok(packet) => packet,
3814        // Conditionally send an ICMP response if we encountered a parameter
3815        // problem error when parsing an IPv4 packet. Note, we do not always
3816        // send back an ICMP response as it can be used as an attack vector for
3817        // DDoS attacks. We only send back an ICMP response if the RFC requires
3818        // that we MUST send one, as noted by `must_send_icmp` and `action`.
3819        Err(IpParseError::ParameterProblem {
3820            src_ip,
3821            dst_ip,
3822            code,
3823            pointer,
3824            must_send_icmp,
3825            header_len: _,
3826            action,
3827        }) if must_send_icmp && action.should_send_icmp(&dst_ip) => {
3828            core_ctx.increment_both(device, |c| &c.parameter_problem);
3829            let dst_ip = match SpecifiedAddr::new(dst_ip) {
3830                Some(ip) => ip,
3831                None => {
3832                    core_ctx.increment_both(device, |c| &c.unspecified_destination);
3833                    debug!(
3834                        "receive_ipv6_packet: Received packet with unspecified destination IP address; dropping"
3835                    );
3836                    return;
3837                }
3838            };
3839            let src_ip = match Ipv6SourceAddr::new(src_ip) {
3840                None => {
3841                    core_ctx.increment_both(device, |c| &c.invalid_source);
3842                    return;
3843                }
3844                Some(Ipv6SourceAddr::Unspecified) => {
3845                    core_ctx.increment_both(device, |c| &c.unspecified_source);
3846                    return;
3847                }
3848                Some(Ipv6SourceAddr::Unicast(src_ip)) => src_ip,
3849            };
3850            IcmpErrorHandler::<Ipv6, _>::send_icmp_error_message(
3851                core_ctx,
3852                bindings_ctx,
3853                device,
3854                frame_dst,
3855                *src_ip,
3856                dst_ip,
3857                buffer,
3858                Icmpv6ErrorKind::ParameterProblem {
3859                    code,
3860                    pointer,
3861                    allow_dst_multicast: action.should_send_icmp_to_multicast(),
3862                },
3863                &device_ip_layer_metadata.marks,
3864            );
3865            return;
3866        }
3867        _ => return, // TODO(joshlf): Do something with ICMP here?
3868    };
3869
3870    trace!("receive_ipv6_packet: parsed packet: {:?}", packet);
3871
3872    // TODO(ghanan): Act upon extension headers.
3873
3874    // We verify these properties later by actually creating the corresponding
3875    // witness types after the INGRESS filtering hook, but we keep these checks
3876    // here as an optimization to return early and save some work.
3877    if packet.src_ipv6().is_none() {
3878        debug!(
3879            "receive_ipv6_packet: received packet from invalid source {}; dropping",
3880            packet.src_ip()
3881        );
3882        core_ctx.increment_both(device, |c| &c.invalid_source);
3883        return;
3884    };
3885    if !packet.dst_ip().is_specified() {
3886        core_ctx.increment_both(device, |c| &c.unspecified_destination);
3887        debug!("receive_ipv6_packet: Received packet with unspecified destination IP; dropping");
3888        return;
3889    };
3890
3891    // Reassemble all packets before local delivery or forwarding. Reassembly
3892    // before forwarding is not RFC-compliant, but it's the easiest way to
3893    // ensure that fragments are filtered properly. Linux does this and it
3894    // doesn't seem to create major problems.
3895    //
3896    // TODO(https://fxbug.dev/345814518): Forward fragments without reassembly.
3897    //
3898    // delivery_extension_header_action is used to prevent looking at the
3899    // extension headers twice when a non-fragmented packet is delivered
3900    // locally.
3901    let (mut packet, delivery_extension_header_action) =
3902        match ipv6::handle_extension_headers(core_ctx, device, frame_dst, &packet, true) {
3903            Ipv6PacketAction::_Discard => {
3904                core_ctx.increment_both(device, |c| &c.version_rx.extension_header_discard);
3905                trace!("receive_ipv6_packet: handled IPv6 extension headers: discarding packet");
3906                return;
3907            }
3908            Ipv6PacketAction::Continue => {
3909                trace!("receive_ipv6_packet: handled IPv6 extension headers: dispatching packet");
3910                (packet, Some(Ipv6PacketAction::Continue))
3911            }
3912            Ipv6PacketAction::ProcessFragment => {
3913                trace!(
3914                    "receive_ipv6_packet: handled IPv6 extension headers: handling \
3915                    fragmented packet"
3916                );
3917
3918                // Note, `IpPacketFragmentCache::process_fragment`
3919                // could panic if the packet does not have fragment data.
3920                // However, we are guaranteed that it will not panic for an
3921                // IPv6 packet because the fragment data is in an (optional)
3922                // fragment extension header which we attempt to handle by
3923                // calling `ipv6::handle_extension_headers`. We will only
3924                // end up here if its return value is
3925                // `Ipv6PacketAction::ProcessFragment` which is only
3926                // possible when the packet has the fragment extension
3927                // header (even if the fragment data has values that implies
3928                // that the packet is not fragmented).
3929                match process_fragment(core_ctx, bindings_ctx, device, packet) {
3930                    ProcessFragmentResult::Done => return,
3931                    ProcessFragmentResult::NotNeeded(packet) => {
3932                        // While strange, it's possible for there to be a Fragment
3933                        // header that says the packet doesn't need defragmentation.
3934                        // As per RFC 8200 4.5:
3935                        //
3936                        //   If the fragment is a whole datagram (that is, both the
3937                        //   Fragment Offset field and the M flag are zero), then it
3938                        //   does not need any further reassembly and should be
3939                        //   processed as a fully reassembled packet (i.e., updating
3940                        //   Next Header, adjust Payload Length, removing the
3941                        //   Fragment header, etc.).
3942                        //
3943                        // In this case, we're not technically reassembling the
3944                        // packet, since, per the RFC, that would mean removing the
3945                        // Fragment header.
3946                        (packet, Some(Ipv6PacketAction::Continue))
3947                    }
3948                    ProcessFragmentResult::Reassembled(buf) => {
3949                        let buf = Buf::new(buf, ..);
3950                        buffer = packet::Either::B(buf);
3951
3952                        match buffer.parse_mut() {
3953                            Ok(packet) => (packet, None),
3954                            Err(err) => {
3955                                core_ctx.increment_both(device, |c| &c.fragment_reassembly_error);
3956                                debug!(
3957                                    "receive_ip_packet: fragmented, failed to reassemble: {:?}",
3958                                    err
3959                                );
3960                                return;
3961                            }
3962                        }
3963                    }
3964                }
3965            }
3966        };
3967
3968    let mut packet_metadata = IpLayerPacketMetadata::from_device_ip_layer_metadata(
3969        core_ctx,
3970        device,
3971        device_ip_layer_metadata,
3972    );
3973    let mut filter = core_ctx.filter_handler();
3974
3975    match filter.ingress_hook(bindings_ctx, &mut packet, device, &mut packet_metadata) {
3976        IngressVerdict::Verdict(filter::Verdict::Accept(())) => {}
3977        IngressVerdict::Verdict(filter::Verdict::Drop) => {
3978            packet_metadata.acknowledge_drop();
3979            return;
3980        }
3981        IngressVerdict::TransparentLocalDelivery { addr, port } => {
3982            // Drop the filter handler since it holds a mutable borrow of `core_ctx`, which
3983            // we need to provide to the packet dispatch function.
3984            drop(filter);
3985
3986            let Some(addr) = SpecifiedAddr::new(addr) else {
3987                core_ctx.increment_both(device, |c| &c.unspecified_destination);
3988                debug!("cannot perform transparent delivery to unspecified destination; dropping");
3989                return;
3990            };
3991
3992            let receive_meta = ReceiveIpPacketMeta {
3993                broadcast: None,
3994                transparent_override: Some(TransparentLocalDelivery { addr, port }),
3995            };
3996
3997            // Short-circuit the routing process and override local demux, providing a local
3998            // address and port to which the packet should be transparently delivered at the
3999            // transport layer.
4000            dispatch_receive_ipv6_packet(
4001                core_ctx,
4002                bindings_ctx,
4003                device,
4004                frame_dst,
4005                packet,
4006                packet_metadata,
4007                receive_meta,
4008            )
4009            .unwrap_or_else(|err| err.respond_with_icmp_error(core_ctx, bindings_ctx, buffer));
4010            return;
4011        }
4012    }
4013    // Drop the filter handler since it holds a mutable borrow of `core_ctx`, which
4014    // we need below.
4015    drop(filter);
4016
4017    let Some(src_ip) = packet.src_ipv6() else {
4018        debug!(
4019            "receive_ipv6_packet: received packet from invalid source {}; dropping",
4020            packet.src_ip()
4021        );
4022        core_ctx.increment_both(device, |c| &c.invalid_source);
4023        return;
4024    };
4025
4026    match receive_ipv6_packet_action(
4027        core_ctx,
4028        bindings_ctx,
4029        device,
4030        &packet,
4031        frame_dst,
4032        &packet_metadata.marks,
4033    ) {
4034        ReceivePacketAction::MulticastForward { targets, address_status, dst_ip } => {
4035            // TOOD(https://fxbug.dev/364242513): Support connection tracking of
4036            // the multiplexed flows created by multicast forwarding. Here, we
4037            // use the existing metadata for the first action taken, and then
4038            // a default instance for each subsequent action. The first action
4039            // will populate the conntrack table with an entry, which will then
4040            // be used by all subsequent forwards.
4041            let mut packet_metadata = Some(packet_metadata);
4042            for MulticastRouteTarget { output_interface, min_ttl } in targets.as_ref() {
4043                clone_packet_for_mcast_forwarding! {
4044                    let (copy_of_data, copy_of_buffer, copy_of_packet) = packet
4045                };
4046                determine_ip_packet_forwarding_action::<Ipv6, _, _>(
4047                    core_ctx,
4048                    copy_of_packet,
4049                    packet_metadata.take().unwrap_or_default(),
4050                    Some(*min_ttl),
4051                    device,
4052                    &output_interface,
4053                    IpPacketDestination::from_addr(dst_ip),
4054                    frame_dst,
4055                    src_ip,
4056                    dst_ip,
4057                )
4058                .perform_action_with_buffer(core_ctx, bindings_ctx, copy_of_buffer);
4059            }
4060
4061            // If we also have an interest in the packet, deliver it locally.
4062            if let Some(_) = address_status {
4063                let receive_meta =
4064                    ReceiveIpPacketMeta { broadcast: None, transparent_override: None };
4065
4066                dispatch_receive_ipv6_packet(
4067                    core_ctx,
4068                    bindings_ctx,
4069                    device,
4070                    frame_dst,
4071                    packet,
4072                    packet_metadata.take().unwrap_or_default(),
4073                    receive_meta,
4074                )
4075                .unwrap_or_else(|err| err.respond_with_icmp_error(core_ctx, bindings_ctx, buffer));
4076            }
4077        }
4078        ReceivePacketAction::Deliver { address_status: _, internal_forwarding } => {
4079            trace!("receive_ipv6_packet: delivering locally");
4080
4081            let action = if let Some(action) = delivery_extension_header_action {
4082                action
4083            } else {
4084                ipv6::handle_extension_headers(core_ctx, device, frame_dst, &packet, true)
4085            };
4086            match action {
4087                Ipv6PacketAction::_Discard => {
4088                    core_ctx.increment_both(device, |c| &c.version_rx.extension_header_discard);
4089                    trace!(
4090                        "receive_ipv6_packet: handled IPv6 extension headers: discarding packet"
4091                    );
4092                    packet_metadata.acknowledge_drop();
4093                }
4094                Ipv6PacketAction::Continue => {
4095                    trace!(
4096                        "receive_ipv6_packet: handled IPv6 extension headers: dispatching packet"
4097                    );
4098
4099                    // NB: when performing internal forwarding, hit the
4100                    // forwarding hook.
4101                    match internal_forwarding {
4102                        InternalForwarding::Used(outbound_device) => {
4103                            core_ctx.increment_both(device, |c| &c.forward);
4104                            match core_ctx.filter_handler().forwarding_hook(
4105                                &mut packet,
4106                                device,
4107                                &outbound_device,
4108                                &mut packet_metadata,
4109                            ) {
4110                                filter::Verdict::Drop => {
4111                                    packet_metadata.acknowledge_drop();
4112                                    return;
4113                                }
4114                                filter::Verdict::Accept(()) => {}
4115                            }
4116                        }
4117                        InternalForwarding::NotUsed => {}
4118                    }
4119
4120                    let meta = ReceiveIpPacketMeta { broadcast: None, transparent_override: None };
4121
4122                    // TODO(joshlf):
4123                    // - Do something with ICMP if we don't have a handler for
4124                    //   that protocol?
4125                    // - Check for already-expired TTL?
4126                    dispatch_receive_ipv6_packet(
4127                        core_ctx,
4128                        bindings_ctx,
4129                        device,
4130                        frame_dst,
4131                        packet,
4132                        packet_metadata,
4133                        meta,
4134                    )
4135                    .unwrap_or_else(|err| {
4136                        err.respond_with_icmp_error(core_ctx, bindings_ctx, buffer)
4137                    });
4138                }
4139                Ipv6PacketAction::ProcessFragment => {
4140                    debug!("receive_ipv6_packet: found fragment header after reassembly; dropping");
4141                    packet_metadata.acknowledge_drop();
4142                }
4143            }
4144        }
4145        ReceivePacketAction::Forward {
4146            original_dst,
4147            dst: Destination { device: dst_device, next_hop },
4148        } => {
4149            determine_ip_packet_forwarding_action::<Ipv6, _, _>(
4150                core_ctx,
4151                packet,
4152                packet_metadata,
4153                None,
4154                device,
4155                &dst_device,
4156                IpPacketDestination::from_next_hop(next_hop, original_dst),
4157                frame_dst,
4158                src_ip,
4159                original_dst,
4160            )
4161            .perform_action_with_buffer(core_ctx, bindings_ctx, buffer);
4162        }
4163        ReceivePacketAction::SendNoRouteToDest { dst: dst_ip } => {
4164            core_ctx.increment_both(device, |c| &c.no_route_to_host);
4165            let (_, _, proto, meta): (Ipv6Addr, Ipv6Addr, _, _) =
4166                drop_packet_and_undo_parse!(packet, buffer);
4167            debug!("received IPv6 packet with no known route to destination {}", dst_ip);
4168            let marks = packet_metadata.marks;
4169            packet_metadata.acknowledge_drop();
4170
4171            let src_ip = match src_ip {
4172                Ipv6SourceAddr::Unspecified => {
4173                    core_ctx.increment_both(device, |c| &c.unspecified_source);
4174                    return;
4175                }
4176                Ipv6SourceAddr::Unicast(src_ip) => src_ip,
4177            };
4178            IcmpErrorHandler::<Ipv6, _>::send_icmp_error_message(
4179                core_ctx,
4180                bindings_ctx,
4181                device,
4182                frame_dst,
4183                *src_ip,
4184                dst_ip,
4185                buffer,
4186                Icmpv6ErrorKind::NetUnreachable { proto, header_len: meta.header_len() },
4187                &marks,
4188            );
4189        }
4190        ReceivePacketAction::Drop { reason } => {
4191            core_ctx.increment_both(device, |c| &c.dropped);
4192            let src_ip = packet.src_ip();
4193            let dst_ip = packet.dst_ip();
4194            packet_metadata.acknowledge_drop();
4195            debug!(
4196                "receive_ipv6_packet: dropping packet from {src_ip} to {dst_ip} received on \
4197                {device:?}: {reason:?}",
4198            );
4199        }
4200    }
4201}
4202
4203/// The action to take in order to process a received IP packet.
4204#[derive(Debug, PartialEq)]
4205pub enum ReceivePacketAction<I: BroadcastIpExt + IpLayerIpExt, DeviceId: StrongDeviceIdentifier> {
4206    /// Deliver the packet locally.
4207    Deliver {
4208        /// Status of the receiving IP address.
4209        address_status: I::AddressStatus,
4210        /// `InternalForwarding::Used(d)` if we're delivering the packet as a
4211        /// Weak Host performing internal forwarding via output device `d`.
4212        internal_forwarding: InternalForwarding<DeviceId>,
4213    },
4214
4215    /// Forward the packet to the given destination.
4216    Forward {
4217        /// The original destination IP address of the packet.
4218        original_dst: SpecifiedAddr<I::Addr>,
4219        /// The destination that the packet should be forwarded to.
4220        dst: Destination<I::Addr, DeviceId>,
4221    },
4222
4223    /// A multicast packet that should be forwarded (& optional local delivery).
4224    ///
4225    /// The packet should be forwarded to each of the given targets. This case
4226    /// is only returned when the packet is eligible for multicast forwarding;
4227    /// `Self::Deliver` is used for packets that are ineligible (either because
4228    /// multicast forwarding is disabled, or because there are no applicable
4229    /// multicast routes with which to forward the packet).
4230    MulticastForward {
4231        /// The multicast targets to forward the packet via.
4232        targets: MulticastRouteTargets<DeviceId>,
4233        /// Some if the host is a member of the multicast group and the packet
4234        /// should be delivered locally (in addition to forwarding).
4235        address_status: Option<I::AddressStatus>,
4236        /// The multicast address the packet should be forwarded to.
4237        dst_ip: SpecifiedAddr<I::Addr>,
4238    },
4239
4240    /// Send a Destination Unreachable ICMP error message to the packet's sender
4241    /// and drop the packet.
4242    ///
4243    /// For ICMPv4, use the code "net unreachable". For ICMPv6, use the code "no
4244    /// route to destination".
4245    SendNoRouteToDest {
4246        /// The destination IP Address to which there was no route.
4247        dst: SpecifiedAddr<I::Addr>,
4248    },
4249
4250    /// Silently drop the packet.
4251    ///
4252    /// `reason` describes why the packet was dropped.
4253    #[allow(missing_docs)]
4254    Drop { reason: DropReason },
4255}
4256
4257// It's possible that there is more than one device with the address
4258// present. Prefer any address status over `UnicastTentative`.
4259fn choose_highest_priority_address_status<I: IpLayerIpExt>(
4260    address_statuses: impl Iterator<Item = I::AddressStatus>,
4261) -> Option<I::AddressStatus> {
4262    address_statuses.max_by_key(|status| {
4263        #[derive(GenericOverIp)]
4264        #[generic_over_ip(I, Ip)]
4265        struct Wrap<'a, I: IpLayerIpExt>(&'a I::AddressStatus);
4266        I::map_ip_in(
4267            Wrap(status),
4268            |Wrap(v4_status)| match v4_status {
4269                Ipv4PresentAddressStatus::UnicastTentative => 0,
4270                _ => 1,
4271            },
4272            |Wrap(v6_status)| match v6_status {
4273                Ipv6PresentAddressStatus::UnicastTentative => 0,
4274                _ => 1,
4275            },
4276        )
4277    })
4278}
4279
4280/// The reason a received IP packet is dropped.
4281#[derive(Debug, PartialEq)]
4282pub enum DropReason {
4283    /// Remote packet destined to tentative address.
4284    Tentative,
4285    /// Remote packet destined to the unspecified address.
4286    UnspecifiedDestination,
4287    /// Cannot forward a packet with unspecified source address.
4288    ForwardUnspecifiedSource,
4289    /// Cannot forward a packet with link-local source or destination address.
4290    ForwardLinkLocal,
4291    /// Packet should be forwarded but packet's inbound interface has forwarding
4292    /// disabled.
4293    ForwardingDisabledInboundIface,
4294    /// Remote packet destined to a multicast address that could not be:
4295    /// * delivered locally (because we are not a member of the multicast
4296    ///   group), or
4297    /// * forwarded (either because multicast forwarding is disabled, or no
4298    ///   applicable multicast route has been installed).
4299    MulticastNoInterest,
4300}
4301
4302/// Computes the action to take in order to process a received IPv4 packet.
4303pub fn receive_ipv4_packet_action<BC, CC, B>(
4304    core_ctx: &mut CC,
4305    bindings_ctx: &mut BC,
4306    device: &CC::DeviceId,
4307    packet: &Ipv4Packet<B>,
4308    frame_dst: Option<FrameDestination>,
4309    marks: &Marks,
4310) -> ReceivePacketAction<Ipv4, CC::DeviceId>
4311where
4312    BC: IpLayerBindingsContext<Ipv4, CC::DeviceId>,
4313    CC: IpLayerContext<Ipv4, BC>,
4314    B: SplitByteSlice,
4315{
4316    let Some(dst_ip) = SpecifiedAddr::new(packet.dst_ip()) else {
4317        core_ctx.increment_both(device, |c| &c.unspecified_destination);
4318        return ReceivePacketAction::Drop { reason: DropReason::UnspecifiedDestination };
4319    };
4320
4321    // If the packet arrived at the loopback interface, check if any local
4322    // interface has the destination address assigned. This effectively lets
4323    // the loopback interface operate as a weak host for incoming packets.
4324    //
4325    // Note that (as of writing) the stack sends all locally destined traffic to
4326    // the loopback interface so we need this hack to allow the stack to accept
4327    // packets that arrive at the loopback interface (after being looped back)
4328    // but destined to an address that is assigned to another local interface.
4329    //
4330    // TODO(https://fxbug.dev/42175703): This should instead be controlled by the
4331    // routing table.
4332
4333    let highest_priority = if device.is_loopback() {
4334        core_ctx.with_address_statuses(dst_ip, |it| {
4335            let it = it.map(|(_device, status)| status);
4336            choose_highest_priority_address_status::<Ipv4>(it)
4337        })
4338    } else {
4339        core_ctx.address_status_for_device(dst_ip, device).into_present()
4340    };
4341    match highest_priority {
4342        Some(
4343            address_status @ (Ipv4PresentAddressStatus::UnicastAssigned
4344            | Ipv4PresentAddressStatus::LoopbackSubnet),
4345        ) => {
4346            core_ctx.increment_both(device, |c| &c.deliver_unicast);
4347            ReceivePacketAction::Deliver {
4348                address_status,
4349                internal_forwarding: InternalForwarding::NotUsed,
4350            }
4351        }
4352        Some(Ipv4PresentAddressStatus::UnicastTentative) => {
4353            // If the destination address is tentative (which implies that
4354            // we are still performing Duplicate Address Detection on
4355            // it), then we don't consider the address "assigned to an
4356            // interface", and so we drop packets instead of delivering them
4357            // locally.
4358            core_ctx.increment_both(device, |c| &c.drop_for_tentative);
4359            ReceivePacketAction::Drop { reason: DropReason::Tentative }
4360        }
4361
4362        Some(address_status @ Ipv4PresentAddressStatus::Multicast) => {
4363            receive_ip_multicast_packet_action(
4364                core_ctx,
4365                bindings_ctx,
4366                device,
4367                packet,
4368                Some(address_status),
4369                dst_ip,
4370                frame_dst,
4371            )
4372        }
4373        Some(
4374            address_status @ (Ipv4PresentAddressStatus::LimitedBroadcast
4375            | Ipv4PresentAddressStatus::SubnetBroadcast),
4376        ) => {
4377            core_ctx.increment_both(device, |c| &c.version_rx.deliver_broadcast);
4378            ReceivePacketAction::Deliver {
4379                address_status,
4380                internal_forwarding: InternalForwarding::NotUsed,
4381            }
4382        }
4383        None => receive_ip_packet_action_common::<Ipv4, _, _, _>(
4384            core_ctx,
4385            bindings_ctx,
4386            dst_ip,
4387            device,
4388            packet,
4389            frame_dst,
4390            marks,
4391        ),
4392    }
4393}
4394
4395/// Computes the action to take in order to process a received IPv6 packet.
4396pub fn receive_ipv6_packet_action<BC, CC, B>(
4397    core_ctx: &mut CC,
4398    bindings_ctx: &mut BC,
4399    device: &CC::DeviceId,
4400    packet: &Ipv6Packet<B>,
4401    frame_dst: Option<FrameDestination>,
4402    marks: &Marks,
4403) -> ReceivePacketAction<Ipv6, CC::DeviceId>
4404where
4405    BC: IpLayerBindingsContext<Ipv6, CC::DeviceId>,
4406    CC: IpLayerContext<Ipv6, BC>,
4407    B: SplitByteSlice,
4408{
4409    let Some(dst_ip) = SpecifiedAddr::new(packet.dst_ip()) else {
4410        core_ctx.increment_both(device, |c| &c.unspecified_destination);
4411        return ReceivePacketAction::Drop { reason: DropReason::UnspecifiedDestination };
4412    };
4413
4414    // If the packet arrived at the loopback interface, check if any local
4415    // interface has the destination address assigned. This effectively lets
4416    // the loopback interface operate as a weak host for incoming packets.
4417    //
4418    // Note that (as of writing) the stack sends all locally destined traffic to
4419    // the loopback interface so we need this hack to allow the stack to accept
4420    // packets that arrive at the loopback interface (after being looped back)
4421    // but destined to an address that is assigned to another local interface.
4422    //
4423    // TODO(https://fxbug.dev/42175703): This should instead be controlled by the
4424    // routing table.
4425
4426    let highest_priority = if device.is_loopback() {
4427        core_ctx.with_address_statuses(dst_ip, |it| {
4428            let it = it.map(|(_device, status)| status);
4429            choose_highest_priority_address_status::<Ipv6>(it)
4430        })
4431    } else {
4432        core_ctx.address_status_for_device(dst_ip, device).into_present()
4433    };
4434    match highest_priority {
4435        Some(address_status @ Ipv6PresentAddressStatus::Multicast) => {
4436            receive_ip_multicast_packet_action(
4437                core_ctx,
4438                bindings_ctx,
4439                device,
4440                packet,
4441                Some(address_status),
4442                dst_ip,
4443                frame_dst,
4444            )
4445        }
4446        Some(address_status @ Ipv6PresentAddressStatus::UnicastAssigned) => {
4447            core_ctx.increment_both(device, |c| &c.deliver_unicast);
4448            ReceivePacketAction::Deliver {
4449                address_status,
4450                internal_forwarding: InternalForwarding::NotUsed,
4451            }
4452        }
4453        Some(Ipv6PresentAddressStatus::UnicastTentative) => {
4454            // If the destination address is tentative (which implies that
4455            // we are still performing NDP's Duplicate Address Detection on
4456            // it), then we don't consider the address "assigned to an
4457            // interface", and so we drop packets instead of delivering them
4458            // locally.
4459            //
4460            // As per RFC 4862 section 5.4:
4461            //
4462            //   An address on which the Duplicate Address Detection
4463            //   procedure is applied is said to be tentative until the
4464            //   procedure has completed successfully. A tentative address
4465            //   is not considered "assigned to an interface" in the
4466            //   traditional sense.  That is, the interface must accept
4467            //   Neighbor Solicitation and Advertisement messages containing
4468            //   the tentative address in the Target Address field, but
4469            //   processes such packets differently from those whose Target
4470            //   Address matches an address assigned to the interface. Other
4471            //   packets addressed to the tentative address should be
4472            //   silently discarded. Note that the "other packets" include
4473            //   Neighbor Solicitation and Advertisement messages that have
4474            //   the tentative (i.e., unicast) address as the IP destination
4475            //   address and contain the tentative address in the Target
4476            //   Address field.  Such a case should not happen in normal
4477            //   operation, though, since these messages are multicasted in
4478            //   the Duplicate Address Detection procedure.
4479            //
4480            // That is, we accept no packets destined to a tentative
4481            // address. NS and NA packets should be addressed to a multicast
4482            // address that we would have joined during DAD so that we can
4483            // receive those packets.
4484            core_ctx.increment_both(device, |c| &c.drop_for_tentative);
4485            ReceivePacketAction::Drop { reason: DropReason::Tentative }
4486        }
4487        None => receive_ip_packet_action_common::<Ipv6, _, _, _>(
4488            core_ctx,
4489            bindings_ctx,
4490            dst_ip,
4491            device,
4492            packet,
4493            frame_dst,
4494            marks,
4495        ),
4496    }
4497}
4498
4499/// Computes the action to take for multicast packets on behalf of
4500/// [`receive_ipv4_packet_action`] and [`receive_ipv6_packet_action`].
4501fn receive_ip_multicast_packet_action<
4502    I: IpLayerIpExt,
4503    B: SplitByteSlice,
4504    BC: IpLayerBindingsContext<I, CC::DeviceId>,
4505    CC: IpLayerContext<I, BC>,
4506>(
4507    core_ctx: &mut CC,
4508    bindings_ctx: &mut BC,
4509    device: &CC::DeviceId,
4510    packet: &I::Packet<B>,
4511    address_status: Option<I::AddressStatus>,
4512    dst_ip: SpecifiedAddr<I::Addr>,
4513    frame_dst: Option<FrameDestination>,
4514) -> ReceivePacketAction<I, CC::DeviceId> {
4515    let targets = multicast_forwarding::lookup_multicast_route_or_stash_packet(
4516        core_ctx,
4517        bindings_ctx,
4518        packet,
4519        device,
4520        frame_dst,
4521    );
4522    match (targets, address_status) {
4523        (Some(targets), address_status) => {
4524            if address_status.is_some() {
4525                core_ctx.increment_both(device, |c| &c.deliver_multicast);
4526            }
4527            ReceivePacketAction::MulticastForward { targets, address_status, dst_ip }
4528        }
4529        (None, Some(address_status)) => {
4530            // If the address was present on the device (e.g. the host is a
4531            // member of the multicast group), fallback to local delivery.
4532            core_ctx.increment_both(device, |c| &c.deliver_multicast);
4533            ReceivePacketAction::Deliver {
4534                address_status,
4535                internal_forwarding: InternalForwarding::NotUsed,
4536            }
4537        }
4538        (None, None) => {
4539            // As per RFC 1122 Section 3.2.2
4540            //   An ICMP error message MUST NOT be sent as the result of
4541            //   receiving:
4542            //   ...
4543            //   * a datagram destined to an IP broadcast or IP multicast
4544            //     address
4545            //
4546            // As such, drop the packet
4547            core_ctx.increment_both(device, |c| &c.multicast_no_interest);
4548            ReceivePacketAction::Drop { reason: DropReason::MulticastNoInterest }
4549        }
4550    }
4551}
4552
4553/// Computes the remaining protocol-agnostic actions on behalf of
4554/// [`receive_ipv4_packet_action`] and [`receive_ipv6_packet_action`].
4555fn receive_ip_packet_action_common<
4556    I: IpLayerIpExt,
4557    B: SplitByteSlice,
4558    BC: IpLayerBindingsContext<I, CC::DeviceId>,
4559    CC: IpLayerContext<I, BC>,
4560>(
4561    core_ctx: &mut CC,
4562    bindings_ctx: &mut BC,
4563    dst_ip: SpecifiedAddr<I::Addr>,
4564    device_id: &CC::DeviceId,
4565    packet: &I::Packet<B>,
4566    frame_dst: Option<FrameDestination>,
4567    marks: &Marks,
4568) -> ReceivePacketAction<I, CC::DeviceId> {
4569    if dst_ip.is_multicast() {
4570        return receive_ip_multicast_packet_action(
4571            core_ctx,
4572            bindings_ctx,
4573            device_id,
4574            packet,
4575            None,
4576            dst_ip,
4577            frame_dst,
4578        );
4579    }
4580
4581    // The packet is not destined locally, so we attempt to forward it.
4582    if !core_ctx.is_device_unicast_forwarding_enabled(device_id) {
4583        // Forwarding is disabled; we are operating only as a host.
4584        //
4585        // For IPv4, per RFC 1122 Section 3.2.1.3, "A host MUST silently discard
4586        // an incoming datagram that is not destined for the host."
4587        //
4588        // For IPv6, per RFC 4443 Section 3.1, the only instance in which a host
4589        // sends an ICMPv6 Destination Unreachable message is when a packet is
4590        // destined to that host but on an unreachable port (Code 4 - "Port
4591        // unreachable"). Since the only sensible error message to send in this
4592        // case is a Destination Unreachable message, we interpret the RFC text
4593        // to mean that, consistent with IPv4's behavior, we should silently
4594        // discard the packet in this case.
4595        core_ctx.increment_both(device_id, |c| &c.forwarding_disabled);
4596        return ReceivePacketAction::Drop { reason: DropReason::ForwardingDisabledInboundIface };
4597    }
4598    // Per https://www.rfc-editor.org/rfc/rfc4291.html#section-2.5.2:
4599    //   An IPv6 packet with a source address of unspecified must never be forwarded by an IPv6
4600    //   router.
4601    // Per https://datatracker.ietf.org/doc/html/rfc1812#section-5.3.7:
4602    //   A router SHOULD NOT forward any packet that has an invalid IP source address or a source
4603    //   address on network 0
4604    let Some(source_address) = SpecifiedAddr::new(packet.src_ip()) else {
4605        return ReceivePacketAction::Drop { reason: DropReason::ForwardUnspecifiedSource };
4606    };
4607
4608    // If forwarding is enabled, allow local delivery if the packet is destined
4609    // for an IP assigned to a different interface.
4610    //
4611    // This enables a weak host model when the Netstack is configured as a
4612    // router. Conceptually, the netstack is forwarding the packet from the
4613    // input device, to the destination IP's device.
4614    if let Some(dst_ip) = NonMappedAddr::new(dst_ip).and_then(NonMulticastAddr::new) {
4615        if let Some((outbound_device, address_status)) =
4616            get_device_with_assigned_address(core_ctx, IpDeviceAddr::new_from_witness(dst_ip))
4617        {
4618            return ReceivePacketAction::Deliver {
4619                address_status,
4620                internal_forwarding: InternalForwarding::Used(outbound_device),
4621            };
4622        }
4623    }
4624
4625    // For IPv4, RFC 3927 Section 2.7 states:
4626    //
4627    //   An IPv4 packet whose source and/or destination address is in the
4628    //   169.254/16 prefix MUST NOT be sent to any router for forwarding, and
4629    //   any network device receiving such a packet MUST NOT forward it,
4630    //   regardless of the TTL in the IPv4 header.
4631    //
4632    // However, to maintain behavioral similarity to both gVisor/Netstack2 and
4633    // Linux, we omit this check.
4634    //
4635    // For IPv6, RFC 4291 Section 2.5.6 states:
4636    //
4637    //   Routers must not forward any packets with Link-Local source or
4638    //   destination addresses to other links.
4639    if I::map_ip_in(
4640        &packet,
4641        |_| false,
4642        |packet| packet.src_ip().is_link_local() || packet.dst_ip().is_link_local(),
4643    ) {
4644        return ReceivePacketAction::Drop { reason: DropReason::ForwardLinkLocal };
4645    }
4646
4647    match lookup_route_table(
4648        core_ctx,
4649        *dst_ip,
4650        RuleInput {
4651            packet_origin: PacketOrigin::NonLocal { source_address, incoming_device: device_id },
4652            marks,
4653        },
4654    ) {
4655        Some(dst) => {
4656            core_ctx.increment_both(device_id, |c| &c.forward);
4657            ReceivePacketAction::Forward { original_dst: dst_ip, dst }
4658        }
4659        None => {
4660            core_ctx.increment_both(device_id, |c| &c.no_route_to_host);
4661            ReceivePacketAction::SendNoRouteToDest { dst: dst_ip }
4662        }
4663    }
4664}
4665
4666// Look up the route to a host.
4667fn lookup_route_table<
4668    I: IpLayerIpExt,
4669    BC: IpLayerBindingsContext<I, CC::DeviceId>,
4670    CC: IpStateContext<I, BC>,
4671>(
4672    core_ctx: &mut CC,
4673    dst_ip: I::Addr,
4674    rule_input: RuleInput<'_, I, CC::DeviceId>,
4675) -> Option<Destination<I::Addr, CC::DeviceId>> {
4676    let bound_device = match rule_input.packet_origin {
4677        PacketOrigin::Local { bound_address: _, bound_device } => bound_device,
4678        PacketOrigin::NonLocal { source_address: _, incoming_device: _ } => None,
4679    };
4680    core_ctx.with_rules_table(|core_ctx, rules: &RulesTable<_, _, BC>| {
4681        match walk_rules(core_ctx, rules, (), &rule_input, |(), core_ctx, table| {
4682            match table.lookup(core_ctx, bound_device, dst_ip) {
4683                Some(dst) => ControlFlow::Break(Some(dst)),
4684                None => ControlFlow::Continue(()),
4685            }
4686        }) {
4687            ControlFlow::Break(RuleAction::Lookup(RuleWalkInfo {
4688                inner: dst,
4689                observed_source_address_matcher: _,
4690            })) => dst,
4691            ControlFlow::Break(RuleAction::Unreachable) => None,
4692            ControlFlow::Continue(RuleWalkInfo {
4693                inner: (),
4694                observed_source_address_matcher: _,
4695            }) => None,
4696        }
4697    })
4698}
4699
4700/// Packed destination passed to [`IpDeviceSendContext::send_ip_frame`].
4701#[derive(Debug, Derivative, Clone)]
4702#[derivative(Eq(bound = "D: Eq"), PartialEq(bound = "D: PartialEq"))]
4703pub enum IpPacketDestination<I: BroadcastIpExt, D> {
4704    /// Broadcast packet.
4705    Broadcast(I::BroadcastMarker),
4706
4707    /// Multicast packet to the specified IP.
4708    Multicast(MulticastAddr<I::Addr>),
4709
4710    /// Send packet to the neighbor with the specified IP (the receiving
4711    /// node is either a router or the final recipient of the packet).
4712    Neighbor(SpecifiedAddr<I::Addr>),
4713
4714    /// Loopback the packet to the specified device. Can be used only when
4715    /// sending to the loopback device.
4716    Loopback(D),
4717}
4718
4719impl<I: BroadcastIpExt, D> IpPacketDestination<I, D> {
4720    /// Creates `IpPacketDestination` for IP address.
4721    pub fn from_addr(addr: SpecifiedAddr<I::Addr>) -> Self {
4722        match MulticastAddr::new(addr.into_addr()) {
4723            Some(mc_addr) => Self::Multicast(mc_addr),
4724            None => Self::Neighbor(addr),
4725        }
4726    }
4727
4728    /// Create `IpPacketDestination` from `NextHop`.
4729    pub fn from_next_hop(next_hop: NextHop<I::Addr>, dst_ip: SpecifiedAddr<I::Addr>) -> Self {
4730        match next_hop {
4731            NextHop::RemoteAsNeighbor => Self::from_addr(dst_ip),
4732            NextHop::Gateway(gateway) => Self::Neighbor(gateway),
4733            NextHop::Broadcast(marker) => Self::Broadcast(marker),
4734        }
4735    }
4736}
4737
4738/// The metadata associated with an outgoing IP packet.
4739#[derive(Debug, Clone)]
4740pub struct SendIpPacketMeta<I: IpExt, D, Src> {
4741    /// The outgoing device.
4742    pub device: D,
4743
4744    /// The source address of the packet.
4745    pub src_ip: Src,
4746
4747    /// The destination address of the packet.
4748    pub dst_ip: SpecifiedAddr<I::Addr>,
4749
4750    /// The destination for the send operation.
4751    pub destination: IpPacketDestination<I, D>,
4752
4753    /// The upper-layer protocol held in the packet's payload.
4754    pub proto: I::Proto,
4755
4756    /// The time-to-live (IPv4) or hop limit (IPv6) for the packet.
4757    ///
4758    /// If not set, a default TTL may be used.
4759    pub ttl: Option<NonZeroU8>,
4760
4761    /// An MTU to artificially impose on the whole IP packet.
4762    ///
4763    /// Note that the device's and discovered path MTU may still be imposed on
4764    /// the packet.
4765    pub mtu: Mtu,
4766
4767    /// Traffic Class (IPv6) or Type of Service (IPv4) field for the packet.
4768    pub dscp_and_ecn: DscpAndEcn,
4769}
4770
4771impl<I: IpExt, D> From<SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>>
4772    for SendIpPacketMeta<I, D, Option<SpecifiedAddr<I::Addr>>>
4773{
4774    fn from(
4775        SendIpPacketMeta { device, src_ip, dst_ip, destination, proto, ttl, mtu, dscp_and_ecn }: SendIpPacketMeta<
4776            I,
4777            D,
4778            SpecifiedAddr<I::Addr>,
4779        >,
4780    ) -> SendIpPacketMeta<I, D, Option<SpecifiedAddr<I::Addr>>> {
4781        SendIpPacketMeta {
4782            device,
4783            src_ip: Some(src_ip),
4784            dst_ip,
4785            destination,
4786            proto,
4787            ttl,
4788            mtu,
4789            dscp_and_ecn,
4790        }
4791    }
4792}
4793
4794/// Trait for abstracting the IP layer for locally-generated traffic.  That is,
4795/// traffic generated by the netstack itself (e.g. ICMP, IGMP, or MLD).
4796///
4797/// NOTE: Due to filtering rules, it is possible that the device provided in
4798/// `meta` will not be the device that final IP packet is actually sent from.
4799pub trait IpLayerHandler<I: IpExt + FragmentationIpExt + FilterIpExt, BC>:
4800    DeviceIdContext<AnyDevice>
4801{
4802    /// Encapsulate and send the provided transport packet and from the device
4803    /// provided in `meta`.
4804    fn send_ip_packet_from_device<S>(
4805        &mut self,
4806        bindings_ctx: &mut BC,
4807        meta: SendIpPacketMeta<I, &Self::DeviceId, Option<SpecifiedAddr<I::Addr>>>,
4808        body: S,
4809    ) -> Result<(), IpSendFrameError<S>>
4810    where
4811        S: TransportPacketSerializer<I>,
4812        S::Buffer: BufferMut;
4813
4814    /// Send an IP packet that doesn't require the encapsulation and other
4815    /// processing of [`send_ip_packet_from_device`] from the device specified
4816    /// in `meta`.
4817    // TODO(https://fxbug.dev/333908066): The packets going through this
4818    // function only hit the EGRESS filter hook, bypassing LOCAL_EGRESS.
4819    // Refactor callers and other functions to prevent this.
4820    fn send_ip_frame<S>(
4821        &mut self,
4822        bindings_ctx: &mut BC,
4823        device: &Self::DeviceId,
4824        destination: IpPacketDestination<I, &Self::DeviceId>,
4825        body: S,
4826    ) -> Result<(), IpSendFrameError<S>>
4827    where
4828        S: FragmentableIpSerializer<I, Buffer: BufferMut> + FilterIpPacket<I>;
4829}
4830
4831impl<
4832    I: IpLayerIpExt,
4833    BC: IpLayerBindingsContext<I, <CC as DeviceIdContext<AnyDevice>>::DeviceId>,
4834    CC: IpLayerEgressContext<I, BC> + IpDeviceEgressStateContext<I> + IpDeviceMtuContext<I>,
4835> IpLayerHandler<I, BC> for CC
4836{
4837    fn send_ip_packet_from_device<S>(
4838        &mut self,
4839        bindings_ctx: &mut BC,
4840        meta: SendIpPacketMeta<I, &CC::DeviceId, Option<SpecifiedAddr<I::Addr>>>,
4841        body: S,
4842    ) -> Result<(), IpSendFrameError<S>>
4843    where
4844        S: TransportPacketSerializer<I>,
4845        S::Buffer: BufferMut,
4846    {
4847        send_ip_packet_from_device(self, bindings_ctx, meta, body, IpLayerPacketMetadata::default())
4848    }
4849
4850    fn send_ip_frame<S>(
4851        &mut self,
4852        bindings_ctx: &mut BC,
4853        device: &Self::DeviceId,
4854        destination: IpPacketDestination<I, &Self::DeviceId>,
4855        body: S,
4856    ) -> Result<(), IpSendFrameError<S>>
4857    where
4858        S: FragmentableIpSerializer<I, Buffer: BufferMut> + FilterIpPacket<I>,
4859    {
4860        send_ip_frame(
4861            self,
4862            bindings_ctx,
4863            device,
4864            destination,
4865            body,
4866            IpLayerPacketMetadata::default(),
4867            Mtu::no_limit(),
4868        )
4869    }
4870}
4871
4872/// Sends an Ip packet with the specified metadata.
4873///
4874/// # Panics
4875///
4876/// Panics if either the source or destination address is the loopback address
4877/// and the device is a non-loopback device.
4878pub(crate) fn send_ip_packet_from_device<I, BC, CC, S>(
4879    core_ctx: &mut CC,
4880    bindings_ctx: &mut BC,
4881    meta: SendIpPacketMeta<
4882        I,
4883        &<CC as DeviceIdContext<AnyDevice>>::DeviceId,
4884        Option<SpecifiedAddr<I::Addr>>,
4885    >,
4886    body: S,
4887    packet_metadata: IpLayerPacketMetadata<I, CC::WeakAddressId, BC>,
4888) -> Result<(), IpSendFrameError<S>>
4889where
4890    I: IpLayerIpExt,
4891    BC: FilterBindingsContext<CC::DeviceId> + TxMetadataBindingsTypes + MarksBindingsContext,
4892    CC: IpLayerEgressContext<I, BC> + IpDeviceEgressStateContext<I> + IpDeviceMtuContext<I>,
4893    S: TransportPacketSerializer<I>,
4894    S::Buffer: BufferMut,
4895{
4896    let SendIpPacketMeta { device, src_ip, dst_ip, destination, proto, ttl, mtu, dscp_and_ecn } =
4897        meta;
4898    core_ctx.increment_both(device, |c| &c.send_ip_packet);
4899    let next_packet_id = gen_ip_packet_id(core_ctx);
4900    let ttl = ttl.unwrap_or_else(|| core_ctx.get_hop_limit(device)).get();
4901    let src_ip = src_ip.map_or(I::UNSPECIFIED_ADDRESS, |a| a.get());
4902    let mut builder = I::PacketBuilder::new(src_ip, dst_ip.get(), ttl, proto);
4903
4904    #[derive(GenericOverIp)]
4905    #[generic_over_ip(I, Ip)]
4906    struct Wrap<'a, I: IpLayerIpExt> {
4907        builder: &'a mut I::PacketBuilder,
4908        next_packet_id: I::PacketId,
4909    }
4910
4911    I::map_ip::<_, ()>(
4912        Wrap { builder: &mut builder, next_packet_id },
4913        |Wrap { builder, next_packet_id }| {
4914            builder.id(next_packet_id);
4915        },
4916        |Wrap { builder: _, next_packet_id: () }| {
4917            // IPv6 doesn't have packet IDs.
4918        },
4919    );
4920
4921    builder.set_dscp_and_ecn(dscp_and_ecn);
4922
4923    let ip_frame = builder.wrap_body(body);
4924    send_ip_frame(core_ctx, bindings_ctx, device, destination, ip_frame, packet_metadata, mtu)
4925        .map_err(|ser| ser.map_serializer(|s| s.into_inner()))
4926}
4927
4928/// Abstracts access to a [`filter::FilterHandler`] for core contexts.
4929pub trait FilterHandlerProvider<I: FilterIpExt, BT: FilterBindingsTypes>:
4930    IpDeviceAddressIdContext<I, DeviceId: netstack3_base::InterfaceProperties<BT::DeviceClass>>
4931{
4932    /// The filter handler.
4933    type Handler<'a>: filter::FilterHandler<I, BT, DeviceId = Self::DeviceId, WeakAddressId = Self::WeakAddressId>
4934    where
4935        Self: 'a;
4936
4937    /// Gets the filter handler for this context.
4938    fn filter_handler(&mut self) -> Self::Handler<'_>;
4939}
4940
4941#[cfg(any(test, feature = "testutils"))]
4942pub(crate) mod testutil {
4943    use super::*;
4944
4945    use netstack3_base::testutil::{FakeBindingsCtx, FakeCoreCtx, FakeStrongDeviceId};
4946    use netstack3_base::{AssignedAddrIpExt, SendFrameContext, SendFrameError, SendableFrameMeta};
4947    use packet::Serializer;
4948
4949    /// A [`SendIpPacketMeta`] for dual stack contextx.
4950    #[derive(Debug, GenericOverIp)]
4951    #[generic_over_ip()]
4952    #[allow(missing_docs)]
4953    pub enum DualStackSendIpPacketMeta<D> {
4954        V4(SendIpPacketMeta<Ipv4, D, SpecifiedAddr<Ipv4Addr>>),
4955        V6(SendIpPacketMeta<Ipv6, D, SpecifiedAddr<Ipv6Addr>>),
4956    }
4957
4958    impl<I: IpExt, D> From<SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>>
4959        for DualStackSendIpPacketMeta<D>
4960    {
4961        fn from(value: SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>) -> Self {
4962            #[derive(GenericOverIp)]
4963            #[generic_over_ip(I, Ip)]
4964            struct Wrap<I: IpExt, D>(SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>);
4965            use DualStackSendIpPacketMeta::*;
4966            I::map_ip_in(Wrap(value), |Wrap(value)| V4(value), |Wrap(value)| V6(value))
4967        }
4968    }
4969
4970    impl<I: IpExt, S, DeviceId, BC>
4971        SendableFrameMeta<FakeCoreCtx<S, DualStackSendIpPacketMeta<DeviceId>, DeviceId>, BC>
4972        for SendIpPacketMeta<I, DeviceId, SpecifiedAddr<I::Addr>>
4973    {
4974        fn send_meta<SS>(
4975            self,
4976            core_ctx: &mut FakeCoreCtx<S, DualStackSendIpPacketMeta<DeviceId>, DeviceId>,
4977            bindings_ctx: &mut BC,
4978            frame: SS,
4979        ) -> Result<(), SendFrameError<SS>>
4980        where
4981            SS: Serializer,
4982            SS::Buffer: BufferMut,
4983        {
4984            SendFrameContext::send_frame(
4985                &mut core_ctx.frames,
4986                bindings_ctx,
4987                DualStackSendIpPacketMeta::from(self),
4988                frame,
4989            )
4990        }
4991    }
4992
4993    /// Error returned when the IP version doesn't match.
4994    #[derive(Debug)]
4995    pub struct WrongIpVersion;
4996
4997    impl<D> DualStackSendIpPacketMeta<D> {
4998        /// Returns the internal [`SendIpPacketMeta`] if this is carrying the
4999        /// version matching `I`.
5000        pub fn try_as<I: IpExt>(
5001            &self,
5002        ) -> Result<&SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>, WrongIpVersion> {
5003            #[derive(GenericOverIp)]
5004            #[generic_over_ip(I, Ip)]
5005            struct Wrap<'a, I: IpExt, D>(
5006                Option<&'a SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>>,
5007            );
5008            use DualStackSendIpPacketMeta::*;
5009            let Wrap(dual_stack) = I::map_ip(
5010                self,
5011                |value| {
5012                    Wrap(match value {
5013                        V4(meta) => Some(meta),
5014                        V6(_) => None,
5015                    })
5016                },
5017                |value| {
5018                    Wrap(match value {
5019                        V4(_) => None,
5020                        V6(meta) => Some(meta),
5021                    })
5022                },
5023            );
5024            dual_stack.ok_or(WrongIpVersion)
5025        }
5026    }
5027
5028    impl<I, BC, S, Meta, DeviceId> FilterHandlerProvider<I, BC> for FakeCoreCtx<S, Meta, DeviceId>
5029    where
5030        I: AssignedAddrIpExt + FilterIpExt,
5031        BC: FilterBindingsContext<DeviceId>,
5032        DeviceId: FakeStrongDeviceId + netstack3_base::InterfaceProperties<BC::DeviceClass>,
5033    {
5034        type Handler<'a>
5035            = filter::testutil::NoopImpl<DeviceId>
5036        where
5037            Self: 'a;
5038
5039        fn filter_handler(&mut self) -> Self::Handler<'_> {
5040            filter::testutil::NoopImpl::default()
5041        }
5042    }
5043
5044    impl<TimerId, Event: Debug, State, FrameMeta> MarksBindingsContext
5045        for FakeBindingsCtx<TimerId, Event, State, FrameMeta>
5046    {
5047        fn marks_to_keep_on_egress() -> &'static [MarkDomain] {
5048            const MARKS: [MarkDomain; 1] = [MarkDomain::Mark1];
5049            &MARKS
5050        }
5051
5052        fn marks_to_set_on_ingress() -> &'static [MarkDomain] {
5053            const MARKS: [MarkDomain; 1] = [MarkDomain::Mark2];
5054            &MARKS
5055        }
5056    }
5057}
5058
5059#[cfg(test)]
5060mod test {
5061    use super::*;
5062
5063    #[test]
5064    fn highest_priority_address_status_v4() {
5065        // Prefer assigned addresses over tentative addresses.
5066        assert_eq!(
5067            choose_highest_priority_address_status::<Ipv4>(
5068                [
5069                    Ipv4PresentAddressStatus::UnicastAssigned,
5070                    Ipv4PresentAddressStatus::UnicastTentative
5071                ]
5072                .into_iter()
5073            ),
5074            Some(Ipv4PresentAddressStatus::UnicastAssigned)
5075        )
5076    }
5077
5078    #[test]
5079    fn highest_priority_address_status_v6() {
5080        // Prefer assigned addresses over tentative addresses.
5081        assert_eq!(
5082            choose_highest_priority_address_status::<Ipv6>(
5083                [
5084                    Ipv6PresentAddressStatus::UnicastAssigned,
5085                    Ipv6PresentAddressStatus::UnicastTentative
5086                ]
5087                .into_iter()
5088            ),
5089            Some(Ipv6PresentAddressStatus::UnicastAssigned)
5090        )
5091    }
5092}