Skip to main content

netstack3_ip/
base.rs

1// Copyright 2018 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use alloc::boxed::Box;
6use alloc::vec::Vec;
7use core::convert::Infallible as Never;
8use core::fmt::Debug;
9use core::hash::Hash;
10use core::marker::PhantomData;
11use core::num::NonZeroU8;
12use core::ops::ControlFlow;
13#[cfg(test)]
14use core::ops::DerefMut;
15use core::sync::atomic::{self, AtomicU16};
16
17use derivative::Derivative;
18use explicit::ResultExt as _;
19use lock_order::lock::{OrderedLockAccess, OrderedLockRef};
20use log::{debug, trace};
21use net_types::ip::{
22    GenericOverIp, Ip, Ipv4, Ipv4Addr, Ipv6, Ipv6Addr, Ipv6SourceAddr, Mtu, Subnet,
23};
24use net_types::{
25    LinkLocalAddress, MulticastAddr, MulticastAddress, NonMappedAddr, NonMulticastAddr,
26    SpecifiedAddr, SpecifiedAddress as _, Witness,
27};
28use netstack3_base::socket::{EitherStack, SocketCookie, SocketIpAddr, SocketIpAddrExt as _};
29use netstack3_base::sync::{Mutex, PrimaryRc, RwLock, StrongRc, WeakRc};
30use netstack3_base::{
31    AnyDevice, BroadcastIpExt, CoreTimerContext, Counter, CounterCollectionSpec, CounterContext,
32    DeviceIdContext, DeviceIdentifier as _, ErrorAndSerializer, EventContext, FrameDestination,
33    HandleableTimer, InstantContext, InterfaceProperties, IpAddressId, IpDeviceAddr,
34    IpDeviceAddressIdContext, IpExt, MarkDomain, Marks, Matcher as _, MatcherBindingsTypes,
35    NestedIntoCoreTimerCtx, NotFoundError, ResourceCounterContext, RngContext,
36    SendFrameErrorReason, StrongDeviceIdentifier, TimerBindingsTypes, TimerContext, TimerHandler,
37    TxMetadata as _, TxMetadataBindingsTypes, WeakIpAddressId, WrapBroadcastMarker,
38};
39use netstack3_filter::{
40    self as filter, ConnectionDirection, ConntrackConnection, FilterBindingsContext,
41    FilterBindingsTypes, FilterHandler as _, FilterIpContext, FilterIpExt, FilterIpMetadata,
42    FilterIpPacket, FilterPacketMetadata, FilterTimerId, ForwardedPacket, IpPacket, MarkAction,
43    MaybeTransportPacket as _, RejectType, TransportPacketSerializer, Tuple, WeakConnectionError,
44    WeakConntrackConnection,
45};
46use netstack3_hashmap::HashMap;
47use packet::{
48    Buf, BufferMut, GrowBuffer, LayoutBufferAlloc, PacketBuilder as _, PacketConstraints,
49    ParsablePacket as _, ParseBuffer, ParseBufferMut, ParseMetadata, SerializeError,
50    Serializer as _,
51};
52use packet_formats::error::{Ipv6ParseError, ParseError};
53use packet_formats::ip::{DscpAndEcn, IpPacket as _, IpPacketBuilder as _};
54use packet_formats::ipv4::{Ipv4FragmentType, Ipv4Packet};
55use packet_formats::ipv6::{Ipv6Packet, Ipv6PacketRaw};
56use thiserror::Error;
57use zerocopy::SplitByteSlice;
58
59use crate::internal::counters::{IpCounters, IpCountersIpExt};
60use crate::internal::device::opaque_iid::IidSecret;
61use crate::internal::device::slaac::SlaacCounters;
62use crate::internal::device::state::{
63    IpAddressData, IpAddressFlags, IpDeviceStateBindingsTypes, IpDeviceStateIpExt, WeakAddressId,
64};
65use crate::internal::device::{
66    self, IpDeviceAddressContext, IpDeviceBindingsContext, IpDeviceIpExt, IpDeviceSendContext,
67};
68use crate::internal::fragmentation::{FragmentableIpSerializer, FragmentationIpExt, IpFragmenter};
69use crate::internal::gmp::GmpQueryHandler;
70use crate::internal::gmp::igmp::IgmpCounters;
71use crate::internal::gmp::mld::MldCounters;
72use crate::internal::icmp::counters::IcmpCountersIpExt;
73use crate::internal::icmp::{
74    IcmpBindingsTypes, IcmpError, IcmpErrorHandler, IcmpHandlerIpExt, Icmpv4Error, Icmpv4State,
75    Icmpv4StateBuilder, Icmpv6Error, Icmpv6State, Icmpv6StateBuilder,
76};
77use crate::internal::ipv6::Ipv6PacketAction;
78use crate::internal::local_delivery::{
79    IpHeaderInfo, Ipv4HeaderInfo, Ipv6HeaderInfo, LocalDeliveryPacketInfo, ReceiveIpPacketMeta,
80    TransparentLocalDelivery,
81};
82use crate::internal::multicast_forwarding::counters::MulticastForwardingCounters;
83use crate::internal::multicast_forwarding::route::{
84    MulticastRouteIpExt, MulticastRouteTarget, MulticastRouteTargets,
85};
86use crate::internal::multicast_forwarding::state::{
87    MulticastForwardingState, MulticastForwardingStateContext,
88};
89use crate::internal::multicast_forwarding::{
90    MulticastForwardingBindingsTypes, MulticastForwardingDeviceContext, MulticastForwardingEvent,
91    MulticastForwardingTimerId,
92};
93use crate::internal::path_mtu::{PmtuBindingsTypes, PmtuCache, PmtuTimerId};
94use crate::internal::raw::counters::RawIpSocketCounters;
95use crate::internal::raw::{RawIpSocketHandler, RawIpSocketMap, RawIpSocketsBindingsTypes};
96use crate::internal::reassembly::{
97    FragmentBindingsTypes, FragmentHandler, FragmentProcessingState, FragmentTimerId,
98    FragmentablePacket, IpPacketFragmentCache, ReassemblyIpExt,
99};
100use crate::internal::routing::rules::{Rule, RuleAction, RuleInput, RulesTable};
101use crate::internal::routing::{
102    IpRoutingBindingsTypes, IpRoutingDeviceContext, NonLocalSrcAddrPolicy, PacketOrigin,
103    RoutingTable,
104};
105use crate::internal::socket::{IpSocketBindingsContext, IpSocketContext, IpSocketHandler};
106use crate::internal::types::{
107    self, Destination, InternalForwarding, NextHop, ResolvedRoute, RoutableIpAddr,
108};
109use crate::internal::{ipv6, multicast_forwarding};
110
111#[cfg(test)]
112mod tests;
113
114/// Default IPv4 TTL.
115pub const DEFAULT_TTL: NonZeroU8 = NonZeroU8::new(64).unwrap();
116
117/// Hop limits for packets sent to multicast and unicast destinations.
118#[derive(Copy, Clone, Debug, Eq, PartialEq)]
119#[allow(missing_docs)]
120pub struct HopLimits {
121    pub unicast: NonZeroU8,
122    pub multicast: NonZeroU8,
123}
124
125/// Default hop limits for sockets.
126pub const DEFAULT_HOP_LIMITS: HopLimits =
127    HopLimits { unicast: DEFAULT_TTL, multicast: NonZeroU8::new(1).unwrap() };
128
129/// The IPv6 subnet that contains all addresses; `::/0`.
130// Safe because 0 is less than the number of IPv6 address bits.
131pub const IPV6_DEFAULT_SUBNET: Subnet<Ipv6Addr> =
132    unsafe { Subnet::new_unchecked(Ipv6::UNSPECIFIED_ADDRESS, 0) };
133
134/// Sidecar metadata passed along with the packet.
135///
136/// Note: This metadata may be regenerated when packet handling requires
137/// performing multiple actions (e.g. sending the packet out multiple interfaces
138/// as part of multicast forwarding).
139#[derive(Derivative)]
140#[derivative(Default(bound = ""))]
141pub struct IpLayerPacketMetadata<
142    I: packet_formats::ip::IpExt,
143    A,
144    BT: FilterBindingsTypes + TxMetadataBindingsTypes,
145> {
146    conntrack_connection_and_direction:
147        Option<(ConntrackConnection<I, A, BT>, ConnectionDirection)>,
148
149    /// Tx metadata associated with this packet.
150    ///
151    /// This may be non-default even in the rx path for looped back packets that
152    /// are still forcing tx frame ownership for sockets.
153    tx_metadata: BT::TxMetadata,
154
155    /// Marks attached to the packet that can be acted upon by routing/filtering.
156    marks: Marks,
157
158    /// Socket cookie of the associate socket if any. The value should be
159    /// passed to eBPF programs that process the packet, but it should not be
160    /// used as a unique identifier of the resource inside the netstack.
161    socket_cookie: Option<SocketCookie>,
162
163    #[cfg(debug_assertions)]
164    drop_check: IpLayerPacketMetadataDropCheck,
165}
166
167/// A type that asserts, on drop, that it was intentionally being dropped.
168///
169/// NOTE: Unfortunately, debugging this requires backtraces, since track_caller
170/// won't do what we want (https://github.com/rust-lang/rust/issues/116942).
171/// Since this is only enabled in debug, the assumption is that stacktraces are
172/// enabled.
173#[cfg(debug_assertions)]
174#[derive(Default)]
175struct IpLayerPacketMetadataDropCheck {
176    okay_to_drop: bool,
177}
178
179/// Metadata that is produced and consumed by the IP layer for each packet, but
180/// which also traverses the device layer.
181#[derive(Derivative)]
182#[derivative(Debug(bound = ""), Default(bound = ""))]
183pub struct DeviceIpLayerMetadata<BT: TxMetadataBindingsTypes> {
184    /// Weak reference to this packet's connection tracking entry, if the packet is
185    /// tracked.
186    ///
187    /// This allows NAT to consistently associate locally-generated, looped-back
188    /// packets with the same connection at every filtering hook even when NAT may
189    /// have been performed on them, causing them to no longer match the original or
190    /// reply tuples of the connection.
191    conntrack_entry: Option<(WeakConntrackConnection, ConnectionDirection)>,
192    /// Tx metadata associated with this packet.
193    ///
194    /// This may be non-default even in the rx path for looped back packets that
195    /// are still forcing tx frame ownership for sockets.
196    tx_metadata: BT::TxMetadata,
197    /// Marks attached to this packet. For all the incoming packets, they are None
198    /// by default but can be changed by a filtering rule.
199    ///
200    /// Note: The marks will be preserved if the packet is being looped back, i.e.,
201    /// the receiver will be able to observe the marks set by the sender. This is
202    /// consistent with Linux behavior.
203    marks: Marks,
204}
205
206impl<BT: TxMetadataBindingsTypes> DeviceIpLayerMetadata<BT> {
207    /// Discards the remaining IP layer information and returns only the tx
208    /// metadata used for buffer ownership.
209    pub fn into_tx_metadata(self) -> BT::TxMetadata {
210        self.tx_metadata
211    }
212    /// Creates new IP layer metadata with the marks.
213    #[cfg(any(test, feature = "testutils"))]
214    pub fn with_marks(marks: Marks) -> Self {
215        Self { conntrack_entry: None, tx_metadata: Default::default(), marks }
216    }
217}
218
219impl<
220    I: IpLayerIpExt,
221    A: WeakIpAddressId<I::Addr>,
222    BT: FilterBindingsTypes + TxMetadataBindingsTypes,
223> IpLayerPacketMetadata<I, A, BT>
224{
225    fn from_device_ip_layer_metadata<CC, D>(
226        core_ctx: &mut CC,
227        device: &D,
228        DeviceIpLayerMetadata { conntrack_entry, tx_metadata, marks }: DeviceIpLayerMetadata<BT>,
229    ) -> Self
230    where
231        CC: ResourceCounterContext<D, IpCounters<I>>,
232    {
233        let conntrack_connection_and_direction = match conntrack_entry
234            .map(|(conn, dir)| conn.into_inner().map(|conn| (conn, dir)))
235            .transpose()
236        {
237            // Either the packet was tracked and we've preserved its conntrack entry across
238            // loopback, or it was untracked and we just stash the `None`.
239            Ok(conn_and_dir) => conn_and_dir,
240            // Conntrack entry was removed from table after packet was enqueued in loopback.
241            Err(WeakConnectionError::EntryRemoved) => None,
242            // Conntrack entry no longer matches the packet (for example, it could be that
243            // this is an IPv6 packet that was modified at the device layer and therefore it
244            // no longer matches its IPv4 conntrack entry).
245            Err(WeakConnectionError::InvalidEntry) => {
246                core_ctx.increment_both(device, |c| &c.invalid_cached_conntrack_entry);
247                None
248            }
249        };
250
251        let socket_cookie = tx_metadata.socket_cookie();
252
253        Self {
254            conntrack_connection_and_direction,
255            tx_metadata,
256            marks,
257            socket_cookie,
258            #[cfg(debug_assertions)]
259            drop_check: Default::default(),
260        }
261    }
262}
263
264impl<I: IpExt, A, BT: FilterBindingsTypes + TxMetadataBindingsTypes>
265    IpLayerPacketMetadata<I, A, BT>
266{
267    pub(crate) fn from_tx_metadata_and_marks(tx_metadata: BT::TxMetadata, marks: Marks) -> Self {
268        let socket_cookie = tx_metadata.socket_cookie();
269        Self {
270            conntrack_connection_and_direction: None,
271            tx_metadata,
272            marks,
273            socket_cookie,
274            #[cfg(debug_assertions)]
275            drop_check: Default::default(),
276        }
277    }
278
279    pub(crate) fn into_parts(
280        self,
281    ) -> (
282        Option<(ConntrackConnection<I, A, BT>, ConnectionDirection)>,
283        BT::TxMetadata,
284        Marks,
285        Option<SocketCookie>,
286    ) {
287        let Self {
288            tx_metadata,
289            marks,
290            conntrack_connection_and_direction,
291            socket_cookie,
292            #[cfg(debug_assertions)]
293            mut drop_check,
294        } = self;
295        #[cfg(debug_assertions)]
296        {
297            drop_check.okay_to_drop = true;
298        }
299        (conntrack_connection_and_direction, tx_metadata, marks, socket_cookie)
300    }
301
302    /// Acknowledge that it's okay to drop this packet metadata.
303    ///
304    /// When compiled with debug assertions, dropping [`IplayerPacketMetadata`]
305    /// will panic if this method has not previously been called.
306    pub(crate) fn acknowledge_drop(self) {
307        #[cfg(debug_assertions)]
308        {
309            let mut this = self;
310            this.drop_check.okay_to_drop = true;
311        }
312    }
313
314    /// Returns the tx metadata associated with this packet.
315    pub(crate) fn tx_metadata(&self) -> &BT::TxMetadata {
316        &self.tx_metadata
317    }
318
319    /// Returns the marks attached to this packet.
320    pub(crate) fn marks(&self) -> &Marks {
321        &self.marks
322    }
323}
324
325#[cfg(debug_assertions)]
326impl Drop for IpLayerPacketMetadataDropCheck {
327    fn drop(&mut self) {
328        if !self.okay_to_drop {
329            panic!(
330                "IpLayerPacketMetadata dropped without acknowledgement.  https://fxbug.dev/334127474"
331            );
332        }
333    }
334}
335
336impl<I: packet_formats::ip::IpExt, A, BT: FilterBindingsTypes + TxMetadataBindingsTypes>
337    FilterIpMetadata<I, A, BT> for IpLayerPacketMetadata<I, A, BT>
338{
339    fn take_connection_and_direction(
340        &mut self,
341    ) -> Option<(ConntrackConnection<I, A, BT>, ConnectionDirection)> {
342        self.conntrack_connection_and_direction.take()
343    }
344
345    fn replace_connection_and_direction(
346        &mut self,
347        conn: ConntrackConnection<I, A, BT>,
348        direction: ConnectionDirection,
349    ) -> Option<ConntrackConnection<I, A, BT>> {
350        self.conntrack_connection_and_direction.replace((conn, direction)).map(|(conn, _dir)| conn)
351    }
352}
353
354impl<I: packet_formats::ip::IpExt, A, BT: FilterBindingsTypes + TxMetadataBindingsTypes>
355    FilterPacketMetadata for IpLayerPacketMetadata<I, A, BT>
356{
357    fn apply_mark_action(&mut self, domain: MarkDomain, action: MarkAction) {
358        action.apply(self.marks.get_mut(domain))
359    }
360
361    fn cookie(&self) -> Option<SocketCookie> {
362        self.socket_cookie.clone()
363    }
364
365    fn marks(&self) -> &Marks {
366        &self.marks
367    }
368}
369
370/// Send errors observed at or above the IP layer that carry a serializer.
371pub type IpSendFrameError<S> = ErrorAndSerializer<IpSendFrameErrorReason, S>;
372
373/// Send error cause for [`IpSendFrameError`].
374#[derive(Debug, PartialEq)]
375pub enum IpSendFrameErrorReason {
376    /// Error comes from the device layer.
377    Device(SendFrameErrorReason),
378    /// The frame's source or destination address is in the loopback subnet, but
379    /// the target device is not the loopback device.
380    IllegalLoopbackAddress,
381}
382
383impl From<SendFrameErrorReason> for IpSendFrameErrorReason {
384    fn from(value: SendFrameErrorReason) -> Self {
385        Self::Device(value)
386    }
387}
388
389/// The execution context provided by a transport layer protocol to the IP
390/// layer.
391///
392/// An implementation for `()` is provided which indicates that a particular
393/// transport layer protocol is unsupported.
394pub trait IpTransportContext<I, BC, CC>
395where
396    I: IpLayerIpExt,
397    CC: DeviceIdContext<AnyDevice> + ?Sized,
398{
399    /// Type used to identify sockets for early demux.
400    type EarlyDemuxSocket;
401
402    /// Performs early demux.
403    ///
404    /// Tries to match the packet with a connected socket that will receive the
405    /// packet. If a match is found, the socket information is passed to
406    /// `LOCAL_INGRESS` filters. The socket is also passed to
407    /// `receive_ip_packet` to avoid demuxing the packet twice.
408    ///
409    /// The socket may be invalidated if the source address is changed by SNAT.
410    /// In that case, `receive_ip_packet` is called with `early_demux_socket`
411    /// set to `None`.
412    fn early_demux<B: ParseBuffer>(
413        core_ctx: &mut CC,
414        device: &CC::DeviceId,
415        src_ip: I::Addr,
416        dst_ip: I::Addr,
417        buffer: B,
418    ) -> Option<Self::EarlyDemuxSocket>;
419
420    /// Receive an ICMP error message.
421    ///
422    /// All arguments beginning with `original_` are fields from the IP packet
423    /// that triggered the error. The `original_body` is provided here so that
424    /// the error can be associated with a transport-layer socket. `device`
425    /// identifies the device that received the ICMP error message packet.
426    ///
427    /// While ICMPv4 error messages are supposed to contain the first 8 bytes of
428    /// the body of the offending packet, and ICMPv6 error messages are supposed
429    /// to contain as much of the offending packet as possible without violating
430    /// the IPv6 minimum MTU, the caller does NOT guarantee that either of these
431    /// hold. It is `receive_icmp_error`'s responsibility to handle any length
432    /// of `original_body`, and to perform any necessary validation.
433    fn receive_icmp_error(
434        core_ctx: &mut CC,
435        bindings_ctx: &mut BC,
436        device: &CC::DeviceId,
437        original_src_ip: Option<SpecifiedAddr<I::Addr>>,
438        original_dst_ip: SpecifiedAddr<I::Addr>,
439        original_body: &[u8],
440        err: I::ErrorCode,
441    );
442
443    /// Receive a transport layer packet in an IP packet.
444    ///
445    /// In the event of an unreachable port, `receive_ip_packet` returns the
446    /// buffer in its original state (with the transport packet un-parsed) in
447    /// the `Err` variant.
448    fn receive_ip_packet<B: BufferMut, H: IpHeaderInfo<I>>(
449        core_ctx: &mut CC,
450        bindings_ctx: &mut BC,
451        device: &CC::DeviceId,
452        src_ip: I::RecvSrcAddr,
453        dst_ip: SpecifiedAddr<I::Addr>,
454        buffer: B,
455        info: &LocalDeliveryPacketInfo<I, H>,
456        early_demux_socket: Option<Self::EarlyDemuxSocket>,
457    ) -> Result<(), (B, I::IcmpError)>;
458}
459
460/// The base execution context provided by the IP layer to transport layer
461/// protocols.
462pub trait BaseTransportIpContext<I: IpExt, BC>: DeviceIdContext<AnyDevice> {
463    /// The iterator given to
464    /// [`BaseTransportIpContext::with_devices_with_assigned_addr`].
465    type DevicesWithAddrIter<'s>: Iterator<Item = Self::DeviceId>;
466
467    /// Is this one of our local addresses, and is it in the assigned state?
468    ///
469    /// Calls `cb` with an iterator over all the local interfaces for which
470    /// `addr` is an associated address, and, for IPv6, for which it is in the
471    /// "assigned" state.
472    fn with_devices_with_assigned_addr<O, F: FnOnce(Self::DevicesWithAddrIter<'_>) -> O>(
473        &mut self,
474        addr: SpecifiedAddr<I::Addr>,
475        cb: F,
476    ) -> O;
477
478    /// Get default hop limits.
479    ///
480    /// If `device` is not `None` and exists, its hop limits will be returned.
481    /// Otherwise the system defaults are returned.
482    fn get_default_hop_limits(&mut self, device: Option<&Self::DeviceId>) -> HopLimits;
483
484    /// Gets the original destination for the tracked connection indexed by
485    /// `tuple`, which includes the source and destination addresses and
486    /// transport-layer ports as well as the transport protocol number.
487    fn get_original_destination(&mut self, tuple: &Tuple<I>) -> Option<(I::Addr, u16)>;
488}
489
490/// A marker trait for the traits required by the transport layer from the IP
491/// layer.
492pub trait TransportIpContext<I: IpExt + FilterIpExt, BC: TxMetadataBindingsTypes>:
493    BaseTransportIpContext<I, BC> + IpSocketHandler<I, BC>
494{
495}
496
497impl<I, CC, BC> TransportIpContext<I, BC> for CC
498where
499    I: IpExt + FilterIpExt,
500    CC: BaseTransportIpContext<I, BC> + IpSocketHandler<I, BC>,
501    BC: TxMetadataBindingsTypes,
502{
503}
504
505/// Abstraction over the ability to join and leave multicast groups.
506pub trait MulticastMembershipHandler<I: Ip, BC>: DeviceIdContext<AnyDevice> {
507    /// Requests that the specified device join the given multicast group.
508    ///
509    /// If this method is called multiple times with the same device and
510    /// address, the device will remain joined to the multicast group until
511    /// [`MulticastTransportIpContext::leave_multicast_group`] has been called
512    /// the same number of times.
513    fn join_multicast_group(
514        &mut self,
515        bindings_ctx: &mut BC,
516        device: &Self::DeviceId,
517        addr: MulticastAddr<I::Addr>,
518    );
519
520    /// Requests that the specified device leave the given multicast group.
521    ///
522    /// Each call to this method must correspond to an earlier call to
523    /// [`MulticastTransportIpContext::join_multicast_group`]. The device
524    /// remains a member of the multicast group so long as some call to
525    /// `join_multicast_group` has been made without a corresponding call to
526    /// `leave_multicast_group`.
527    fn leave_multicast_group(
528        &mut self,
529        bindings_ctx: &mut BC,
530        device: &Self::DeviceId,
531        addr: MulticastAddr<I::Addr>,
532    );
533
534    /// Selects a default device with which to join the given multicast group.
535    ///
536    /// The selection is made by consulting the routing table; If there is no
537    /// route available to the given address, an error is returned.
538    fn select_device_for_multicast_group(
539        &mut self,
540        addr: MulticastAddr<I::Addr>,
541        marks: &Marks,
542    ) -> Result<Self::DeviceId, ResolveRouteError>;
543}
544
545// TODO(joshlf): With all 256 protocol numbers (minus reserved ones) given their
546// own associated type in both traits, running `cargo check` on a 2018 MacBook
547// Pro takes over a minute. Eventually - and before we formally publish this as
548// a library - we should identify the bottleneck in the compiler and optimize
549// it. For the time being, however, we only support protocol numbers that we
550// actually use (TCP and UDP).
551
552/// Enables a blanket implementation of [`TransportIpContext`].
553///
554/// Implementing this marker trait for a type enables a blanket implementation
555/// of `TransportIpContext` given the other requirements are met.
556pub trait UseTransportIpContextBlanket {}
557
558/// An iterator supporting the blanket implementation of
559/// [`BaseTransportIpContext::with_devices_with_assigned_addr`].
560pub struct AssignedAddressDeviceIterator<Iter, I, D>(Iter, PhantomData<(I, D)>);
561
562impl<Iter, I, D> Iterator for AssignedAddressDeviceIterator<Iter, I, D>
563where
564    Iter: Iterator<Item = (D, I::AddressStatus)>,
565    I: IpLayerIpExt,
566{
567    type Item = D;
568    fn next(&mut self) -> Option<D> {
569        let Self(iter, PhantomData) = self;
570        iter.by_ref().find_map(|(device, state)| is_unicast_assigned::<I>(&state).then_some(device))
571    }
572}
573
574impl<
575    I: IpLayerIpExt,
576    BC: FilterBindingsContext<CC::DeviceId> + TxMetadataBindingsTypes + IpRoutingBindingsTypes,
577    CC: IpDeviceContext<I>
578        + IpSocketHandler<I, BC>
579        + IpStateContext<I, BC>
580        + FilterIpContext<I, BC>
581        + UseTransportIpContextBlanket,
582> BaseTransportIpContext<I, BC> for CC
583{
584    type DevicesWithAddrIter<'s> =
585        AssignedAddressDeviceIterator<CC::DeviceAndAddressStatusIter<'s>, I, CC::DeviceId>;
586
587    fn with_devices_with_assigned_addr<O, F: FnOnce(Self::DevicesWithAddrIter<'_>) -> O>(
588        &mut self,
589        addr: SpecifiedAddr<I::Addr>,
590        cb: F,
591    ) -> O {
592        self.with_address_statuses(addr, |it| cb(AssignedAddressDeviceIterator(it, PhantomData)))
593    }
594
595    fn get_default_hop_limits(&mut self, device: Option<&Self::DeviceId>) -> HopLimits {
596        match device {
597            Some(device) => HopLimits {
598                unicast: IpDeviceEgressStateContext::<I>::get_hop_limit(self, device),
599                ..DEFAULT_HOP_LIMITS
600            },
601            None => DEFAULT_HOP_LIMITS,
602        }
603    }
604
605    fn get_original_destination(&mut self, tuple: &Tuple<I>) -> Option<(I::Addr, u16)> {
606        self.with_filter_state(|state| {
607            let conn = state.conntrack.get_connection(&tuple)?;
608
609            if !conn.destination_nat() {
610                return None;
611            }
612
613            // The tuple marking the original direction of the connection is
614            // never modified by NAT. This means it can be used to recover the
615            // destination before NAT was performed.
616            let original = conn.original_tuple();
617            Some((original.dst_addr, original.dst_port_or_id))
618        })
619    }
620}
621
622/// The status of an IP address on an interface.
623#[derive(Debug, PartialEq)]
624#[allow(missing_docs)]
625pub enum AddressStatus<S> {
626    Present(S),
627    Unassigned,
628}
629
630impl<S> AddressStatus<S> {
631    fn into_present(self) -> Option<S> {
632        match self {
633            Self::Present(s) => Some(s),
634            Self::Unassigned => None,
635        }
636    }
637}
638
639impl AddressStatus<Ipv4PresentAddressStatus> {
640    /// Creates an IPv4 `AddressStatus` for `addr` on `device`.
641    pub fn from_context_addr_v4<
642        BC: IpDeviceStateBindingsTypes,
643        CC: device::IpDeviceStateContext<Ipv4, BC> + GmpQueryHandler<Ipv4, BC>,
644    >(
645        core_ctx: &mut CC,
646        device: &CC::DeviceId,
647        addr: SpecifiedAddr<Ipv4Addr>,
648    ) -> AddressStatus<Ipv4PresentAddressStatus> {
649        if addr.is_limited_broadcast() {
650            return AddressStatus::Present(Ipv4PresentAddressStatus::LimitedBroadcast);
651        }
652
653        if MulticastAddr::new(addr.get())
654            .is_some_and(|addr| GmpQueryHandler::gmp_is_in_group(core_ctx, device, addr))
655        {
656            return AddressStatus::Present(Ipv4PresentAddressStatus::Multicast);
657        }
658
659        core_ctx.with_address_ids(device, |mut addrs, core_ctx| {
660            addrs
661                .find_map(|addr_id| {
662                    let dev_addr = addr_id.addr_sub();
663                    let (dev_addr, subnet) = dev_addr.addr_subnet();
664
665                    if **dev_addr == addr {
666                        let assigned = core_ctx.with_ip_address_data(
667                            device,
668                            &addr_id,
669                            |IpAddressData { flags: IpAddressFlags { assigned }, config: _ }| {
670                                *assigned
671                            },
672                        );
673
674                        if assigned {
675                            Some(AddressStatus::Present(Ipv4PresentAddressStatus::UnicastAssigned))
676                        } else {
677                            Some(AddressStatus::Present(Ipv4PresentAddressStatus::UnicastTentative))
678                        }
679                    } else if addr.get() == subnet.broadcast() {
680                        Some(AddressStatus::Present(Ipv4PresentAddressStatus::SubnetBroadcast))
681                    } else if device.is_loopback() && subnet.contains(addr.as_ref()) {
682                        Some(AddressStatus::Present(Ipv4PresentAddressStatus::LoopbackSubnet))
683                    } else {
684                        None
685                    }
686                })
687                .unwrap_or(AddressStatus::Unassigned)
688        })
689    }
690}
691
692impl AddressStatus<Ipv6PresentAddressStatus> {
693    /// /// Creates an IPv6 `AddressStatus` for `addr` on `device`.
694    pub fn from_context_addr_v6<
695        BC: IpDeviceBindingsContext<Ipv6, CC::DeviceId>,
696        CC: device::Ipv6DeviceContext<BC> + GmpQueryHandler<Ipv6, BC>,
697    >(
698        core_ctx: &mut CC,
699        device: &CC::DeviceId,
700        addr: SpecifiedAddr<Ipv6Addr>,
701    ) -> AddressStatus<Ipv6PresentAddressStatus> {
702        if MulticastAddr::new(addr.get())
703            .is_some_and(|addr| GmpQueryHandler::gmp_is_in_group(core_ctx, device, addr))
704        {
705            return AddressStatus::Present(Ipv6PresentAddressStatus::Multicast);
706        }
707
708        let addr_id = match core_ctx.get_address_id(device, addr) {
709            Ok(o) => o,
710            Err(NotFoundError) => return AddressStatus::Unassigned,
711        };
712
713        let assigned = core_ctx.with_ip_address_data(
714            device,
715            &addr_id,
716            |IpAddressData { flags: IpAddressFlags { assigned }, config: _ }| *assigned,
717        );
718
719        if assigned {
720            AddressStatus::Present(Ipv6PresentAddressStatus::UnicastAssigned)
721        } else {
722            AddressStatus::Present(Ipv6PresentAddressStatus::UnicastTentative)
723        }
724    }
725}
726
727impl<S: GenericOverIp<I>, I: Ip> GenericOverIp<I> for AddressStatus<S> {
728    type Type = AddressStatus<S::Type>;
729}
730
731/// The status of an IPv4 address.
732#[derive(Debug, PartialEq)]
733#[allow(missing_docs)]
734pub enum Ipv4PresentAddressStatus {
735    LimitedBroadcast,
736    SubnetBroadcast,
737    Multicast,
738    UnicastAssigned,
739    UnicastTentative,
740    /// This status indicates that the queried device was Loopback. The address
741    /// belongs to a subnet that is assigned to the interface. This status
742    /// takes lower precedence than `Unicast` and `SubnetBroadcast``, E.g. if
743    /// the loopback device is assigned `127.0.0.1/8`:
744    ///   * address `127.0.0.1` -> `Unicast`
745    ///   * address `127.0.0.2` -> `LoopbackSubnet`
746    ///   * address `127.255.255.255` -> `SubnetBroadcast`
747    /// This exists for Linux conformance, which on the Loopback device,
748    /// considers an IPv4 address assigned if it belongs to one of the device's
749    /// assigned subnets.
750    LoopbackSubnet,
751}
752
753impl Ipv4PresentAddressStatus {
754    fn to_broadcast_marker(&self) -> Option<<Ipv4 as BroadcastIpExt>::BroadcastMarker> {
755        match self {
756            Self::LimitedBroadcast | Self::SubnetBroadcast => Some(()),
757            Self::Multicast
758            | Self::UnicastAssigned
759            | Self::UnicastTentative
760            | Self::LoopbackSubnet => None,
761        }
762    }
763}
764
765/// The status of an IPv6 address.
766#[derive(Debug, PartialEq)]
767#[allow(missing_docs)]
768pub enum Ipv6PresentAddressStatus {
769    Multicast,
770    UnicastAssigned,
771    UnicastTentative,
772}
773
774/// An extension trait providing IP layer properties.
775pub trait IpLayerIpExt:
776    IpExt
777    + MulticastRouteIpExt
778    + IcmpHandlerIpExt
779    + FilterIpExt
780    + FragmentationIpExt
781    + IpDeviceIpExt
782    + IpCountersIpExt
783    + IcmpCountersIpExt
784    + ReassemblyIpExt
785{
786    /// IP Address status.
787    type AddressStatus: Debug;
788    /// IP Address state.
789    type State<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>: AsRef<
790        IpStateInner<Self, StrongDeviceId, BT>,
791    >;
792    /// State kept for packet identifiers.
793    type PacketIdState;
794    /// The type of a single packet identifier.
795    type PacketId;
796    /// Produces the next packet ID from the state.
797    fn next_packet_id_from_state(state: &Self::PacketIdState) -> Self::PacketId;
798}
799
800impl IpLayerIpExt for Ipv4 {
801    type AddressStatus = Ipv4PresentAddressStatus;
802    type State<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes> =
803        Ipv4State<StrongDeviceId, BT>;
804    type PacketIdState = AtomicU16;
805    type PacketId = u16;
806    fn next_packet_id_from_state(next_packet_id: &Self::PacketIdState) -> Self::PacketId {
807        // Relaxed ordering as we only need atomicity without synchronization. See
808        // https://en.cppreference.com/w/cpp/atomic/memory_order#Relaxed_ordering
809        // for more details.
810        next_packet_id.fetch_add(1, atomic::Ordering::Relaxed)
811    }
812}
813
814impl IpLayerIpExt for Ipv6 {
815    type AddressStatus = Ipv6PresentAddressStatus;
816    type State<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes> =
817        Ipv6State<StrongDeviceId, BT>;
818    type PacketIdState = ();
819    type PacketId = ();
820    fn next_packet_id_from_state((): &Self::PacketIdState) -> Self::PacketId {
821        ()
822    }
823}
824
825/// The state context provided to the IP layer.
826pub trait IpStateContext<I: IpLayerIpExt, BT: IpRoutingBindingsTypes + MatcherBindingsTypes>:
827    IpRouteTablesContext<I, BT, DeviceId: InterfaceProperties<BT::DeviceClass>>
828{
829    /// The context that provides access to the IP routing tables.
830    type IpRouteTablesCtx<'a>: IpRouteTablesContext<I, BT, DeviceId = Self::DeviceId>;
831
832    /// Gets an immutable reference to the rules table.
833    fn with_rules_table<
834        O,
835        F: FnOnce(&mut Self::IpRouteTablesCtx<'_>, &RulesTable<I, Self::DeviceId, BT>) -> O,
836    >(
837        &mut self,
838        cb: F,
839    ) -> O;
840
841    /// Gets a mutable reference to the rules table.
842    fn with_rules_table_mut<
843        O,
844        F: FnOnce(&mut Self::IpRouteTablesCtx<'_>, &mut RulesTable<I, Self::DeviceId, BT>) -> O,
845    >(
846        &mut self,
847        cb: F,
848    ) -> O;
849}
850
851/// The state context that gives access to routing tables provided to the IP layer.
852pub trait IpRouteTablesContext<I: IpLayerIpExt, BT: IpRoutingBindingsTypes>:
853    IpRouteTableContext<I, BT> + IpDeviceContext<I>
854{
855    /// The inner context that can provide access to individual routing tables.
856    type Ctx<'a>: IpRouteTableContext<I, BT, DeviceId = Self::DeviceId, WeakDeviceId = Self::WeakDeviceId>;
857
858    /// Gets the main table ID.
859    fn main_table_id(&self) -> RoutingTableId<I, Self::DeviceId, BT>;
860
861    /// Gets immutable access to all the routing tables that currently exist.
862    fn with_ip_routing_tables<
863        O,
864        F: FnOnce(
865            &mut Self::Ctx<'_>,
866            &HashMap<
867                RoutingTableId<I, Self::DeviceId, BT>,
868                PrimaryRc<BaseRoutingTableState<I, Self::DeviceId, BT>>,
869            >,
870        ) -> O,
871    >(
872        &mut self,
873        cb: F,
874    ) -> O;
875
876    /// Gets mutable access to all the routing tables that currently exist.
877    fn with_ip_routing_tables_mut<
878        O,
879        F: FnOnce(
880            &mut HashMap<
881                RoutingTableId<I, Self::DeviceId, BT>,
882                PrimaryRc<BaseRoutingTableState<I, Self::DeviceId, BT>>,
883            >,
884        ) -> O,
885    >(
886        &mut self,
887        cb: F,
888    ) -> O;
889
890    // TODO(https://fxbug.dev/354724171): Remove this function when we no longer
891    // make routing decisions starting from the main table.
892    /// Calls the function with an immutable reference to IP routing table.
893    fn with_main_ip_routing_table<
894        O,
895        F: FnOnce(&mut Self::IpDeviceIdCtx<'_>, &RoutingTable<I, Self::DeviceId>) -> O,
896    >(
897        &mut self,
898        cb: F,
899    ) -> O {
900        let main_table_id = self.main_table_id();
901        self.with_ip_routing_table(&main_table_id, cb)
902    }
903
904    // TODO(https://fxbug.dev/341194323): Remove this function when we no longer
905    // only update the main routing table by default.
906    /// Calls the function with a mutable reference to IP routing table.
907    fn with_main_ip_routing_table_mut<
908        O,
909        F: FnOnce(&mut Self::IpDeviceIdCtx<'_>, &mut RoutingTable<I, Self::DeviceId>) -> O,
910    >(
911        &mut self,
912        cb: F,
913    ) -> O {
914        let main_table_id = self.main_table_id();
915        self.with_ip_routing_table_mut(&main_table_id, cb)
916    }
917}
918
919/// The state context that gives access to a singular routing table.
920pub trait IpRouteTableContext<I: IpLayerIpExt, BT: IpRoutingBindingsTypes>:
921    IpDeviceContext<I>
922{
923    /// The inner device id context.
924    type IpDeviceIdCtx<'a>: DeviceIdContext<AnyDevice, DeviceId = Self::DeviceId, WeakDeviceId = Self::WeakDeviceId>
925        + IpRoutingDeviceContext<I>
926        + IpDeviceContext<I>;
927
928    /// Calls the function with an immutable reference to IP routing table.
929    fn with_ip_routing_table<
930        O,
931        F: FnOnce(&mut Self::IpDeviceIdCtx<'_>, &RoutingTable<I, Self::DeviceId>) -> O,
932    >(
933        &mut self,
934        table_id: &RoutingTableId<I, Self::DeviceId, BT>,
935        cb: F,
936    ) -> O;
937
938    /// Calls the function with a mutable reference to IP routing table.
939    fn with_ip_routing_table_mut<
940        O,
941        F: FnOnce(&mut Self::IpDeviceIdCtx<'_>, &mut RoutingTable<I, Self::DeviceId>) -> O,
942    >(
943        &mut self,
944        table_id: &RoutingTableId<I, Self::DeviceId, BT>,
945        cb: F,
946    ) -> O;
947}
948
949/// Provides access to an IP device's state for IP layer egress.
950pub trait IpDeviceEgressStateContext<I: IpLayerIpExt>: DeviceIdContext<AnyDevice> {
951    /// Calls the callback with the next packet ID.
952    fn with_next_packet_id<O, F: FnOnce(&I::PacketIdState) -> O>(&self, cb: F) -> O;
953
954    /// Returns the best local address for communicating with the remote.
955    fn get_local_addr_for_remote(
956        &mut self,
957        device_id: &Self::DeviceId,
958        remote: Option<SpecifiedAddr<I::Addr>>,
959    ) -> Option<IpDeviceAddr<I::Addr>>;
960
961    /// Returns the hop limit.
962    fn get_hop_limit(&mut self, device_id: &Self::DeviceId) -> NonZeroU8;
963}
964
965/// Provides access to an IP device's state for IP layer ingress.
966pub trait IpDeviceIngressStateContext<I: IpLayerIpExt>: DeviceIdContext<AnyDevice> {
967    /// Gets the status of an address.
968    ///
969    /// Only the specified device will be checked for the address. Returns
970    /// [`AddressStatus::Unassigned`] if the address is not assigned to the
971    /// device.
972    fn address_status_for_device(
973        &mut self,
974        addr: SpecifiedAddr<I::Addr>,
975        device_id: &Self::DeviceId,
976    ) -> AddressStatus<I::AddressStatus>;
977}
978
979/// The IP device context provided to the IP layer.
980pub trait IpDeviceContext<I: IpLayerIpExt>:
981    IpDeviceEgressStateContext<I> + IpDeviceIngressStateContext<I>
982{
983    /// Is the device enabled?
984    fn is_ip_device_enabled(&mut self, device_id: &Self::DeviceId) -> bool;
985
986    /// The iterator provided to [`IpDeviceContext::with_address_statuses`].
987    type DeviceAndAddressStatusIter<'a>: Iterator<Item = (Self::DeviceId, I::AddressStatus)>;
988
989    /// Provides access to the status of an address.
990    ///
991    /// Calls the provided callback with an iterator over the devices for which
992    /// the address is assigned and the status of the assignment for each
993    /// device.
994    fn with_address_statuses<F: FnOnce(Self::DeviceAndAddressStatusIter<'_>) -> R, R>(
995        &mut self,
996        addr: SpecifiedAddr<I::Addr>,
997        cb: F,
998    ) -> R;
999
1000    /// Returns true iff the device has unicast forwarding enabled.
1001    fn is_device_unicast_forwarding_enabled(&mut self, device_id: &Self::DeviceId) -> bool;
1002}
1003
1004/// Provides the ability to check neighbor reachability via a specific device.
1005pub trait IpDeviceConfirmReachableContext<I: IpLayerIpExt, BC>: DeviceIdContext<AnyDevice> {
1006    /// Confirm transport-layer forward reachability to the specified neighbor
1007    /// through the specified device.
1008    fn confirm_reachable(
1009        &mut self,
1010        bindings_ctx: &mut BC,
1011        device: &Self::DeviceId,
1012        neighbor: SpecifiedAddr<I::Addr>,
1013    );
1014}
1015
1016/// Provides access to an IP device's MTU for the IP layer.
1017pub trait IpDeviceMtuContext<I: Ip>: DeviceIdContext<AnyDevice> {
1018    /// Returns the MTU of the device.
1019    ///
1020    /// The MTU is the maximum size of an IP packet.
1021    fn get_mtu(&mut self, device_id: &Self::DeviceId) -> Mtu;
1022}
1023
1024/// Events observed at the IP layer.
1025#[derive(Debug, Eq, Hash, PartialEq, GenericOverIp)]
1026#[generic_over_ip(I, Ip)]
1027pub enum IpLayerEvent<DeviceId, I: IpLayerIpExt> {
1028    /// A route needs to be added.
1029    AddRoute(types::AddableEntry<I::Addr, DeviceId>),
1030    /// Routes matching these specifiers need to be removed.
1031    RemoveRoutes {
1032        /// Destination subnet
1033        subnet: Subnet<I::Addr>,
1034        /// Outgoing interface
1035        device: DeviceId,
1036        /// Gateway/next-hop
1037        gateway: Option<SpecifiedAddr<I::Addr>>,
1038    },
1039    /// The multicast forwarding engine emitted an event.
1040    MulticastForwarding(MulticastForwardingEvent<I, DeviceId>),
1041}
1042
1043impl<DeviceId, I: IpLayerIpExt> From<MulticastForwardingEvent<I, DeviceId>>
1044    for IpLayerEvent<DeviceId, I>
1045{
1046    fn from(event: MulticastForwardingEvent<I, DeviceId>) -> IpLayerEvent<DeviceId, I> {
1047        IpLayerEvent::MulticastForwarding(event)
1048    }
1049}
1050
1051impl<DeviceId, I: IpLayerIpExt> IpLayerEvent<DeviceId, I> {
1052    /// Changes the device id type with `map`.
1053    pub fn map_device<N, F: Fn(DeviceId) -> N>(self, map: F) -> IpLayerEvent<N, I> {
1054        match self {
1055            IpLayerEvent::AddRoute(types::AddableEntry { subnet, device, gateway, metric }) => {
1056                IpLayerEvent::AddRoute(types::AddableEntry {
1057                    subnet,
1058                    device: map(device),
1059                    gateway,
1060                    metric,
1061                })
1062            }
1063            IpLayerEvent::RemoveRoutes { subnet, device, gateway } => {
1064                IpLayerEvent::RemoveRoutes { subnet, device: map(device), gateway }
1065            }
1066            IpLayerEvent::MulticastForwarding(e) => {
1067                IpLayerEvent::MulticastForwarding(e.map_device(map))
1068            }
1069        }
1070    }
1071}
1072
1073/// An event signifying a router advertisement has been received.
1074#[derive(Derivative, PartialEq, Eq, Clone, Hash)]
1075#[derivative(Debug)]
1076pub struct RouterAdvertisementEvent<D> {
1077    /// The raw bytes of the router advertisement message's options.
1078    // NB: avoid deriving Debug for this since it could contain PII.
1079    #[derivative(Debug = "ignore")]
1080    pub options_bytes: Box<[u8]>,
1081    /// The source address of the RA message.
1082    pub source: net_types::ip::Ipv6Addr,
1083    /// The device on which the message was received.
1084    pub device: D,
1085}
1086
1087impl<D> RouterAdvertisementEvent<D> {
1088    /// Maps the contained device ID type.
1089    pub fn map_device<N, F: Fn(D) -> N>(self, map: F) -> RouterAdvertisementEvent<N> {
1090        let Self { options_bytes, source, device } = self;
1091        RouterAdvertisementEvent { options_bytes, source, device: map(device) }
1092    }
1093}
1094
1095/// Ipv6-specific bindings execution context for the IP layer.
1096pub trait NdpBindingsContext<DeviceId>: EventContext<RouterAdvertisementEvent<DeviceId>> {}
1097impl<DeviceId, BC: EventContext<RouterAdvertisementEvent<DeviceId>>> NdpBindingsContext<DeviceId>
1098    for BC
1099{
1100}
1101
1102/// Defines how socket marks should be handled by the IP layer.
1103pub trait MarksBindingsContext {
1104    /// Mark domains for marks that should be kept when an egress packet is
1105    /// passed from the IP layer to the device. For egress packets that are
1106    /// delivered locally through the loopback interface, these marks are
1107    /// passed to the ingress path and can be observed by ingress filter hooks.
1108    fn marks_to_keep_on_egress() -> &'static [MarkDomain];
1109
1110    /// Mark domains for marks that should be copied to ingress packets. If
1111    /// early demux results in a socket then these marks are copied from the
1112    /// socket to the packet and can be observed in `LOCAL_INGRESS` filter
1113    /// hook.
1114    fn marks_to_set_on_ingress() -> &'static [MarkDomain];
1115}
1116
1117/// The bindings execution context for the IP layer.
1118pub trait IpLayerBindingsContext<I: IpLayerIpExt, DeviceId>:
1119    InstantContext
1120    + EventContext<IpLayerEvent<DeviceId, I>>
1121    + FilterBindingsContext<DeviceId>
1122    + TxMetadataBindingsTypes
1123    + IpRoutingBindingsTypes
1124    + MarksBindingsContext
1125{
1126}
1127impl<
1128    I: IpLayerIpExt,
1129    DeviceId,
1130    BC: InstantContext
1131        + EventContext<IpLayerEvent<DeviceId, I>>
1132        + FilterBindingsContext<DeviceId>
1133        + TxMetadataBindingsTypes
1134        + IpRoutingBindingsTypes
1135        + MarksBindingsContext,
1136> IpLayerBindingsContext<I, DeviceId> for BC
1137{
1138}
1139
1140/// A marker trait for bindings types at the IP layer.
1141pub trait IpLayerBindingsTypes:
1142    IcmpBindingsTypes + IpStateBindingsTypes + IpRoutingBindingsTypes
1143{
1144}
1145impl<BT: IcmpBindingsTypes + IpStateBindingsTypes + IpRoutingBindingsTypes> IpLayerBindingsTypes
1146    for BT
1147{
1148}
1149
1150/// The execution context for the IP layer.
1151pub trait IpLayerContext<
1152    I: IpLayerIpExt,
1153    BC: IpLayerBindingsContext<I, <Self as DeviceIdContext<AnyDevice>>::DeviceId>,
1154>:
1155    IpStateContext<I, BC>
1156    + IpDeviceContext<I>
1157    + IpDeviceMtuContext<I>
1158    + IpDeviceSendContext<I, BC>
1159    + IcmpErrorHandler<I, BC>
1160    + MulticastForwardingStateContext<I, BC>
1161    + MulticastForwardingDeviceContext<I>
1162    + CounterContext<MulticastForwardingCounters<I>>
1163    + ResourceCounterContext<<Self as DeviceIdContext<AnyDevice>>::DeviceId, IpCounters<I>>
1164{
1165}
1166
1167impl<
1168    I: IpLayerIpExt,
1169    BC: IpLayerBindingsContext<I, <CC as DeviceIdContext<AnyDevice>>::DeviceId>,
1170    CC: IpStateContext<I, BC>
1171        + IpDeviceContext<I>
1172        + IpDeviceMtuContext<I>
1173        + IpDeviceSendContext<I, BC>
1174        + IcmpErrorHandler<I, BC>
1175        + MulticastForwardingStateContext<I, BC>
1176        + MulticastForwardingDeviceContext<I>
1177        + CounterContext<MulticastForwardingCounters<I>>
1178        + ResourceCounterContext<<Self as DeviceIdContext<AnyDevice>>::DeviceId, IpCounters<I>>,
1179> IpLayerContext<I, BC> for CC
1180{
1181}
1182
1183fn is_unicast_assigned<I: IpLayerIpExt>(status: &I::AddressStatus) -> bool {
1184    #[derive(GenericOverIp)]
1185    #[generic_over_ip(I, Ip)]
1186    struct WrapAddressStatus<'a, I: IpLayerIpExt>(&'a I::AddressStatus);
1187
1188    I::map_ip(
1189        WrapAddressStatus(status),
1190        |WrapAddressStatus(status)| match status {
1191            Ipv4PresentAddressStatus::UnicastAssigned
1192            | Ipv4PresentAddressStatus::LoopbackSubnet => true,
1193            Ipv4PresentAddressStatus::UnicastTentative
1194            | Ipv4PresentAddressStatus::LimitedBroadcast
1195            | Ipv4PresentAddressStatus::SubnetBroadcast
1196            | Ipv4PresentAddressStatus::Multicast => false,
1197        },
1198        |WrapAddressStatus(status)| match status {
1199            Ipv6PresentAddressStatus::UnicastAssigned => true,
1200            Ipv6PresentAddressStatus::Multicast | Ipv6PresentAddressStatus::UnicastTentative => {
1201                false
1202            }
1203        },
1204    )
1205}
1206
1207fn is_local_assigned_address<I: Ip + IpLayerIpExt, CC: IpDeviceIngressStateContext<I>>(
1208    core_ctx: &mut CC,
1209    device: &CC::DeviceId,
1210    addr: IpDeviceAddr<I::Addr>,
1211) -> bool {
1212    match core_ctx.address_status_for_device(addr.into(), device) {
1213        AddressStatus::Present(status) => is_unicast_assigned::<I>(&status),
1214        AddressStatus::Unassigned => false,
1215    }
1216}
1217
1218fn get_device_with_assigned_address<I, CC>(
1219    core_ctx: &mut CC,
1220    addr: IpDeviceAddr<I::Addr>,
1221) -> Option<(CC::DeviceId, I::AddressStatus)>
1222where
1223    I: IpLayerIpExt,
1224    CC: IpDeviceContext<I>,
1225{
1226    core_ctx.with_address_statuses(addr.into(), |mut it| {
1227        it.find_map(|(device, status)| {
1228            is_unicast_assigned::<I>(&status).then_some((device, status))
1229        })
1230    })
1231}
1232
1233// Returns the local IP address to use for sending packets from the
1234// given device to `addr`, restricting to `local_ip` if it is not
1235// `None`.
1236fn get_local_addr<I: Ip + IpLayerIpExt, CC: IpDeviceContext<I>>(
1237    core_ctx: &mut CC,
1238    local_ip_and_policy: Option<(IpDeviceAddr<I::Addr>, NonLocalSrcAddrPolicy)>,
1239    device: &CC::DeviceId,
1240    remote_addr: Option<RoutableIpAddr<I::Addr>>,
1241) -> Result<IpDeviceAddr<I::Addr>, ResolveRouteError> {
1242    match local_ip_and_policy {
1243        Some((local_ip, NonLocalSrcAddrPolicy::Allow)) => Ok(local_ip),
1244        Some((local_ip, NonLocalSrcAddrPolicy::Deny)) => {
1245            is_local_assigned_address(core_ctx, device, local_ip)
1246                .then_some(local_ip)
1247                .ok_or(ResolveRouteError::NoSrcAddr)
1248        }
1249        None => core_ctx
1250            .get_local_addr_for_remote(device, remote_addr.map(Into::into))
1251            .ok_or(ResolveRouteError::NoSrcAddr),
1252    }
1253}
1254
1255/// An error occurred while resolving the route to a destination
1256#[derive(Error, Copy, Clone, Debug, Eq, GenericOverIp, PartialEq)]
1257#[generic_over_ip()]
1258pub enum ResolveRouteError {
1259    /// A source address could not be selected.
1260    #[error("a source address could not be selected")]
1261    NoSrcAddr,
1262    /// The destination in unreachable.
1263    #[error("no route exists to the destination IP address")]
1264    Unreachable,
1265}
1266
1267/// Like [`get_local_addr`], but willing to forward internally as necessary.
1268fn get_local_addr_with_internal_forwarding<I, CC>(
1269    core_ctx: &mut CC,
1270    local_ip_and_policy: Option<(IpDeviceAddr<I::Addr>, NonLocalSrcAddrPolicy)>,
1271    device: &CC::DeviceId,
1272    remote_addr: Option<RoutableIpAddr<I::Addr>>,
1273) -> Result<(IpDeviceAddr<I::Addr>, InternalForwarding<CC::DeviceId>), ResolveRouteError>
1274where
1275    I: IpLayerIpExt,
1276    CC: IpDeviceContext<I>,
1277{
1278    match get_local_addr(core_ctx, local_ip_and_policy, device, remote_addr) {
1279        Ok(src_addr) => Ok((src_addr, InternalForwarding::NotUsed)),
1280        Err(e) => {
1281            // If a local_ip was specified, the local_ip is assigned to a
1282            // device, and that device has forwarding enabled, use internal
1283            // forwarding.
1284            //
1285            // This enables a weak host model when the Netstack is configured as
1286            // a router. Conceptually the netstack is forwarding the packet from
1287            // the local IP's device to the output device of the selected route.
1288            if let Some((local_ip, _policy)) = local_ip_and_policy {
1289                if let Some((device, _addr_status)) =
1290                    get_device_with_assigned_address(core_ctx, local_ip)
1291                {
1292                    if core_ctx.is_device_unicast_forwarding_enabled(&device) {
1293                        return Ok((local_ip, InternalForwarding::Used(device)));
1294                    }
1295                }
1296            }
1297            Err(e)
1298        }
1299    }
1300}
1301
1302/// The information about the rule walk in addition to a custom state. This type is introduced so
1303/// that `walk_rules` can be extended later with more information about the walk if needed.
1304#[derive(Debug, PartialEq, Eq)]
1305struct RuleWalkInfo<O> {
1306    /// Whether there is a rule with a source address matcher during the walk.
1307    observed_source_address_matcher: bool,
1308    /// The custom info carried. For example this could be the lookup result from the user provided
1309    /// function.
1310    inner: O,
1311}
1312
1313/// A helper function that traverses through the rules table.
1314///
1315/// To walk through the rules, you need to provide it with an initial value for the loop and a
1316/// callback function that yieds a [`ControlFlow`] result to indicate whether the traversal should
1317/// stop.
1318///
1319/// # Returns
1320///
1321/// - `ControlFlow::Break(RuleAction::Lookup(_))` if we hit a lookup rule and an output is
1322///   yielded from the route table.
1323/// - `ControlFlow::Break(RuleAction::Unreachable)` if we hit an unreachable rule.
1324/// - `ControlFlow::Continue(_)` if we finished walking the rules table without yielding any
1325///   result.
1326fn walk_rules<
1327    I: IpLayerIpExt,
1328    BT: IpRoutingBindingsTypes + MatcherBindingsTypes,
1329    CC: IpRouteTablesContext<I, BT, DeviceId: InterfaceProperties<BT::DeviceClass>>,
1330    O,
1331    State,
1332    F: FnMut(
1333        State,
1334        &mut CC::IpDeviceIdCtx<'_>,
1335        &RoutingTable<I, CC::DeviceId>,
1336    ) -> ControlFlow<O, State>,
1337>(
1338    core_ctx: &mut CC,
1339    rules: &RulesTable<I, CC::DeviceId, BT>,
1340    init: State,
1341    rule_input: &RuleInput<'_, I, CC::DeviceId>,
1342    mut lookup_table: F,
1343) -> ControlFlow<RuleAction<RuleWalkInfo<O>>, RuleWalkInfo<State>> {
1344    rules.iter().try_fold(
1345        RuleWalkInfo { inner: init, observed_source_address_matcher: false },
1346        |RuleWalkInfo { inner: state, observed_source_address_matcher },
1347         Rule { action, matcher }| {
1348            let observed_source_address_matcher =
1349                observed_source_address_matcher || matcher.source_address_matcher.is_some();
1350            if !matcher.matches(rule_input) {
1351                return ControlFlow::Continue(RuleWalkInfo {
1352                    inner: state,
1353                    observed_source_address_matcher,
1354                });
1355            }
1356            match action {
1357                RuleAction::Unreachable => return ControlFlow::Break(RuleAction::Unreachable),
1358                RuleAction::Lookup(table_id) => core_ctx.with_ip_routing_table(
1359                    &table_id,
1360                    |core_ctx, table| match lookup_table(state, core_ctx, table) {
1361                        ControlFlow::Break(out) => {
1362                            ControlFlow::Break(RuleAction::Lookup(RuleWalkInfo {
1363                                inner: out,
1364                                observed_source_address_matcher,
1365                            }))
1366                        }
1367                        ControlFlow::Continue(state) => ControlFlow::Continue(RuleWalkInfo {
1368                            inner: state,
1369                            observed_source_address_matcher,
1370                        }),
1371                    },
1372                ),
1373            }
1374        },
1375    )
1376}
1377
1378/// Returns the outgoing routing instructions for reaching the given destination.
1379///
1380/// If a `device` is specified, the resolved route is limited to those that
1381/// egress over the device.
1382///
1383/// If `src_ip` is specified the resolved route is limited to those that egress
1384/// over a device with the address assigned.
1385///
1386/// This function should only be used for calculating a route for an outgoing packet
1387/// that is generated by us.
1388pub fn resolve_output_route_to_destination<
1389    I: Ip + IpDeviceStateIpExt + IpDeviceIpExt + IpLayerIpExt,
1390    BC: IpDeviceBindingsContext<I, CC::DeviceId> + IpLayerBindingsContext<I, CC::DeviceId>,
1391    CC: IpStateContext<I, BC> + IpDeviceContext<I> + device::IpDeviceConfigurationContext<I, BC>,
1392>(
1393    core_ctx: &mut CC,
1394    device: Option<&CC::DeviceId>,
1395    src_ip_and_policy: Option<(IpDeviceAddr<I::Addr>, NonLocalSrcAddrPolicy)>,
1396    dst_ip: Option<RoutableIpAddr<I::Addr>>,
1397    marks: &Marks,
1398) -> Result<ResolvedRoute<I, CC::DeviceId>, ResolveRouteError> {
1399    enum LocalDelivery<A, D> {
1400        WeakLoopback { dst_ip: A, device: D },
1401        StrongForDevice(D),
1402    }
1403
1404    // Check if locally destined. If the destination is an address assigned
1405    // on an interface, and an egress interface wasn't specifically
1406    // selected, route via the loopback device. This lets us operate as a
1407    // strong host when an outgoing interface is explicitly requested while
1408    // still enabling local delivery via the loopback interface, which is
1409    // acting as a weak host. Note that if the loopback interface is
1410    // requested as an outgoing interface, route selection is still
1411    // performed as a strong host! This makes the loopback interface behave
1412    // more like the other interfaces on the system.
1413    //
1414    // TODO(https://fxbug.dev/42175703): Encode the delivery of locally-
1415    // destined packets to loopback in the route table.
1416    //
1417    // TODO(https://fxbug.dev/322539434): Linux is more permissive about
1418    // allowing cross-device local delivery even when SO_BINDTODEVICE or
1419    // link-local addresses are involved, and this behavior may need to be
1420    // emulated.
1421    let local_delivery_instructions: Option<LocalDelivery<IpDeviceAddr<I::Addr>, CC::DeviceId>> = {
1422        let dst_ip = dst_ip.and_then(IpDeviceAddr::new_from_socket_ip_addr);
1423        match (device, dst_ip) {
1424            (Some(device), Some(dst_ip)) => is_local_assigned_address(core_ctx, device, dst_ip)
1425                .then_some(LocalDelivery::StrongForDevice(device.clone())),
1426            (None, Some(dst_ip)) => {
1427                get_device_with_assigned_address(core_ctx, dst_ip).map(
1428                    |(dst_device, _addr_status)| {
1429                        // If either the source or destination addresses needs
1430                        // a zone ID, then use strong host to enforce that the
1431                        // source and destination addresses are assigned to the
1432                        // same interface.
1433                        if src_ip_and_policy
1434                            .is_some_and(|(ip, _policy)| ip.as_ref().must_have_zone())
1435                            || dst_ip.as_ref().must_have_zone()
1436                        {
1437                            LocalDelivery::StrongForDevice(dst_device)
1438                        } else {
1439                            LocalDelivery::WeakLoopback { dst_ip, device: dst_device }
1440                        }
1441                    },
1442                )
1443            }
1444            (_, None) => None,
1445        }
1446    };
1447
1448    if let Some(local_delivery) = local_delivery_instructions {
1449        let loopback = core_ctx.loopback_id().ok_or(ResolveRouteError::Unreachable)?;
1450
1451        let (src_addr, dest_device) = match local_delivery {
1452            LocalDelivery::WeakLoopback { dst_ip, device } => {
1453                let src_ip = match src_ip_and_policy {
1454                    Some((src_ip, NonLocalSrcAddrPolicy::Deny)) => {
1455                        let _device = get_device_with_assigned_address(core_ctx, src_ip)
1456                            .ok_or(ResolveRouteError::NoSrcAddr)?;
1457                        src_ip
1458                    }
1459                    Some((src_ip, NonLocalSrcAddrPolicy::Allow)) => src_ip,
1460                    None => dst_ip,
1461                };
1462                (src_ip, device)
1463            }
1464            LocalDelivery::StrongForDevice(device) => {
1465                (get_local_addr(core_ctx, src_ip_and_policy, &device, dst_ip)?, device)
1466            }
1467        };
1468        return Ok(ResolvedRoute {
1469            src_addr,
1470            local_delivery_device: Some(dest_device),
1471            device: loopback,
1472            next_hop: NextHop::RemoteAsNeighbor,
1473            internal_forwarding: InternalForwarding::NotUsed,
1474        });
1475    }
1476    let bound_address = src_ip_and_policy.map(|(sock_addr, _policy)| sock_addr.into_inner().get());
1477    let rule_input = RuleInput {
1478        packet_origin: PacketOrigin::Local { bound_address, bound_device: device },
1479        marks,
1480    };
1481    core_ctx.with_rules_table(|core_ctx, rules: &RulesTable<_, _, BC>| {
1482        let mut walk_rules = |rule_input, src_ip_and_policy| {
1483            walk_rules(
1484                core_ctx,
1485                rules,
1486                None, /* first error encountered */
1487                rule_input,
1488                |first_error, core_ctx, table| {
1489                    let mut matching_with_addr = table.lookup_filter_map(
1490                        core_ctx,
1491                        device,
1492                        dst_ip.map_or(I::UNSPECIFIED_ADDRESS, |a| a.addr()),
1493                        |core_ctx, d| {
1494                            Some(get_local_addr_with_internal_forwarding(
1495                                core_ctx,
1496                                src_ip_and_policy,
1497                                d,
1498                                dst_ip,
1499                            ))
1500                        },
1501                    );
1502
1503                    let first_error_in_this_table = match matching_with_addr.next() {
1504                        Some((
1505                            Destination { device, next_hop },
1506                            Ok((local_addr, internal_forwarding)),
1507                        )) => {
1508                            return ControlFlow::Break(Ok((
1509                                Destination { device: device.clone(), next_hop },
1510                                local_addr,
1511                                internal_forwarding,
1512                            )));
1513                        }
1514                        Some((_, Err(e))) => e,
1515                        // Note: rule evaluation will continue on to the next rule, if the
1516                        // previous rule was `Lookup` but the table didn't have the route
1517                        // inside of it.
1518                        None => return ControlFlow::Continue(first_error),
1519                    };
1520
1521                    matching_with_addr
1522                        .filter_map(|(destination, local_addr)| {
1523                            // Select successful routes. We ignore later errors
1524                            // since we've already saved the first one.
1525                            local_addr.ok_checked::<ResolveRouteError>().map(
1526                                |(local_addr, internal_forwarding)| {
1527                                    (destination, local_addr, internal_forwarding)
1528                                },
1529                            )
1530                        })
1531                        .next()
1532                        .map_or(
1533                            ControlFlow::Continue(first_error.or(Some(first_error_in_this_table))),
1534                            |(
1535                                Destination { device, next_hop },
1536                                local_addr,
1537                                internal_forwarding,
1538                            )| {
1539                                ControlFlow::Break(Ok((
1540                                    Destination { device: device.clone(), next_hop },
1541                                    local_addr,
1542                                    internal_forwarding,
1543                                )))
1544                            },
1545                        )
1546                },
1547            )
1548        };
1549
1550        let result = match walk_rules(&rule_input, src_ip_and_policy) {
1551            // Only try to resolve a route again if all of the following are true:
1552            // 1. The source address is not provided by the caller.
1553            // 2. A route is successfully resolved so we selected a source address.
1554            // 3. There is a rule with a source address matcher during the resolution.
1555            // The rationale is to make sure the route resolution converges to a sensible route
1556            // after considering the source address we select.
1557            ControlFlow::Break(RuleAction::Lookup(RuleWalkInfo {
1558                inner: Ok((_dst, selected_src_addr, _internal_forwarding)),
1559                observed_source_address_matcher: true,
1560            })) if src_ip_and_policy.is_none() => walk_rules(
1561                &RuleInput {
1562                    packet_origin: PacketOrigin::Local {
1563                        bound_address: Some(selected_src_addr.into()),
1564                        bound_device: device,
1565                    },
1566                    marks,
1567                },
1568                Some((selected_src_addr, NonLocalSrcAddrPolicy::Deny)),
1569            ),
1570            result => result,
1571        };
1572
1573        match result {
1574            ControlFlow::Break(RuleAction::Lookup(RuleWalkInfo {
1575                inner: result,
1576                observed_source_address_matcher: _,
1577            })) => {
1578                result.map(|(Destination { device, next_hop }, src_addr, internal_forwarding)| {
1579                    ResolvedRoute {
1580                        src_addr,
1581                        device,
1582                        local_delivery_device: None,
1583                        next_hop,
1584                        internal_forwarding,
1585                    }
1586                })
1587            }
1588            ControlFlow::Break(RuleAction::Unreachable) => Err(ResolveRouteError::Unreachable),
1589            ControlFlow::Continue(RuleWalkInfo {
1590                inner: first_error,
1591                observed_source_address_matcher: _,
1592            }) => Err(first_error.unwrap_or(ResolveRouteError::Unreachable)),
1593        }
1594    })
1595}
1596
1597/// Enables a blanket implementation of [`IpSocketContext`].
1598///
1599/// Implementing this marker trait for a type enables a blanket implementation
1600/// of `IpSocketContext` given the other requirements are met.
1601pub trait UseIpSocketContextBlanket {}
1602
1603impl<
1604    I: Ip + IpDeviceStateIpExt + IpDeviceIpExt + IpLayerIpExt,
1605    BC: IpDeviceBindingsContext<I, CC::DeviceId>
1606        + IpLayerBindingsContext<I, CC::DeviceId>
1607        + IpSocketBindingsContext<CC::DeviceId>,
1608    CC: IpLayerEgressContext<I, BC>
1609        + IpStateContext<I, BC>
1610        + IpDeviceContext<I>
1611        + IpDeviceConfirmReachableContext<I, BC>
1612        + IpDeviceMtuContext<I>
1613        + device::IpDeviceConfigurationContext<I, BC>
1614        + UseIpSocketContextBlanket,
1615> IpSocketContext<I, BC> for CC
1616{
1617    fn lookup_route(
1618        &mut self,
1619        _bindings_ctx: &mut BC,
1620        device: Option<&CC::DeviceId>,
1621        local_ip: Option<IpDeviceAddr<I::Addr>>,
1622        addr: RoutableIpAddr<I::Addr>,
1623        transparent: bool,
1624        marks: &Marks,
1625    ) -> Result<ResolvedRoute<I, CC::DeviceId>, ResolveRouteError> {
1626        let src_ip_and_policy = local_ip.map(|local_ip| {
1627            (
1628                local_ip,
1629                if transparent {
1630                    NonLocalSrcAddrPolicy::Allow
1631                } else {
1632                    NonLocalSrcAddrPolicy::Deny
1633                },
1634            )
1635        });
1636        let res =
1637            resolve_output_route_to_destination(self, device, src_ip_and_policy, Some(addr), marks);
1638        trace!(
1639            "lookup_route(\
1640                device={device:?}, \
1641                local_ip={local_ip:?}, \
1642                addr={addr:?}, \
1643                transparent={transparent:?}, \
1644                marks={marks:?}) => {res:?}"
1645        );
1646        res
1647    }
1648
1649    fn send_ip_packet<S>(
1650        &mut self,
1651        bindings_ctx: &mut BC,
1652        meta: SendIpPacketMeta<
1653            I,
1654            &<CC as DeviceIdContext<AnyDevice>>::DeviceId,
1655            SpecifiedAddr<I::Addr>,
1656        >,
1657        body: S,
1658        packet_metadata: IpLayerPacketMetadata<I, CC::WeakAddressId, BC>,
1659    ) -> Result<(), IpSendFrameError<S>>
1660    where
1661        S: TransportPacketSerializer<I>,
1662        S::Buffer: BufferMut,
1663    {
1664        send_ip_packet_from_device(self, bindings_ctx, meta.into(), body, packet_metadata)
1665    }
1666
1667    fn get_loopback_device(&mut self) -> Option<Self::DeviceId> {
1668        device::IpDeviceConfigurationContext::<I, _>::loopback_id(self)
1669    }
1670
1671    fn confirm_reachable(
1672        &mut self,
1673        bindings_ctx: &mut BC,
1674        dst: SpecifiedAddr<I::Addr>,
1675        input: RuleInput<'_, I, Self::DeviceId>,
1676    ) {
1677        match lookup_route_table(self, dst.get(), input) {
1678            Some(Destination { next_hop, device }) => {
1679                let neighbor = match next_hop {
1680                    NextHop::RemoteAsNeighbor => dst,
1681                    NextHop::Gateway(gateway) => gateway,
1682                    NextHop::Broadcast(marker) => {
1683                        I::map_ip::<_, ()>(
1684                            WrapBroadcastMarker(marker),
1685                            |WrapBroadcastMarker(())| {
1686                                debug!(
1687                                    "can't confirm {dst:?}@{device:?} as reachable: \
1688                                    dst is a broadcast address"
1689                                );
1690                            },
1691                            |WrapBroadcastMarker(never)| match never {},
1692                        );
1693                        return;
1694                    }
1695                };
1696                IpDeviceConfirmReachableContext::confirm_reachable(
1697                    self,
1698                    bindings_ctx,
1699                    &device,
1700                    neighbor,
1701                );
1702            }
1703            None => {
1704                debug!("can't confirm {dst:?} as reachable: no route");
1705            }
1706        }
1707    }
1708}
1709
1710/// Trait that provides basic socket information for types that carry a socket
1711/// ID.
1712pub trait SocketMetadata<CC>
1713where
1714    CC: ?Sized,
1715{
1716    /// Returns Socket cookie for the socket.
1717    fn socket_cookie(&self, core_ctx: &mut CC) -> SocketCookie;
1718    /// Returns Socket Marks.
1719    fn marks(&self, core_ctx: &mut CC) -> Marks;
1720}
1721
1722impl<T, O, CC> SocketMetadata<CC> for EitherStack<T, O>
1723where
1724    CC: ?Sized,
1725    T: SocketMetadata<CC>,
1726    O: SocketMetadata<CC>,
1727{
1728    fn socket_cookie(&self, core_ctx: &mut CC) -> SocketCookie {
1729        match self {
1730            Self::ThisStack(t) => t.socket_cookie(core_ctx),
1731            Self::OtherStack(o) => o.socket_cookie(core_ctx),
1732        }
1733    }
1734
1735    fn marks(&self, core_ctx: &mut CC) -> Marks {
1736        match self {
1737            Self::ThisStack(t) => t.marks(core_ctx),
1738            Self::OtherStack(o) => o.marks(core_ctx),
1739        }
1740    }
1741}
1742
1743/// The IP context providing dispatch to the available transport protocols.
1744///
1745/// This trait acts like a demux on the transport protocol for ingress IP
1746/// packets.
1747pub trait IpTransportDispatchContext<I: IpLayerIpExt, BC>: DeviceIdContext<AnyDevice> {
1748    /// Early Demux result.
1749    type EarlyDemuxSocket: SocketMetadata<Self>;
1750
1751    /// Performs early demux result.
1752    fn early_demux<B: ParseBuffer>(
1753        &mut self,
1754        device: &Self::DeviceId,
1755        frame_dst: Option<FrameDestination>,
1756        src_ip: I::Addr,
1757        dst_ip: I::Addr,
1758        proto: I::Proto,
1759        body: B,
1760    ) -> Option<Self::EarlyDemuxSocket>;
1761
1762    /// Dispatches a received incoming IP packet to the appropriate protocol.
1763    /// In case of a failure returns the kind of the ICMP error that should be
1764    /// sent back to the source.
1765    fn dispatch_receive_ip_packet<B: BufferMut, H: IpHeaderInfo<I>>(
1766        &mut self,
1767        bindings_ctx: &mut BC,
1768        device: &Self::DeviceId,
1769        src_ip: I::RecvSrcAddr,
1770        dst_ip: SpecifiedAddr<I::Addr>,
1771        proto: I::Proto,
1772        body: B,
1773        info: &LocalDeliveryPacketInfo<I, H>,
1774        early_demux_socket: Option<Self::EarlyDemuxSocket>,
1775    ) -> Result<(), I::IcmpError>;
1776}
1777
1778/// A marker trait for all the contexts required for IP ingress.
1779pub trait IpLayerIngressContext<I: IpLayerIpExt, BC: IpLayerBindingsContext<I, Self::DeviceId>>:
1780    IpTransportDispatchContext<
1781        I,
1782        BC,
1783        DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>,
1784    > + IpDeviceIngressStateContext<I>
1785    + IpDeviceMtuContext<I>
1786    + IpDeviceSendContext<I, BC>
1787    + IcmpErrorHandler<I, BC>
1788    + IpLayerContext<I, BC>
1789    + FragmentHandler<I, BC>
1790    + FilterHandlerProvider<I, BC>
1791    + RawIpSocketHandler<I, BC>
1792{
1793}
1794
1795impl<
1796    I: IpLayerIpExt,
1797    BC: IpLayerBindingsContext<I, CC::DeviceId>,
1798    CC: IpTransportDispatchContext<
1799            I,
1800            BC,
1801            DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>,
1802        > + IpDeviceIngressStateContext<I>
1803        + IpDeviceMtuContext<I>
1804        + IpDeviceSendContext<I, BC>
1805        + IcmpErrorHandler<I, BC>
1806        + IpLayerContext<I, BC>
1807        + FragmentHandler<I, BC>
1808        + FilterHandlerProvider<I, BC>
1809        + RawIpSocketHandler<I, BC>,
1810> IpLayerIngressContext<I, BC> for CC
1811{
1812}
1813
1814/// A marker trait for all the contexts required for IP egress.
1815pub trait IpLayerEgressContext<I, BC>:
1816    IpDeviceSendContext<I, BC, DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>>
1817    + FilterHandlerProvider<I, BC>
1818    + ResourceCounterContext<Self::DeviceId, IpCounters<I>>
1819where
1820    I: IpLayerIpExt,
1821    BC: FilterBindingsContext<Self::DeviceId> + TxMetadataBindingsTypes,
1822{
1823}
1824
1825impl<I, BC, CC> IpLayerEgressContext<I, BC> for CC
1826where
1827    I: IpLayerIpExt,
1828    BC: FilterBindingsContext<CC::DeviceId> + TxMetadataBindingsTypes,
1829    CC: IpDeviceSendContext<I, BC, DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>>
1830        + FilterHandlerProvider<I, BC>
1831        + ResourceCounterContext<Self::DeviceId, IpCounters<I>>,
1832{
1833}
1834
1835/// A marker trait for all the contexts required for IP forwarding.
1836pub trait IpLayerForwardingContext<I: IpLayerIpExt, BC: IpLayerBindingsContext<I, Self::DeviceId>>:
1837    IpLayerEgressContext<I, BC> + IcmpErrorHandler<I, BC> + IpDeviceMtuContext<I>
1838{
1839}
1840
1841impl<
1842    I: IpLayerIpExt,
1843    BC: IpLayerBindingsContext<I, CC::DeviceId>,
1844    CC: IpLayerEgressContext<I, BC> + IcmpErrorHandler<I, BC> + IpDeviceMtuContext<I>,
1845> IpLayerForwardingContext<I, BC> for CC
1846{
1847}
1848
1849/// A builder for IPv4 state.
1850#[derive(Copy, Clone, Default)]
1851pub struct Ipv4StateBuilder {
1852    icmp: Icmpv4StateBuilder,
1853}
1854
1855impl Ipv4StateBuilder {
1856    /// Get the builder for the ICMPv4 state.
1857    #[cfg(any(test, feature = "testutils"))]
1858    pub fn icmpv4_builder(&mut self) -> &mut Icmpv4StateBuilder {
1859        &mut self.icmp
1860    }
1861
1862    /// Builds the [`Ipv4State`].
1863    pub fn build<
1864        CC: CoreTimerContext<IpLayerTimerId, BC>,
1865        StrongDeviceId: StrongDeviceIdentifier,
1866        BC: TimerContext + RngContext + IpLayerBindingsTypes,
1867    >(
1868        self,
1869        bindings_ctx: &mut BC,
1870    ) -> Ipv4State<StrongDeviceId, BC> {
1871        let Ipv4StateBuilder { icmp } = self;
1872
1873        Ipv4State {
1874            inner: IpStateInner::new::<CC>(bindings_ctx),
1875            icmp: icmp.build(),
1876            next_packet_id: Default::default(),
1877        }
1878    }
1879}
1880
1881/// A builder for IPv6 state.
1882///
1883/// By default, opaque IIDs will not be used to generate stable SLAAC addresses.
1884#[derive(Copy, Clone)]
1885pub struct Ipv6StateBuilder {
1886    icmp: Icmpv6StateBuilder,
1887    slaac_stable_secret_key: Option<IidSecret>,
1888}
1889
1890impl Ipv6StateBuilder {
1891    /// Sets the secret key used to generate stable SLAAC addresses.
1892    ///
1893    /// If `slaac_stable_secret_key` is left unset, opaque IIDs will not be used to
1894    /// generate stable SLAAC addresses.
1895    pub fn slaac_stable_secret_key(&mut self, secret_key: IidSecret) -> &mut Self {
1896        self.slaac_stable_secret_key = Some(secret_key);
1897        self
1898    }
1899
1900    /// Builds the [`Ipv6State`].
1901    ///
1902    /// # Panics
1903    ///
1904    /// Panics if the `slaac_stable_secret_key` has not been set.
1905    pub fn build<
1906        CC: CoreTimerContext<IpLayerTimerId, BC>,
1907        StrongDeviceId: StrongDeviceIdentifier,
1908        BC: TimerContext + RngContext + IpLayerBindingsTypes,
1909    >(
1910        self,
1911        bindings_ctx: &mut BC,
1912    ) -> Ipv6State<StrongDeviceId, BC> {
1913        let Ipv6StateBuilder { icmp, slaac_stable_secret_key } = self;
1914
1915        let slaac_stable_secret_key = slaac_stable_secret_key
1916            .expect("stable SLAAC secret key was not provided to `Ipv6StateBuilder`");
1917
1918        Ipv6State {
1919            inner: IpStateInner::new::<CC>(bindings_ctx),
1920            icmp: icmp.build(),
1921            slaac_counters: Default::default(),
1922            slaac_temp_secret_key: IidSecret::new_random(&mut bindings_ctx.rng()),
1923            slaac_stable_secret_key,
1924        }
1925    }
1926}
1927
1928impl Default for Ipv6StateBuilder {
1929    fn default() -> Self {
1930        #[cfg(any(test, feature = "testutils"))]
1931        let slaac_stable_secret_key = Some(IidSecret::ALL_ONES);
1932
1933        #[cfg(not(any(test, feature = "testutils")))]
1934        let slaac_stable_secret_key = None;
1935
1936        Self { icmp: Icmpv6StateBuilder::default(), slaac_stable_secret_key }
1937    }
1938}
1939
1940/// The stack's IPv4 state.
1941pub struct Ipv4State<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes> {
1942    /// The common inner IP layer state.
1943    pub inner: IpStateInner<Ipv4, StrongDeviceId, BT>,
1944    /// The ICMP state.
1945    pub icmp: Icmpv4State<BT>,
1946    /// The atomic counter providing IPv4 packet identifiers.
1947    pub next_packet_id: AtomicU16,
1948}
1949
1950impl<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
1951    AsRef<IpStateInner<Ipv4, StrongDeviceId, BT>> for Ipv4State<StrongDeviceId, BT>
1952{
1953    fn as_ref(&self) -> &IpStateInner<Ipv4, StrongDeviceId, BT> {
1954        &self.inner
1955    }
1956}
1957
1958/// Generates an IP packet ID.
1959///
1960/// This is only meaningful for IPv4, see [`IpLayerIpExt`].
1961pub fn gen_ip_packet_id<I: IpLayerIpExt, CC: IpDeviceEgressStateContext<I>>(
1962    core_ctx: &mut CC,
1963) -> I::PacketId {
1964    core_ctx.with_next_packet_id(|state| I::next_packet_id_from_state(state))
1965}
1966
1967/// The stack's IPv6 state.
1968pub struct Ipv6State<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes> {
1969    /// The common inner IP layer state.
1970    pub inner: IpStateInner<Ipv6, StrongDeviceId, BT>,
1971    /// ICMPv6 state.
1972    pub icmp: Icmpv6State<BT>,
1973    /// Stateless address autoconfiguration counters.
1974    pub slaac_counters: SlaacCounters,
1975    /// Secret key used for generating SLAAC temporary addresses.
1976    pub slaac_temp_secret_key: IidSecret,
1977    /// Secret key used for generating SLAAC stable addresses.
1978    ///
1979    /// If `None`, opaque IIDs will not be used to generate stable SLAAC
1980    /// addresses.
1981    pub slaac_stable_secret_key: IidSecret,
1982}
1983
1984impl<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
1985    AsRef<IpStateInner<Ipv6, StrongDeviceId, BT>> for Ipv6State<StrongDeviceId, BT>
1986{
1987    fn as_ref(&self) -> &IpStateInner<Ipv6, StrongDeviceId, BT> {
1988        &self.inner
1989    }
1990}
1991
1992impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
1993    OrderedLockAccess<IpPacketFragmentCache<I, BT>> for IpStateInner<I, D, BT>
1994{
1995    type Lock = Mutex<IpPacketFragmentCache<I, BT>>;
1996    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
1997        OrderedLockRef::new(&self.fragment_cache)
1998    }
1999}
2000
2001impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2002    OrderedLockAccess<PmtuCache<I, BT>> for IpStateInner<I, D, BT>
2003{
2004    type Lock = Mutex<PmtuCache<I, BT>>;
2005    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2006        OrderedLockRef::new(&self.pmtu_cache)
2007    }
2008}
2009
2010impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2011    OrderedLockAccess<RulesTable<I, D, BT>> for IpStateInner<I, D, BT>
2012{
2013    type Lock = RwLock<RulesTable<I, D, BT>>;
2014    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2015        OrderedLockRef::new(&self.rules_table)
2016    }
2017}
2018
2019impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2020    OrderedLockAccess<HashMap<RoutingTableId<I, D, BT>, PrimaryRc<BaseRoutingTableState<I, D, BT>>>>
2021    for IpStateInner<I, D, BT>
2022{
2023    type Lock =
2024        Mutex<HashMap<RoutingTableId<I, D, BT>, PrimaryRc<BaseRoutingTableState<I, D, BT>>>>;
2025    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2026        OrderedLockRef::new(&self.tables)
2027    }
2028}
2029
2030impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpRoutingBindingsTypes>
2031    OrderedLockAccess<RoutingTable<I, D>> for RoutingTableId<I, D, BT>
2032{
2033    type Lock = RwLock<RoutingTable<I, D>>;
2034    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2035        let Self(inner) = self;
2036        OrderedLockRef::new(&inner.routing_table)
2037    }
2038}
2039
2040impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2041    OrderedLockAccess<MulticastForwardingState<I, D, BT>> for IpStateInner<I, D, BT>
2042{
2043    type Lock = RwLock<MulticastForwardingState<I, D, BT>>;
2044    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2045        OrderedLockRef::new(&self.multicast_forwarding)
2046    }
2047}
2048
2049impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2050    OrderedLockAccess<RawIpSocketMap<I, D::Weak, BT>> for IpStateInner<I, D, BT>
2051{
2052    type Lock = RwLock<RawIpSocketMap<I, D::Weak, BT>>;
2053    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2054        OrderedLockRef::new(&self.raw_sockets)
2055    }
2056}
2057
2058impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2059    OrderedLockAccess<filter::State<I, WeakAddressId<I, BT>, BT>> for IpStateInner<I, D, BT>
2060{
2061    type Lock = RwLock<filter::State<I, WeakAddressId<I, BT>, BT>>;
2062    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2063        OrderedLockRef::new(&self.filter)
2064    }
2065}
2066
2067/// Marker trait for the bindings types required by the IP layer's inner state.
2068pub trait IpStateBindingsTypes:
2069    PmtuBindingsTypes
2070    + FragmentBindingsTypes
2071    + RawIpSocketsBindingsTypes
2072    + FilterBindingsTypes
2073    + MulticastForwardingBindingsTypes
2074    + IpDeviceStateBindingsTypes
2075    + IpRoutingBindingsTypes
2076{
2077}
2078impl<BT> IpStateBindingsTypes for BT where
2079    BT: PmtuBindingsTypes
2080        + FragmentBindingsTypes
2081        + RawIpSocketsBindingsTypes
2082        + FilterBindingsTypes
2083        + MulticastForwardingBindingsTypes
2084        + IpDeviceStateBindingsTypes
2085        + IpRoutingBindingsTypes
2086{
2087}
2088
2089/// Bindings ID for a routing table.
2090#[derive(Derivative)]
2091#[derivative(Debug(bound = ""))]
2092#[derivative(Clone(bound = "BT::RoutingTableId: Clone"))]
2093pub enum RoutingTableCookie<BT: IpRoutingBindingsTypes> {
2094    /// Main table.
2095    Main,
2096    /// A table added by user (Bindings).
2097    BindingsId(BT::RoutingTableId),
2098}
2099
2100/// State for a routing table.
2101#[derive(Derivative)]
2102#[derivative(Debug(bound = "D: Debug"))]
2103pub struct BaseRoutingTableState<I: Ip, D, BT: IpRoutingBindingsTypes> {
2104    routing_table: RwLock<RoutingTable<I, D>>,
2105    bindings_id: RoutingTableCookie<BT>,
2106}
2107
2108impl<I: Ip, D, BT: IpRoutingBindingsTypes> BaseRoutingTableState<I, D, BT> {
2109    pub(crate) fn with_bindings_id(bindings_id: RoutingTableCookie<BT>) -> Self {
2110        Self { bindings_id, routing_table: Default::default() }
2111    }
2112}
2113
2114/// Identifier to a routing table.
2115#[derive(Derivative)]
2116#[derivative(PartialEq(bound = ""))]
2117#[derivative(Eq(bound = ""))]
2118#[derivative(Hash(bound = ""))]
2119#[derivative(Clone(bound = ""))]
2120pub struct RoutingTableId<I: Ip, D, BT: IpRoutingBindingsTypes>(
2121    StrongRc<BaseRoutingTableState<I, D, BT>>,
2122);
2123
2124impl<I: Ip, D, BT: IpRoutingBindingsTypes> Debug for RoutingTableId<I, D, BT> {
2125    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2126        let Self(rc) = self;
2127        f.debug_tuple("RoutingTableId").field(&I::NAME).field(&rc.bindings_id).finish()
2128    }
2129}
2130
2131impl<I: Ip, D, BT: IpRoutingBindingsTypes> RoutingTableId<I, D, BT> {
2132    /// Creates a new table ID.
2133    pub(crate) fn new(rc: StrongRc<BaseRoutingTableState<I, D, BT>>) -> Self {
2134        Self(rc)
2135    }
2136
2137    /// Provides direct access to the forwarding table.
2138    #[cfg(any(test, feature = "testutils"))]
2139    pub fn table(&self) -> &RwLock<RoutingTable<I, D>> {
2140        let Self(inner) = self;
2141        &inner.routing_table
2142    }
2143
2144    /// Downgrades the strong ID into a weak one.
2145    pub fn downgrade(&self) -> WeakRoutingTableId<I, D, BT>
2146    where
2147        BT::RoutingTableId: Clone,
2148    {
2149        let Self(rc) = self;
2150        WeakRoutingTableId { rc: StrongRc::downgrade(rc), bindings_id: rc.bindings_id.clone() }
2151    }
2152
2153    #[cfg(test)]
2154    fn get_mut(&self) -> impl DerefMut<Target = RoutingTable<I, D>> + '_ {
2155        let Self(rc) = self;
2156        rc.routing_table.write()
2157    }
2158
2159    /// Gets the bindings cookie for this routing table.
2160    pub fn bindings_id(&self) -> &RoutingTableCookie<BT> {
2161        let Self(rc) = self;
2162        &rc.bindings_id
2163    }
2164}
2165
2166/// Weak Identifier to a routing table.
2167#[derive(Derivative)]
2168#[derivative(Clone(bound = "BT::RoutingTableId: Clone"))]
2169#[derivative(PartialEq, Eq, Hash)]
2170pub struct WeakRoutingTableId<I: Ip, D, BT: IpRoutingBindingsTypes> {
2171    rc: WeakRc<BaseRoutingTableState<I, D, BT>>,
2172    #[derivative(PartialEq = "ignore")]
2173    #[derivative(Hash = "ignore")]
2174    bindings_id: RoutingTableCookie<BT>,
2175}
2176
2177impl<I: Ip, D, BT: IpRoutingBindingsTypes> Debug for WeakRoutingTableId<I, D, BT> {
2178    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2179        let Self { bindings_id, .. } = self;
2180        f.debug_tuple("WeakRoutingTableId").field(&I::NAME).field(bindings_id).finish()
2181    }
2182}
2183
2184/// The inner state for the IP layer for IP version `I`.
2185#[derive(GenericOverIp)]
2186#[generic_over_ip(I, Ip)]
2187pub struct IpStateInner<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpStateBindingsTypes> {
2188    rules_table: RwLock<RulesTable<I, D, BT>>,
2189    // TODO(https://fxbug.dev/355059838): Explore the option to let Bindings create the main table.
2190    main_table_id: RoutingTableId<I, D, BT>,
2191    multicast_forwarding: RwLock<MulticastForwardingState<I, D, BT>>,
2192    multicast_forwarding_counters: MulticastForwardingCounters<I>,
2193    fragment_cache: Mutex<IpPacketFragmentCache<I, BT>>,
2194    pmtu_cache: Mutex<PmtuCache<I, BT>>,
2195    counters: IpCounters<I>,
2196    raw_sockets: RwLock<RawIpSocketMap<I, D::Weak, BT>>,
2197    raw_socket_counters: RawIpSocketCounters<I>,
2198    filter: RwLock<filter::State<I, WeakAddressId<I, BT>, BT>>,
2199    // Make sure the primary IDs are dropped last. Also note that the following hash map also stores
2200    // the primary ID to the main table, and if the user (Bindings) attempts to remove the main
2201    // table without dropping `main_table_id` first, it will panic. This serves as an assertion
2202    // that the main table cannot be removed and Bindings must never attempt to remove the main
2203    // routing table.
2204    tables: Mutex<HashMap<RoutingTableId<I, D, BT>, PrimaryRc<BaseRoutingTableState<I, D, BT>>>>,
2205    igmp_counters: IgmpCounters,
2206    mld_counters: MldCounters,
2207}
2208
2209impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpStateBindingsTypes> IpStateInner<I, D, BT> {
2210    /// Gets the IP counters.
2211    pub fn counters(&self) -> &IpCounters<I> {
2212        &self.counters
2213    }
2214
2215    /// Gets the multicast forwarding counters.
2216    pub fn multicast_forwarding_counters(&self) -> &MulticastForwardingCounters<I> {
2217        &self.multicast_forwarding_counters
2218    }
2219
2220    /// Gets the aggregate raw IP socket counters.
2221    pub fn raw_ip_socket_counters(&self) -> &RawIpSocketCounters<I> {
2222        &self.raw_socket_counters
2223    }
2224
2225    /// Gets the main table ID.
2226    pub fn main_table_id(&self) -> &RoutingTableId<I, D, BT> {
2227        &self.main_table_id
2228    }
2229
2230    /// Provides direct access to the path MTU cache.
2231    #[cfg(any(test, feature = "testutils"))]
2232    pub fn pmtu_cache(&self) -> &Mutex<PmtuCache<I, BT>> {
2233        &self.pmtu_cache
2234    }
2235
2236    /// Provides direct access to the filtering state.
2237    #[cfg(any(test, feature = "testutils"))]
2238    pub fn filter(&self) -> &RwLock<filter::State<I, WeakAddressId<I, BT>, BT>> {
2239        &self.filter
2240    }
2241
2242    /// Gets the stack-wide IGMP counters.
2243    pub fn igmp_counters(&self) -> &IgmpCounters {
2244        &self.igmp_counters
2245    }
2246
2247    /// Gets the stack-wide MLD counters.
2248    pub fn mld_counters(&self) -> &MldCounters {
2249        &self.mld_counters
2250    }
2251}
2252
2253impl<
2254    I: IpLayerIpExt,
2255    D: StrongDeviceIdentifier,
2256    BC: TimerContext + RngContext + IpStateBindingsTypes + IpRoutingBindingsTypes,
2257> IpStateInner<I, D, BC>
2258{
2259    /// Creates a new inner IP layer state.
2260    fn new<CC: CoreTimerContext<IpLayerTimerId, BC>>(bindings_ctx: &mut BC) -> Self {
2261        let main_table: PrimaryRc<BaseRoutingTableState<I, D, BC>> =
2262            PrimaryRc::new(BaseRoutingTableState::with_bindings_id(RoutingTableCookie::Main));
2263        let main_table_id = RoutingTableId(PrimaryRc::clone_strong(&main_table));
2264        Self {
2265            rules_table: RwLock::new(RulesTable::new(main_table_id.clone())),
2266            tables: Mutex::new(HashMap::from_iter(core::iter::once((
2267                main_table_id.clone(),
2268                main_table,
2269            )))),
2270            main_table_id,
2271            multicast_forwarding: Default::default(),
2272            multicast_forwarding_counters: Default::default(),
2273            fragment_cache: Mutex::new(
2274                IpPacketFragmentCache::new::<NestedIntoCoreTimerCtx<CC, _>>(bindings_ctx),
2275            ),
2276            pmtu_cache: Mutex::new(PmtuCache::new::<NestedIntoCoreTimerCtx<CC, _>>(bindings_ctx)),
2277            counters: Default::default(),
2278            raw_sockets: Default::default(),
2279            raw_socket_counters: Default::default(),
2280            filter: RwLock::new(filter::State::new::<NestedIntoCoreTimerCtx<CC, _>>(bindings_ctx)),
2281            igmp_counters: Default::default(),
2282            mld_counters: Default::default(),
2283        }
2284    }
2285}
2286
2287/// The identifier for timer events in the IP layer.
2288#[derive(Debug, Clone, Eq, PartialEq, Hash, GenericOverIp)]
2289#[generic_over_ip()]
2290pub enum IpLayerTimerId {
2291    /// A timer event for IPv4 packet reassembly timers.
2292    ReassemblyTimeoutv4(FragmentTimerId<Ipv4>),
2293    /// A timer event for IPv6 packet reassembly timers.
2294    ReassemblyTimeoutv6(FragmentTimerId<Ipv6>),
2295    /// A timer event for IPv4 path MTU discovery.
2296    PmtuTimeoutv4(PmtuTimerId<Ipv4>),
2297    /// A timer event for IPv6 path MTU discovery.
2298    PmtuTimeoutv6(PmtuTimerId<Ipv6>),
2299    /// A timer event for IPv4 filtering timers.
2300    FilterTimerv4(FilterTimerId<Ipv4>),
2301    /// A timer event for IPv6 filtering timers.
2302    FilterTimerv6(FilterTimerId<Ipv6>),
2303    /// A timer event for IPv4 Multicast forwarding timers.
2304    MulticastForwardingTimerv4(MulticastForwardingTimerId<Ipv4>),
2305    /// A timer event for IPv6 Multicast forwarding timers.
2306    MulticastForwardingTimerv6(MulticastForwardingTimerId<Ipv6>),
2307}
2308
2309impl<I: Ip> From<FragmentTimerId<I>> for IpLayerTimerId {
2310    fn from(timer: FragmentTimerId<I>) -> IpLayerTimerId {
2311        I::map_ip(timer, IpLayerTimerId::ReassemblyTimeoutv4, IpLayerTimerId::ReassemblyTimeoutv6)
2312    }
2313}
2314
2315impl<I: Ip> From<PmtuTimerId<I>> for IpLayerTimerId {
2316    fn from(timer: PmtuTimerId<I>) -> IpLayerTimerId {
2317        I::map_ip(timer, IpLayerTimerId::PmtuTimeoutv4, IpLayerTimerId::PmtuTimeoutv6)
2318    }
2319}
2320
2321impl<I: Ip> From<FilterTimerId<I>> for IpLayerTimerId {
2322    fn from(timer: FilterTimerId<I>) -> IpLayerTimerId {
2323        I::map_ip(timer, IpLayerTimerId::FilterTimerv4, IpLayerTimerId::FilterTimerv6)
2324    }
2325}
2326
2327impl<I: Ip> From<MulticastForwardingTimerId<I>> for IpLayerTimerId {
2328    fn from(timer: MulticastForwardingTimerId<I>) -> IpLayerTimerId {
2329        I::map_ip(
2330            timer,
2331            IpLayerTimerId::MulticastForwardingTimerv4,
2332            IpLayerTimerId::MulticastForwardingTimerv6,
2333        )
2334    }
2335}
2336
2337impl<CC, BC> HandleableTimer<CC, BC> for IpLayerTimerId
2338where
2339    CC: TimerHandler<BC, FragmentTimerId<Ipv4>>
2340        + TimerHandler<BC, FragmentTimerId<Ipv6>>
2341        + TimerHandler<BC, PmtuTimerId<Ipv4>>
2342        + TimerHandler<BC, PmtuTimerId<Ipv6>>
2343        + TimerHandler<BC, FilterTimerId<Ipv4>>
2344        + TimerHandler<BC, FilterTimerId<Ipv6>>
2345        + TimerHandler<BC, MulticastForwardingTimerId<Ipv4>>
2346        + TimerHandler<BC, MulticastForwardingTimerId<Ipv6>>,
2347    BC: TimerBindingsTypes,
2348{
2349    fn handle(self, core_ctx: &mut CC, bindings_ctx: &mut BC, timer: BC::UniqueTimerId) {
2350        match self {
2351            IpLayerTimerId::ReassemblyTimeoutv4(id) => {
2352                core_ctx.handle_timer(bindings_ctx, id, timer)
2353            }
2354            IpLayerTimerId::ReassemblyTimeoutv6(id) => {
2355                core_ctx.handle_timer(bindings_ctx, id, timer)
2356            }
2357            IpLayerTimerId::PmtuTimeoutv4(id) => core_ctx.handle_timer(bindings_ctx, id, timer),
2358            IpLayerTimerId::PmtuTimeoutv6(id) => core_ctx.handle_timer(bindings_ctx, id, timer),
2359            IpLayerTimerId::FilterTimerv4(id) => core_ctx.handle_timer(bindings_ctx, id, timer),
2360            IpLayerTimerId::FilterTimerv6(id) => core_ctx.handle_timer(bindings_ctx, id, timer),
2361            IpLayerTimerId::MulticastForwardingTimerv4(id) => {
2362                core_ctx.handle_timer(bindings_ctx, id, timer)
2363            }
2364            IpLayerTimerId::MulticastForwardingTimerv6(id) => {
2365                core_ctx.handle_timer(bindings_ctx, id, timer)
2366            }
2367        }
2368    }
2369}
2370
2371/// An ICMP error, and the metadata required to send it.
2372///
2373/// This allows the sending of the ICMP error to be decoupled from the
2374/// generation of the error, which is advantageous because sending the error
2375/// requires the underlying packet buffer, which cannot be "moved" in certain
2376/// contexts.
2377pub(crate) struct IcmpErrorSender<'a, I: IcmpHandlerIpExt, D> {
2378    /// The ICMP error that should be sent.
2379    err: I::IcmpError,
2380    /// The original source IP address of the packet (before the local-ingress
2381    /// hook evaluation).
2382    src_ip: SocketIpAddr<I::Addr>,
2383    /// The original destination IP address of the packet (before the
2384    /// local-ingress hook evaluation).
2385    dst_ip: SocketIpAddr<I::Addr>,
2386    /// The frame destination of the packet.
2387    frame_dst: Option<FrameDestination>,
2388    /// The device out which to send the error.
2389    device: &'a D,
2390    /// The metadata from the packet, allowing the packet's backing buffer to be
2391    /// returned to it's pre-IP-parse state with [`GrowBuffer::undo_parse`].
2392    meta: ParseMetadata,
2393    /// The marks used to send the ICMP error.
2394    marks: Marks,
2395    /// The protocol of the original packet.
2396    proto: I::Proto,
2397}
2398
2399impl<'a, I: IcmpHandlerIpExt, D> IcmpErrorSender<'a, I, D> {
2400    pub fn new<CC, B>(
2401        core_ctx: &mut CC,
2402        err: I::IcmpError,
2403        packet: &I::Packet<B>,
2404        frame_dst: Option<FrameDestination>,
2405        device: &'a D,
2406        marks: Marks,
2407    ) -> Option<Self>
2408    where
2409        I: IpCountersIpExt,
2410        CC: ResourceCounterContext<D, IpCounters<I>>,
2411        B: SplitByteSlice,
2412    {
2413        let Some(src_ip) = SocketIpAddr::new(packet.src_ip()) else {
2414            core_ctx.increment_both(device, |c| &c.unspecified_source);
2415            return None;
2416        };
2417        let Some(dst_ip) = SocketIpAddr::new(packet.dst_ip()) else {
2418            return None;
2419        };
2420
2421        // In IPv4, don't respond to non-initial fragments.
2422        let is_ipv4_fragment = I::map_ip_in(
2423            packet,
2424            |p| {
2425                packet_formats::ipv4::Ipv4Header::fragment_type(p)
2426                    == Ipv4FragmentType::NonInitialFragment
2427            },
2428            |_| false,
2429        );
2430        if is_ipv4_fragment {
2431            return None;
2432        }
2433
2434        let meta = packet.parse_metadata();
2435        let proto = packet.proto();
2436        Some(Self { err, src_ip, dst_ip, frame_dst, device, meta, marks, proto })
2437    }
2438
2439    /// Generate an send an appropriate ICMP error in response to this error.
2440    ///
2441    /// The provided `body` must be the original buffer from which the IP
2442    /// packet responsible for this error was parsed. It is expected to be in a
2443    /// state that allows undoing the IP packet parse (e.g. unmodified after the
2444    /// IP packet was parsed).
2445    pub fn send<B, BC, CC>(self, core_ctx: &mut CC, bindings_ctx: &mut BC, mut body: B)
2446    where
2447        B: BufferMut,
2448        CC: IcmpErrorHandler<I, BC, DeviceId = D>,
2449    {
2450        let IcmpErrorSender { err, src_ip, dst_ip, frame_dst, device, meta, marks, proto } = self;
2451        let header_len = meta.header_len();
2452
2453        // Undo the parsing of the IP Packet, moving the buffer's cursor so that
2454        // it points at the start of the IP header. This way, the sent ICMP
2455        // error will contain the entire original IP packet.
2456        body.undo_parse(meta);
2457
2458        core_ctx.send_icmp_error_message(
2459            bindings_ctx,
2460            Some(device),
2461            frame_dst,
2462            src_ip,
2463            dst_ip,
2464            body,
2465            err,
2466            header_len,
2467            proto,
2468            &marks,
2469        );
2470    }
2471}
2472
2473// Early demux results may be invalidated by SNAT in the LOCAL_INGRESS hook.
2474// This struct is used to check if the early demux result is still valid.
2475//
2476// TODO(https://fxbug.dev/476507679): Add tests to ensure this works properly
2477// once SNAT is fully implemented.
2478#[derive(PartialEq, Eq)]
2479struct EarlyDemuxResult<I: Ip, S> {
2480    socket: S,
2481    src_addr: I::Addr,
2482    src_port: Option<u16>,
2483}
2484
2485impl<I: FilterIpExt, S> EarlyDemuxResult<I, S> {
2486    fn new<P: IpPacket<I>>(socket: S, packet: &P) -> Self {
2487        let src_port =
2488            packet.maybe_transport_packet().transport_packet_data().map(|t| t.src_port());
2489        Self { socket, src_addr: packet.src_addr(), src_port }
2490    }
2491
2492    // Returns the socket if it's still the right socket to handle the packet.
2493    fn take_socket<P: IpPacket<I>>(self, packet: &P) -> Option<S> {
2494        let src_port =
2495            packet.maybe_transport_packet().transport_packet_data().map(|t| t.src_port());
2496        (self.src_addr == packet.src_addr() && self.src_port == src_port).then_some(self.socket)
2497    }
2498
2499    fn update_packet_metadata<CC, BC>(
2500        &self,
2501        core_ctx: &mut CC,
2502        packet_metadata: &mut IpLayerPacketMetadata<I, CC::WeakAddressId, BC>,
2503    ) where
2504        I: IpLayerIpExt,
2505        S: SocketMetadata<CC>,
2506        BC: IpLayerBindingsContext<I, CC::DeviceId>,
2507        CC: IpLayerIngressContext<I, BC>,
2508    {
2509        packet_metadata.socket_cookie = Some(self.socket.socket_cookie(core_ctx));
2510        for mark in BC::marks_to_set_on_ingress() {
2511            *packet_metadata.marks.get_mut(*mark) = self.socket.marks(core_ctx).get(*mark).clone();
2512        }
2513    }
2514}
2515
2516fn reject_type_to_icmpv4_error(reject_type: RejectType) -> Option<Icmpv4Error> {
2517    let error = match reject_type {
2518        RejectType::NetUnreachable => Icmpv4Error::NetUnreachable,
2519        RejectType::ProtoUnreachable => Icmpv4Error::ProtocolUnreachable,
2520        RejectType::PortUnreachable => Icmpv4Error::PortUnreachable,
2521        RejectType::HostUnreachable => Icmpv4Error::HostUnreachable,
2522        RejectType::RoutePolicyFail => Icmpv4Error::NetworkProhibited,
2523        RejectType::RejectRoute => Icmpv4Error::HostProhibited,
2524        RejectType::AdminProhibited => Icmpv4Error::AdminProhibited,
2525        // TODO(https://fxbug.dev/488116504): Implement RejectType::TcpReset.
2526        RejectType::TcpReset => return None,
2527    };
2528    Some(error)
2529}
2530
2531fn reject_type_to_icmpv6_error(reject_type: RejectType) -> Option<Icmpv6Error> {
2532    let error = match reject_type {
2533        RejectType::NetUnreachable => Icmpv6Error::NetUnreachable,
2534        RejectType::PortUnreachable => Icmpv6Error::PortUnreachable,
2535        RejectType::HostUnreachable => Icmpv6Error::AddressUnreachable,
2536        RejectType::AdminProhibited => Icmpv6Error::AdminProhibited,
2537        RejectType::RoutePolicyFail => Icmpv6Error::SourceAddressPolicyFailed,
2538        RejectType::RejectRoute => Icmpv6Error::RejectRoute,
2539        // TODO(https://fxbug.dev/488116504): Implement ProtoUnreachable and TcpReset.
2540        RejectType::TcpReset | RejectType::ProtoUnreachable => return None,
2541    };
2542    Some(error)
2543}
2544// TODO(joshlf): Once we support multiple extension headers in IPv6, we will
2545// need to verify that the callers of this function are still sound. In
2546// particular, they may accidentally pass a parse_metadata argument which
2547// corresponds to a single extension header rather than all of the IPv6 headers.
2548
2549/// Dispatch a received IPv4 packet to the appropriate protocol.
2550///
2551/// `device` is the device the packet was received on. `parse_metadata` is the
2552/// parse metadata associated with parsing the IP headers. It is used to undo
2553/// that parsing. Both `device` and `parse_metadata` are required in order to
2554/// send ICMP messages in response to unrecognized protocols or ports. If either
2555/// of `device` or `parse_metadata` is `None`, the caller promises that the
2556/// protocol and port are recognized.
2557///
2558/// # Panics
2559///
2560/// `dispatch_receive_ipv4_packet` panics if the protocol is unrecognized and
2561/// `parse_metadata` is `None`. If an IGMP message is received but it is not
2562/// coming from a device, i.e., `device` given is `None`,
2563/// `dispatch_receive_ip_packet` will also panic.
2564fn dispatch_receive_ipv4_packet<
2565    'a,
2566    'b,
2567    BC: IpLayerBindingsContext<Ipv4, CC::DeviceId>,
2568    CC: IpLayerIngressContext<Ipv4, BC>,
2569>(
2570    core_ctx: &'a mut CC,
2571    bindings_ctx: &'a mut BC,
2572    device: &'b CC::DeviceId,
2573    frame_dst: Option<FrameDestination>,
2574    mut packet: Ipv4Packet<&'a mut [u8]>,
2575    mut packet_metadata: IpLayerPacketMetadata<Ipv4, CC::WeakAddressId, BC>,
2576    receive_meta: ReceiveIpPacketMeta<Ipv4>,
2577) -> Result<(), IcmpErrorSender<'b, Ipv4, CC::DeviceId>> {
2578    core_ctx.increment_both(device, |c| &c.dispatch_receive_ip_packet);
2579
2580    match frame_dst {
2581        Some(FrameDestination::Individual { local: false }) => {
2582            core_ctx.increment_both(device, |c| &c.dispatch_receive_ip_packet_other_host);
2583        }
2584        Some(FrameDestination::Individual { local: true })
2585        | Some(FrameDestination::Multicast)
2586        | Some(FrameDestination::Broadcast)
2587        | None => (),
2588    };
2589
2590    // Skip early demux if the packet was redirected to a TPROXY.
2591    // TODO(https://fxbug.dev/475851987): Handle TPROXY in early_demux.
2592    let early_demux_result = receive_meta
2593        .transparent_override
2594        .is_none()
2595        .then(|| {
2596            core_ctx.early_demux(
2597                device,
2598                frame_dst,
2599                packet.src_ip(),
2600                packet.dst_ip(),
2601                packet.proto(),
2602                packet.body(),
2603            )
2604        })
2605        .flatten()
2606        .map(|socket| {
2607            let early_demux_result = EarlyDemuxResult::new(socket, &packet);
2608            early_demux_result.update_packet_metadata(core_ctx, &mut packet_metadata);
2609            early_demux_result
2610        });
2611
2612    let filter_verdict = core_ctx.filter_handler().local_ingress_hook(
2613        bindings_ctx,
2614        &mut packet,
2615        device,
2616        &mut packet_metadata,
2617    );
2618
2619    let marks = packet_metadata.marks;
2620    packet_metadata.acknowledge_drop();
2621
2622    match filter_verdict {
2623        filter::Verdict::Stop(filter::DropOrReject::Drop) => {
2624            return Ok(());
2625        }
2626        filter::Verdict::Stop(filter::DropOrReject::Reject(reject_type)) => {
2627            return match reject_type_to_icmpv4_error(reject_type) {
2628                Some(icmp_error) => {
2629                    match IcmpErrorSender::new(
2630                        core_ctx, icmp_error, &packet, frame_dst, device, marks,
2631                    ) {
2632                        Some(icmp_sender) => Err(icmp_sender),
2633                        None => Ok(()),
2634                    }
2635                }
2636                None => {
2637                    debug!("Unsupported reject type: {:?}", reject_type);
2638                    return Ok(());
2639                }
2640            };
2641        }
2642        filter::Verdict::Proceed(filter::Accept) => (),
2643    };
2644
2645    // These invariants are validated by the caller of this function, but it's
2646    // possible for the LOCAL_INGRESS hook to rewrite the packet, so we have to
2647    // check them again.
2648    let Some(src_ip) = packet.src_ipv4() else {
2649        debug!(
2650            "dispatch_receive_ipv4_packet: received packet from invalid source {} after the \
2651            LOCAL_INGRESS hook; dropping",
2652            packet.src_ip()
2653        );
2654        core_ctx.increment_both(device, |c| &c.invalid_source);
2655        return Ok(());
2656    };
2657    let Some(dst_ip) = SpecifiedAddr::new(packet.dst_ip()) else {
2658        core_ctx.increment_both(device, |c| &c.unspecified_destination);
2659        debug!(
2660            "dispatch_receive_ipv4_packet: Received packet with unspecified destination IP address \
2661            after the LOCAL_INGRESS hook; dropping"
2662        );
2663        return Ok(());
2664    };
2665
2666    core_ctx.deliver_packet_to_raw_ip_sockets(bindings_ctx, &packet, &device);
2667
2668    // Check if the early demux result is still valid.
2669    let early_demux_socket = early_demux_result.and_then(|result| result.take_socket(&packet));
2670
2671    let proto = packet.proto();
2672    let (prefix, options, body) = packet.parts_with_body_mut();
2673    let buffer = Buf::new(body, ..);
2674    let header_info = Ipv4HeaderInfo { prefix, options: options.as_ref() };
2675    let receive_info = LocalDeliveryPacketInfo { meta: receive_meta, header_info, marks };
2676
2677    core_ctx
2678        .dispatch_receive_ip_packet(
2679            bindings_ctx,
2680            device,
2681            src_ip,
2682            dst_ip,
2683            proto,
2684            buffer,
2685            &receive_info,
2686            early_demux_socket,
2687        )
2688        .or_else(|icmp_error| {
2689            match IcmpErrorSender::new(core_ctx, icmp_error, &packet, frame_dst, device, marks) {
2690                Some(icmp_sender) => Err(icmp_sender),
2691                None => Ok(()),
2692            }
2693        })
2694}
2695
2696/// Dispatch a received IPv6 packet to the appropriate protocol.
2697///
2698/// `dispatch_receive_ipv6_packet` has the same semantics as
2699/// `dispatch_receive_ipv4_packet`, but for IPv6.
2700fn dispatch_receive_ipv6_packet<
2701    'a,
2702    'b,
2703    BC: IpLayerBindingsContext<Ipv6, CC::DeviceId>,
2704    CC: IpLayerIngressContext<Ipv6, BC>,
2705>(
2706    core_ctx: &'a mut CC,
2707    bindings_ctx: &'a mut BC,
2708    device: &'b CC::DeviceId,
2709    frame_dst: Option<FrameDestination>,
2710    mut packet: Ipv6Packet<&'a mut [u8]>,
2711    mut packet_metadata: IpLayerPacketMetadata<Ipv6, CC::WeakAddressId, BC>,
2712    meta: ReceiveIpPacketMeta<Ipv6>,
2713) -> Result<(), IcmpErrorSender<'b, Ipv6, CC::DeviceId>> {
2714    // TODO(https://fxbug.dev/42095067): Once we support multiple extension
2715    // headers in IPv6, we will need to verify that the callers of this
2716    // function are still sound. In particular, they may accidentally pass a
2717    // parse_metadata argument which corresponds to a single extension
2718    // header rather than all of the IPv6 headers.
2719
2720    core_ctx.increment_both(device, |c| &c.dispatch_receive_ip_packet);
2721
2722    match frame_dst {
2723        Some(FrameDestination::Individual { local: false }) => {
2724            core_ctx.increment_both(device, |c| &c.dispatch_receive_ip_packet_other_host);
2725        }
2726        Some(FrameDestination::Individual { local: true })
2727        | Some(FrameDestination::Multicast)
2728        | Some(FrameDestination::Broadcast)
2729        | None => (),
2730    }
2731
2732    // Skip early demux if the packet was redirected to a TPROXY.
2733    // TODO(https://fxbug.dev/475851987): Handle TPROXY in early_demux.
2734    let early_demux_result = meta
2735        .transparent_override
2736        .is_none()
2737        .then(|| {
2738            core_ctx.early_demux(
2739                device,
2740                frame_dst,
2741                packet.src_ip(),
2742                packet.dst_ip(),
2743                packet.proto(),
2744                packet.body(),
2745            )
2746        })
2747        .flatten()
2748        .map(|socket| {
2749            let early_demux_result = EarlyDemuxResult::new(socket, &packet);
2750            early_demux_result.update_packet_metadata(core_ctx, &mut packet_metadata);
2751            early_demux_result
2752        });
2753
2754    let filter_verdict = core_ctx.filter_handler().local_ingress_hook(
2755        bindings_ctx,
2756        &mut packet,
2757        device,
2758        &mut packet_metadata,
2759    );
2760
2761    let marks = packet_metadata.marks;
2762    packet_metadata.acknowledge_drop();
2763
2764    match filter_verdict {
2765        filter::Verdict::Stop(filter::DropOrReject::Drop) => {
2766            return Ok(());
2767        }
2768        filter::Verdict::Stop(filter::DropOrReject::Reject(reject_type)) => {
2769            return match reject_type_to_icmpv6_error(reject_type) {
2770                Some(icmp_error) => {
2771                    match IcmpErrorSender::new(
2772                        core_ctx, icmp_error, &packet, frame_dst, device, marks,
2773                    ) {
2774                        Some(icmp_sender) => Err(icmp_sender),
2775                        None => Ok(()),
2776                    }
2777                }
2778                None => {
2779                    debug!("Unsupported reject type: {:?}", reject_type);
2780                    return Ok(());
2781                }
2782            };
2783        }
2784        filter::Verdict::Proceed(filter::Accept) => {}
2785    }
2786
2787    // These invariants are validated by the caller of this function, but it's
2788    // possible for the LOCAL_INGRESS hook to rewrite the packet, so we have to
2789    // check them again.
2790    let Some(src_ip) = packet.src_ipv6() else {
2791        debug!(
2792            "dispatch_receive_ipv6_packet: received packet from invalid source {} after the \
2793            LOCAL_INGRESS hook; dropping",
2794            packet.src_ip()
2795        );
2796
2797        core_ctx.increment_both(device, |c| &c.invalid_source);
2798        return Ok(());
2799    };
2800    let Some(dst_ip) = SpecifiedAddr::new(packet.dst_ip()) else {
2801        core_ctx.increment_both(device, |c| &c.unspecified_destination);
2802        debug!(
2803            "dispatch_receive_ipv6_packet: Received packet with unspecified destination IP address \
2804            after the LOCAL_INGRESS hook; dropping"
2805        );
2806        return Ok(());
2807    };
2808
2809    core_ctx.deliver_packet_to_raw_ip_sockets(bindings_ctx, &packet, &device);
2810
2811    // Check if the early demux result is still valid.
2812    let early_demux_socket = early_demux_result.and_then(|result| result.take_socket(&packet));
2813
2814    let proto = packet.proto();
2815    let (fixed, extension, body) = packet.parts_with_body_mut();
2816    let buffer = Buf::new(body, ..);
2817    let header_info = Ipv6HeaderInfo { fixed, extension };
2818    let receive_info = LocalDeliveryPacketInfo { meta, header_info, marks };
2819
2820    core_ctx
2821        .dispatch_receive_ip_packet(
2822            bindings_ctx,
2823            device,
2824            src_ip,
2825            dst_ip,
2826            proto,
2827            buffer,
2828            &receive_info,
2829            early_demux_socket,
2830        )
2831        .or_else(|icmp_error| {
2832            let marks = receive_info.marks;
2833            match IcmpErrorSender::new(core_ctx, icmp_error, &packet, frame_dst, device, marks) {
2834                Some(icmp_sender) => Err(icmp_sender),
2835                None => Ok(()),
2836            }
2837        })
2838}
2839
2840/// The metadata required to forward an IP Packet.
2841///
2842/// This allows the forwarding of the packet to be decoupled from the
2843/// determination of how to forward. This is advantageous because forwarding
2844/// requires the underlying packet buffer, which cannot be "moved" in certain
2845/// contexts.
2846pub(crate) struct IpPacketForwarder<
2847    'a,
2848    I: IpLayerIpExt,
2849    D,
2850    A,
2851    BT: FilterBindingsTypes + TxMetadataBindingsTypes,
2852> {
2853    inbound_device: &'a D,
2854    outbound_device: &'a D,
2855    packet_meta: IpLayerPacketMetadata<I, A, BT>,
2856    src_ip: I::RecvSrcAddr,
2857    dst_ip: SpecifiedAddr<I::Addr>,
2858    destination: IpPacketDestination<I, &'a D>,
2859    proto: I::Proto,
2860    parse_meta: ParseMetadata,
2861    frame_dst: Option<FrameDestination>,
2862}
2863
2864impl<'a, I, D, A, BC> IpPacketForwarder<'a, I, D, A, BC>
2865where
2866    I: IpLayerIpExt,
2867    BC: IpLayerBindingsContext<I, D>,
2868{
2869    // Forward the provided buffer as specified by this [`IpPacketForwarder`].
2870    fn forward_with_buffer<CC, B>(self, core_ctx: &mut CC, bindings_ctx: &mut BC, buffer: B)
2871    where
2872        B: BufferMut,
2873        CC: IpLayerForwardingContext<I, BC, DeviceId = D, WeakAddressId = A>,
2874    {
2875        let Self {
2876            inbound_device,
2877            outbound_device,
2878            packet_meta,
2879            src_ip,
2880            dst_ip,
2881            destination,
2882            proto,
2883            parse_meta,
2884            frame_dst,
2885        } = self;
2886
2887        let packet = ForwardedPacket::new(src_ip.get(), dst_ip.get(), proto, parse_meta, buffer);
2888
2889        trace!("forward_with_buffer: forwarding {} packet", I::NAME);
2890
2891        let marks = packet_meta.marks;
2892        match send_ip_frame(
2893            core_ctx,
2894            bindings_ctx,
2895            outbound_device,
2896            destination,
2897            packet,
2898            packet_meta,
2899            Mtu::no_limit(),
2900        ) {
2901            Ok(()) => (),
2902            Err(IpSendFrameError { serializer, error }) => {
2903                match error {
2904                    IpSendFrameErrorReason::Device(
2905                        SendFrameErrorReason::SizeConstraintsViolation,
2906                    ) => {
2907                        debug!("failed to forward {} packet: MTU exceeded", I::NAME);
2908                        core_ctx.increment_both(outbound_device, |c| &c.mtu_exceeded);
2909                        let mtu = core_ctx.get_mtu(inbound_device);
2910                        // NB: Ipv6 sends a PacketTooBig error. Ipv4 sends nothing.
2911                        let Some(err) = I::IcmpError::mtu_exceeded(mtu) else {
2912                            return;
2913                        };
2914                        // NB: Only send an ICMP error if the sender's src
2915                        // is specified.
2916                        let Some(src_ip) = I::received_source_as_icmp_source(src_ip) else {
2917                            return;
2918                        };
2919
2920                        let Some(dst_ip) = SocketIpAddr::new(dst_ip.get()) else {
2921                            return;
2922                        };
2923
2924                        // TODO(https://fxbug.dev/362489447): Increment the TTL since we
2925                        // just decremented it. The fact that we don't do this is
2926                        // technically a violation of the ICMP spec (we're not
2927                        // encapsulating the original packet that caused the
2928                        // issue, but a slightly modified version of it), but
2929                        // it's not that big of a deal because it won't affect
2930                        // the sender's ability to figure out the minimum path
2931                        // MTU. This may break other logic, though, so we should
2932                        // still fix it eventually.
2933                        core_ctx.send_icmp_error_message(
2934                            bindings_ctx,
2935                            Some(inbound_device),
2936                            frame_dst,
2937                            src_ip,
2938                            dst_ip,
2939                            serializer.into_buffer(),
2940                            err,
2941                            parse_meta.header_len(),
2942                            proto,
2943                            &marks,
2944                        );
2945                    }
2946                    IpSendFrameErrorReason::Device(SendFrameErrorReason::QueueFull)
2947                    | IpSendFrameErrorReason::Device(SendFrameErrorReason::Alloc)
2948                    | IpSendFrameErrorReason::IllegalLoopbackAddress => (),
2949                }
2950                debug!("failed to forward {} packet: {error:?}", I::NAME);
2951            }
2952        }
2953    }
2954}
2955
2956/// The action to take for a packet that was a candidate for forwarding.
2957pub(crate) enum ForwardingAction<
2958    'a,
2959    I: IpLayerIpExt,
2960    D,
2961    A,
2962    BT: FilterBindingsTypes + TxMetadataBindingsTypes,
2963> {
2964    /// Drop the packet without forwarding it or generating an ICMP error.
2965    SilentlyDrop,
2966    /// Forward the packet, as specified by the [`IpPacketForwarder`].
2967    Forward(IpPacketForwarder<'a, I, D, A, BT>),
2968    /// Drop the packet without forwarding, and generate an ICMP error as
2969    /// specified by the [`IcmpErrorSender`].
2970    DropWithIcmpError(IcmpErrorSender<'a, I, D>),
2971}
2972
2973impl<'a, I, D, A, BC> ForwardingAction<'a, I, D, A, BC>
2974where
2975    I: IpLayerIpExt,
2976    BC: IpLayerBindingsContext<I, D>,
2977{
2978    /// Perform the action prescribed by self, with the provided packet buffer.
2979    pub(crate) fn perform_action_with_buffer<CC, B>(
2980        self,
2981        core_ctx: &mut CC,
2982        bindings_ctx: &mut BC,
2983        buffer: B,
2984    ) where
2985        B: BufferMut,
2986        CC: IpLayerForwardingContext<I, BC, DeviceId = D, WeakAddressId = A>,
2987    {
2988        match self {
2989            ForwardingAction::SilentlyDrop => {}
2990            ForwardingAction::Forward(forwarder) => {
2991                forwarder.forward_with_buffer(core_ctx, bindings_ctx, buffer)
2992            }
2993            ForwardingAction::DropWithIcmpError(icmp_sender) => {
2994                icmp_sender.send(core_ctx, bindings_ctx, buffer)
2995            }
2996        }
2997    }
2998}
2999
3000/// Determine which [`ForwardingAction`] should be taken for an IP packet.
3001pub(crate) fn determine_ip_packet_forwarding_action<'a, 'b, I, BC, CC>(
3002    core_ctx: &'a mut CC,
3003    mut packet: I::Packet<&'a mut [u8]>,
3004    mut packet_meta: IpLayerPacketMetadata<I, CC::WeakAddressId, BC>,
3005    minimum_ttl: Option<u8>,
3006    inbound_device: &'b CC::DeviceId,
3007    outbound_device: &'b CC::DeviceId,
3008    destination: IpPacketDestination<I, &'b CC::DeviceId>,
3009    frame_dst: Option<FrameDestination>,
3010    src_ip: I::RecvSrcAddr,
3011    dst_ip: SpecifiedAddr<I::Addr>,
3012) -> ForwardingAction<'b, I, CC::DeviceId, CC::WeakAddressId, BC>
3013where
3014    I: IpLayerIpExt,
3015    BC: IpLayerBindingsContext<I, CC::DeviceId>,
3016    CC: IpLayerForwardingContext<I, BC>,
3017{
3018    // When forwarding, if a datagram's TTL is one or zero, discard it, as
3019    // decrementing the TTL would put it below the allowed minimum value.
3020    // For IPv4, see "TTL" section, https://tools.ietf.org/html/rfc791#page-14.
3021    // For IPv6, see "Hop Limit" section, https://datatracker.ietf.org/doc/html/rfc2460#page-5.
3022    const DEFAULT_MINIMUM_FORWARDING_TTL: u8 = 2;
3023    let minimum_ttl = minimum_ttl.unwrap_or(DEFAULT_MINIMUM_FORWARDING_TTL);
3024
3025    let ttl = packet.ttl();
3026    if ttl < minimum_ttl {
3027        debug!(
3028            "{} packet not forwarded due to inadequate TTL: got={ttl} minimum={minimum_ttl}",
3029            I::NAME
3030        );
3031        // As per RFC 792's specification of the Time Exceeded Message:
3032        //     If the gateway processing a datagram finds the time to live
3033        //     field is zero it must discard the datagram. The gateway may
3034        //     also notify the source host via the time exceeded message.
3035        // And RFC 4443 section 3.3:
3036        //    If a router receives a packet with a Hop Limit of zero, or if
3037        //    a router decrements a packet's Hop Limit to zero, it MUST
3038        //    discard the packet and originate an ICMPv6 Time Exceeded
3039        //    message with Code 0 to the source of the packet.
3040        // Don't send a Time Exceeded Message in cases where the netstack is
3041        // enforcing a higher minimum TTL (e.g. as part of a multicast route).
3042        if ttl > 1 {
3043            packet_meta.acknowledge_drop();
3044            return ForwardingAction::SilentlyDrop;
3045        }
3046
3047        core_ctx.increment_both(inbound_device, |c| &c.ttl_expired);
3048
3049        let marks = packet_meta.marks;
3050        packet_meta.acknowledge_drop();
3051
3052        // Construct and send the appropriate ICMP error for the IP version.
3053        match IcmpErrorSender::new(
3054            core_ctx,
3055            I::IcmpError::ttl_expired(),
3056            &packet,
3057            frame_dst,
3058            inbound_device,
3059            marks,
3060        ) {
3061            Some(icmp_sender) => return ForwardingAction::DropWithIcmpError(icmp_sender),
3062            None => return ForwardingAction::SilentlyDrop,
3063        }
3064    }
3065
3066    trace!("determine_ip_packet_forwarding_action: adequate TTL");
3067
3068    // For IPv6 packets, handle extension headers first.
3069    //
3070    // Any previous handling of extension headers was done under the
3071    // assumption that we are the final destination of the packet. Now that
3072    // we know we're forwarding, we need to re-examine them.
3073    let maybe_ipv6_packet_action = I::map_ip_in(
3074        &packet,
3075        |_packet| None,
3076        |packet| {
3077            Some(ipv6::handle_extension_headers(core_ctx, inbound_device, frame_dst, packet, false))
3078        },
3079    );
3080    match maybe_ipv6_packet_action {
3081        None => {} // NB: Ipv4 case.
3082        Some(Ipv6PacketAction::_Discard) => {
3083            core_ctx.increment_both(inbound_device, |c| {
3084                #[derive(GenericOverIp)]
3085                #[generic_over_ip(I, Ip)]
3086                struct InCounters<'a, I: IpLayerIpExt>(
3087                    &'a <I::RxCounters as CounterCollectionSpec>::CounterCollection<Counter>,
3088                );
3089                I::map_ip_in::<_, _>(
3090                    InCounters(&c.version_rx),
3091                    |_counters| {
3092                        unreachable!(
3093                            "`I` must be `Ipv6` because we're handling IPv6 extension headers"
3094                        )
3095                    },
3096                    |InCounters(counters)| &counters.extension_header_discard,
3097                )
3098            });
3099            trace!(
3100                "determine_ip_packet_forwarding_action: handled IPv6 extension headers: \
3101                discarding packet"
3102            );
3103            packet_meta.acknowledge_drop();
3104            return ForwardingAction::SilentlyDrop;
3105        }
3106        Some(Ipv6PacketAction::Continue) => {
3107            trace!(
3108                "determine_ip_packet_forwarding_action: handled IPv6 extension headers: \
3109                forwarding packet"
3110            );
3111        }
3112        Some(Ipv6PacketAction::ProcessFragment) => {
3113            unreachable!(
3114                "When forwarding packets, we should only ever look at the hop by hop \
3115                    options extension header (if present)"
3116            )
3117        }
3118    };
3119
3120    match core_ctx.filter_handler().forwarding_hook(
3121        I::as_filter_packet(&mut packet),
3122        inbound_device,
3123        outbound_device,
3124        &mut packet_meta,
3125    ) {
3126        filter::Verdict::Stop(filter::DropOrReject::Drop) => {
3127            packet_meta.acknowledge_drop();
3128            trace!("determine_ip_packet_forwarding_action: filter verdict: Drop");
3129            return ForwardingAction::SilentlyDrop;
3130        }
3131        filter::Verdict::Stop(filter::DropOrReject::Reject(reject_type)) => {
3132            // TODO(https://fxbug.dev/466098884): Send reject packet.
3133            packet_meta.acknowledge_drop();
3134            trace!(
3135                "determine_ip_packet_forwarding_action: filter verdict: Reject({:?})",
3136                reject_type
3137            );
3138            return ForwardingAction::SilentlyDrop;
3139        }
3140        filter::Verdict::Proceed(filter::Accept) => {}
3141    }
3142
3143    packet.set_ttl(ttl - 1);
3144    let (_, _, proto, parse_meta): (I::Addr, I::Addr, _, _) = packet.into_metadata();
3145    ForwardingAction::Forward(IpPacketForwarder {
3146        inbound_device,
3147        outbound_device,
3148        packet_meta,
3149        src_ip,
3150        dst_ip,
3151        destination,
3152        proto,
3153        parse_meta,
3154        frame_dst,
3155    })
3156}
3157
3158pub(crate) fn send_ip_frame<I, CC, BC, S>(
3159    core_ctx: &mut CC,
3160    bindings_ctx: &mut BC,
3161    device: &CC::DeviceId,
3162    destination: IpPacketDestination<I, &CC::DeviceId>,
3163    mut body: S,
3164    mut packet_metadata: IpLayerPacketMetadata<I, CC::WeakAddressId, BC>,
3165    limit_mtu: Mtu,
3166) -> Result<(), IpSendFrameError<S>>
3167where
3168    I: IpLayerIpExt,
3169    BC: FilterBindingsContext<CC::DeviceId> + TxMetadataBindingsTypes + MarksBindingsContext,
3170    CC: IpLayerEgressContext<I, BC> + IpDeviceMtuContext<I> + IpDeviceAddressIdContext<I>,
3171    S: FragmentableIpSerializer<I, Buffer: BufferMut> + FilterIpPacket<I>,
3172{
3173    let (verdict, proof) = core_ctx.filter_handler().egress_hook(
3174        bindings_ctx,
3175        &mut body,
3176        device,
3177        &mut packet_metadata,
3178    );
3179    match verdict {
3180        filter::Verdict::Stop(filter::DropPacket) => {
3181            packet_metadata.acknowledge_drop();
3182            return Ok(());
3183        }
3184        filter::Verdict::Proceed(filter::Accept) => {}
3185    }
3186
3187    // If the packet is leaving through the loopback device, attempt to extract a
3188    // weak reference to the packet's conntrack entry to plumb that through the
3189    // device layer so it can be reused on ingress to the IP layer.
3190    let (conntrack_connection_and_direction, tx_metadata, marks, _socket_cookie) =
3191        packet_metadata.into_parts();
3192    let conntrack_entry = if device.is_loopback() {
3193        conntrack_connection_and_direction
3194            .and_then(|(conn, dir)| WeakConntrackConnection::new(&conn).map(|conn| (conn, dir)))
3195    } else {
3196        None
3197    };
3198
3199    let mut device_layer_marks = Marks::default();
3200    for mark in BC::marks_to_keep_on_egress() {
3201        *device_layer_marks.get_mut(*mark) = *marks.get(*mark);
3202    }
3203
3204    let device_ip_layer_metadata =
3205        DeviceIpLayerMetadata { conntrack_entry, tx_metadata, marks: device_layer_marks };
3206
3207    // The filtering layer may have changed our address. Perform a last moment
3208    // check to protect against sending loopback addresses on the wire for
3209    // non-loopback devices, which is an RFC violation.
3210    if !device.is_loopback()
3211        && (I::LOOPBACK_SUBNET.contains(&body.src_addr())
3212            || I::LOOPBACK_SUBNET.contains(&body.dst_addr()))
3213    {
3214        core_ctx.increment_both(device, |c| &c.tx_illegal_loopback_address);
3215        return Err(IpSendFrameError {
3216            serializer: body,
3217            error: IpSendFrameErrorReason::IllegalLoopbackAddress,
3218        });
3219    }
3220
3221    // Use the minimum MTU between the target device and the requested mtu.
3222    let mtu = limit_mtu.min(core_ctx.get_mtu(device));
3223
3224    let body = body.with_size_limit(mtu.into());
3225
3226    let fits_mtu =
3227        match body.serialize_new_buf(PacketConstraints::UNCONSTRAINED, AlwaysFailBufferAlloc) {
3228            // We hit the allocator that refused to allocate new data, which
3229            // means the MTU is respected.
3230            Err(SerializeError::Alloc(())) => true,
3231            // MTU failure, we should try to fragment.
3232            Err(SerializeError::SizeLimitExceeded) => false,
3233        };
3234
3235    if fits_mtu {
3236        return core_ctx
3237            .send_ip_frame(bindings_ctx, device, destination, device_ip_layer_metadata, body, proof)
3238            .map_err(|ErrorAndSerializer { serializer, error }| IpSendFrameError {
3239                serializer: serializer.into_inner(),
3240                error: error.into(),
3241            });
3242    }
3243
3244    // Body doesn't fit MTU, we must fragment this serializer in order to send
3245    // it out.
3246    core_ctx.increment_both(device, |c| &c.fragmentation.fragmentation_required);
3247
3248    // Taken on the last frame.
3249    let mut device_ip_layer_metadata = Some(device_ip_layer_metadata);
3250    let body = body.into_inner();
3251    let result = match IpFragmenter::new(bindings_ctx, &body, mtu) {
3252        Ok(mut fragmenter) => loop {
3253            let (fragment, has_more) = match fragmenter.next() {
3254                None => break Ok(()),
3255                Some(f) => f,
3256            };
3257
3258            // TODO(https://fxbug.dev/391953082): We should penalize sockets
3259            // via the tx metadata when we incur IP fragmentation instead of
3260            // just attaching the ownership to the last fragment. For now, we
3261            // attach the tx metadata to the last frame only.
3262            let device_ip_layer_metadata = if has_more {
3263                // Unwrap here because only the last frame can take it.
3264                let device_ip_layer_metadata = device_ip_layer_metadata.as_ref().unwrap();
3265                DeviceIpLayerMetadata {
3266                    conntrack_entry: device_ip_layer_metadata.conntrack_entry.clone(),
3267                    tx_metadata: Default::default(),
3268                    marks: device_ip_layer_metadata.marks,
3269                }
3270            } else {
3271                // Unwrap here because the last frame can only happen once.
3272                device_ip_layer_metadata.take().unwrap()
3273            };
3274
3275            match core_ctx.send_ip_frame(
3276                bindings_ctx,
3277                device,
3278                destination.clone(),
3279                device_ip_layer_metadata,
3280                fragment,
3281                proof.clone_for_fragmentation(),
3282            ) {
3283                Ok(()) => {
3284                    core_ctx.increment_both(device, |c| &c.fragmentation.fragments);
3285                }
3286                Err(ErrorAndSerializer { serializer: _, error }) => {
3287                    core_ctx
3288                        .increment_both(device, |c| &c.fragmentation.error_fragmented_serializer);
3289                    break Err(error);
3290                }
3291            }
3292        },
3293        Err(e) => {
3294            core_ctx.increment_both(device, |c| &c.fragmentation.error_counter(&e));
3295            Err(SendFrameErrorReason::SizeConstraintsViolation)
3296        }
3297    };
3298    result.map_err(|e| IpSendFrameError { serializer: body, error: e.into() })
3299}
3300
3301/// A buffer allocator that always fails to allocate a new buffer.
3302///
3303/// Can be used to check for packet size constraints in serializer without in
3304/// fact serializing the buffer.
3305struct AlwaysFailBufferAlloc;
3306
3307impl LayoutBufferAlloc<Never> for AlwaysFailBufferAlloc {
3308    type Error = ();
3309    fn layout_alloc(
3310        self,
3311        _prefix: usize,
3312        _body: usize,
3313        _suffix: usize,
3314    ) -> Result<Never, Self::Error> {
3315        Err(())
3316    }
3317}
3318
3319/// Drop a packet and undo the effects of parsing it.
3320///
3321/// `drop_packet_and_undo_parse!` takes a `$packet` and a `$buffer` which the
3322/// packet was parsed from. It saves the results of the `src_ip()`, `dst_ip()`,
3323/// `proto()`, and `parse_metadata()` methods. It drops `$packet` and uses the
3324/// result of `parse_metadata()` to undo the effects of parsing the packet.
3325/// Finally, it returns the source IP, destination IP, protocol, and parse
3326/// metadata.
3327macro_rules! drop_packet_and_undo_parse {
3328    ($packet:expr, $buffer:expr) => {{
3329        let (src_ip, dst_ip, proto, meta) = $packet.into_metadata();
3330        $buffer.undo_parse(meta);
3331        (src_ip, dst_ip, proto, meta)
3332    }};
3333}
3334
3335/// The result of calling [`process_fragment`], depending on what action needs
3336/// to be taken by the caller.
3337enum ProcessFragmentResult<'a, I: IpLayerIpExt> {
3338    /// Processing of the packet is complete and no more action should be
3339    /// taken.
3340    Done,
3341
3342    /// Reassembly is not needed. The returned packet is the same one that was
3343    /// passed in the call to [`process_fragment`].
3344    NotNeeded(I::Packet<&'a mut [u8]>),
3345
3346    /// A packet was successfully reassembled into the provided buffer. If a
3347    /// parsed packet is needed, then the caller must perform that parsing.
3348    Reassembled(Vec<u8>),
3349}
3350
3351/// Process a fragment and reassemble if required.
3352///
3353/// Attempts to process a potential fragment packet and reassemble if we are
3354/// ready to do so. Returns an enum to the caller with the result of processing
3355/// the potential fragment.
3356fn process_fragment<'a, I, CC, BC>(
3357    core_ctx: &mut CC,
3358    bindings_ctx: &mut BC,
3359    device: &CC::DeviceId,
3360    packet: I::Packet<&'a mut [u8]>,
3361) -> ProcessFragmentResult<'a, I>
3362where
3363    I: IpLayerIpExt,
3364    for<'b> I::Packet<&'b mut [u8]>: FragmentablePacket,
3365    CC: IpLayerIngressContext<I, BC>,
3366    BC: IpLayerBindingsContext<I, CC::DeviceId>,
3367{
3368    match FragmentHandler::<I, _>::process_fragment::<&mut [u8]>(core_ctx, bindings_ctx, packet) {
3369        // Handle the packet right away since reassembly is not needed.
3370        FragmentProcessingState::NotNeeded(packet) => {
3371            trace!("receive_ip_packet: not fragmented");
3372            ProcessFragmentResult::NotNeeded(packet)
3373        }
3374        // Ready to reassemble a packet.
3375        FragmentProcessingState::Ready { key, packet_len } => {
3376            trace!("receive_ip_packet: fragmented, ready for reassembly");
3377            // Allocate a buffer of `packet_len` bytes.
3378            let mut buffer = Buf::new(alloc::vec![0; packet_len], ..);
3379
3380            // Attempt to reassemble the packet.
3381            let reassemble_result = match FragmentHandler::<I, _>::reassemble_packet(
3382                core_ctx,
3383                bindings_ctx,
3384                &key,
3385                buffer.buffer_view_mut(),
3386            ) {
3387                // Successfully reassembled the packet, handle it.
3388                Ok(()) => ProcessFragmentResult::Reassembled(buffer.into_inner()),
3389                Err(e) => {
3390                    core_ctx.increment_both(device, |c| &c.fragment_reassembly_error);
3391                    debug!("receive_ip_packet: fragmented, failed to reassemble: {:?}", e);
3392                    ProcessFragmentResult::Done
3393                }
3394            };
3395            reassemble_result
3396        }
3397        // Cannot proceed since we need more fragments before we
3398        // can reassemble a packet.
3399        FragmentProcessingState::NeedMoreFragments => {
3400            core_ctx.increment_both(device, |c| &c.need_more_fragments);
3401            trace!("receive_ip_packet: fragmented, need more before reassembly");
3402            ProcessFragmentResult::Done
3403        }
3404        // TODO(ghanan): Handle invalid fragments.
3405        FragmentProcessingState::InvalidFragment => {
3406            core_ctx.increment_both(device, |c| &c.invalid_fragment);
3407            trace!("receive_ip_packet: fragmented, invalid");
3408            ProcessFragmentResult::Done
3409        }
3410        FragmentProcessingState::OutOfMemory => {
3411            core_ctx.increment_both(device, |c| &c.fragment_cache_full);
3412            trace!("receive_ip_packet: fragmented, dropped because OOM");
3413            ProcessFragmentResult::Done
3414        }
3415    }
3416}
3417
3418// TODO(joshlf): Can we turn `try_parse_ip_packet` into a function? So far, I've
3419// been unable to get the borrow checker to accept it.
3420
3421/// Try to parse an IP packet from a buffer.
3422///
3423/// If parsing fails, return the buffer to its original state so that its
3424/// contents can be used to send an ICMP error message. When invoked, the macro
3425/// expands to an expression whose type is `Result<P, P::Error>`, where `P` is
3426/// the parsed packet type.
3427macro_rules! try_parse_ip_packet {
3428    ($buffer:expr) => {{
3429        let p_len = $buffer.prefix_len();
3430        let s_len = $buffer.suffix_len();
3431
3432        let result = $buffer.parse_mut();
3433
3434        if let Err(err) = result {
3435            // Revert `buffer` to it's original state.
3436            let n_p_len = $buffer.prefix_len();
3437            let n_s_len = $buffer.suffix_len();
3438
3439            if n_p_len > p_len {
3440                $buffer.grow_front(n_p_len - p_len);
3441            }
3442
3443            if n_s_len > s_len {
3444                $buffer.grow_back(n_s_len - s_len);
3445            }
3446
3447            Err(err)
3448        } else {
3449            result
3450        }
3451    }};
3452}
3453
3454/// Clone an IP packet so that it may be delivered to a multicast route target.
3455///
3456/// Note: We must copy the underlying data here, as the filtering
3457/// engine may uniquely modify each instance as part of
3458/// performing forwarding.
3459///
3460/// In the future there are potential optimizations we could
3461/// pursue, including:
3462///   * Copy-on-write semantics for the buffer/packet so that
3463///     copies of the underlying data are done on an as-needed
3464///     basis.
3465///   * Avoid reparsing the IP packet. Because we're parsing an
3466///     exact copy of a known good packet, it would be safe to
3467///     adopt the data as an IP packet without performing any
3468///     validation.
3469// NB: This is a macro, not a function, because Rust's "move" semantics prevent
3470// us from returning both a buffer and a packet referencing that buffer.
3471macro_rules! clone_packet_for_mcast_forwarding {
3472    {let ($new_data:ident, $new_buffer:ident, $new_packet:ident) = $packet:ident} => {
3473        let mut $new_data = $packet.to_vec();
3474        let mut $new_buffer: Buf<&mut [u8]> = Buf::new($new_data.as_mut(), ..);
3475        let $new_packet = try_parse_ip_packet!($new_buffer).unwrap();
3476    };
3477}
3478
3479/// Receive an IPv4 packet from a device.
3480///
3481/// `frame_dst` specifies how this packet was received; see [`FrameDestination`]
3482/// for options.
3483pub fn receive_ipv4_packet<
3484    BC: IpLayerBindingsContext<Ipv4, CC::DeviceId>,
3485    B: BufferMut,
3486    CC: IpLayerIngressContext<Ipv4, BC>,
3487>(
3488    core_ctx: &mut CC,
3489    bindings_ctx: &mut BC,
3490    device: &CC::DeviceId,
3491    frame_dst: Option<FrameDestination>,
3492    device_ip_layer_metadata: DeviceIpLayerMetadata<BC>,
3493    buffer: B,
3494) {
3495    if !core_ctx.is_ip_device_enabled(&device) {
3496        return;
3497    }
3498
3499    // This is required because we may need to process the buffer that was
3500    // passed in or a reassembled one, which have different types.
3501    let mut buffer: packet::Either<B, Buf<Vec<u8>>> = packet::Either::A(buffer);
3502
3503    core_ctx.increment_both(device, |c| &c.receive_ip_packet);
3504    trace!("receive_ip_packet({device:?})");
3505
3506    let packet: Ipv4Packet<_> = match try_parse_ip_packet!(buffer) {
3507        Ok(packet) => packet,
3508        Err(ParseError::Format)
3509        | Err(ParseError::Checksum)
3510        | Err(ParseError::NotSupported)
3511        | Err(ParseError::NotExpected) => {
3512            core_ctx.increment_both(device, |c| &c.unparsable_packet);
3513            return;
3514        }
3515    };
3516
3517    // We verify these properties later by actually creating the corresponding
3518    // witness types after the INGRESS filtering hook, but we keep these checks
3519    // here as an optimization to return early and save some work.
3520    if packet.src_ipv4().is_none() {
3521        debug!(
3522            "receive_ipv4_packet: received packet from invalid source {}; dropping",
3523            packet.src_ip()
3524        );
3525        core_ctx.increment_both(device, |c| &c.invalid_source);
3526        return;
3527    };
3528    if !packet.dst_ip().is_specified() {
3529        core_ctx.increment_both(device, |c| &c.unspecified_destination);
3530        debug!("receive_ipv4_packet: Received packet with unspecified destination IP; dropping");
3531        return;
3532    };
3533
3534    // Reassemble all packets before local delivery or forwarding. Reassembly
3535    // before forwarding is not RFC-compliant, but it's the easiest way to
3536    // ensure that fragments are filtered properly. Linux does this and it
3537    // doesn't seem to create major problems.
3538    //
3539    // TODO(https://fxbug.dev/345814518): Forward fragments without reassembly.
3540    //
3541    // Note, the `process_fragment` function could panic if the packet does not
3542    // have fragment data. However, we are guaranteed that it will not panic
3543    // because the fragment data is in the fixed header so it is always present
3544    // (even if the fragment data has values that implies that the packet is not
3545    // fragmented).
3546    let mut packet = match process_fragment(core_ctx, bindings_ctx, device, packet) {
3547        ProcessFragmentResult::Done => return,
3548        ProcessFragmentResult::NotNeeded(packet) => packet,
3549        ProcessFragmentResult::Reassembled(buf) => {
3550            let buf = Buf::new(buf, ..);
3551            buffer = packet::Either::B(buf);
3552
3553            match buffer.parse_mut() {
3554                Ok(packet) => packet,
3555                Err(err) => {
3556                    core_ctx.increment_both(device, |c| &c.fragment_reassembly_error);
3557                    debug!("receive_ip_packet: fragmented, failed to reassemble: {:?}", err);
3558                    return;
3559                }
3560            }
3561        }
3562    };
3563
3564    // TODO(ghanan): Act upon options.
3565
3566    let mut packet_metadata = IpLayerPacketMetadata::from_device_ip_layer_metadata(
3567        core_ctx,
3568        device,
3569        device_ip_layer_metadata,
3570    );
3571    let mut filter = core_ctx.filter_handler();
3572    match filter.ingress_hook(bindings_ctx, &mut packet, device, &mut packet_metadata) {
3573        filter::Verdict::Proceed(filter::Accept) => {}
3574        filter::Verdict::Stop(filter::IngressStopReason::Drop) => {
3575            packet_metadata.acknowledge_drop();
3576            return;
3577        }
3578        filter::Verdict::Stop(filter::IngressStopReason::TransparentLocalDelivery {
3579            addr,
3580            port,
3581        }) => {
3582            // Drop the filter handler since it holds a mutable borrow of `core_ctx`, which
3583            // we need to provide to the packet dispatch function.
3584            drop(filter);
3585
3586            let Some(addr) = SpecifiedAddr::new(addr) else {
3587                core_ctx.increment_both(device, |c| &c.unspecified_destination);
3588                debug!("cannot perform transparent delivery to unspecified destination; dropping");
3589                return;
3590            };
3591
3592            let receive_meta = ReceiveIpPacketMeta {
3593                // It's possible that the packet was actually sent to a
3594                // broadcast address, but it doesn't matter here since it's
3595                // being delivered to a transparent proxy.
3596                broadcast: None,
3597                transparent_override: Some(TransparentLocalDelivery { addr, port }),
3598            };
3599
3600            // Short-circuit the routing process and override local demux, providing a local
3601            // address and port to which the packet should be transparently delivered at the
3602            // transport layer.
3603            dispatch_receive_ipv4_packet(
3604                core_ctx,
3605                bindings_ctx,
3606                device,
3607                frame_dst,
3608                packet,
3609                packet_metadata,
3610                receive_meta,
3611            )
3612            .unwrap_or_else(|icmp_sender| icmp_sender.send(core_ctx, bindings_ctx, buffer));
3613            return;
3614        }
3615    }
3616    // Drop the filter handler since it holds a mutable borrow of `core_ctx`, which
3617    // we need below.
3618    drop(filter);
3619
3620    let Some(src_ip) = packet.src_ipv4() else {
3621        core_ctx.increment_both(device, |c| &c.invalid_source);
3622        debug!(
3623            "receive_ipv4_packet: received packet from invalid source {}; dropping",
3624            packet.src_ip()
3625        );
3626        return;
3627    };
3628
3629    let action = receive_ipv4_packet_action(
3630        core_ctx,
3631        bindings_ctx,
3632        device,
3633        &packet,
3634        frame_dst,
3635        &packet_metadata.marks,
3636    );
3637    match action {
3638        ReceivePacketAction::MulticastForward { targets, address_status, dst_ip } => {
3639            // TOOD(https://fxbug.dev/364242513): Support connection tracking of
3640            // the multiplexed flows created by multicast forwarding. Here, we
3641            // use the existing metadata for the first action taken, and then
3642            // a default instance for each subsequent action. The first action
3643            // will populate the conntrack table with an entry, which will then
3644            // be used by all subsequent forwards.
3645            let mut packet_metadata = Some(packet_metadata);
3646            for MulticastRouteTarget { output_interface, min_ttl } in targets.as_ref() {
3647                clone_packet_for_mcast_forwarding! {
3648                    let (copy_of_data, copy_of_buffer, copy_of_packet) = packet
3649                };
3650                determine_ip_packet_forwarding_action::<Ipv4, _, _>(
3651                    core_ctx,
3652                    copy_of_packet,
3653                    packet_metadata.take().unwrap_or_default(),
3654                    Some(*min_ttl),
3655                    device,
3656                    &output_interface,
3657                    IpPacketDestination::from_addr(dst_ip),
3658                    frame_dst,
3659                    src_ip,
3660                    dst_ip,
3661                )
3662                .perform_action_with_buffer(core_ctx, bindings_ctx, copy_of_buffer);
3663            }
3664
3665            // If we also have an interest in the packet, deliver it locally.
3666            if let Some(address_status) = address_status {
3667                let receive_meta = ReceiveIpPacketMeta {
3668                    broadcast: address_status.to_broadcast_marker(),
3669                    transparent_override: None,
3670                };
3671                dispatch_receive_ipv4_packet(
3672                    core_ctx,
3673                    bindings_ctx,
3674                    device,
3675                    frame_dst,
3676                    packet,
3677                    packet_metadata.take().unwrap_or_default(),
3678                    receive_meta,
3679                )
3680                .unwrap_or_else(|icmp_sender| icmp_sender.send(core_ctx, bindings_ctx, buffer));
3681            }
3682        }
3683        ReceivePacketAction::Deliver { address_status, internal_forwarding } => {
3684            // NB: when performing internal forwarding, hit the
3685            // forwarding hook.
3686            match internal_forwarding {
3687                InternalForwarding::Used(outbound_device) => {
3688                    core_ctx.increment_both(device, |c| &c.forward);
3689                    match core_ctx.filter_handler().forwarding_hook(
3690                        &mut packet,
3691                        device,
3692                        &outbound_device,
3693                        &mut packet_metadata,
3694                    ) {
3695                        filter::Verdict::Stop(filter::DropOrReject::Drop) => {
3696                            packet_metadata.acknowledge_drop();
3697                            return;
3698                        }
3699                        filter::Verdict::Stop(filter::DropOrReject::Reject(_reject_type)) => {
3700                            // TODO(https://fxbug.dev/466098884): Send reject packet.
3701                            packet_metadata.acknowledge_drop();
3702                            return;
3703                        }
3704                        filter::Verdict::Proceed(filter::Accept) => {}
3705                    }
3706                }
3707                InternalForwarding::NotUsed => {}
3708            }
3709
3710            let receive_meta = ReceiveIpPacketMeta {
3711                broadcast: address_status.to_broadcast_marker(),
3712                transparent_override: None,
3713            };
3714            dispatch_receive_ipv4_packet(
3715                core_ctx,
3716                bindings_ctx,
3717                device,
3718                frame_dst,
3719                packet,
3720                packet_metadata,
3721                receive_meta,
3722            )
3723            .unwrap_or_else(|icmp_sender| icmp_sender.send(core_ctx, bindings_ctx, buffer));
3724        }
3725        ReceivePacketAction::Forward {
3726            original_dst,
3727            dst: Destination { device: dst_device, next_hop },
3728        } => {
3729            determine_ip_packet_forwarding_action::<Ipv4, _, _>(
3730                core_ctx,
3731                packet,
3732                packet_metadata,
3733                None,
3734                device,
3735                &dst_device,
3736                IpPacketDestination::from_next_hop(next_hop, original_dst),
3737                frame_dst,
3738                src_ip,
3739                original_dst,
3740            )
3741            .perform_action_with_buffer(core_ctx, bindings_ctx, buffer);
3742        }
3743        ReceivePacketAction::SendNoRouteToDest { dst: dst_ip } => {
3744            core_ctx.increment_both(device, |c| &c.no_route_to_host);
3745            debug!("received IPv4 packet with no known route to destination {}", dst_ip);
3746
3747            let marks = packet_metadata.marks;
3748            packet_metadata.acknowledge_drop();
3749
3750            if let Some(sender) = IcmpErrorSender::new(
3751                core_ctx,
3752                Icmpv4Error::NetUnreachable,
3753                &packet,
3754                frame_dst,
3755                device,
3756                marks,
3757            ) {
3758                sender.send(core_ctx, bindings_ctx, buffer);
3759            }
3760        }
3761        ReceivePacketAction::Drop { reason } => {
3762            let src_ip = packet.src_ip();
3763            let dst_ip = packet.dst_ip();
3764            packet_metadata.acknowledge_drop();
3765            core_ctx.increment_both(device, |c| &c.dropped);
3766            debug!(
3767                "receive_ipv4_packet: dropping packet from {src_ip} to {dst_ip} received on \
3768                {device:?}: {reason:?}",
3769            );
3770        }
3771    }
3772}
3773
3774fn handle_ipv6_parse_error<BC, B, CC>(
3775    core_ctx: &mut CC,
3776    bindings_ctx: &mut BC,
3777    device: &CC::DeviceId,
3778    frame_dst: Option<FrameDestination>,
3779    device_ip_layer_metadata: DeviceIpLayerMetadata<BC>,
3780    mut buffer: B,
3781    error: Ipv6ParseError,
3782) where
3783    BC: IpLayerBindingsContext<Ipv6, CC::DeviceId>,
3784    B: BufferMut,
3785    CC: IpLayerIngressContext<Ipv6, BC>,
3786{
3787    // Conditionally send an ICMP response if we encountered a parameter
3788    // problem error when parsing an IPv6 packet. Note, we do not always
3789    // send back an ICMP response as it can be used as an attack vector for
3790    // DDoS attacks. We only send back an ICMP response if the RFC requires
3791    // that we MUST send one, as noted by `must_send_icmp` and `action`.
3792    let Ipv6ParseError::ParameterProblem { src_ip, dst_ip, code, pointer, must_send_icmp, action } =
3793        error
3794    else {
3795        core_ctx.increment_both(device, |c| &c.unparsable_packet);
3796        debug!("receive_ipv6_packet: Failed to parse IPv6 packet: {:?}", error);
3797        return;
3798    };
3799    if !must_send_icmp || !action.should_send_icmp(&dst_ip) {
3800        return;
3801    }
3802    core_ctx.increment_both(device, |c| &c.parameter_problem);
3803    let dst_ip = match SocketIpAddr::new(dst_ip) {
3804        Some(ip) => ip,
3805        None => {
3806            core_ctx.increment_both(device, |c| &c.unspecified_destination);
3807            debug!("receive_ipv6_packet: Dropping packet with unspecified destination IP");
3808            return;
3809        }
3810    };
3811
3812    let src_ip = match Ipv6SourceAddr::new(src_ip) {
3813        None => {
3814            core_ctx.increment_both(device, |c| &c.invalid_source);
3815            return;
3816        }
3817        Some(Ipv6SourceAddr::Unspecified) => {
3818            core_ctx.increment_both(device, |c| &c.unspecified_source);
3819            return;
3820        }
3821        Some(Ipv6SourceAddr::Unicast(src_ip)) => {
3822            SocketIpAddr::new_from_ipv6_non_mapped_unicast(src_ip)
3823        }
3824    };
3825
3826    // Try raw parser to find main packet protocol and body offset. If this
3827    // fails as well then we can't send an ICMP error message.
3828    let raw_packet: Ipv6PacketRaw<_> = match try_parse_ip_packet!(buffer) {
3829        Ok(packet) => packet,
3830        Err(error) => {
3831            core_ctx.increment_both(device, |c| &c.unparsable_packet);
3832            debug!("receive_ipv6_packet: Failed to parse IPv6 packet: {:?}", error);
3833            return;
3834        }
3835    };
3836    let proto = match raw_packet.proto() {
3837        Ok(proto) => proto,
3838        Err(error) => {
3839            core_ctx.increment_both(device, |c| &c.unparsable_packet);
3840            debug!("receive_ipv6_packet: Failed to get protocol from IPv6 packet: {:?}", error);
3841            return;
3842        }
3843    };
3844    let parse_metadata = raw_packet.parse_metadata();
3845    let header_len = parse_metadata.header_len();
3846    buffer.undo_parse(parse_metadata);
3847
3848    let err = Icmpv6Error::ParameterProblem {
3849        code,
3850        pointer,
3851        allow_dst_multicast: action.should_send_icmp_to_multicast(),
3852    };
3853
3854    IcmpErrorHandler::<Ipv6, _>::send_icmp_error_message(
3855        core_ctx,
3856        bindings_ctx,
3857        Some(device),
3858        frame_dst,
3859        src_ip,
3860        dst_ip,
3861        buffer,
3862        err,
3863        header_len,
3864        proto,
3865        &device_ip_layer_metadata.marks,
3866    );
3867}
3868
3869/// Receive an IPv6 packet from a device.
3870///
3871/// `frame_dst` specifies how this packet was received; see [`FrameDestination`]
3872/// for options.
3873pub fn receive_ipv6_packet<
3874    BC: IpLayerBindingsContext<Ipv6, CC::DeviceId>,
3875    B: BufferMut,
3876    CC: IpLayerIngressContext<Ipv6, BC>,
3877>(
3878    core_ctx: &mut CC,
3879    bindings_ctx: &mut BC,
3880    device: &CC::DeviceId,
3881    frame_dst: Option<FrameDestination>,
3882    device_ip_layer_metadata: DeviceIpLayerMetadata<BC>,
3883    buffer: B,
3884) {
3885    if !core_ctx.is_ip_device_enabled(&device) {
3886        return;
3887    }
3888
3889    // This is required because we may need to process the buffer that was
3890    // passed in or a reassembled one, which have different types.
3891    let mut buffer: packet::Either<B, Buf<Vec<u8>>> = packet::Either::A(buffer);
3892
3893    core_ctx.increment_both(device, |c| &c.receive_ip_packet);
3894    trace!("receive_ipv6_packet({:?})", device);
3895
3896    let packet: Ipv6Packet<_> = match try_parse_ip_packet!(buffer) {
3897        Ok(packet) => packet,
3898        Err(error) => {
3899            handle_ipv6_parse_error(
3900                core_ctx,
3901                bindings_ctx,
3902                device,
3903                frame_dst,
3904                device_ip_layer_metadata,
3905                buffer,
3906                error,
3907            );
3908            return;
3909        }
3910    };
3911
3912    trace!("receive_ipv6_packet: parsed packet: {:?}", packet);
3913
3914    // TODO(ghanan): Act upon extension headers.
3915
3916    // We verify these properties later by actually creating the corresponding
3917    // witness types after the INGRESS filtering hook, but we keep these checks
3918    // here as an optimization to return early and save some work.
3919    if packet.src_ipv6().is_none() {
3920        debug!(
3921            "receive_ipv6_packet: received packet from invalid source {}; dropping",
3922            packet.src_ip()
3923        );
3924        core_ctx.increment_both(device, |c| &c.invalid_source);
3925        return;
3926    };
3927    if !packet.dst_ip().is_specified() {
3928        core_ctx.increment_both(device, |c| &c.unspecified_destination);
3929        debug!("receive_ipv6_packet: Received packet with unspecified destination IP; dropping");
3930        return;
3931    };
3932
3933    // Reassemble all packets before local delivery or forwarding. Reassembly
3934    // before forwarding is not RFC-compliant, but it's the easiest way to
3935    // ensure that fragments are filtered properly. Linux does this and it
3936    // doesn't seem to create major problems.
3937    //
3938    // TODO(https://fxbug.dev/345814518): Forward fragments without reassembly.
3939    //
3940    // delivery_extension_header_action is used to prevent looking at the
3941    // extension headers twice when a non-fragmented packet is delivered
3942    // locally.
3943    let (mut packet, delivery_extension_header_action) =
3944        match ipv6::handle_extension_headers(core_ctx, device, frame_dst, &packet, true) {
3945            Ipv6PacketAction::_Discard => {
3946                core_ctx.increment_both(device, |c| &c.version_rx.extension_header_discard);
3947                trace!("receive_ipv6_packet: handled IPv6 extension headers: discarding packet");
3948                return;
3949            }
3950            Ipv6PacketAction::Continue => {
3951                trace!("receive_ipv6_packet: handled IPv6 extension headers: dispatching packet");
3952                (packet, Some(Ipv6PacketAction::Continue))
3953            }
3954            Ipv6PacketAction::ProcessFragment => {
3955                trace!(
3956                    "receive_ipv6_packet: handled IPv6 extension headers: handling \
3957                    fragmented packet"
3958                );
3959
3960                // Note, `IpPacketFragmentCache::process_fragment`
3961                // could panic if the packet does not have fragment data.
3962                // However, we are guaranteed that it will not panic for an
3963                // IPv6 packet because the fragment data is in an (optional)
3964                // fragment extension header which we attempt to handle by
3965                // calling `ipv6::handle_extension_headers`. We will only
3966                // end up here if its return value is
3967                // `Ipv6PacketAction::ProcessFragment` which is only
3968                // possible when the packet has the fragment extension
3969                // header (even if the fragment data has values that implies
3970                // that the packet is not fragmented).
3971                match process_fragment(core_ctx, bindings_ctx, device, packet) {
3972                    ProcessFragmentResult::Done => return,
3973                    ProcessFragmentResult::NotNeeded(packet) => {
3974                        // While strange, it's possible for there to be a Fragment
3975                        // header that says the packet doesn't need defragmentation.
3976                        // As per RFC 8200 4.5:
3977                        //
3978                        //   If the fragment is a whole datagram (that is, both the
3979                        //   Fragment Offset field and the M flag are zero), then it
3980                        //   does not need any further reassembly and should be
3981                        //   processed as a fully reassembled packet (i.e., updating
3982                        //   Next Header, adjust Payload Length, removing the
3983                        //   Fragment header, etc.).
3984                        //
3985                        // In this case, we're not technically reassembling the
3986                        // packet, since, per the RFC, that would mean removing the
3987                        // Fragment header.
3988                        (packet, Some(Ipv6PacketAction::Continue))
3989                    }
3990                    ProcessFragmentResult::Reassembled(buf) => {
3991                        let buf = Buf::new(buf, ..);
3992                        buffer = packet::Either::B(buf);
3993
3994                        match buffer.parse_mut() {
3995                            Ok(packet) => (packet, None),
3996                            Err(err) => {
3997                                core_ctx.increment_both(device, |c| &c.fragment_reassembly_error);
3998                                debug!(
3999                                    "receive_ip_packet: fragmented, failed to reassemble: {:?}",
4000                                    err
4001                                );
4002                                return;
4003                            }
4004                        }
4005                    }
4006                }
4007            }
4008        };
4009
4010    let mut packet_metadata = IpLayerPacketMetadata::from_device_ip_layer_metadata(
4011        core_ctx,
4012        device,
4013        device_ip_layer_metadata,
4014    );
4015    let mut filter = core_ctx.filter_handler();
4016
4017    match filter.ingress_hook(bindings_ctx, &mut packet, device, &mut packet_metadata) {
4018        filter::Verdict::Proceed(filter::Accept) => {}
4019        filter::Verdict::Stop(filter::IngressStopReason::Drop) => {
4020            packet_metadata.acknowledge_drop();
4021            return;
4022        }
4023        filter::Verdict::Stop(filter::IngressStopReason::TransparentLocalDelivery {
4024            addr,
4025            port,
4026        }) => {
4027            // Drop the filter handler since it holds a mutable borrow of `core_ctx`, which
4028            // we need to provide to the packet dispatch function.
4029            drop(filter);
4030
4031            let Some(addr) = SpecifiedAddr::new(addr) else {
4032                core_ctx.increment_both(device, |c| &c.unspecified_destination);
4033                debug!("cannot perform transparent delivery to unspecified destination; dropping");
4034                return;
4035            };
4036
4037            let receive_meta = ReceiveIpPacketMeta {
4038                broadcast: None,
4039                transparent_override: Some(TransparentLocalDelivery { addr, port }),
4040            };
4041
4042            // Short-circuit the routing process and override local demux, providing a local
4043            // address and port to which the packet should be transparently delivered at the
4044            // transport layer.
4045            dispatch_receive_ipv6_packet(
4046                core_ctx,
4047                bindings_ctx,
4048                device,
4049                frame_dst,
4050                packet,
4051                packet_metadata,
4052                receive_meta,
4053            )
4054            .unwrap_or_else(|icmp_sender| icmp_sender.send(core_ctx, bindings_ctx, buffer));
4055            return;
4056        }
4057    }
4058    // Drop the filter handler since it holds a mutable borrow of `core_ctx`, which
4059    // we need below.
4060    drop(filter);
4061
4062    let Some(src_ip) = packet.src_ipv6() else {
4063        debug!(
4064            "receive_ipv6_packet: received packet from invalid source {}; dropping",
4065            packet.src_ip()
4066        );
4067        core_ctx.increment_both(device, |c| &c.invalid_source);
4068        return;
4069    };
4070
4071    match receive_ipv6_packet_action(
4072        core_ctx,
4073        bindings_ctx,
4074        device,
4075        &packet,
4076        frame_dst,
4077        &packet_metadata.marks,
4078    ) {
4079        ReceivePacketAction::MulticastForward { targets, address_status, dst_ip } => {
4080            // TOOD(https://fxbug.dev/364242513): Support connection tracking of
4081            // the multiplexed flows created by multicast forwarding. Here, we
4082            // use the existing metadata for the first action taken, and then
4083            // a default instance for each subsequent action. The first action
4084            // will populate the conntrack table with an entry, which will then
4085            // be used by all subsequent forwards.
4086            let mut packet_metadata = Some(packet_metadata);
4087            for MulticastRouteTarget { output_interface, min_ttl } in targets.as_ref() {
4088                clone_packet_for_mcast_forwarding! {
4089                    let (copy_of_data, copy_of_buffer, copy_of_packet) = packet
4090                };
4091                determine_ip_packet_forwarding_action::<Ipv6, _, _>(
4092                    core_ctx,
4093                    copy_of_packet,
4094                    packet_metadata.take().unwrap_or_default(),
4095                    Some(*min_ttl),
4096                    device,
4097                    &output_interface,
4098                    IpPacketDestination::from_addr(dst_ip),
4099                    frame_dst,
4100                    src_ip,
4101                    dst_ip,
4102                )
4103                .perform_action_with_buffer(core_ctx, bindings_ctx, copy_of_buffer);
4104            }
4105
4106            // If we also have an interest in the packet, deliver it locally.
4107            if let Some(_) = address_status {
4108                let receive_meta =
4109                    ReceiveIpPacketMeta { broadcast: None, transparent_override: None };
4110
4111                dispatch_receive_ipv6_packet(
4112                    core_ctx,
4113                    bindings_ctx,
4114                    device,
4115                    frame_dst,
4116                    packet,
4117                    packet_metadata.take().unwrap_or_default(),
4118                    receive_meta,
4119                )
4120                .unwrap_or_else(|icmp_sender| icmp_sender.send(core_ctx, bindings_ctx, buffer));
4121            }
4122        }
4123        ReceivePacketAction::Deliver { address_status: _, internal_forwarding } => {
4124            trace!("receive_ipv6_packet: delivering locally");
4125
4126            let action = if let Some(action) = delivery_extension_header_action {
4127                action
4128            } else {
4129                ipv6::handle_extension_headers(core_ctx, device, frame_dst, &packet, true)
4130            };
4131            match action {
4132                Ipv6PacketAction::_Discard => {
4133                    core_ctx.increment_both(device, |c| &c.version_rx.extension_header_discard);
4134                    trace!(
4135                        "receive_ipv6_packet: handled IPv6 extension headers: discarding packet"
4136                    );
4137                    packet_metadata.acknowledge_drop();
4138                }
4139                Ipv6PacketAction::Continue => {
4140                    trace!(
4141                        "receive_ipv6_packet: handled IPv6 extension headers: dispatching packet"
4142                    );
4143
4144                    // NB: when performing internal forwarding, hit the
4145                    // forwarding hook.
4146                    match internal_forwarding {
4147                        InternalForwarding::Used(outbound_device) => {
4148                            core_ctx.increment_both(device, |c| &c.forward);
4149                            match core_ctx.filter_handler().forwarding_hook(
4150                                &mut packet,
4151                                device,
4152                                &outbound_device,
4153                                &mut packet_metadata,
4154                            ) {
4155                                filter::Verdict::Stop(filter::DropOrReject::Drop) => {
4156                                    packet_metadata.acknowledge_drop();
4157                                    return;
4158                                }
4159                                filter::Verdict::Stop(filter::DropOrReject::Reject(
4160                                    _reject_type,
4161                                )) => {
4162                                    // TODO(https://fxbug.dev/466098884): Send reject packet.
4163                                    packet_metadata.acknowledge_drop();
4164                                    return;
4165                                }
4166                                filter::Verdict::Proceed(filter::Accept) => {}
4167                            }
4168                        }
4169                        InternalForwarding::NotUsed => {}
4170                    }
4171
4172                    let meta = ReceiveIpPacketMeta { broadcast: None, transparent_override: None };
4173                    dispatch_receive_ipv6_packet(
4174                        core_ctx,
4175                        bindings_ctx,
4176                        device,
4177                        frame_dst,
4178                        packet,
4179                        packet_metadata,
4180                        meta,
4181                    )
4182                    .unwrap_or_else(|icmp_sender| icmp_sender.send(core_ctx, bindings_ctx, buffer));
4183                }
4184                Ipv6PacketAction::ProcessFragment => {
4185                    debug!("receive_ipv6_packet: found fragment header after reassembly; dropping");
4186                    packet_metadata.acknowledge_drop();
4187                }
4188            }
4189        }
4190        ReceivePacketAction::Forward {
4191            original_dst,
4192            dst: Destination { device: dst_device, next_hop },
4193        } => {
4194            determine_ip_packet_forwarding_action::<Ipv6, _, _>(
4195                core_ctx,
4196                packet,
4197                packet_metadata,
4198                None,
4199                device,
4200                &dst_device,
4201                IpPacketDestination::from_next_hop(next_hop, original_dst),
4202                frame_dst,
4203                src_ip,
4204                original_dst,
4205            )
4206            .perform_action_with_buffer(core_ctx, bindings_ctx, buffer);
4207        }
4208        ReceivePacketAction::SendNoRouteToDest { dst: dst_ip } => {
4209            core_ctx.increment_both(device, |c| &c.no_route_to_host);
4210            let (_, _, proto, meta): (Ipv6Addr, Ipv6Addr, _, _) =
4211                drop_packet_and_undo_parse!(packet, buffer);
4212            debug!("received IPv6 packet with no known route to destination {}", dst_ip);
4213            let marks = packet_metadata.marks;
4214            packet_metadata.acknowledge_drop();
4215
4216            let src_ip = match src_ip {
4217                Ipv6SourceAddr::Unspecified => {
4218                    core_ctx.increment_both(device, |c| &c.unspecified_source);
4219                    return;
4220                }
4221                Ipv6SourceAddr::Unicast(src_ip) => {
4222                    SocketIpAddr::new_from_ipv6_non_mapped_unicast(src_ip)
4223                }
4224            };
4225
4226            IcmpErrorHandler::<Ipv6, _>::send_icmp_error_message(
4227                core_ctx,
4228                bindings_ctx,
4229                Some(device),
4230                frame_dst,
4231                src_ip,
4232                SocketIpAddr::new_from_witness(dst_ip),
4233                buffer,
4234                Icmpv6Error::NetUnreachable,
4235                meta.header_len(),
4236                proto,
4237                &marks,
4238            );
4239        }
4240        ReceivePacketAction::Drop { reason } => {
4241            core_ctx.increment_both(device, |c| &c.dropped);
4242            let src_ip = packet.src_ip();
4243            let dst_ip = packet.dst_ip();
4244            packet_metadata.acknowledge_drop();
4245            debug!(
4246                "receive_ipv6_packet: dropping packet from {src_ip} to {dst_ip} received on \
4247                {device:?}: {reason:?}",
4248            );
4249        }
4250    }
4251}
4252
4253/// The action to take in order to process a received IP packet.
4254#[derive(Debug, PartialEq)]
4255pub enum ReceivePacketAction<I: BroadcastIpExt + IpLayerIpExt, DeviceId: StrongDeviceIdentifier> {
4256    /// Deliver the packet locally.
4257    Deliver {
4258        /// Status of the receiving IP address.
4259        address_status: I::AddressStatus,
4260        /// `InternalForwarding::Used(d)` if we're delivering the packet as a
4261        /// Weak Host performing internal forwarding via output device `d`.
4262        internal_forwarding: InternalForwarding<DeviceId>,
4263    },
4264
4265    /// Forward the packet to the given destination.
4266    Forward {
4267        /// The original destination IP address of the packet.
4268        original_dst: SpecifiedAddr<I::Addr>,
4269        /// The destination that the packet should be forwarded to.
4270        dst: Destination<I::Addr, DeviceId>,
4271    },
4272
4273    /// A multicast packet that should be forwarded (& optional local delivery).
4274    ///
4275    /// The packet should be forwarded to each of the given targets. This case
4276    /// is only returned when the packet is eligible for multicast forwarding;
4277    /// `Self::Deliver` is used for packets that are ineligible (either because
4278    /// multicast forwarding is disabled, or because there are no applicable
4279    /// multicast routes with which to forward the packet).
4280    MulticastForward {
4281        /// The multicast targets to forward the packet via.
4282        targets: MulticastRouteTargets<DeviceId>,
4283        /// Some if the host is a member of the multicast group and the packet
4284        /// should be delivered locally (in addition to forwarding).
4285        address_status: Option<I::AddressStatus>,
4286        /// The multicast address the packet should be forwarded to.
4287        dst_ip: SpecifiedAddr<I::Addr>,
4288    },
4289
4290    /// Send a Destination Unreachable ICMP error message to the packet's sender
4291    /// and drop the packet.
4292    ///
4293    /// For ICMPv4, use the code "net unreachable". For ICMPv6, use the code "no
4294    /// route to destination".
4295    SendNoRouteToDest {
4296        /// The destination IP Address to which there was no route.
4297        dst: NonMappedAddr<SpecifiedAddr<I::Addr>>,
4298    },
4299
4300    /// Silently drop the packet.
4301    ///
4302    /// `reason` describes why the packet was dropped.
4303    #[allow(missing_docs)]
4304    Drop { reason: DropReason },
4305}
4306
4307// It's possible that there is more than one device with the address
4308// present. Prefer any address status over `UnicastTentative`.
4309fn choose_highest_priority_address_status<I: IpLayerIpExt>(
4310    address_statuses: impl Iterator<Item = I::AddressStatus>,
4311) -> Option<I::AddressStatus> {
4312    address_statuses.max_by_key(|status| {
4313        #[derive(GenericOverIp)]
4314        #[generic_over_ip(I, Ip)]
4315        struct Wrap<'a, I: IpLayerIpExt>(&'a I::AddressStatus);
4316        I::map_ip_in(
4317            Wrap(status),
4318            |Wrap(v4_status)| match v4_status {
4319                Ipv4PresentAddressStatus::UnicastTentative => 0,
4320                _ => 1,
4321            },
4322            |Wrap(v6_status)| match v6_status {
4323                Ipv6PresentAddressStatus::UnicastTentative => 0,
4324                _ => 1,
4325            },
4326        )
4327    })
4328}
4329
4330/// The reason a received IP packet is dropped.
4331#[derive(Debug, PartialEq)]
4332pub enum DropReason {
4333    /// Remote packet destined to tentative address.
4334    Tentative,
4335    /// Remote packet destined to the unspecified address.
4336    UnspecifiedDestination,
4337    /// Remote packet with an invalid destination address.
4338    InvalidDestination,
4339    /// Cannot forward a packet with unspecified source address.
4340    ForwardUnspecifiedSource,
4341    /// Cannot forward a packet with link-local source or destination address.
4342    ForwardLinkLocal,
4343    /// Packet should be forwarded but packet's inbound interface has forwarding
4344    /// disabled.
4345    ForwardingDisabledInboundIface,
4346    /// Remote packet destined to a multicast address that could not be:
4347    /// * delivered locally (because we are not a member of the multicast
4348    ///   group), or
4349    /// * forwarded (either because multicast forwarding is disabled, or no
4350    ///   applicable multicast route has been installed).
4351    MulticastNoInterest,
4352}
4353
4354/// Computes the action to take in order to process a received IPv4 packet.
4355pub fn receive_ipv4_packet_action<BC, CC, B>(
4356    core_ctx: &mut CC,
4357    bindings_ctx: &mut BC,
4358    device: &CC::DeviceId,
4359    packet: &Ipv4Packet<B>,
4360    frame_dst: Option<FrameDestination>,
4361    marks: &Marks,
4362) -> ReceivePacketAction<Ipv4, CC::DeviceId>
4363where
4364    BC: IpLayerBindingsContext<Ipv4, CC::DeviceId>,
4365    CC: IpLayerContext<Ipv4, BC>,
4366    B: SplitByteSlice,
4367{
4368    let Some(dst_ip) = SpecifiedAddr::new(packet.dst_ip()) else {
4369        core_ctx.increment_both(device, |c| &c.unspecified_destination);
4370        return ReceivePacketAction::Drop { reason: DropReason::UnspecifiedDestination };
4371    };
4372
4373    // If the packet arrived at the loopback interface, check if any local
4374    // interface has the destination address assigned. This effectively lets
4375    // the loopback interface operate as a weak host for incoming packets.
4376    //
4377    // Note that (as of writing) the stack sends all locally destined traffic to
4378    // the loopback interface so we need this hack to allow the stack to accept
4379    // packets that arrive at the loopback interface (after being looped back)
4380    // but destined to an address that is assigned to another local interface.
4381    //
4382    // TODO(https://fxbug.dev/42175703): This should instead be controlled by the
4383    // routing table.
4384
4385    let highest_priority = if device.is_loopback() {
4386        core_ctx.with_address_statuses(dst_ip, |it| {
4387            let it = it.map(|(_device, status)| status);
4388            choose_highest_priority_address_status::<Ipv4>(it)
4389        })
4390    } else {
4391        core_ctx.address_status_for_device(dst_ip, device).into_present()
4392    };
4393    match highest_priority {
4394        Some(
4395            address_status @ (Ipv4PresentAddressStatus::UnicastAssigned
4396            | Ipv4PresentAddressStatus::LoopbackSubnet),
4397        ) => {
4398            core_ctx.increment_both(device, |c| &c.deliver_unicast);
4399            ReceivePacketAction::Deliver {
4400                address_status,
4401                internal_forwarding: InternalForwarding::NotUsed,
4402            }
4403        }
4404        Some(Ipv4PresentAddressStatus::UnicastTentative) => {
4405            // If the destination address is tentative (which implies that
4406            // we are still performing Duplicate Address Detection on
4407            // it), then we don't consider the address "assigned to an
4408            // interface", and so we drop packets instead of delivering them
4409            // locally.
4410            core_ctx.increment_both(device, |c| &c.drop_for_tentative);
4411            ReceivePacketAction::Drop { reason: DropReason::Tentative }
4412        }
4413
4414        Some(address_status @ Ipv4PresentAddressStatus::Multicast) => {
4415            receive_ip_multicast_packet_action(
4416                core_ctx,
4417                bindings_ctx,
4418                device,
4419                packet,
4420                Some(address_status),
4421                dst_ip,
4422                frame_dst,
4423            )
4424        }
4425        Some(
4426            address_status @ (Ipv4PresentAddressStatus::LimitedBroadcast
4427            | Ipv4PresentAddressStatus::SubnetBroadcast),
4428        ) => {
4429            core_ctx.increment_both(device, |c| &c.version_rx.deliver_broadcast);
4430            ReceivePacketAction::Deliver {
4431                address_status,
4432                internal_forwarding: InternalForwarding::NotUsed,
4433            }
4434        }
4435        None => receive_ip_packet_action_common::<Ipv4, _, _, _>(
4436            core_ctx,
4437            bindings_ctx,
4438            dst_ip,
4439            device,
4440            packet,
4441            frame_dst,
4442            marks,
4443        ),
4444    }
4445}
4446
4447/// Computes the action to take in order to process a received IPv6 packet.
4448pub fn receive_ipv6_packet_action<BC, CC, B>(
4449    core_ctx: &mut CC,
4450    bindings_ctx: &mut BC,
4451    device: &CC::DeviceId,
4452    packet: &Ipv6Packet<B>,
4453    frame_dst: Option<FrameDestination>,
4454    marks: &Marks,
4455) -> ReceivePacketAction<Ipv6, CC::DeviceId>
4456where
4457    BC: IpLayerBindingsContext<Ipv6, CC::DeviceId>,
4458    CC: IpLayerContext<Ipv6, BC>,
4459    B: SplitByteSlice,
4460{
4461    let Some(dst_ip) = SpecifiedAddr::new(packet.dst_ip()) else {
4462        core_ctx.increment_both(device, |c| &c.unspecified_destination);
4463        return ReceivePacketAction::Drop { reason: DropReason::UnspecifiedDestination };
4464    };
4465
4466    // If the packet arrived at the loopback interface, check if any local
4467    // interface has the destination address assigned. This effectively lets
4468    // the loopback interface operate as a weak host for incoming packets.
4469    //
4470    // Note that (as of writing) the stack sends all locally destined traffic to
4471    // the loopback interface so we need this hack to allow the stack to accept
4472    // packets that arrive at the loopback interface (after being looped back)
4473    // but destined to an address that is assigned to another local interface.
4474    //
4475    // TODO(https://fxbug.dev/42175703): This should instead be controlled by the
4476    // routing table.
4477
4478    let highest_priority = if device.is_loopback() {
4479        core_ctx.with_address_statuses(dst_ip, |it| {
4480            let it = it.map(|(_device, status)| status);
4481            choose_highest_priority_address_status::<Ipv6>(it)
4482        })
4483    } else {
4484        core_ctx.address_status_for_device(dst_ip, device).into_present()
4485    };
4486    match highest_priority {
4487        Some(address_status @ Ipv6PresentAddressStatus::Multicast) => {
4488            receive_ip_multicast_packet_action(
4489                core_ctx,
4490                bindings_ctx,
4491                device,
4492                packet,
4493                Some(address_status),
4494                dst_ip,
4495                frame_dst,
4496            )
4497        }
4498        Some(address_status @ Ipv6PresentAddressStatus::UnicastAssigned) => {
4499            core_ctx.increment_both(device, |c| &c.deliver_unicast);
4500            ReceivePacketAction::Deliver {
4501                address_status,
4502                internal_forwarding: InternalForwarding::NotUsed,
4503            }
4504        }
4505        Some(Ipv6PresentAddressStatus::UnicastTentative) => {
4506            // If the destination address is tentative (which implies that
4507            // we are still performing NDP's Duplicate Address Detection on
4508            // it), then we don't consider the address "assigned to an
4509            // interface", and so we drop packets instead of delivering them
4510            // locally.
4511            //
4512            // As per RFC 4862 section 5.4:
4513            //
4514            //   An address on which the Duplicate Address Detection
4515            //   procedure is applied is said to be tentative until the
4516            //   procedure has completed successfully. A tentative address
4517            //   is not considered "assigned to an interface" in the
4518            //   traditional sense.  That is, the interface must accept
4519            //   Neighbor Solicitation and Advertisement messages containing
4520            //   the tentative address in the Target Address field, but
4521            //   processes such packets differently from those whose Target
4522            //   Address matches an address assigned to the interface. Other
4523            //   packets addressed to the tentative address should be
4524            //   silently discarded. Note that the "other packets" include
4525            //   Neighbor Solicitation and Advertisement messages that have
4526            //   the tentative (i.e., unicast) address as the IP destination
4527            //   address and contain the tentative address in the Target
4528            //   Address field.  Such a case should not happen in normal
4529            //   operation, though, since these messages are multicasted in
4530            //   the Duplicate Address Detection procedure.
4531            //
4532            // That is, we accept no packets destined to a tentative
4533            // address. NS and NA packets should be addressed to a multicast
4534            // address that we would have joined during DAD so that we can
4535            // receive those packets.
4536            core_ctx.increment_both(device, |c| &c.drop_for_tentative);
4537            ReceivePacketAction::Drop { reason: DropReason::Tentative }
4538        }
4539        None => receive_ip_packet_action_common::<Ipv6, _, _, _>(
4540            core_ctx,
4541            bindings_ctx,
4542            dst_ip,
4543            device,
4544            packet,
4545            frame_dst,
4546            marks,
4547        ),
4548    }
4549}
4550
4551/// Computes the action to take for multicast packets on behalf of
4552/// [`receive_ipv4_packet_action`] and [`receive_ipv6_packet_action`].
4553fn receive_ip_multicast_packet_action<
4554    I: IpLayerIpExt,
4555    B: SplitByteSlice,
4556    BC: IpLayerBindingsContext<I, CC::DeviceId>,
4557    CC: IpLayerContext<I, BC>,
4558>(
4559    core_ctx: &mut CC,
4560    bindings_ctx: &mut BC,
4561    device: &CC::DeviceId,
4562    packet: &I::Packet<B>,
4563    address_status: Option<I::AddressStatus>,
4564    dst_ip: SpecifiedAddr<I::Addr>,
4565    frame_dst: Option<FrameDestination>,
4566) -> ReceivePacketAction<I, CC::DeviceId> {
4567    let targets = multicast_forwarding::lookup_multicast_route_or_stash_packet(
4568        core_ctx,
4569        bindings_ctx,
4570        packet,
4571        device,
4572        frame_dst,
4573    );
4574    match (targets, address_status) {
4575        (Some(targets), address_status) => {
4576            if address_status.is_some() {
4577                core_ctx.increment_both(device, |c| &c.deliver_multicast);
4578            }
4579            ReceivePacketAction::MulticastForward { targets, address_status, dst_ip }
4580        }
4581        (None, Some(address_status)) => {
4582            // If the address was present on the device (e.g. the host is a
4583            // member of the multicast group), fallback to local delivery.
4584            core_ctx.increment_both(device, |c| &c.deliver_multicast);
4585            ReceivePacketAction::Deliver {
4586                address_status,
4587                internal_forwarding: InternalForwarding::NotUsed,
4588            }
4589        }
4590        (None, None) => {
4591            // As per RFC 1122 Section 3.2.2
4592            //   An ICMP error message MUST NOT be sent as the result of
4593            //   receiving:
4594            //   ...
4595            //   * a datagram destined to an IP broadcast or IP multicast
4596            //     address
4597            //
4598            // As such, drop the packet
4599            core_ctx.increment_both(device, |c| &c.multicast_no_interest);
4600            ReceivePacketAction::Drop { reason: DropReason::MulticastNoInterest }
4601        }
4602    }
4603}
4604
4605/// Computes the remaining protocol-agnostic actions on behalf of
4606/// [`receive_ipv4_packet_action`] and [`receive_ipv6_packet_action`].
4607fn receive_ip_packet_action_common<
4608    I: IpLayerIpExt,
4609    B: SplitByteSlice,
4610    BC: IpLayerBindingsContext<I, CC::DeviceId>,
4611    CC: IpLayerContext<I, BC>,
4612>(
4613    core_ctx: &mut CC,
4614    bindings_ctx: &mut BC,
4615    dst_ip: SpecifiedAddr<I::Addr>,
4616    device_id: &CC::DeviceId,
4617    packet: &I::Packet<B>,
4618    frame_dst: Option<FrameDestination>,
4619    marks: &Marks,
4620) -> ReceivePacketAction<I, CC::DeviceId> {
4621    if dst_ip.is_multicast() {
4622        return receive_ip_multicast_packet_action(
4623            core_ctx,
4624            bindings_ctx,
4625            device_id,
4626            packet,
4627            None,
4628            dst_ip,
4629            frame_dst,
4630        );
4631    }
4632
4633    // Don't allow mapped IPv6 addresses.
4634    let Some(dst_ip) = NonMappedAddr::new(dst_ip) else {
4635        return ReceivePacketAction::Drop { reason: DropReason::InvalidDestination };
4636    };
4637
4638    // The packet is not destined locally, so we attempt to forward it.
4639    if !core_ctx.is_device_unicast_forwarding_enabled(device_id) {
4640        // Forwarding is disabled; we are operating only as a host.
4641        //
4642        // For IPv4, per RFC 1122 Section 3.2.1.3, "A host MUST silently discard
4643        // an incoming datagram that is not destined for the host."
4644        //
4645        // For IPv6, per RFC 4443 Section 3.1, the only instance in which a host
4646        // sends an ICMPv6 Destination Unreachable message is when a packet is
4647        // destined to that host but on an unreachable port (Code 4 - "Port
4648        // unreachable"). Since the only sensible error message to send in this
4649        // case is a Destination Unreachable message, we interpret the RFC text
4650        // to mean that, consistent with IPv4's behavior, we should silently
4651        // discard the packet in this case.
4652        core_ctx.increment_both(device_id, |c| &c.forwarding_disabled);
4653        return ReceivePacketAction::Drop { reason: DropReason::ForwardingDisabledInboundIface };
4654    }
4655    // Per https://www.rfc-editor.org/rfc/rfc4291.html#section-2.5.2:
4656    //   An IPv6 packet with a source address of unspecified must never be forwarded by an IPv6
4657    //   router.
4658    // Per https://datatracker.ietf.org/doc/html/rfc1812#section-5.3.7:
4659    //   A router SHOULD NOT forward any packet that has an invalid IP source address or a source
4660    //   address on network 0
4661    let Some(source_address) = SpecifiedAddr::new(packet.src_ip()) else {
4662        return ReceivePacketAction::Drop { reason: DropReason::ForwardUnspecifiedSource };
4663    };
4664
4665    // If forwarding is enabled, allow local delivery if the packet is destined
4666    // for an IP assigned to a different interface.
4667    //
4668    // This enables a weak host model when the Netstack is configured as a
4669    // router. Conceptually, the netstack is forwarding the packet from the
4670    // input device, to the destination IP's device.
4671    if let Some(dst_ip) = NonMulticastAddr::new(dst_ip) {
4672        if let Some((outbound_device, address_status)) =
4673            get_device_with_assigned_address(core_ctx, IpDeviceAddr::new_from_witness(dst_ip))
4674        {
4675            return ReceivePacketAction::Deliver {
4676                address_status,
4677                internal_forwarding: InternalForwarding::Used(outbound_device),
4678            };
4679        }
4680    }
4681
4682    // For IPv4, RFC 3927 Section 2.7 states:
4683    //
4684    //   An IPv4 packet whose source and/or destination address is in the
4685    //   169.254/16 prefix MUST NOT be sent to any router for forwarding, and
4686    //   any network device receiving such a packet MUST NOT forward it,
4687    //   regardless of the TTL in the IPv4 header.
4688    //
4689    // However, to maintain behavioral similarity to both gVisor/Netstack2 and
4690    // Linux, we omit this check.
4691    //
4692    // For IPv6, RFC 4291 Section 2.5.6 states:
4693    //
4694    //   Routers must not forward any packets with Link-Local source or
4695    //   destination addresses to other links.
4696    if I::map_ip_in(
4697        &packet,
4698        |_| false,
4699        |packet| packet.src_ip().is_link_local() || packet.dst_ip().is_link_local(),
4700    ) {
4701        return ReceivePacketAction::Drop { reason: DropReason::ForwardLinkLocal };
4702    }
4703
4704    match lookup_route_table(
4705        core_ctx,
4706        dst_ip.get(),
4707        RuleInput {
4708            packet_origin: PacketOrigin::NonLocal { source_address, incoming_device: device_id },
4709            marks,
4710        },
4711    ) {
4712        Some(dst) => {
4713            core_ctx.increment_both(device_id, |c| &c.forward);
4714            ReceivePacketAction::Forward { original_dst: *dst_ip, dst }
4715        }
4716        None => {
4717            core_ctx.increment_both(device_id, |c| &c.no_route_to_host);
4718            ReceivePacketAction::SendNoRouteToDest { dst: dst_ip }
4719        }
4720    }
4721}
4722
4723// Look up the route to a host.
4724fn lookup_route_table<
4725    I: IpLayerIpExt,
4726    BC: IpLayerBindingsContext<I, CC::DeviceId>,
4727    CC: IpStateContext<I, BC>,
4728>(
4729    core_ctx: &mut CC,
4730    dst_ip: I::Addr,
4731    rule_input: RuleInput<'_, I, CC::DeviceId>,
4732) -> Option<Destination<I::Addr, CC::DeviceId>> {
4733    let bound_device = match rule_input.packet_origin {
4734        PacketOrigin::Local { bound_address: _, bound_device } => bound_device,
4735        PacketOrigin::NonLocal { source_address: _, incoming_device: _ } => None,
4736    };
4737    core_ctx.with_rules_table(|core_ctx, rules: &RulesTable<_, _, BC>| {
4738        match walk_rules(core_ctx, rules, (), &rule_input, |(), core_ctx, table| {
4739            match table.lookup(core_ctx, bound_device, dst_ip) {
4740                Some(dst) => ControlFlow::Break(Some(dst)),
4741                None => ControlFlow::Continue(()),
4742            }
4743        }) {
4744            ControlFlow::Break(RuleAction::Lookup(RuleWalkInfo {
4745                inner: dst,
4746                observed_source_address_matcher: _,
4747            })) => dst,
4748            ControlFlow::Break(RuleAction::Unreachable) => None,
4749            ControlFlow::Continue(RuleWalkInfo {
4750                inner: (),
4751                observed_source_address_matcher: _,
4752            }) => None,
4753        }
4754    })
4755}
4756
4757/// Packed destination passed to [`IpDeviceSendContext::send_ip_frame`].
4758#[derive(Debug, Derivative, Clone)]
4759#[derivative(Eq(bound = "D: Eq"), PartialEq(bound = "D: PartialEq"))]
4760pub enum IpPacketDestination<I: BroadcastIpExt, D> {
4761    /// Broadcast packet.
4762    Broadcast(I::BroadcastMarker),
4763
4764    /// Multicast packet to the specified IP.
4765    Multicast(MulticastAddr<I::Addr>),
4766
4767    /// Send packet to the neighbor with the specified IP (the receiving
4768    /// node is either a router or the final recipient of the packet).
4769    Neighbor(SpecifiedAddr<I::Addr>),
4770
4771    /// Loopback the packet to the specified device. Can be used only when
4772    /// sending to the loopback device.
4773    Loopback(D),
4774}
4775
4776impl<I: BroadcastIpExt, D> IpPacketDestination<I, D> {
4777    /// Creates `IpPacketDestination` for IP address.
4778    pub fn from_addr(addr: SpecifiedAddr<I::Addr>) -> Self {
4779        match MulticastAddr::new(addr.into_addr()) {
4780            Some(mc_addr) => Self::Multicast(mc_addr),
4781            None => Self::Neighbor(addr),
4782        }
4783    }
4784
4785    /// Create `IpPacketDestination` from `NextHop`.
4786    pub fn from_next_hop(next_hop: NextHop<I::Addr>, dst_ip: SpecifiedAddr<I::Addr>) -> Self {
4787        match next_hop {
4788            NextHop::RemoteAsNeighbor => Self::from_addr(dst_ip),
4789            NextHop::Gateway(gateway) => Self::Neighbor(gateway),
4790            NextHop::Broadcast(marker) => Self::Broadcast(marker),
4791        }
4792    }
4793}
4794
4795/// The metadata associated with an outgoing IP packet.
4796#[derive(Debug, Clone)]
4797pub struct SendIpPacketMeta<I: IpExt, D, Src> {
4798    /// The outgoing device.
4799    pub device: D,
4800
4801    /// The source address of the packet.
4802    pub src_ip: Src,
4803
4804    /// The destination address of the packet.
4805    pub dst_ip: SpecifiedAddr<I::Addr>,
4806
4807    /// The destination for the send operation.
4808    pub destination: IpPacketDestination<I, D>,
4809
4810    /// The upper-layer protocol held in the packet's payload.
4811    pub proto: I::Proto,
4812
4813    /// The time-to-live (IPv4) or hop limit (IPv6) for the packet.
4814    ///
4815    /// If not set, a default TTL may be used.
4816    pub ttl: Option<NonZeroU8>,
4817
4818    /// An MTU to artificially impose on the whole IP packet.
4819    ///
4820    /// Note that the device's and discovered path MTU may still be imposed on
4821    /// the packet.
4822    pub mtu: Mtu,
4823
4824    /// Traffic Class (IPv6) or Type of Service (IPv4) field for the packet.
4825    pub dscp_and_ecn: DscpAndEcn,
4826}
4827
4828impl<I: IpExt, D> From<SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>>
4829    for SendIpPacketMeta<I, D, Option<SpecifiedAddr<I::Addr>>>
4830{
4831    fn from(
4832        SendIpPacketMeta { device, src_ip, dst_ip, destination, proto, ttl, mtu, dscp_and_ecn }: SendIpPacketMeta<
4833            I,
4834            D,
4835            SpecifiedAddr<I::Addr>,
4836        >,
4837    ) -> SendIpPacketMeta<I, D, Option<SpecifiedAddr<I::Addr>>> {
4838        SendIpPacketMeta {
4839            device,
4840            src_ip: Some(src_ip),
4841            dst_ip,
4842            destination,
4843            proto,
4844            ttl,
4845            mtu,
4846            dscp_and_ecn,
4847        }
4848    }
4849}
4850
4851/// Trait for abstracting the IP layer for locally-generated traffic.  That is,
4852/// traffic generated by the netstack itself (e.g. ICMP, IGMP, or MLD).
4853///
4854/// NOTE: Due to filtering rules, it is possible that the device provided in
4855/// `meta` will not be the device that final IP packet is actually sent from.
4856pub trait IpLayerHandler<I: IpExt + FragmentationIpExt + FilterIpExt, BC>:
4857    DeviceIdContext<AnyDevice>
4858{
4859    /// Encapsulate and send the provided transport packet and from the device
4860    /// provided in `meta`.
4861    fn send_ip_packet_from_device<S>(
4862        &mut self,
4863        bindings_ctx: &mut BC,
4864        meta: SendIpPacketMeta<I, &Self::DeviceId, Option<SpecifiedAddr<I::Addr>>>,
4865        body: S,
4866    ) -> Result<(), IpSendFrameError<S>>
4867    where
4868        S: TransportPacketSerializer<I>,
4869        S::Buffer: BufferMut;
4870
4871    /// Send an IP packet that doesn't require the encapsulation and other
4872    /// processing of [`send_ip_packet_from_device`] from the device specified
4873    /// in `meta`.
4874    // TODO(https://fxbug.dev/333908066): The packets going through this
4875    // function only hit the EGRESS filter hook, bypassing LOCAL_EGRESS.
4876    // Refactor callers and other functions to prevent this.
4877    fn send_ip_frame<S>(
4878        &mut self,
4879        bindings_ctx: &mut BC,
4880        device: &Self::DeviceId,
4881        destination: IpPacketDestination<I, &Self::DeviceId>,
4882        body: S,
4883    ) -> Result<(), IpSendFrameError<S>>
4884    where
4885        S: FragmentableIpSerializer<I, Buffer: BufferMut> + FilterIpPacket<I>;
4886}
4887
4888impl<
4889    I: IpLayerIpExt,
4890    BC: IpLayerBindingsContext<I, <CC as DeviceIdContext<AnyDevice>>::DeviceId>,
4891    CC: IpLayerEgressContext<I, BC> + IpDeviceEgressStateContext<I> + IpDeviceMtuContext<I>,
4892> IpLayerHandler<I, BC> for CC
4893{
4894    fn send_ip_packet_from_device<S>(
4895        &mut self,
4896        bindings_ctx: &mut BC,
4897        meta: SendIpPacketMeta<I, &CC::DeviceId, Option<SpecifiedAddr<I::Addr>>>,
4898        body: S,
4899    ) -> Result<(), IpSendFrameError<S>>
4900    where
4901        S: TransportPacketSerializer<I>,
4902        S::Buffer: BufferMut,
4903    {
4904        send_ip_packet_from_device(self, bindings_ctx, meta, body, IpLayerPacketMetadata::default())
4905    }
4906
4907    fn send_ip_frame<S>(
4908        &mut self,
4909        bindings_ctx: &mut BC,
4910        device: &Self::DeviceId,
4911        destination: IpPacketDestination<I, &Self::DeviceId>,
4912        body: S,
4913    ) -> Result<(), IpSendFrameError<S>>
4914    where
4915        S: FragmentableIpSerializer<I, Buffer: BufferMut> + FilterIpPacket<I>,
4916    {
4917        send_ip_frame(
4918            self,
4919            bindings_ctx,
4920            device,
4921            destination,
4922            body,
4923            IpLayerPacketMetadata::default(),
4924            Mtu::no_limit(),
4925        )
4926    }
4927}
4928
4929/// Sends an Ip packet with the specified metadata.
4930///
4931/// # Panics
4932///
4933/// Panics if either the source or destination address is the loopback address
4934/// and the device is a non-loopback device.
4935pub(crate) fn send_ip_packet_from_device<I, BC, CC, S>(
4936    core_ctx: &mut CC,
4937    bindings_ctx: &mut BC,
4938    meta: SendIpPacketMeta<
4939        I,
4940        &<CC as DeviceIdContext<AnyDevice>>::DeviceId,
4941        Option<SpecifiedAddr<I::Addr>>,
4942    >,
4943    body: S,
4944    packet_metadata: IpLayerPacketMetadata<I, CC::WeakAddressId, BC>,
4945) -> Result<(), IpSendFrameError<S>>
4946where
4947    I: IpLayerIpExt,
4948    BC: FilterBindingsContext<CC::DeviceId> + TxMetadataBindingsTypes + MarksBindingsContext,
4949    CC: IpLayerEgressContext<I, BC> + IpDeviceEgressStateContext<I> + IpDeviceMtuContext<I>,
4950    S: TransportPacketSerializer<I>,
4951    S::Buffer: BufferMut,
4952{
4953    let SendIpPacketMeta { device, src_ip, dst_ip, destination, proto, ttl, mtu, dscp_and_ecn } =
4954        meta;
4955    core_ctx.increment_both(device, |c| &c.send_ip_packet);
4956    let next_packet_id = gen_ip_packet_id(core_ctx);
4957    let ttl = ttl.unwrap_or_else(|| core_ctx.get_hop_limit(device)).get();
4958    let src_ip = src_ip.map_or(I::UNSPECIFIED_ADDRESS, |a| a.get());
4959    let mut builder = I::PacketBuilder::new(src_ip, dst_ip.get(), ttl, proto);
4960
4961    #[derive(GenericOverIp)]
4962    #[generic_over_ip(I, Ip)]
4963    struct Wrap<'a, I: IpLayerIpExt> {
4964        builder: &'a mut I::PacketBuilder,
4965        next_packet_id: I::PacketId,
4966    }
4967
4968    I::map_ip::<_, ()>(
4969        Wrap { builder: &mut builder, next_packet_id },
4970        |Wrap { builder, next_packet_id }| {
4971            builder.id(next_packet_id);
4972        },
4973        |Wrap { builder: _, next_packet_id: () }| {
4974            // IPv6 doesn't have packet IDs.
4975        },
4976    );
4977
4978    builder.set_dscp_and_ecn(dscp_and_ecn);
4979
4980    let ip_frame = builder.wrap_body(body);
4981    send_ip_frame(core_ctx, bindings_ctx, device, destination, ip_frame, packet_metadata, mtu)
4982        .map_err(|ser| ser.map_serializer(|s| s.into_inner()))
4983}
4984
4985/// Abstracts access to a [`filter::FilterHandler`] for core contexts.
4986pub trait FilterHandlerProvider<I: FilterIpExt, BT: FilterBindingsTypes>:
4987    IpDeviceAddressIdContext<I, DeviceId: netstack3_base::InterfaceProperties<BT::DeviceClass>>
4988{
4989    /// The filter handler.
4990    type Handler<'a>: filter::FilterHandler<I, BT, DeviceId = Self::DeviceId, WeakAddressId = Self::WeakAddressId>
4991    where
4992        Self: 'a;
4993
4994    /// Gets the filter handler for this context.
4995    fn filter_handler(&mut self) -> Self::Handler<'_>;
4996}
4997
4998#[cfg(any(test, feature = "testutils"))]
4999pub(crate) mod testutil {
5000    use super::*;
5001
5002    use netstack3_base::testutil::{FakeBindingsCtx, FakeCoreCtx, FakeStrongDeviceId};
5003    use netstack3_base::{AssignedAddrIpExt, SendFrameContext, SendFrameError, SendableFrameMeta};
5004    use packet::Serializer;
5005
5006    /// A [`SendIpPacketMeta`] for dual stack contextx.
5007    #[derive(Debug, GenericOverIp)]
5008    #[generic_over_ip()]
5009    #[allow(missing_docs)]
5010    pub enum DualStackSendIpPacketMeta<D> {
5011        V4(SendIpPacketMeta<Ipv4, D, SpecifiedAddr<Ipv4Addr>>),
5012        V6(SendIpPacketMeta<Ipv6, D, SpecifiedAddr<Ipv6Addr>>),
5013    }
5014
5015    impl<I: IpExt, D> From<SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>>
5016        for DualStackSendIpPacketMeta<D>
5017    {
5018        fn from(value: SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>) -> Self {
5019            #[derive(GenericOverIp)]
5020            #[generic_over_ip(I, Ip)]
5021            struct Wrap<I: IpExt, D>(SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>);
5022            use DualStackSendIpPacketMeta::*;
5023            I::map_ip_in(Wrap(value), |Wrap(value)| V4(value), |Wrap(value)| V6(value))
5024        }
5025    }
5026
5027    impl<I: IpExt, S, DeviceId, BC>
5028        SendableFrameMeta<FakeCoreCtx<S, DualStackSendIpPacketMeta<DeviceId>, DeviceId>, BC>
5029        for SendIpPacketMeta<I, DeviceId, SpecifiedAddr<I::Addr>>
5030    {
5031        fn send_meta<SS>(
5032            self,
5033            core_ctx: &mut FakeCoreCtx<S, DualStackSendIpPacketMeta<DeviceId>, DeviceId>,
5034            bindings_ctx: &mut BC,
5035            frame: SS,
5036        ) -> Result<(), SendFrameError<SS>>
5037        where
5038            SS: Serializer,
5039            SS::Buffer: BufferMut,
5040        {
5041            SendFrameContext::send_frame(
5042                &mut core_ctx.frames,
5043                bindings_ctx,
5044                DualStackSendIpPacketMeta::from(self),
5045                frame,
5046            )
5047        }
5048    }
5049
5050    /// Error returned when the IP version doesn't match.
5051    #[derive(Debug)]
5052    pub struct WrongIpVersion;
5053
5054    impl<D> DualStackSendIpPacketMeta<D> {
5055        /// Returns the internal [`SendIpPacketMeta`] if this is carrying the
5056        /// version matching `I`.
5057        pub fn try_as<I: IpExt>(
5058            &self,
5059        ) -> Result<&SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>, WrongIpVersion> {
5060            #[derive(GenericOverIp)]
5061            #[generic_over_ip(I, Ip)]
5062            struct Wrap<'a, I: IpExt, D>(
5063                Option<&'a SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>>,
5064            );
5065            use DualStackSendIpPacketMeta::*;
5066            let Wrap(dual_stack) = I::map_ip(
5067                self,
5068                |value| {
5069                    Wrap(match value {
5070                        V4(meta) => Some(meta),
5071                        V6(_) => None,
5072                    })
5073                },
5074                |value| {
5075                    Wrap(match value {
5076                        V4(_) => None,
5077                        V6(meta) => Some(meta),
5078                    })
5079                },
5080            );
5081            dual_stack.ok_or(WrongIpVersion)
5082        }
5083    }
5084
5085    impl<I, BC, S, Meta, DeviceId> FilterHandlerProvider<I, BC> for FakeCoreCtx<S, Meta, DeviceId>
5086    where
5087        I: AssignedAddrIpExt + FilterIpExt,
5088        BC: FilterBindingsContext<DeviceId>,
5089        DeviceId: FakeStrongDeviceId + netstack3_base::InterfaceProperties<BC::DeviceClass>,
5090    {
5091        type Handler<'a>
5092            = filter::testutil::NoopImpl<DeviceId>
5093        where
5094            Self: 'a;
5095
5096        fn filter_handler(&mut self) -> Self::Handler<'_> {
5097            filter::testutil::NoopImpl::default()
5098        }
5099    }
5100
5101    impl<TimerId, Event: Debug, State, FrameMeta> MarksBindingsContext
5102        for FakeBindingsCtx<TimerId, Event, State, FrameMeta>
5103    {
5104        fn marks_to_keep_on_egress() -> &'static [MarkDomain] {
5105            const MARKS: [MarkDomain; 1] = [MarkDomain::Mark1];
5106            &MARKS
5107        }
5108
5109        fn marks_to_set_on_ingress() -> &'static [MarkDomain] {
5110            const MARKS: [MarkDomain; 1] = [MarkDomain::Mark2];
5111            &MARKS
5112        }
5113    }
5114}
5115
5116#[cfg(test)]
5117mod test {
5118    use super::*;
5119
5120    #[test]
5121    fn highest_priority_address_status_v4() {
5122        // Prefer assigned addresses over tentative addresses.
5123        assert_eq!(
5124            choose_highest_priority_address_status::<Ipv4>(
5125                [
5126                    Ipv4PresentAddressStatus::UnicastAssigned,
5127                    Ipv4PresentAddressStatus::UnicastTentative
5128                ]
5129                .into_iter()
5130            ),
5131            Some(Ipv4PresentAddressStatus::UnicastAssigned)
5132        )
5133    }
5134
5135    #[test]
5136    fn highest_priority_address_status_v6() {
5137        // Prefer assigned addresses over tentative addresses.
5138        assert_eq!(
5139            choose_highest_priority_address_status::<Ipv6>(
5140                [
5141                    Ipv6PresentAddressStatus::UnicastAssigned,
5142                    Ipv6PresentAddressStatus::UnicastTentative
5143                ]
5144                .into_iter()
5145            ),
5146            Some(Ipv6PresentAddressStatus::UnicastAssigned)
5147        )
5148    }
5149}