Skip to main content

netstack3_ip/
base.rs

1// Copyright 2018 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use alloc::boxed::Box;
6use alloc::vec::Vec;
7use core::convert::Infallible as Never;
8use core::fmt::Debug;
9use core::hash::Hash;
10use core::marker::PhantomData;
11use core::num::NonZeroU8;
12use core::ops::ControlFlow;
13#[cfg(test)]
14use core::ops::DerefMut;
15use core::sync::atomic::{self, AtomicU16};
16
17use derivative::Derivative;
18use explicit::ResultExt as _;
19use lock_order::lock::{OrderedLockAccess, OrderedLockRef};
20use log::{debug, trace};
21use net_types::ip::{
22    GenericOverIp, Ip, Ipv4, Ipv4Addr, Ipv6, Ipv6Addr, Ipv6SourceAddr, Mtu, Subnet,
23};
24use net_types::{
25    LinkLocalAddress, MulticastAddr, MulticastAddress, NonMappedAddr, NonMulticastAddr,
26    SpecifiedAddr, SpecifiedAddress as _, Witness,
27};
28use netstack3_base::socket::{EitherStack, SocketIpAddr, SocketIpAddrExt as _};
29use netstack3_base::sync::{Mutex, PrimaryRc, RwLock, StrongRc, WeakRc};
30use netstack3_base::{
31    AnyDevice, BroadcastIpExt, CoreTimerContext, Counter, CounterCollectionSpec, CounterContext,
32    DeviceIdContext, DeviceIdentifier as _, ErrorAndSerializer, EventContext, HandleableTimer,
33    InstantContext, InterfaceProperties, IpAddressId, IpDeviceAddr, IpDeviceAddressIdContext,
34    IpExt, LocalFrameDestination, MarkDomain, Marks, Matcher as _, MatcherBindingsTypes,
35    NestedIntoCoreTimerCtx, NetworkParsingContext, NetworkSerializationContext, NotFoundError,
36    ResourceCounterContext, RngContext, SendFrameErrorReason, StrongDeviceIdentifier,
37    TimerBindingsTypes, TimerContext, TimerHandler, TxMetadata as _, TxMetadataBindingsTypes,
38    WeakIpAddressId, WrapBroadcastMarker,
39};
40use netstack3_filter::{
41    self as filter, ConnectionDirection, ConntrackConnection, FilterBindingsContext,
42    FilterBindingsTypes, FilterHandler as _, FilterIpContext, FilterIpExt, FilterIpMetadata,
43    FilterIpPacket, FilterPacketMetadata, FilterTimerId, ForwardedPacket, IpPacket, MarkAction,
44    MaybeTransportPacket as _, RejectType, SocketInfo, TransportPacketSerializer, Tuple,
45    WeakConnectionError, WeakConntrackConnection,
46};
47use netstack3_hashmap::HashMap;
48use packet::{
49    Buf, BufferMut, GrowBuffer, LayoutBufferAlloc, NestablePacketBuilder as _, PacketConstraints,
50    ParsablePacket as _, ParseBuffer, ParseBufferMut, ParseMetadata, SerializeError,
51    Serializer as _,
52};
53use packet_formats::error::{Ipv6ParseError, ParseError};
54use packet_formats::ip::{DscpAndEcn, IpPacket as _, IpPacketBuilder as _};
55use packet_formats::ipv4::{Ipv4FragmentType, Ipv4Packet};
56use packet_formats::ipv6::{Ipv6Packet, Ipv6PacketRaw};
57use thiserror::Error;
58use zerocopy::SplitByteSlice;
59
60use crate::internal::counters::{IpCounters, IpCountersIpExt};
61use crate::internal::device::opaque_iid::IidSecret;
62use crate::internal::device::slaac::SlaacCounters;
63use crate::internal::device::state::{
64    IpAddressData, IpAddressFlags, IpDeviceStateBindingsTypes, IpDeviceStateIpExt, WeakAddressId,
65};
66use crate::internal::device::{
67    self, IpDeviceAddressContext, IpDeviceBindingsContext, IpDeviceIpExt, IpDeviceSendContext,
68};
69use crate::internal::fragmentation::{FragmentableIpSerializer, FragmentationIpExt, IpFragmenter};
70use crate::internal::gmp::GmpQueryHandler;
71use crate::internal::gmp::igmp::IgmpCounters;
72use crate::internal::gmp::mld::MldCounters;
73use crate::internal::icmp::counters::IcmpCountersIpExt;
74use crate::internal::icmp::{
75    IcmpBindingsTypes, IcmpError, IcmpErrorHandler, IcmpHandlerIpExt, Icmpv4Error, Icmpv4State,
76    Icmpv4StateBuilder, Icmpv6Error, Icmpv6State, Icmpv6StateBuilder,
77};
78use crate::internal::ipv6::Ipv6PacketAction;
79use crate::internal::local_delivery::{
80    IpHeaderInfo, Ipv4HeaderInfo, Ipv6HeaderInfo, LocalDeliveryPacketInfo, ReceiveIpPacketMeta,
81    TransparentLocalDelivery,
82};
83use crate::internal::multicast_forwarding::counters::MulticastForwardingCounters;
84use crate::internal::multicast_forwarding::route::{
85    MulticastRouteIpExt, MulticastRouteTarget, MulticastRouteTargets,
86};
87use crate::internal::multicast_forwarding::state::{
88    MulticastForwardingState, MulticastForwardingStateContext,
89};
90use crate::internal::multicast_forwarding::{
91    MulticastForwardingBindingsTypes, MulticastForwardingDeviceContext, MulticastForwardingEvent,
92    MulticastForwardingTimerId,
93};
94use crate::internal::path_mtu::{PmtuBindingsTypes, PmtuCache, PmtuTimerId};
95use crate::internal::raw::counters::RawIpSocketCounters;
96use crate::internal::raw::{RawIpSocketHandler, RawIpSocketMap, RawIpSocketsBindingsTypes};
97use crate::internal::reassembly::{
98    FragmentBindingsTypes, FragmentHandler, FragmentProcessingState, FragmentTimerId,
99    FragmentablePacket, IpPacketFragmentCache, ReassemblyIpExt,
100};
101use crate::internal::routing::rules::{Rule, RuleAction, RuleInput, RulesTable};
102use crate::internal::routing::{
103    IpRoutingBindingsTypes, IpRoutingDeviceContext, NonLocalSrcAddrPolicy, PacketOrigin,
104    RoutingTable,
105};
106use crate::internal::socket::{IpSocketBindingsContext, IpSocketContext, IpSocketHandler};
107use crate::internal::types::{
108    self, Destination, InternalForwarding, NextHop, ResolvedRoute, RoutableIpAddr,
109};
110use crate::internal::{ipv6, multicast_forwarding};
111
112#[cfg(test)]
113mod tests;
114
115/// Default IPv4 TTL.
116pub const DEFAULT_TTL: NonZeroU8 = NonZeroU8::new(64).unwrap();
117
118/// Hop limits for packets sent to multicast and unicast destinations.
119#[derive(Copy, Clone, Debug, Eq, PartialEq)]
120#[allow(missing_docs)]
121pub struct HopLimits {
122    pub unicast: NonZeroU8,
123    pub multicast: NonZeroU8,
124}
125
126/// Default hop limits for sockets.
127pub const DEFAULT_HOP_LIMITS: HopLimits =
128    HopLimits { unicast: DEFAULT_TTL, multicast: NonZeroU8::new(1).unwrap() };
129
130/// The IPv6 subnet that contains all addresses; `::/0`.
131// Safe because 0 is less than the number of IPv6 address bits.
132pub const IPV6_DEFAULT_SUBNET: Subnet<Ipv6Addr> =
133    unsafe { Subnet::new_unchecked(Ipv6::UNSPECIFIED_ADDRESS, 0) };
134
135/// Sidecar metadata passed along with the packet.
136///
137/// Note: This metadata may be regenerated when packet handling requires
138/// performing multiple actions (e.g. sending the packet out multiple interfaces
139/// as part of multicast forwarding).
140#[derive(Derivative)]
141#[derivative(Default(bound = ""))]
142pub struct IpLayerPacketMetadata<
143    I: packet_formats::ip::IpExt,
144    A,
145    BT: FilterBindingsTypes + TxMetadataBindingsTypes,
146> {
147    conntrack_connection_and_direction:
148        Option<(ConntrackConnection<I, A, BT>, ConnectionDirection)>,
149
150    /// Tx metadata associated with this packet.
151    ///
152    /// This may be non-default even in the rx path for looped back packets that
153    /// are still forcing tx frame ownership for sockets.
154    tx_metadata: BT::TxMetadata,
155
156    /// Marks attached to the packet that can be acted upon by routing/filtering.
157    marks: Marks,
158
159    /// Socket info of the associate socket if any.
160    socket_info: Option<SocketInfo>,
161
162    #[cfg(debug_assertions)]
163    drop_check: IpLayerPacketMetadataDropCheck,
164}
165
166/// A type that asserts, on drop, that it was intentionally being dropped.
167///
168/// NOTE: Unfortunately, debugging this requires backtraces, since track_caller
169/// won't do what we want (https://github.com/rust-lang/rust/issues/116942).
170/// Since this is only enabled in debug, the assumption is that stacktraces are
171/// enabled.
172#[cfg(debug_assertions)]
173#[derive(Default)]
174struct IpLayerPacketMetadataDropCheck {
175    okay_to_drop: bool,
176}
177
178/// Metadata that is produced and consumed by the IP layer for each packet, but
179/// which also traverses the device layer.
180#[derive(Derivative)]
181#[derivative(Debug(bound = ""), Default(bound = ""))]
182pub struct DeviceIpLayerMetadata<BT: TxMetadataBindingsTypes> {
183    /// Weak reference to this packet's connection tracking entry, if the packet is
184    /// tracked.
185    ///
186    /// This allows NAT to consistently associate locally-generated, looped-back
187    /// packets with the same connection at every filtering hook even when NAT may
188    /// have been performed on them, causing them to no longer match the original or
189    /// reply tuples of the connection.
190    conntrack_entry: Option<(WeakConntrackConnection, ConnectionDirection)>,
191    /// Tx metadata associated with this packet.
192    ///
193    /// This may be non-default even in the rx path for looped back packets that
194    /// are still forcing tx frame ownership for sockets.
195    tx_metadata: BT::TxMetadata,
196    /// Marks attached to this packet. For all the incoming packets, they are None
197    /// by default but can be changed by a filtering rule.
198    ///
199    /// Note: The marks will be preserved if the packet is being looped back, i.e.,
200    /// the receiver will be able to observe the marks set by the sender. This is
201    /// consistent with Linux behavior.
202    marks: Marks,
203}
204
205impl<BT: TxMetadataBindingsTypes> DeviceIpLayerMetadata<BT> {
206    /// Discards the remaining IP layer information and returns only the tx
207    /// metadata used for buffer ownership.
208    pub fn into_tx_metadata(self) -> BT::TxMetadata {
209        self.tx_metadata
210    }
211    /// Creates new IP layer metadata with the marks.
212    #[cfg(any(test, feature = "testutils"))]
213    pub fn with_marks(marks: Marks) -> Self {
214        Self { conntrack_entry: None, tx_metadata: Default::default(), marks }
215    }
216}
217
218impl<
219    I: IpLayerIpExt,
220    A: WeakIpAddressId<I::Addr>,
221    BT: FilterBindingsTypes + TxMetadataBindingsTypes,
222> IpLayerPacketMetadata<I, A, BT>
223{
224    fn from_device_ip_layer_metadata<CC, D>(
225        core_ctx: &mut CC,
226        device: &D,
227        DeviceIpLayerMetadata { conntrack_entry, tx_metadata, marks }: DeviceIpLayerMetadata<BT>,
228    ) -> Self
229    where
230        CC: ResourceCounterContext<D, IpCounters<I>>,
231    {
232        let conntrack_connection_and_direction = match conntrack_entry
233            .map(|(conn, dir)| conn.into_inner().map(|conn| (conn, dir)))
234            .transpose()
235        {
236            // Either the packet was tracked and we've preserved its conntrack entry across
237            // loopback, or it was untracked and we just stash the `None`.
238            Ok(conn_and_dir) => conn_and_dir,
239            // Conntrack entry was removed from table after packet was enqueued in loopback.
240            Err(WeakConnectionError::EntryRemoved) => None,
241            // Conntrack entry no longer matches the packet (for example, it could be that
242            // this is an IPv6 packet that was modified at the device layer and therefore it
243            // no longer matches its IPv4 conntrack entry).
244            Err(WeakConnectionError::InvalidEntry) => {
245                core_ctx.increment_both(device, |c| &c.invalid_cached_conntrack_entry);
246                None
247            }
248        };
249
250        let socket_info = tx_metadata.socket_info();
251
252        Self {
253            conntrack_connection_and_direction,
254            tx_metadata,
255            marks,
256            socket_info,
257            #[cfg(debug_assertions)]
258            drop_check: Default::default(),
259        }
260    }
261}
262
263impl<I: IpExt, A, BT: FilterBindingsTypes + TxMetadataBindingsTypes>
264    IpLayerPacketMetadata<I, A, BT>
265{
266    pub(crate) fn from_tx_metadata_and_marks(tx_metadata: BT::TxMetadata, marks: Marks) -> Self {
267        let socket_info = tx_metadata.socket_info();
268        Self {
269            conntrack_connection_and_direction: None,
270            tx_metadata,
271            marks,
272            socket_info,
273            #[cfg(debug_assertions)]
274            drop_check: Default::default(),
275        }
276    }
277
278    pub(crate) fn into_parts(
279        self,
280    ) -> (
281        Option<(ConntrackConnection<I, A, BT>, ConnectionDirection)>,
282        BT::TxMetadata,
283        Marks,
284        Option<SocketInfo>,
285    ) {
286        let Self {
287            tx_metadata,
288            marks,
289            conntrack_connection_and_direction,
290            socket_info,
291            #[cfg(debug_assertions)]
292            mut drop_check,
293        } = self;
294        #[cfg(debug_assertions)]
295        {
296            drop_check.okay_to_drop = true;
297        }
298        (conntrack_connection_and_direction, tx_metadata, marks, socket_info)
299    }
300
301    /// Acknowledge that it's okay to drop this packet metadata.
302    ///
303    /// When compiled with debug assertions, dropping [`IplayerPacketMetadata`]
304    /// will panic if this method has not previously been called.
305    pub(crate) fn acknowledge_drop(self) {
306        #[cfg(debug_assertions)]
307        {
308            let mut this = self;
309            this.drop_check.okay_to_drop = true;
310        }
311    }
312
313    /// Returns the tx metadata associated with this packet.
314    pub(crate) fn tx_metadata(&self) -> &BT::TxMetadata {
315        &self.tx_metadata
316    }
317
318    /// Returns the marks attached to this packet.
319    pub(crate) fn marks(&self) -> &Marks {
320        &self.marks
321    }
322}
323
324#[cfg(debug_assertions)]
325impl Drop for IpLayerPacketMetadataDropCheck {
326    fn drop(&mut self) {
327        if !self.okay_to_drop {
328            panic!(
329                "IpLayerPacketMetadata dropped without acknowledgement.  https://fxbug.dev/334127474"
330            );
331        }
332    }
333}
334
335impl<I: packet_formats::ip::IpExt, A, BT: FilterBindingsTypes + TxMetadataBindingsTypes>
336    FilterIpMetadata<I, A, BT> for IpLayerPacketMetadata<I, A, BT>
337{
338    fn take_connection_and_direction(
339        &mut self,
340    ) -> Option<(ConntrackConnection<I, A, BT>, ConnectionDirection)> {
341        self.conntrack_connection_and_direction.take()
342    }
343
344    fn replace_connection_and_direction(
345        &mut self,
346        conn: ConntrackConnection<I, A, BT>,
347        direction: ConnectionDirection,
348    ) -> Option<ConntrackConnection<I, A, BT>> {
349        self.conntrack_connection_and_direction.replace((conn, direction)).map(|(conn, _dir)| conn)
350    }
351}
352
353impl<I: packet_formats::ip::IpExt, A, BT: FilterBindingsTypes + TxMetadataBindingsTypes>
354    FilterPacketMetadata for IpLayerPacketMetadata<I, A, BT>
355{
356    fn apply_mark_action(&mut self, domain: MarkDomain, action: MarkAction) {
357        action.apply(self.marks.get_mut(domain))
358    }
359
360    fn socket_info(&self) -> Option<SocketInfo> {
361        self.socket_info.clone()
362    }
363
364    fn marks(&self) -> &Marks {
365        &self.marks
366    }
367}
368
369/// Send errors observed at or above the IP layer that carry a serializer.
370pub type IpSendFrameError<S> = ErrorAndSerializer<IpSendFrameErrorReason, S>;
371
372/// Send error cause for [`IpSendFrameError`].
373#[derive(Debug, PartialEq)]
374pub enum IpSendFrameErrorReason {
375    /// Error comes from the device layer.
376    Device(SendFrameErrorReason),
377    /// The frame's source or destination address is in the loopback subnet, but
378    /// the target device is not the loopback device.
379    IllegalLoopbackAddress,
380}
381
382impl From<SendFrameErrorReason> for IpSendFrameErrorReason {
383    fn from(value: SendFrameErrorReason) -> Self {
384        Self::Device(value)
385    }
386}
387
388/// The execution context provided by a transport layer protocol to the IP
389/// layer.
390///
391/// An implementation for `()` is provided which indicates that a particular
392/// transport layer protocol is unsupported.
393pub trait IpTransportContext<I, BC, CC>
394where
395    I: IpLayerIpExt,
396    CC: DeviceIdContext<AnyDevice> + ?Sized,
397{
398    /// Type used to identify sockets for early demux.
399    type EarlyDemuxSocket;
400
401    /// Performs early demux.
402    ///
403    /// Tries to match the packet with a connected socket that will receive the
404    /// packet. If a match is found, the socket information is passed to
405    /// `LOCAL_INGRESS` filters. The socket is also passed to
406    /// `receive_ip_packet` to avoid demuxing the packet twice.
407    ///
408    /// The socket may be invalidated if the source address is changed by SNAT.
409    /// In that case, `receive_ip_packet` is called with `early_demux_socket`
410    /// set to `None`.
411    fn early_demux<B: ParseBuffer>(
412        core_ctx: &mut CC,
413        device: &CC::DeviceId,
414        src_ip: I::Addr,
415        dst_ip: I::Addr,
416        buffer: B,
417    ) -> Option<Self::EarlyDemuxSocket>;
418
419    /// Receive an ICMP error message.
420    ///
421    /// All arguments beginning with `original_` are fields from the IP packet
422    /// that triggered the error. The `original_body` is provided here so that
423    /// the error can be associated with a transport-layer socket. `device`
424    /// identifies the device that received the ICMP error message packet.
425    ///
426    /// While ICMPv4 error messages are supposed to contain the first 8 bytes of
427    /// the body of the offending packet, and ICMPv6 error messages are supposed
428    /// to contain as much of the offending packet as possible without violating
429    /// the IPv6 minimum MTU, the caller does NOT guarantee that either of these
430    /// hold. It is `receive_icmp_error`'s responsibility to handle any length
431    /// of `original_body`, and to perform any necessary validation.
432    fn receive_icmp_error(
433        core_ctx: &mut CC,
434        bindings_ctx: &mut BC,
435        device: &CC::DeviceId,
436        original_src_ip: Option<SpecifiedAddr<I::Addr>>,
437        original_dst_ip: SpecifiedAddr<I::Addr>,
438        original_body: &[u8],
439        err: I::ErrorCode,
440    );
441
442    /// Receive a transport layer packet in an IP packet.
443    ///
444    /// In the event of an unreachable port, `receive_ip_packet` returns the
445    /// buffer in its original state (with the transport packet un-parsed) in
446    /// the `Err` variant.
447    fn receive_ip_packet<B: BufferMut, H: IpHeaderInfo<I>>(
448        core_ctx: &mut CC,
449        bindings_ctx: &mut BC,
450        device: &CC::DeviceId,
451        src_ip: I::RecvSrcAddr,
452        dst_ip: SpecifiedAddr<I::Addr>,
453        buffer: B,
454        info: &mut LocalDeliveryPacketInfo<I, H>,
455        early_demux_socket: Option<Self::EarlyDemuxSocket>,
456    ) -> Result<(), (B, I::IcmpError)>;
457}
458
459/// The base execution context provided by the IP layer to transport layer
460/// protocols.
461pub trait BaseTransportIpContext<I: IpExt, BC>: DeviceIdContext<AnyDevice> {
462    /// The iterator given to
463    /// [`BaseTransportIpContext::with_devices_with_assigned_addr`].
464    type DevicesWithAddrIter<'s>: Iterator<Item = Self::DeviceId>;
465
466    /// Is this one of our local addresses, and is it in the assigned state?
467    ///
468    /// Calls `cb` with an iterator over all the local interfaces for which
469    /// `addr` is an associated address, and, for IPv6, for which it is in the
470    /// "assigned" state.
471    fn with_devices_with_assigned_addr<O, F: FnOnce(Self::DevicesWithAddrIter<'_>) -> O>(
472        &mut self,
473        addr: SpecifiedAddr<I::Addr>,
474        cb: F,
475    ) -> O;
476
477    /// Get default hop limits.
478    ///
479    /// If `device` is not `None` and exists, its hop limits will be returned.
480    /// Otherwise the system defaults are returned.
481    fn get_default_hop_limits(&mut self, device: Option<&Self::DeviceId>) -> HopLimits;
482
483    /// Gets the original destination for the tracked connection indexed by
484    /// `tuple`, which includes the source and destination addresses and
485    /// transport-layer ports as well as the transport protocol number.
486    fn get_original_destination(&mut self, tuple: &Tuple<I>) -> Option<(I::Addr, u16)>;
487}
488
489/// A marker trait for the traits required by the transport layer from the IP
490/// layer.
491pub trait TransportIpContext<I: IpExt + FilterIpExt, BC: TxMetadataBindingsTypes>:
492    BaseTransportIpContext<I, BC> + IpSocketHandler<I, BC>
493{
494}
495
496impl<I, CC, BC> TransportIpContext<I, BC> for CC
497where
498    I: IpExt + FilterIpExt,
499    CC: BaseTransportIpContext<I, BC> + IpSocketHandler<I, BC>,
500    BC: TxMetadataBindingsTypes,
501{
502}
503
504/// Abstraction over the ability to join and leave multicast groups.
505pub trait MulticastMembershipHandler<I: Ip, BC>: DeviceIdContext<AnyDevice> {
506    /// Requests that the specified device join the given multicast group.
507    ///
508    /// If this method is called multiple times with the same device and
509    /// address, the device will remain joined to the multicast group until
510    /// [`MulticastTransportIpContext::leave_multicast_group`] has been called
511    /// the same number of times.
512    fn join_multicast_group(
513        &mut self,
514        bindings_ctx: &mut BC,
515        device: &Self::DeviceId,
516        addr: MulticastAddr<I::Addr>,
517    );
518
519    /// Requests that the specified device leave the given multicast group.
520    ///
521    /// Each call to this method must correspond to an earlier call to
522    /// [`MulticastTransportIpContext::join_multicast_group`]. The device
523    /// remains a member of the multicast group so long as some call to
524    /// `join_multicast_group` has been made without a corresponding call to
525    /// `leave_multicast_group`.
526    fn leave_multicast_group(
527        &mut self,
528        bindings_ctx: &mut BC,
529        device: &Self::DeviceId,
530        addr: MulticastAddr<I::Addr>,
531    );
532
533    /// Selects a default device with which to join the given multicast group.
534    ///
535    /// The selection is made by consulting the routing table; If there is no
536    /// route available to the given address, an error is returned.
537    fn select_device_for_multicast_group(
538        &mut self,
539        addr: MulticastAddr<I::Addr>,
540        marks: &Marks,
541    ) -> Result<Self::DeviceId, ResolveRouteError>;
542}
543
544// TODO(joshlf): With all 256 protocol numbers (minus reserved ones) given their
545// own associated type in both traits, running `cargo check` on a 2018 MacBook
546// Pro takes over a minute. Eventually - and before we formally publish this as
547// a library - we should identify the bottleneck in the compiler and optimize
548// it. For the time being, however, we only support protocol numbers that we
549// actually use (TCP and UDP).
550
551/// Enables a blanket implementation of [`TransportIpContext`].
552///
553/// Implementing this marker trait for a type enables a blanket implementation
554/// of `TransportIpContext` given the other requirements are met.
555pub trait UseTransportIpContextBlanket {}
556
557/// An iterator supporting the blanket implementation of
558/// [`BaseTransportIpContext::with_devices_with_assigned_addr`].
559pub struct AssignedAddressDeviceIterator<Iter, I, D>(Iter, PhantomData<(I, D)>);
560
561impl<Iter, I, D> Iterator for AssignedAddressDeviceIterator<Iter, I, D>
562where
563    Iter: Iterator<Item = (D, I::AddressStatus)>,
564    I: IpLayerIpExt,
565{
566    type Item = D;
567    fn next(&mut self) -> Option<D> {
568        let Self(iter, PhantomData) = self;
569        iter.by_ref().find_map(|(device, state)| is_unicast_assigned::<I>(&state).then_some(device))
570    }
571}
572
573impl<
574    I: IpLayerIpExt,
575    BC: FilterBindingsContext<CC::DeviceId> + TxMetadataBindingsTypes + IpRoutingBindingsTypes,
576    CC: IpDeviceContext<I>
577        + IpSocketHandler<I, BC>
578        + IpStateContext<I, BC>
579        + FilterIpContext<I, BC>
580        + UseTransportIpContextBlanket,
581> BaseTransportIpContext<I, BC> for CC
582{
583    type DevicesWithAddrIter<'s> =
584        AssignedAddressDeviceIterator<CC::DeviceAndAddressStatusIter<'s>, I, CC::DeviceId>;
585
586    fn with_devices_with_assigned_addr<O, F: FnOnce(Self::DevicesWithAddrIter<'_>) -> O>(
587        &mut self,
588        addr: SpecifiedAddr<I::Addr>,
589        cb: F,
590    ) -> O {
591        self.with_address_statuses(addr, |it| cb(AssignedAddressDeviceIterator(it, PhantomData)))
592    }
593
594    fn get_default_hop_limits(&mut self, device: Option<&Self::DeviceId>) -> HopLimits {
595        match device {
596            Some(device) => HopLimits {
597                unicast: IpDeviceEgressStateContext::<I>::get_hop_limit(self, device),
598                ..DEFAULT_HOP_LIMITS
599            },
600            None => DEFAULT_HOP_LIMITS,
601        }
602    }
603
604    fn get_original_destination(&mut self, tuple: &Tuple<I>) -> Option<(I::Addr, u16)> {
605        self.with_filter_state(|state| {
606            let conn = state.conntrack.get_connection(&tuple)?;
607
608            if !conn.destination_nat() {
609                return None;
610            }
611
612            // The tuple marking the original direction of the connection is
613            // never modified by NAT. This means it can be used to recover the
614            // destination before NAT was performed.
615            let original = conn.original_tuple();
616            Some((original.dst_addr, original.dst_port_or_id))
617        })
618    }
619}
620
621/// The status of an IP address on an interface.
622#[derive(Debug, PartialEq)]
623#[allow(missing_docs)]
624pub enum AddressStatus<S> {
625    Present(S),
626    Unassigned,
627}
628
629impl<S> AddressStatus<S> {
630    fn into_present(self) -> Option<S> {
631        match self {
632            Self::Present(s) => Some(s),
633            Self::Unassigned => None,
634        }
635    }
636}
637
638impl AddressStatus<Ipv4PresentAddressStatus> {
639    /// Creates an IPv4 `AddressStatus` for `addr` on `device`.
640    pub fn from_context_addr_v4<
641        BC: IpDeviceStateBindingsTypes,
642        CC: device::IpDeviceStateContext<Ipv4, BC> + GmpQueryHandler<Ipv4, BC>,
643    >(
644        core_ctx: &mut CC,
645        device: &CC::DeviceId,
646        addr: SpecifiedAddr<Ipv4Addr>,
647    ) -> AddressStatus<Ipv4PresentAddressStatus> {
648        if addr.is_limited_broadcast() {
649            return AddressStatus::Present(Ipv4PresentAddressStatus::LimitedBroadcast);
650        }
651
652        if MulticastAddr::new(addr.get())
653            .is_some_and(|addr| GmpQueryHandler::gmp_is_in_group(core_ctx, device, addr))
654        {
655            return AddressStatus::Present(Ipv4PresentAddressStatus::Multicast);
656        }
657
658        core_ctx.with_address_ids(device, |mut addrs, core_ctx| {
659            addrs
660                .find_map(|addr_id| {
661                    let dev_addr = addr_id.addr_sub();
662                    let (dev_addr, subnet) = dev_addr.addr_subnet();
663
664                    if **dev_addr == addr {
665                        let assigned = core_ctx.with_ip_address_data(
666                            device,
667                            &addr_id,
668                            |IpAddressData { flags: IpAddressFlags { assigned }, config: _ }| {
669                                *assigned
670                            },
671                        );
672
673                        if assigned {
674                            Some(AddressStatus::Present(Ipv4PresentAddressStatus::UnicastAssigned))
675                        } else {
676                            Some(AddressStatus::Present(Ipv4PresentAddressStatus::UnicastTentative))
677                        }
678                    } else if addr.get() == subnet.broadcast() {
679                        Some(AddressStatus::Present(Ipv4PresentAddressStatus::SubnetBroadcast))
680                    } else if device.is_loopback() && subnet.contains(addr.as_ref()) {
681                        Some(AddressStatus::Present(Ipv4PresentAddressStatus::LoopbackSubnet))
682                    } else {
683                        None
684                    }
685                })
686                .unwrap_or(AddressStatus::Unassigned)
687        })
688    }
689}
690
691impl AddressStatus<Ipv6PresentAddressStatus> {
692    /// /// Creates an IPv6 `AddressStatus` for `addr` on `device`.
693    pub fn from_context_addr_v6<
694        BC: IpDeviceBindingsContext<Ipv6, CC::DeviceId>,
695        CC: device::Ipv6DeviceContext<BC> + GmpQueryHandler<Ipv6, BC>,
696    >(
697        core_ctx: &mut CC,
698        device: &CC::DeviceId,
699        addr: SpecifiedAddr<Ipv6Addr>,
700    ) -> AddressStatus<Ipv6PresentAddressStatus> {
701        if MulticastAddr::new(addr.get())
702            .is_some_and(|addr| GmpQueryHandler::gmp_is_in_group(core_ctx, device, addr))
703        {
704            return AddressStatus::Present(Ipv6PresentAddressStatus::Multicast);
705        }
706
707        let addr_id = match core_ctx.get_address_id(device, addr) {
708            Ok(o) => o,
709            Err(NotFoundError) => return AddressStatus::Unassigned,
710        };
711
712        let assigned = core_ctx.with_ip_address_data(
713            device,
714            &addr_id,
715            |IpAddressData { flags: IpAddressFlags { assigned }, config: _ }| *assigned,
716        );
717
718        if assigned {
719            AddressStatus::Present(Ipv6PresentAddressStatus::UnicastAssigned)
720        } else {
721            AddressStatus::Present(Ipv6PresentAddressStatus::UnicastTentative)
722        }
723    }
724}
725
726impl<S: GenericOverIp<I>, I: Ip> GenericOverIp<I> for AddressStatus<S> {
727    type Type = AddressStatus<S::Type>;
728}
729
730/// The status of an IPv4 address.
731#[derive(Debug, PartialEq)]
732#[allow(missing_docs)]
733pub enum Ipv4PresentAddressStatus {
734    LimitedBroadcast,
735    SubnetBroadcast,
736    Multicast,
737    UnicastAssigned,
738    UnicastTentative,
739    /// This status indicates that the queried device was Loopback. The address
740    /// belongs to a subnet that is assigned to the interface. This status
741    /// takes lower precedence than `Unicast` and `SubnetBroadcast``, E.g. if
742    /// the loopback device is assigned `127.0.0.1/8`:
743    ///   * address `127.0.0.1` -> `Unicast`
744    ///   * address `127.0.0.2` -> `LoopbackSubnet`
745    ///   * address `127.255.255.255` -> `SubnetBroadcast`
746    /// This exists for Linux conformance, which on the Loopback device,
747    /// considers an IPv4 address assigned if it belongs to one of the device's
748    /// assigned subnets.
749    LoopbackSubnet,
750}
751
752impl Ipv4PresentAddressStatus {
753    fn to_broadcast_marker(&self) -> Option<<Ipv4 as BroadcastIpExt>::BroadcastMarker> {
754        match self {
755            Self::LimitedBroadcast | Self::SubnetBroadcast => Some(()),
756            Self::Multicast
757            | Self::UnicastAssigned
758            | Self::UnicastTentative
759            | Self::LoopbackSubnet => None,
760        }
761    }
762}
763
764/// The status of an IPv6 address.
765#[derive(Debug, PartialEq)]
766#[allow(missing_docs)]
767pub enum Ipv6PresentAddressStatus {
768    Multicast,
769    UnicastAssigned,
770    UnicastTentative,
771}
772
773/// An extension trait providing IP layer properties.
774pub trait IpLayerIpExt:
775    IpExt
776    + MulticastRouteIpExt
777    + IcmpHandlerIpExt
778    + FilterIpExt
779    + FragmentationIpExt
780    + IpDeviceIpExt
781    + IpCountersIpExt
782    + IcmpCountersIpExt
783    + ReassemblyIpExt
784{
785    /// IP Address status.
786    type AddressStatus: Debug;
787    /// IP Address state.
788    type State<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>: AsRef<
789        IpStateInner<Self, StrongDeviceId, BT>,
790    >;
791    /// State kept for packet identifiers.
792    type PacketIdState;
793    /// The type of a single packet identifier.
794    type PacketId;
795    /// Produces the next packet ID from the state.
796    fn next_packet_id_from_state(state: &Self::PacketIdState) -> Self::PacketId;
797}
798
799impl IpLayerIpExt for Ipv4 {
800    type AddressStatus = Ipv4PresentAddressStatus;
801    type State<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes> =
802        Ipv4State<StrongDeviceId, BT>;
803    type PacketIdState = AtomicU16;
804    type PacketId = u16;
805    fn next_packet_id_from_state(next_packet_id: &Self::PacketIdState) -> Self::PacketId {
806        // Relaxed ordering as we only need atomicity without synchronization. See
807        // https://en.cppreference.com/w/cpp/atomic/memory_order#Relaxed_ordering
808        // for more details.
809        next_packet_id.fetch_add(1, atomic::Ordering::Relaxed)
810    }
811}
812
813impl IpLayerIpExt for Ipv6 {
814    type AddressStatus = Ipv6PresentAddressStatus;
815    type State<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes> =
816        Ipv6State<StrongDeviceId, BT>;
817    type PacketIdState = ();
818    type PacketId = ();
819    fn next_packet_id_from_state((): &Self::PacketIdState) -> Self::PacketId {
820        ()
821    }
822}
823
824/// The state context provided to the IP layer.
825pub trait IpStateContext<I: IpLayerIpExt, BT: IpRoutingBindingsTypes + MatcherBindingsTypes>:
826    IpRouteTablesContext<I, BT, DeviceId: InterfaceProperties<BT::DeviceClass>>
827{
828    /// The context that provides access to the IP routing tables.
829    type IpRouteTablesCtx<'a>: IpRouteTablesContext<I, BT, DeviceId = Self::DeviceId>;
830
831    /// Gets an immutable reference to the rules table.
832    fn with_rules_table<
833        O,
834        F: FnOnce(&mut Self::IpRouteTablesCtx<'_>, &RulesTable<I, Self::DeviceId, BT>) -> O,
835    >(
836        &mut self,
837        cb: F,
838    ) -> O;
839
840    /// Gets a mutable reference to the rules table.
841    fn with_rules_table_mut<
842        O,
843        F: FnOnce(&mut Self::IpRouteTablesCtx<'_>, &mut RulesTable<I, Self::DeviceId, BT>) -> O,
844    >(
845        &mut self,
846        cb: F,
847    ) -> O;
848}
849
850/// The state context that gives access to routing tables provided to the IP layer.
851pub trait IpRouteTablesContext<I: IpLayerIpExt, BT: IpRoutingBindingsTypes>:
852    IpRouteTableContext<I, BT> + IpDeviceContext<I>
853{
854    /// The inner context that can provide access to individual routing tables.
855    type Ctx<'a>: IpRouteTableContext<I, BT, DeviceId = Self::DeviceId, WeakDeviceId = Self::WeakDeviceId>;
856
857    /// Gets the main table ID.
858    fn main_table_id(&self) -> RoutingTableId<I, Self::DeviceId, BT>;
859
860    /// Gets immutable access to all the routing tables that currently exist.
861    fn with_ip_routing_tables<
862        O,
863        F: FnOnce(
864            &mut Self::Ctx<'_>,
865            &HashMap<
866                RoutingTableId<I, Self::DeviceId, BT>,
867                PrimaryRc<BaseRoutingTableState<I, Self::DeviceId, BT>>,
868            >,
869        ) -> O,
870    >(
871        &mut self,
872        cb: F,
873    ) -> O;
874
875    /// Gets mutable access to all the routing tables that currently exist.
876    fn with_ip_routing_tables_mut<
877        O,
878        F: FnOnce(
879            &mut HashMap<
880                RoutingTableId<I, Self::DeviceId, BT>,
881                PrimaryRc<BaseRoutingTableState<I, Self::DeviceId, BT>>,
882            >,
883        ) -> O,
884    >(
885        &mut self,
886        cb: F,
887    ) -> O;
888
889    // TODO(https://fxbug.dev/354724171): Remove this function when we no longer
890    // make routing decisions starting from the main table.
891    /// Calls the function with an immutable reference to IP routing table.
892    fn with_main_ip_routing_table<
893        O,
894        F: FnOnce(&mut Self::IpDeviceIdCtx<'_>, &RoutingTable<I, Self::DeviceId>) -> O,
895    >(
896        &mut self,
897        cb: F,
898    ) -> O {
899        let main_table_id = self.main_table_id();
900        self.with_ip_routing_table(&main_table_id, cb)
901    }
902
903    // TODO(https://fxbug.dev/341194323): Remove this function when we no longer
904    // only update the main routing table by default.
905    /// Calls the function with a mutable reference to IP routing table.
906    fn with_main_ip_routing_table_mut<
907        O,
908        F: FnOnce(&mut Self::IpDeviceIdCtx<'_>, &mut RoutingTable<I, Self::DeviceId>) -> O,
909    >(
910        &mut self,
911        cb: F,
912    ) -> O {
913        let main_table_id = self.main_table_id();
914        self.with_ip_routing_table_mut(&main_table_id, cb)
915    }
916}
917
918/// The state context that gives access to a singular routing table.
919pub trait IpRouteTableContext<I: IpLayerIpExt, BT: IpRoutingBindingsTypes>:
920    IpDeviceContext<I>
921{
922    /// The inner device id context.
923    type IpDeviceIdCtx<'a>: DeviceIdContext<AnyDevice, DeviceId = Self::DeviceId, WeakDeviceId = Self::WeakDeviceId>
924        + IpRoutingDeviceContext<I>
925        + IpDeviceContext<I>;
926
927    /// Calls the function with an immutable reference to IP routing table.
928    fn with_ip_routing_table<
929        O,
930        F: FnOnce(&mut Self::IpDeviceIdCtx<'_>, &RoutingTable<I, Self::DeviceId>) -> O,
931    >(
932        &mut self,
933        table_id: &RoutingTableId<I, Self::DeviceId, BT>,
934        cb: F,
935    ) -> O;
936
937    /// Calls the function with a mutable reference to IP routing table.
938    fn with_ip_routing_table_mut<
939        O,
940        F: FnOnce(&mut Self::IpDeviceIdCtx<'_>, &mut RoutingTable<I, Self::DeviceId>) -> O,
941    >(
942        &mut self,
943        table_id: &RoutingTableId<I, Self::DeviceId, BT>,
944        cb: F,
945    ) -> O;
946}
947
948/// Provides access to an IP device's state for IP layer egress.
949pub trait IpDeviceEgressStateContext<I: IpLayerIpExt>: DeviceIdContext<AnyDevice> {
950    /// Calls the callback with the next packet ID.
951    fn with_next_packet_id<O, F: FnOnce(&I::PacketIdState) -> O>(&self, cb: F) -> O;
952
953    /// Returns the best local address for communicating with the remote.
954    fn get_local_addr_for_remote(
955        &mut self,
956        device_id: &Self::DeviceId,
957        remote: Option<SpecifiedAddr<I::Addr>>,
958    ) -> Option<IpDeviceAddr<I::Addr>>;
959
960    /// Returns the hop limit.
961    fn get_hop_limit(&mut self, device_id: &Self::DeviceId) -> NonZeroU8;
962}
963
964/// Provides access to an IP device's state for IP layer ingress.
965pub trait IpDeviceIngressStateContext<I: IpLayerIpExt>: DeviceIdContext<AnyDevice> {
966    /// Gets the status of an address.
967    ///
968    /// Only the specified device will be checked for the address. Returns
969    /// [`AddressStatus::Unassigned`] if the address is not assigned to the
970    /// device.
971    fn address_status_for_device(
972        &mut self,
973        addr: SpecifiedAddr<I::Addr>,
974        device_id: &Self::DeviceId,
975    ) -> AddressStatus<I::AddressStatus>;
976}
977
978/// The IP device context provided to the IP layer.
979pub trait IpDeviceContext<I: IpLayerIpExt>:
980    IpDeviceEgressStateContext<I> + IpDeviceIngressStateContext<I>
981{
982    /// Is the device enabled?
983    fn is_ip_device_enabled(&mut self, device_id: &Self::DeviceId) -> bool;
984
985    /// The iterator provided to [`IpDeviceContext::with_address_statuses`].
986    type DeviceAndAddressStatusIter<'a>: Iterator<Item = (Self::DeviceId, I::AddressStatus)>;
987
988    /// Provides access to the status of an address.
989    ///
990    /// Calls the provided callback with an iterator over the devices for which
991    /// the address is assigned and the status of the assignment for each
992    /// device.
993    fn with_address_statuses<F: FnOnce(Self::DeviceAndAddressStatusIter<'_>) -> R, R>(
994        &mut self,
995        addr: SpecifiedAddr<I::Addr>,
996        cb: F,
997    ) -> R;
998
999    /// Returns true iff the device has unicast forwarding enabled.
1000    fn is_device_unicast_forwarding_enabled(&mut self, device_id: &Self::DeviceId) -> bool;
1001}
1002
1003/// Provides the ability to check neighbor reachability via a specific device.
1004pub trait IpDeviceConfirmReachableContext<I: IpLayerIpExt, BC>: DeviceIdContext<AnyDevice> {
1005    /// Confirm transport-layer forward reachability to the specified neighbor
1006    /// through the specified device.
1007    fn confirm_reachable(
1008        &mut self,
1009        bindings_ctx: &mut BC,
1010        device: &Self::DeviceId,
1011        neighbor: SpecifiedAddr<I::Addr>,
1012    );
1013}
1014
1015/// Provides access to an IP device's MTU for the IP layer.
1016pub trait IpDeviceMtuContext<I: Ip>: DeviceIdContext<AnyDevice> {
1017    /// Returns the MTU of the device.
1018    ///
1019    /// The MTU is the maximum size of an IP packet.
1020    fn get_mtu(&mut self, device_id: &Self::DeviceId) -> Mtu;
1021}
1022
1023/// Events observed at the IP layer.
1024#[derive(Debug, Eq, Hash, PartialEq, GenericOverIp)]
1025#[generic_over_ip(I, Ip)]
1026pub enum IpLayerEvent<DeviceId, I: IpLayerIpExt> {
1027    /// A route needs to be added.
1028    AddRoute(types::AddableEntry<I::Addr, DeviceId>),
1029    /// Routes matching these specifiers need to be removed.
1030    RemoveRoutes {
1031        /// Destination subnet
1032        subnet: Subnet<I::Addr>,
1033        /// Outgoing interface
1034        device: DeviceId,
1035        /// Gateway/next-hop
1036        gateway: Option<SpecifiedAddr<I::Addr>>,
1037    },
1038    /// The multicast forwarding engine emitted an event.
1039    MulticastForwarding(MulticastForwardingEvent<I, DeviceId>),
1040}
1041
1042impl<DeviceId, I: IpLayerIpExt> From<MulticastForwardingEvent<I, DeviceId>>
1043    for IpLayerEvent<DeviceId, I>
1044{
1045    fn from(event: MulticastForwardingEvent<I, DeviceId>) -> IpLayerEvent<DeviceId, I> {
1046        IpLayerEvent::MulticastForwarding(event)
1047    }
1048}
1049
1050impl<DeviceId, I: IpLayerIpExt> IpLayerEvent<DeviceId, I> {
1051    /// Changes the device id type with `map`.
1052    pub fn map_device<N, F: Fn(DeviceId) -> N>(self, map: F) -> IpLayerEvent<N, I> {
1053        match self {
1054            IpLayerEvent::AddRoute(types::AddableEntry {
1055                subnet,
1056                device,
1057                gateway,
1058                metric,
1059                route_preference,
1060            }) => IpLayerEvent::AddRoute(types::AddableEntry {
1061                subnet,
1062                device: map(device),
1063                gateway,
1064                metric,
1065                route_preference,
1066            }),
1067            IpLayerEvent::RemoveRoutes { subnet, device, gateway } => {
1068                IpLayerEvent::RemoveRoutes { subnet, device: map(device), gateway }
1069            }
1070            IpLayerEvent::MulticastForwarding(e) => {
1071                IpLayerEvent::MulticastForwarding(e.map_device(map))
1072            }
1073        }
1074    }
1075}
1076
1077/// An event signifying a router advertisement has been received.
1078#[derive(Derivative, PartialEq, Eq, Clone, Hash)]
1079#[derivative(Debug)]
1080pub struct RouterAdvertisementEvent<D> {
1081    /// The raw bytes of the router advertisement message's options.
1082    // NB: avoid deriving Debug for this since it could contain PII.
1083    #[derivative(Debug = "ignore")]
1084    pub options_bytes: Box<[u8]>,
1085    /// The source address of the RA message.
1086    pub source: net_types::ip::Ipv6Addr,
1087    /// The device on which the message was received.
1088    pub device: D,
1089}
1090
1091impl<D> RouterAdvertisementEvent<D> {
1092    /// Maps the contained device ID type.
1093    pub fn map_device<N, F: Fn(D) -> N>(self, map: F) -> RouterAdvertisementEvent<N> {
1094        let Self { options_bytes, source, device } = self;
1095        RouterAdvertisementEvent { options_bytes, source, device: map(device) }
1096    }
1097}
1098
1099/// Ipv6-specific bindings execution context for the IP layer.
1100pub trait NdpBindingsContext<DeviceId>: EventContext<RouterAdvertisementEvent<DeviceId>> {}
1101impl<DeviceId, BC: EventContext<RouterAdvertisementEvent<DeviceId>>> NdpBindingsContext<DeviceId>
1102    for BC
1103{
1104}
1105
1106/// Defines how socket marks should be handled by the IP layer.
1107pub trait MarksBindingsContext {
1108    /// Mark domains for marks that should be kept when an egress packet is
1109    /// passed from the IP layer to the device. For egress packets that are
1110    /// delivered locally through the loopback interface, these marks are
1111    /// passed to the ingress path and can be observed by ingress filter hooks.
1112    fn marks_to_keep_on_egress() -> &'static [MarkDomain];
1113
1114    /// Mark domains for marks that should be copied to ingress packets. If
1115    /// early demux results in a socket then these marks are copied from the
1116    /// socket to the packet and can be observed in `LOCAL_INGRESS` filter
1117    /// hook.
1118    fn marks_to_set_on_ingress() -> &'static [MarkDomain];
1119}
1120
1121/// The bindings execution context for the IP layer.
1122pub trait IpLayerBindingsContext<I: IpLayerIpExt, DeviceId>:
1123    InstantContext
1124    + EventContext<IpLayerEvent<DeviceId, I>>
1125    + FilterBindingsContext<DeviceId>
1126    + TxMetadataBindingsTypes
1127    + IpRoutingBindingsTypes
1128    + MarksBindingsContext
1129{
1130}
1131impl<
1132    I: IpLayerIpExt,
1133    DeviceId,
1134    BC: InstantContext
1135        + EventContext<IpLayerEvent<DeviceId, I>>
1136        + FilterBindingsContext<DeviceId>
1137        + TxMetadataBindingsTypes
1138        + IpRoutingBindingsTypes
1139        + MarksBindingsContext,
1140> IpLayerBindingsContext<I, DeviceId> for BC
1141{
1142}
1143
1144/// A marker trait for bindings types at the IP layer.
1145pub trait IpLayerBindingsTypes:
1146    IcmpBindingsTypes + IpStateBindingsTypes + IpRoutingBindingsTypes
1147{
1148}
1149impl<BT: IcmpBindingsTypes + IpStateBindingsTypes + IpRoutingBindingsTypes> IpLayerBindingsTypes
1150    for BT
1151{
1152}
1153
1154/// The execution context for the IP layer.
1155pub trait IpLayerContext<
1156    I: IpLayerIpExt,
1157    BC: IpLayerBindingsContext<I, <Self as DeviceIdContext<AnyDevice>>::DeviceId>,
1158>:
1159    IpStateContext<I, BC>
1160    + IpDeviceContext<I>
1161    + IpDeviceMtuContext<I>
1162    + IpDeviceSendContext<I, BC>
1163    + IcmpErrorHandler<I, BC>
1164    + MulticastForwardingStateContext<I, BC>
1165    + MulticastForwardingDeviceContext<I>
1166    + CounterContext<MulticastForwardingCounters<I>>
1167    + ResourceCounterContext<<Self as DeviceIdContext<AnyDevice>>::DeviceId, IpCounters<I>>
1168{
1169}
1170
1171impl<
1172    I: IpLayerIpExt,
1173    BC: IpLayerBindingsContext<I, <CC as DeviceIdContext<AnyDevice>>::DeviceId>,
1174    CC: IpStateContext<I, BC>
1175        + IpDeviceContext<I>
1176        + IpDeviceMtuContext<I>
1177        + IpDeviceSendContext<I, BC>
1178        + IcmpErrorHandler<I, BC>
1179        + MulticastForwardingStateContext<I, BC>
1180        + MulticastForwardingDeviceContext<I>
1181        + CounterContext<MulticastForwardingCounters<I>>
1182        + ResourceCounterContext<<Self as DeviceIdContext<AnyDevice>>::DeviceId, IpCounters<I>>,
1183> IpLayerContext<I, BC> for CC
1184{
1185}
1186
1187fn is_unicast_assigned<I: IpLayerIpExt>(status: &I::AddressStatus) -> bool {
1188    #[derive(GenericOverIp)]
1189    #[generic_over_ip(I, Ip)]
1190    struct WrapAddressStatus<'a, I: IpLayerIpExt>(&'a I::AddressStatus);
1191
1192    I::map_ip(
1193        WrapAddressStatus(status),
1194        |WrapAddressStatus(status)| match status {
1195            Ipv4PresentAddressStatus::UnicastAssigned
1196            | Ipv4PresentAddressStatus::LoopbackSubnet => true,
1197            Ipv4PresentAddressStatus::UnicastTentative
1198            | Ipv4PresentAddressStatus::LimitedBroadcast
1199            | Ipv4PresentAddressStatus::SubnetBroadcast
1200            | Ipv4PresentAddressStatus::Multicast => false,
1201        },
1202        |WrapAddressStatus(status)| match status {
1203            Ipv6PresentAddressStatus::UnicastAssigned => true,
1204            Ipv6PresentAddressStatus::Multicast | Ipv6PresentAddressStatus::UnicastTentative => {
1205                false
1206            }
1207        },
1208    )
1209}
1210
1211fn is_local_assigned_address<I: Ip + IpLayerIpExt, CC: IpDeviceIngressStateContext<I>>(
1212    core_ctx: &mut CC,
1213    device: &CC::DeviceId,
1214    addr: IpDeviceAddr<I::Addr>,
1215) -> bool {
1216    match core_ctx.address_status_for_device(addr.into(), device) {
1217        AddressStatus::Present(status) => is_unicast_assigned::<I>(&status),
1218        AddressStatus::Unassigned => false,
1219    }
1220}
1221
1222fn get_device_with_assigned_address<I, CC>(
1223    core_ctx: &mut CC,
1224    addr: IpDeviceAddr<I::Addr>,
1225) -> Option<(CC::DeviceId, I::AddressStatus)>
1226where
1227    I: IpLayerIpExt,
1228    CC: IpDeviceContext<I>,
1229{
1230    core_ctx.with_address_statuses(addr.into(), |mut it| {
1231        it.find_map(|(device, status)| {
1232            is_unicast_assigned::<I>(&status).then_some((device, status))
1233        })
1234    })
1235}
1236
1237// Returns the local IP address to use for sending packets from the
1238// given device to `addr`, restricting to `local_ip` if it is not
1239// `None`.
1240fn get_local_addr<I: Ip + IpLayerIpExt, CC: IpDeviceContext<I>>(
1241    core_ctx: &mut CC,
1242    local_ip_and_policy: Option<(IpDeviceAddr<I::Addr>, NonLocalSrcAddrPolicy)>,
1243    device: &CC::DeviceId,
1244    remote_addr: Option<RoutableIpAddr<I::Addr>>,
1245) -> Result<IpDeviceAddr<I::Addr>, ResolveRouteError> {
1246    match local_ip_and_policy {
1247        Some((local_ip, NonLocalSrcAddrPolicy::Allow)) => Ok(local_ip),
1248        Some((local_ip, NonLocalSrcAddrPolicy::Deny)) => {
1249            is_local_assigned_address(core_ctx, device, local_ip)
1250                .then_some(local_ip)
1251                .ok_or(ResolveRouteError::NoSrcAddr)
1252        }
1253        None => core_ctx
1254            .get_local_addr_for_remote(device, remote_addr.map(Into::into))
1255            .ok_or(ResolveRouteError::NoSrcAddr),
1256    }
1257}
1258
1259/// An error occurred while resolving the route to a destination
1260#[derive(Error, Copy, Clone, Debug, Eq, GenericOverIp, PartialEq)]
1261#[generic_over_ip()]
1262pub enum ResolveRouteError {
1263    /// A source address could not be selected.
1264    #[error("a source address could not be selected")]
1265    NoSrcAddr,
1266    /// The destination in unreachable.
1267    #[error("no route exists to the destination IP address")]
1268    Unreachable,
1269}
1270
1271/// Like [`get_local_addr`], but willing to forward internally as necessary.
1272fn get_local_addr_with_internal_forwarding<I, CC>(
1273    core_ctx: &mut CC,
1274    local_ip_and_policy: Option<(IpDeviceAddr<I::Addr>, NonLocalSrcAddrPolicy)>,
1275    device: &CC::DeviceId,
1276    remote_addr: Option<RoutableIpAddr<I::Addr>>,
1277) -> Result<(IpDeviceAddr<I::Addr>, InternalForwarding<CC::DeviceId>), ResolveRouteError>
1278where
1279    I: IpLayerIpExt,
1280    CC: IpDeviceContext<I>,
1281{
1282    match get_local_addr(core_ctx, local_ip_and_policy, device, remote_addr) {
1283        Ok(src_addr) => Ok((src_addr, InternalForwarding::NotUsed)),
1284        Err(e) => {
1285            // If a local_ip was specified, the local_ip is assigned to a
1286            // device, and that device has forwarding enabled, use internal
1287            // forwarding.
1288            //
1289            // This enables a weak host model when the Netstack is configured as
1290            // a router. Conceptually the netstack is forwarding the packet from
1291            // the local IP's device to the output device of the selected route.
1292            if let Some((local_ip, _policy)) = local_ip_and_policy {
1293                if let Some((device, _addr_status)) =
1294                    get_device_with_assigned_address(core_ctx, local_ip)
1295                {
1296                    if core_ctx.is_device_unicast_forwarding_enabled(&device) {
1297                        return Ok((local_ip, InternalForwarding::Used(device)));
1298                    }
1299                }
1300            }
1301            Err(e)
1302        }
1303    }
1304}
1305
1306/// The information about the rule walk in addition to a custom state. This type is introduced so
1307/// that `walk_rules` can be extended later with more information about the walk if needed.
1308#[derive(Debug, PartialEq, Eq)]
1309struct RuleWalkInfo<O> {
1310    /// Whether there is a rule with a source address matcher during the walk.
1311    observed_source_address_matcher: bool,
1312    /// The custom info carried. For example this could be the lookup result from the user provided
1313    /// function.
1314    inner: O,
1315}
1316
1317/// A helper function that traverses through the rules table.
1318///
1319/// To walk through the rules, you need to provide it with an initial value for the loop and a
1320/// callback function that yieds a [`ControlFlow`] result to indicate whether the traversal should
1321/// stop.
1322///
1323/// # Returns
1324///
1325/// - `ControlFlow::Break(RuleAction::Lookup(_))` if we hit a lookup rule and an output is
1326///   yielded from the route table.
1327/// - `ControlFlow::Break(RuleAction::Unreachable)` if we hit an unreachable rule.
1328/// - `ControlFlow::Continue(_)` if we finished walking the rules table without yielding any
1329///   result.
1330fn walk_rules<
1331    I: IpLayerIpExt,
1332    BT: IpRoutingBindingsTypes + MatcherBindingsTypes,
1333    CC: IpRouteTablesContext<I, BT, DeviceId: InterfaceProperties<BT::DeviceClass>>,
1334    O,
1335    State,
1336    F: FnMut(
1337        State,
1338        &mut CC::IpDeviceIdCtx<'_>,
1339        &RoutingTable<I, CC::DeviceId>,
1340    ) -> ControlFlow<O, State>,
1341>(
1342    core_ctx: &mut CC,
1343    rules: &RulesTable<I, CC::DeviceId, BT>,
1344    init: State,
1345    rule_input: &RuleInput<'_, I, CC::DeviceId>,
1346    mut lookup_table: F,
1347) -> ControlFlow<RuleAction<RuleWalkInfo<O>>, RuleWalkInfo<State>> {
1348    rules.iter().try_fold(
1349        RuleWalkInfo { inner: init, observed_source_address_matcher: false },
1350        |RuleWalkInfo { inner: state, observed_source_address_matcher },
1351         Rule { action, matcher }| {
1352            let observed_source_address_matcher =
1353                observed_source_address_matcher || matcher.source_address_matcher.is_some();
1354            if !matcher.matches(rule_input) {
1355                return ControlFlow::Continue(RuleWalkInfo {
1356                    inner: state,
1357                    observed_source_address_matcher,
1358                });
1359            }
1360            match action {
1361                RuleAction::Unreachable => return ControlFlow::Break(RuleAction::Unreachable),
1362                RuleAction::Lookup(table_id) => core_ctx.with_ip_routing_table(
1363                    &table_id,
1364                    |core_ctx, table| match lookup_table(state, core_ctx, table) {
1365                        ControlFlow::Break(out) => {
1366                            ControlFlow::Break(RuleAction::Lookup(RuleWalkInfo {
1367                                inner: out,
1368                                observed_source_address_matcher,
1369                            }))
1370                        }
1371                        ControlFlow::Continue(state) => ControlFlow::Continue(RuleWalkInfo {
1372                            inner: state,
1373                            observed_source_address_matcher,
1374                        }),
1375                    },
1376                ),
1377            }
1378        },
1379    )
1380}
1381
1382/// Returns the outgoing routing instructions for reaching the given destination.
1383///
1384/// If a `device` is specified, the resolved route is limited to those that
1385/// egress over the device.
1386///
1387/// If `src_ip` is specified the resolved route is limited to those that egress
1388/// over a device with the address assigned.
1389///
1390/// This function should only be used for calculating a route for an outgoing packet
1391/// that is generated by us.
1392pub fn resolve_output_route_to_destination<
1393    I: Ip + IpDeviceStateIpExt + IpDeviceIpExt + IpLayerIpExt,
1394    BC: IpDeviceBindingsContext<I, CC::DeviceId> + IpLayerBindingsContext<I, CC::DeviceId>,
1395    CC: IpStateContext<I, BC> + IpDeviceContext<I> + device::IpDeviceConfigurationContext<I, BC>,
1396>(
1397    core_ctx: &mut CC,
1398    device: Option<&CC::DeviceId>,
1399    src_ip_and_policy: Option<(IpDeviceAddr<I::Addr>, NonLocalSrcAddrPolicy)>,
1400    dst_ip: Option<RoutableIpAddr<I::Addr>>,
1401    marks: &Marks,
1402) -> Result<ResolvedRoute<I, CC::DeviceId>, ResolveRouteError> {
1403    enum LocalDelivery<A, D> {
1404        WeakLoopback { dst_ip: A, device: D },
1405        StrongForDevice(D),
1406    }
1407
1408    // Check if locally destined. If the destination is an address assigned on
1409    // an interface, and an egress interface wasn't specifically selected, route
1410    // via the loopback device. This lets us operate as a strong host when an
1411    // outgoing interface is explicitly requested while still enabling local
1412    // delivery via the loopback interface, which is acting as a weak host. Note
1413    // that if the loopback interface is requested as an outgoing interface,
1414    // route selection is still performed as a strong host! This makes the
1415    // loopback interface behave more like the other interfaces on the system.
1416    //
1417    // TODO(https://fxbug.dev/42065870): Encode the delivery of locally-
1418    // destined packets to loopback in the route table.
1419    //
1420    // TODO(https://fxbug.dev/322539434): Linux is more permissive about
1421    // allowing cross-device local delivery even when SO_BINDTODEVICE or
1422    // link-local addresses are involved, and this behavior may need to be
1423    // emulated.
1424    let local_delivery_instructions: Option<LocalDelivery<IpDeviceAddr<I::Addr>, CC::DeviceId>> = {
1425        let dst_ip = dst_ip.and_then(IpDeviceAddr::new_from_socket_ip_addr);
1426        match (device, dst_ip) {
1427            (Some(device), Some(dst_ip)) => is_local_assigned_address(core_ctx, device, dst_ip)
1428                .then_some(LocalDelivery::StrongForDevice(device.clone())),
1429            (None, Some(dst_ip)) => {
1430                get_device_with_assigned_address(core_ctx, dst_ip).map(
1431                    |(dst_device, _addr_status)| {
1432                        // If either the source or destination addresses needs
1433                        // a zone ID, then use strong host to enforce that the
1434                        // source and destination addresses are assigned to the
1435                        // same interface.
1436                        if src_ip_and_policy
1437                            .is_some_and(|(ip, _policy)| ip.as_ref().must_have_zone())
1438                            || dst_ip.as_ref().must_have_zone()
1439                        {
1440                            LocalDelivery::StrongForDevice(dst_device)
1441                        } else {
1442                            LocalDelivery::WeakLoopback { dst_ip, device: dst_device }
1443                        }
1444                    },
1445                )
1446            }
1447            (_, None) => None,
1448        }
1449    };
1450
1451    if let Some(local_delivery) = local_delivery_instructions {
1452        let loopback = core_ctx.loopback_id().ok_or(ResolveRouteError::Unreachable)?;
1453
1454        let (src_addr, dest_device) = match local_delivery {
1455            LocalDelivery::WeakLoopback { dst_ip, device } => {
1456                let src_ip = match src_ip_and_policy {
1457                    Some((src_ip, NonLocalSrcAddrPolicy::Deny)) => {
1458                        let _device = get_device_with_assigned_address(core_ctx, src_ip)
1459                            .ok_or(ResolveRouteError::NoSrcAddr)?;
1460                        src_ip
1461                    }
1462                    Some((src_ip, NonLocalSrcAddrPolicy::Allow)) => src_ip,
1463                    None => dst_ip,
1464                };
1465                (src_ip, device)
1466            }
1467            LocalDelivery::StrongForDevice(device) => {
1468                (get_local_addr(core_ctx, src_ip_and_policy, &device, dst_ip)?, device)
1469            }
1470        };
1471        return Ok(ResolvedRoute {
1472            src_addr,
1473            local_delivery_device: Some(dest_device),
1474            device: loopback,
1475            next_hop: NextHop::RemoteAsNeighbor,
1476            internal_forwarding: InternalForwarding::NotUsed,
1477        });
1478    }
1479    let bound_address = src_ip_and_policy.map(|(sock_addr, _policy)| sock_addr.into_inner().get());
1480    let rule_input = RuleInput {
1481        packet_origin: PacketOrigin::Local { bound_address, bound_device: device },
1482        marks,
1483    };
1484    core_ctx.with_rules_table(|core_ctx, rules: &RulesTable<_, _, BC>| {
1485        let mut walk_rules = |rule_input, src_ip_and_policy| {
1486            walk_rules(
1487                core_ctx,
1488                rules,
1489                None, /* first error encountered */
1490                rule_input,
1491                |first_error, core_ctx, table| {
1492                    let mut matching_with_addr = table.lookup_filter_map(
1493                        core_ctx,
1494                        device,
1495                        dst_ip.map_or(I::UNSPECIFIED_ADDRESS, |a| a.addr()),
1496                        |core_ctx, d| {
1497                            Some(get_local_addr_with_internal_forwarding(
1498                                core_ctx,
1499                                src_ip_and_policy,
1500                                d,
1501                                dst_ip,
1502                            ))
1503                        },
1504                    );
1505
1506                    let first_error_in_this_table = match matching_with_addr.next() {
1507                        Some((
1508                            Destination { device, next_hop },
1509                            Ok((local_addr, internal_forwarding)),
1510                        )) => {
1511                            return ControlFlow::Break(Ok((
1512                                Destination { device: device.clone(), next_hop },
1513                                local_addr,
1514                                internal_forwarding,
1515                            )));
1516                        }
1517                        Some((_, Err(e))) => e,
1518                        // Note: rule evaluation will continue on to the next rule, if the
1519                        // previous rule was `Lookup` but the table didn't have the route
1520                        // inside of it.
1521                        None => return ControlFlow::Continue(first_error),
1522                    };
1523
1524                    matching_with_addr
1525                        .filter_map(|(destination, local_addr)| {
1526                            // Select successful routes. We ignore later errors
1527                            // since we've already saved the first one.
1528                            local_addr.ok_checked::<ResolveRouteError>().map(
1529                                |(local_addr, internal_forwarding)| {
1530                                    (destination, local_addr, internal_forwarding)
1531                                },
1532                            )
1533                        })
1534                        .next()
1535                        .map_or(
1536                            ControlFlow::Continue(first_error.or(Some(first_error_in_this_table))),
1537                            |(
1538                                Destination { device, next_hop },
1539                                local_addr,
1540                                internal_forwarding,
1541                            )| {
1542                                ControlFlow::Break(Ok((
1543                                    Destination { device: device.clone(), next_hop },
1544                                    local_addr,
1545                                    internal_forwarding,
1546                                )))
1547                            },
1548                        )
1549                },
1550            )
1551        };
1552
1553        let result = match walk_rules(&rule_input, src_ip_and_policy) {
1554            // Only try to resolve a route again if all of the following are true:
1555            // 1. The source address is not provided by the caller.
1556            // 2. A route is successfully resolved so we selected a source address.
1557            // 3. There is a rule with a source address matcher during the resolution.
1558            // The rationale is to make sure the route resolution converges to a sensible route
1559            // after considering the source address we select.
1560            ControlFlow::Break(RuleAction::Lookup(RuleWalkInfo {
1561                inner: Ok((_dst, selected_src_addr, _internal_forwarding)),
1562                observed_source_address_matcher: true,
1563            })) if src_ip_and_policy.is_none() => walk_rules(
1564                &RuleInput {
1565                    packet_origin: PacketOrigin::Local {
1566                        bound_address: Some(selected_src_addr.into()),
1567                        bound_device: device,
1568                    },
1569                    marks,
1570                },
1571                Some((selected_src_addr, NonLocalSrcAddrPolicy::Deny)),
1572            ),
1573            result => result,
1574        };
1575
1576        match result {
1577            ControlFlow::Break(RuleAction::Lookup(RuleWalkInfo {
1578                inner: result,
1579                observed_source_address_matcher: _,
1580            })) => {
1581                result.map(|(Destination { device, next_hop }, src_addr, internal_forwarding)| {
1582                    ResolvedRoute {
1583                        src_addr,
1584                        device,
1585                        local_delivery_device: None,
1586                        next_hop,
1587                        internal_forwarding,
1588                    }
1589                })
1590            }
1591            ControlFlow::Break(RuleAction::Unreachable) => Err(ResolveRouteError::Unreachable),
1592            ControlFlow::Continue(RuleWalkInfo {
1593                inner: first_error,
1594                observed_source_address_matcher: _,
1595            }) => Err(first_error.unwrap_or(ResolveRouteError::Unreachable)),
1596        }
1597    })
1598}
1599
1600/// Enables a blanket implementation of [`IpSocketContext`].
1601///
1602/// Implementing this marker trait for a type enables a blanket implementation
1603/// of `IpSocketContext` given the other requirements are met.
1604pub trait UseIpSocketContextBlanket {}
1605
1606impl<I, BC, CC> IpSocketContext<I, BC> for CC
1607where
1608    I: Ip + IpDeviceStateIpExt + IpDeviceIpExt + IpLayerIpExt,
1609    BC: IpDeviceBindingsContext<I, CC::DeviceId>
1610        + IpLayerBindingsContext<I, CC::DeviceId>
1611        + IpSocketBindingsContext<CC::DeviceId>,
1612    CC: IpLayerEgressContext<I, BC>
1613        + IpStateContext<I, BC>
1614        + IpDeviceContext<I>
1615        + IpDeviceConfirmReachableContext<I, BC>
1616        + IpDeviceMtuContext<I>
1617        + device::IpDeviceConfigurationContext<I, BC>
1618        + IcmpErrorHandler<I, BC>
1619        + UseIpSocketContextBlanket,
1620{
1621    fn lookup_route(
1622        &mut self,
1623        _bindings_ctx: &mut BC,
1624        device: Option<&CC::DeviceId>,
1625        local_ip: Option<IpDeviceAddr<I::Addr>>,
1626        addr: RoutableIpAddr<I::Addr>,
1627        transparent: bool,
1628        marks: &Marks,
1629    ) -> Result<ResolvedRoute<I, CC::DeviceId>, ResolveRouteError> {
1630        let src_ip_and_policy = local_ip.map(|local_ip| {
1631            (
1632                local_ip,
1633                if transparent {
1634                    NonLocalSrcAddrPolicy::Allow
1635                } else {
1636                    NonLocalSrcAddrPolicy::Deny
1637                },
1638            )
1639        });
1640        let res =
1641            resolve_output_route_to_destination(self, device, src_ip_and_policy, Some(addr), marks);
1642        trace!(
1643            "lookup_route(\
1644                device={device:?}, \
1645                local_ip={local_ip:?}, \
1646                addr={addr:?}, \
1647                transparent={transparent:?}, \
1648                marks={marks:?}) => {res:?}"
1649        );
1650        res
1651    }
1652
1653    fn send_ip_packet<S>(
1654        &mut self,
1655        bindings_ctx: &mut BC,
1656        meta: SendIpPacketMeta<
1657            I,
1658            &<CC as DeviceIdContext<AnyDevice>>::DeviceId,
1659            SpecifiedAddr<I::Addr>,
1660        >,
1661        body: S,
1662        packet_metadata: IpLayerPacketMetadata<I, CC::WeakAddressId, BC>,
1663    ) -> Result<(), IpSendFrameError<S>>
1664    where
1665        S: TransportPacketSerializer<I>,
1666        S::Buffer: BufferMut,
1667    {
1668        send_ip_packet_from_device(self, bindings_ctx, meta.into(), body, packet_metadata)
1669    }
1670
1671    fn get_loopback_device(&mut self) -> Option<Self::DeviceId> {
1672        device::IpDeviceConfigurationContext::<I, _>::loopback_id(self)
1673    }
1674
1675    fn confirm_reachable(
1676        &mut self,
1677        bindings_ctx: &mut BC,
1678        dst: SpecifiedAddr<I::Addr>,
1679        input: RuleInput<'_, I, Self::DeviceId>,
1680    ) {
1681        match lookup_route_table(self, dst.get(), input) {
1682            Some(Destination { next_hop, device }) => {
1683                let neighbor = match next_hop {
1684                    NextHop::RemoteAsNeighbor => dst,
1685                    NextHop::Gateway(gateway) => gateway,
1686                    NextHop::Broadcast(marker) => {
1687                        I::map_ip::<_, ()>(
1688                            WrapBroadcastMarker(marker),
1689                            |WrapBroadcastMarker(())| {
1690                                debug!(
1691                                    "can't confirm {dst:?}@{device:?} as reachable: \
1692                                    dst is a broadcast address"
1693                                );
1694                            },
1695                            |WrapBroadcastMarker(never)| match never {},
1696                        );
1697                        return;
1698                    }
1699                };
1700                IpDeviceConfirmReachableContext::confirm_reachable(
1701                    self,
1702                    bindings_ctx,
1703                    &device,
1704                    neighbor,
1705                );
1706            }
1707            None => {
1708                debug!("can't confirm {dst:?} as reachable: no route");
1709            }
1710        }
1711    }
1712}
1713
1714/// Trait that provides basic socket information for types that carry a socket
1715/// ID.
1716pub trait SocketMetadata<CC>
1717where
1718    CC: ?Sized,
1719{
1720    /// Returns the SocketInfo for the socket.
1721    fn socket_info(&self, core_ctx: &mut CC) -> SocketInfo;
1722    /// Returns Socket Marks.
1723    fn marks(&self, _core_ctx: &mut CC) -> Marks;
1724}
1725
1726impl<T, O, CC> SocketMetadata<CC> for EitherStack<T, O>
1727where
1728    CC: ?Sized,
1729    T: SocketMetadata<CC>,
1730    O: SocketMetadata<CC>,
1731{
1732    fn socket_info(&self, core_ctx: &mut CC) -> SocketInfo {
1733        match self {
1734            Self::ThisStack(t) => t.socket_info(core_ctx),
1735            Self::OtherStack(o) => o.socket_info(core_ctx),
1736        }
1737    }
1738
1739    fn marks(&self, core_ctx: &mut CC) -> Marks {
1740        match self {
1741            Self::ThisStack(t) => t.marks(core_ctx),
1742            Self::OtherStack(o) => o.marks(core_ctx),
1743        }
1744    }
1745}
1746
1747/// The IP context providing dispatch to the available transport protocols.
1748///
1749/// This trait acts like a demux on the transport protocol for ingress IP
1750/// packets.
1751pub trait IpTransportDispatchContext<I: IpLayerIpExt, BC>: DeviceIdContext<AnyDevice> {
1752    /// Early Demux result.
1753    type EarlyDemuxSocket: SocketMetadata<Self>;
1754
1755    /// Performs early demux result.
1756    fn early_demux<B: ParseBuffer>(
1757        &mut self,
1758        device: &Self::DeviceId,
1759        frame_dst: Option<LocalFrameDestination>,
1760        src_ip: I::Addr,
1761        dst_ip: I::Addr,
1762        proto: I::Proto,
1763        body: B,
1764    ) -> Option<Self::EarlyDemuxSocket>;
1765
1766    /// Dispatches a received incoming IP packet to the appropriate protocol.
1767    /// In case of a failure returns the kind of the ICMP error that should be
1768    /// sent back to the source.
1769    fn dispatch_receive_ip_packet<B: BufferMut, H: IpHeaderInfo<I>>(
1770        &mut self,
1771        bindings_ctx: &mut BC,
1772        device: &Self::DeviceId,
1773        src_ip: I::RecvSrcAddr,
1774        dst_ip: SpecifiedAddr<I::Addr>,
1775        proto: I::Proto,
1776        body: B,
1777        info: &mut LocalDeliveryPacketInfo<I, H>,
1778        early_demux_socket: Option<Self::EarlyDemuxSocket>,
1779    ) -> Result<(), I::IcmpError>;
1780}
1781
1782/// A marker trait for all the contexts required for IP ingress.
1783pub trait IpLayerIngressContext<I: IpLayerIpExt, BC: IpLayerBindingsContext<I, Self::DeviceId>>:
1784    IpTransportDispatchContext<
1785        I,
1786        BC,
1787        DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>,
1788    > + IpDeviceIngressStateContext<I>
1789    + IpDeviceMtuContext<I>
1790    + IpDeviceSendContext<I, BC>
1791    + IcmpErrorHandler<I, BC>
1792    + IpLayerContext<I, BC>
1793    + FragmentHandler<I, BC>
1794    + FilterHandlerProvider<I, BC>
1795    + RawIpSocketHandler<I, BC>
1796{
1797}
1798
1799impl<
1800    I: IpLayerIpExt,
1801    BC: IpLayerBindingsContext<I, CC::DeviceId>,
1802    CC: IpTransportDispatchContext<
1803            I,
1804            BC,
1805            DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>,
1806        > + IpDeviceIngressStateContext<I>
1807        + IpDeviceMtuContext<I>
1808        + IpDeviceSendContext<I, BC>
1809        + IcmpErrorHandler<I, BC>
1810        + IpLayerContext<I, BC>
1811        + FragmentHandler<I, BC>
1812        + FilterHandlerProvider<I, BC>
1813        + RawIpSocketHandler<I, BC>,
1814> IpLayerIngressContext<I, BC> for CC
1815{
1816}
1817
1818/// A marker trait for all the contexts required for IP egress.
1819pub trait IpLayerEgressContext<I, BC>:
1820    IpDeviceSendContext<I, BC, DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>>
1821    + FilterHandlerProvider<I, BC>
1822    + ResourceCounterContext<Self::DeviceId, IpCounters<I>>
1823where
1824    I: IpLayerIpExt,
1825    BC: FilterBindingsContext<Self::DeviceId> + TxMetadataBindingsTypes,
1826{
1827}
1828
1829impl<I, BC, CC> IpLayerEgressContext<I, BC> for CC
1830where
1831    I: IpLayerIpExt,
1832    BC: FilterBindingsContext<CC::DeviceId> + TxMetadataBindingsTypes,
1833    CC: IpDeviceSendContext<I, BC, DeviceId: netstack3_base::InterfaceProperties<BC::DeviceClass>>
1834        + FilterHandlerProvider<I, BC>
1835        + ResourceCounterContext<Self::DeviceId, IpCounters<I>>,
1836{
1837}
1838
1839/// A marker trait for all the contexts required for IP forwarding.
1840pub trait IpLayerForwardingContext<I: IpLayerIpExt, BC: IpLayerBindingsContext<I, Self::DeviceId>>:
1841    IpLayerEgressContext<I, BC> + IcmpErrorHandler<I, BC> + IpDeviceMtuContext<I>
1842{
1843}
1844
1845impl<
1846    I: IpLayerIpExt,
1847    BC: IpLayerBindingsContext<I, CC::DeviceId>,
1848    CC: IpLayerEgressContext<I, BC> + IcmpErrorHandler<I, BC> + IpDeviceMtuContext<I>,
1849> IpLayerForwardingContext<I, BC> for CC
1850{
1851}
1852
1853/// A builder for IPv4 state.
1854#[derive(Copy, Clone, Default)]
1855pub struct Ipv4StateBuilder {
1856    icmp: Icmpv4StateBuilder,
1857}
1858
1859impl Ipv4StateBuilder {
1860    /// Get the builder for the ICMPv4 state.
1861    #[cfg(any(test, feature = "testutils"))]
1862    pub fn icmpv4_builder(&mut self) -> &mut Icmpv4StateBuilder {
1863        &mut self.icmp
1864    }
1865
1866    /// Builds the [`Ipv4State`].
1867    pub fn build<
1868        CC: CoreTimerContext<IpLayerTimerId, BC>,
1869        StrongDeviceId: StrongDeviceIdentifier,
1870        BC: TimerContext + RngContext + IpLayerBindingsTypes,
1871    >(
1872        self,
1873        bindings_ctx: &mut BC,
1874    ) -> Ipv4State<StrongDeviceId, BC> {
1875        let Ipv4StateBuilder { icmp } = self;
1876
1877        Ipv4State {
1878            inner: IpStateInner::new::<CC>(bindings_ctx),
1879            icmp: icmp.build(),
1880            next_packet_id: Default::default(),
1881        }
1882    }
1883}
1884
1885/// A builder for IPv6 state.
1886///
1887/// By default, opaque IIDs will not be used to generate stable SLAAC addresses.
1888#[derive(Copy, Clone)]
1889pub struct Ipv6StateBuilder {
1890    icmp: Icmpv6StateBuilder,
1891    slaac_stable_secret_key: Option<IidSecret>,
1892}
1893
1894impl Ipv6StateBuilder {
1895    /// Sets the secret key used to generate stable SLAAC addresses.
1896    ///
1897    /// If `slaac_stable_secret_key` is left unset, opaque IIDs will not be used to
1898    /// generate stable SLAAC addresses.
1899    pub fn slaac_stable_secret_key(&mut self, secret_key: IidSecret) -> &mut Self {
1900        self.slaac_stable_secret_key = Some(secret_key);
1901        self
1902    }
1903
1904    /// Builds the [`Ipv6State`].
1905    ///
1906    /// # Panics
1907    ///
1908    /// Panics if the `slaac_stable_secret_key` has not been set.
1909    pub fn build<
1910        CC: CoreTimerContext<IpLayerTimerId, BC>,
1911        StrongDeviceId: StrongDeviceIdentifier,
1912        BC: TimerContext + RngContext + IpLayerBindingsTypes,
1913    >(
1914        self,
1915        bindings_ctx: &mut BC,
1916    ) -> Ipv6State<StrongDeviceId, BC> {
1917        let Ipv6StateBuilder { icmp, slaac_stable_secret_key } = self;
1918
1919        let slaac_stable_secret_key = slaac_stable_secret_key
1920            .expect("stable SLAAC secret key was not provided to `Ipv6StateBuilder`");
1921
1922        Ipv6State {
1923            inner: IpStateInner::new::<CC>(bindings_ctx),
1924            icmp: icmp.build(),
1925            slaac_counters: Default::default(),
1926            slaac_temp_secret_key: IidSecret::new_random(&mut bindings_ctx.rng()),
1927            slaac_stable_secret_key,
1928        }
1929    }
1930}
1931
1932impl Default for Ipv6StateBuilder {
1933    fn default() -> Self {
1934        #[cfg(any(test, feature = "testutils"))]
1935        let slaac_stable_secret_key = Some(IidSecret::ALL_ONES);
1936
1937        #[cfg(not(any(test, feature = "testutils")))]
1938        let slaac_stable_secret_key = None;
1939
1940        Self { icmp: Icmpv6StateBuilder::default(), slaac_stable_secret_key }
1941    }
1942}
1943
1944/// The stack's IPv4 state.
1945pub struct Ipv4State<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes> {
1946    /// The common inner IP layer state.
1947    pub inner: IpStateInner<Ipv4, StrongDeviceId, BT>,
1948    /// The ICMP state.
1949    pub icmp: Icmpv4State<BT>,
1950    /// The atomic counter providing IPv4 packet identifiers.
1951    pub next_packet_id: AtomicU16,
1952}
1953
1954impl<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
1955    AsRef<IpStateInner<Ipv4, StrongDeviceId, BT>> for Ipv4State<StrongDeviceId, BT>
1956{
1957    fn as_ref(&self) -> &IpStateInner<Ipv4, StrongDeviceId, BT> {
1958        &self.inner
1959    }
1960}
1961
1962/// Generates an IP packet ID.
1963///
1964/// This is only meaningful for IPv4, see [`IpLayerIpExt`].
1965pub fn gen_ip_packet_id<I: IpLayerIpExt, CC: IpDeviceEgressStateContext<I>>(
1966    core_ctx: &mut CC,
1967) -> I::PacketId {
1968    core_ctx.with_next_packet_id(|state| I::next_packet_id_from_state(state))
1969}
1970
1971/// The stack's IPv6 state.
1972pub struct Ipv6State<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes> {
1973    /// The common inner IP layer state.
1974    pub inner: IpStateInner<Ipv6, StrongDeviceId, BT>,
1975    /// ICMPv6 state.
1976    pub icmp: Icmpv6State<BT>,
1977    /// Stateless address autoconfiguration counters.
1978    pub slaac_counters: SlaacCounters,
1979    /// Secret key used for generating SLAAC temporary addresses.
1980    pub slaac_temp_secret_key: IidSecret,
1981    /// Secret key used for generating SLAAC stable addresses.
1982    ///
1983    /// If `None`, opaque IIDs will not be used to generate stable SLAAC
1984    /// addresses.
1985    pub slaac_stable_secret_key: IidSecret,
1986}
1987
1988impl<StrongDeviceId: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
1989    AsRef<IpStateInner<Ipv6, StrongDeviceId, BT>> for Ipv6State<StrongDeviceId, BT>
1990{
1991    fn as_ref(&self) -> &IpStateInner<Ipv6, StrongDeviceId, BT> {
1992        &self.inner
1993    }
1994}
1995
1996impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
1997    OrderedLockAccess<IpPacketFragmentCache<I, BT>> for IpStateInner<I, D, BT>
1998{
1999    type Lock = Mutex<IpPacketFragmentCache<I, BT>>;
2000    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2001        OrderedLockRef::new(&self.fragment_cache)
2002    }
2003}
2004
2005impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2006    OrderedLockAccess<PmtuCache<I, BT>> for IpStateInner<I, D, BT>
2007{
2008    type Lock = Mutex<PmtuCache<I, BT>>;
2009    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2010        OrderedLockRef::new(&self.pmtu_cache)
2011    }
2012}
2013
2014impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2015    OrderedLockAccess<RulesTable<I, D, BT>> for IpStateInner<I, D, BT>
2016{
2017    type Lock = RwLock<RulesTable<I, D, BT>>;
2018    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2019        OrderedLockRef::new(&self.rules_table)
2020    }
2021}
2022
2023impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2024    OrderedLockAccess<HashMap<RoutingTableId<I, D, BT>, PrimaryRc<BaseRoutingTableState<I, D, BT>>>>
2025    for IpStateInner<I, D, BT>
2026{
2027    type Lock =
2028        Mutex<HashMap<RoutingTableId<I, D, BT>, PrimaryRc<BaseRoutingTableState<I, D, BT>>>>;
2029    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2030        OrderedLockRef::new(&self.tables)
2031    }
2032}
2033
2034impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpRoutingBindingsTypes>
2035    OrderedLockAccess<RoutingTable<I, D>> for RoutingTableId<I, D, BT>
2036{
2037    type Lock = RwLock<RoutingTable<I, D>>;
2038    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2039        let Self(inner) = self;
2040        OrderedLockRef::new(&inner.routing_table)
2041    }
2042}
2043
2044impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2045    OrderedLockAccess<MulticastForwardingState<I, D, BT>> for IpStateInner<I, D, BT>
2046{
2047    type Lock = RwLock<MulticastForwardingState<I, D, BT>>;
2048    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2049        OrderedLockRef::new(&self.multicast_forwarding)
2050    }
2051}
2052
2053impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2054    OrderedLockAccess<RawIpSocketMap<I, D::Weak, BT>> for IpStateInner<I, D, BT>
2055{
2056    type Lock = RwLock<RawIpSocketMap<I, D::Weak, BT>>;
2057    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2058        OrderedLockRef::new(&self.raw_sockets)
2059    }
2060}
2061
2062impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpLayerBindingsTypes>
2063    OrderedLockAccess<filter::State<I, WeakAddressId<I, BT>, BT>> for IpStateInner<I, D, BT>
2064{
2065    type Lock = RwLock<filter::State<I, WeakAddressId<I, BT>, BT>>;
2066    fn ordered_lock_access(&self) -> OrderedLockRef<'_, Self::Lock> {
2067        OrderedLockRef::new(&self.filter)
2068    }
2069}
2070
2071/// Marker trait for the bindings types required by the IP layer's inner state.
2072pub trait IpStateBindingsTypes:
2073    PmtuBindingsTypes
2074    + FragmentBindingsTypes
2075    + RawIpSocketsBindingsTypes
2076    + FilterBindingsTypes
2077    + MulticastForwardingBindingsTypes
2078    + IpDeviceStateBindingsTypes
2079    + IpRoutingBindingsTypes
2080{
2081}
2082impl<BT> IpStateBindingsTypes for BT where
2083    BT: PmtuBindingsTypes
2084        + FragmentBindingsTypes
2085        + RawIpSocketsBindingsTypes
2086        + FilterBindingsTypes
2087        + MulticastForwardingBindingsTypes
2088        + IpDeviceStateBindingsTypes
2089        + IpRoutingBindingsTypes
2090{
2091}
2092
2093/// Bindings ID for a routing table.
2094#[derive(Derivative)]
2095#[derivative(Debug(bound = ""))]
2096#[derivative(Clone(bound = "BT::RoutingTableId: Clone"))]
2097pub enum RoutingTableCookie<BT: IpRoutingBindingsTypes> {
2098    /// Main table.
2099    Main,
2100    /// A table added by user (Bindings).
2101    BindingsId(BT::RoutingTableId),
2102}
2103
2104/// State for a routing table.
2105#[derive(Derivative)]
2106#[derivative(Debug(bound = "D: Debug"))]
2107pub struct BaseRoutingTableState<I: Ip, D, BT: IpRoutingBindingsTypes> {
2108    routing_table: RwLock<RoutingTable<I, D>>,
2109    bindings_id: RoutingTableCookie<BT>,
2110}
2111
2112impl<I: Ip, D, BT: IpRoutingBindingsTypes> BaseRoutingTableState<I, D, BT> {
2113    pub(crate) fn with_bindings_id(bindings_id: RoutingTableCookie<BT>) -> Self {
2114        Self { bindings_id, routing_table: Default::default() }
2115    }
2116}
2117
2118/// Identifier to a routing table.
2119#[derive(Derivative)]
2120#[derivative(PartialEq(bound = ""))]
2121#[derivative(Eq(bound = ""))]
2122#[derivative(Hash(bound = ""))]
2123#[derivative(Clone(bound = ""))]
2124pub struct RoutingTableId<I: Ip, D, BT: IpRoutingBindingsTypes>(
2125    StrongRc<BaseRoutingTableState<I, D, BT>>,
2126);
2127
2128impl<I: Ip, D, BT: IpRoutingBindingsTypes> Debug for RoutingTableId<I, D, BT> {
2129    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2130        let Self(rc) = self;
2131        f.debug_tuple("RoutingTableId").field(&I::NAME).field(&rc.bindings_id).finish()
2132    }
2133}
2134
2135impl<I: Ip, D, BT: IpRoutingBindingsTypes> RoutingTableId<I, D, BT> {
2136    /// Creates a new table ID.
2137    pub(crate) fn new(rc: StrongRc<BaseRoutingTableState<I, D, BT>>) -> Self {
2138        Self(rc)
2139    }
2140
2141    /// Provides direct access to the forwarding table.
2142    #[cfg(any(test, feature = "testutils"))]
2143    pub fn table(&self) -> &RwLock<RoutingTable<I, D>> {
2144        let Self(inner) = self;
2145        &inner.routing_table
2146    }
2147
2148    /// Downgrades the strong ID into a weak one.
2149    pub fn downgrade(&self) -> WeakRoutingTableId<I, D, BT>
2150    where
2151        BT::RoutingTableId: Clone,
2152    {
2153        let Self(rc) = self;
2154        WeakRoutingTableId { rc: StrongRc::downgrade(rc), bindings_id: rc.bindings_id.clone() }
2155    }
2156
2157    #[cfg(test)]
2158    fn get_mut(&self) -> impl DerefMut<Target = RoutingTable<I, D>> + '_ {
2159        let Self(rc) = self;
2160        rc.routing_table.write()
2161    }
2162
2163    /// Gets the bindings cookie for this routing table.
2164    pub fn bindings_id(&self) -> &RoutingTableCookie<BT> {
2165        let Self(rc) = self;
2166        &rc.bindings_id
2167    }
2168}
2169
2170/// Weak Identifier to a routing table.
2171#[derive(Derivative)]
2172#[derivative(Clone(bound = "BT::RoutingTableId: Clone"))]
2173#[derivative(PartialEq, Eq, Hash)]
2174pub struct WeakRoutingTableId<I: Ip, D, BT: IpRoutingBindingsTypes> {
2175    rc: WeakRc<BaseRoutingTableState<I, D, BT>>,
2176    #[derivative(PartialEq = "ignore")]
2177    #[derivative(Hash = "ignore")]
2178    bindings_id: RoutingTableCookie<BT>,
2179}
2180
2181impl<I: Ip, D, BT: IpRoutingBindingsTypes> Debug for WeakRoutingTableId<I, D, BT> {
2182    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
2183        let Self { bindings_id, .. } = self;
2184        f.debug_tuple("WeakRoutingTableId").field(&I::NAME).field(bindings_id).finish()
2185    }
2186}
2187
2188/// The inner state for the IP layer for IP version `I`.
2189#[derive(GenericOverIp)]
2190#[generic_over_ip(I, Ip)]
2191pub struct IpStateInner<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpStateBindingsTypes> {
2192    rules_table: RwLock<RulesTable<I, D, BT>>,
2193    // TODO(https://fxbug.dev/355059838): Explore the option to let Bindings create the main table.
2194    main_table_id: RoutingTableId<I, D, BT>,
2195    multicast_forwarding: RwLock<MulticastForwardingState<I, D, BT>>,
2196    multicast_forwarding_counters: MulticastForwardingCounters<I>,
2197    fragment_cache: Mutex<IpPacketFragmentCache<I, BT>>,
2198    pmtu_cache: Mutex<PmtuCache<I, BT>>,
2199    counters: IpCounters<I>,
2200    raw_sockets: RwLock<RawIpSocketMap<I, D::Weak, BT>>,
2201    raw_socket_counters: RawIpSocketCounters<I>,
2202    filter: RwLock<filter::State<I, WeakAddressId<I, BT>, BT>>,
2203    // Make sure the primary IDs are dropped last. Also note that the following hash map also stores
2204    // the primary ID to the main table, and if the user (Bindings) attempts to remove the main
2205    // table without dropping `main_table_id` first, it will panic. This serves as an assertion
2206    // that the main table cannot be removed and Bindings must never attempt to remove the main
2207    // routing table.
2208    tables: Mutex<HashMap<RoutingTableId<I, D, BT>, PrimaryRc<BaseRoutingTableState<I, D, BT>>>>,
2209    igmp_counters: IgmpCounters,
2210    mld_counters: MldCounters,
2211}
2212
2213impl<I: IpLayerIpExt, D: StrongDeviceIdentifier, BT: IpStateBindingsTypes> IpStateInner<I, D, BT> {
2214    /// Gets the IP counters.
2215    pub fn counters(&self) -> &IpCounters<I> {
2216        &self.counters
2217    }
2218
2219    /// Gets the multicast forwarding counters.
2220    pub fn multicast_forwarding_counters(&self) -> &MulticastForwardingCounters<I> {
2221        &self.multicast_forwarding_counters
2222    }
2223
2224    /// Gets the aggregate raw IP socket counters.
2225    pub fn raw_ip_socket_counters(&self) -> &RawIpSocketCounters<I> {
2226        &self.raw_socket_counters
2227    }
2228
2229    /// Gets the main table ID.
2230    pub fn main_table_id(&self) -> &RoutingTableId<I, D, BT> {
2231        &self.main_table_id
2232    }
2233
2234    /// Provides direct access to the path MTU cache.
2235    #[cfg(any(test, feature = "testutils"))]
2236    pub fn pmtu_cache(&self) -> &Mutex<PmtuCache<I, BT>> {
2237        &self.pmtu_cache
2238    }
2239
2240    /// Provides direct access to the filtering state.
2241    #[cfg(any(test, feature = "testutils"))]
2242    pub fn filter(&self) -> &RwLock<filter::State<I, WeakAddressId<I, BT>, BT>> {
2243        &self.filter
2244    }
2245
2246    /// Gets the stack-wide IGMP counters.
2247    pub fn igmp_counters(&self) -> &IgmpCounters {
2248        &self.igmp_counters
2249    }
2250
2251    /// Gets the stack-wide MLD counters.
2252    pub fn mld_counters(&self) -> &MldCounters {
2253        &self.mld_counters
2254    }
2255}
2256
2257impl<
2258    I: IpLayerIpExt,
2259    D: StrongDeviceIdentifier,
2260    BC: TimerContext + RngContext + IpStateBindingsTypes + IpRoutingBindingsTypes,
2261> IpStateInner<I, D, BC>
2262{
2263    /// Creates a new inner IP layer state.
2264    fn new<CC: CoreTimerContext<IpLayerTimerId, BC>>(bindings_ctx: &mut BC) -> Self {
2265        let main_table: PrimaryRc<BaseRoutingTableState<I, D, BC>> =
2266            PrimaryRc::new(BaseRoutingTableState::with_bindings_id(RoutingTableCookie::Main));
2267        let main_table_id = RoutingTableId(PrimaryRc::clone_strong(&main_table));
2268        Self {
2269            rules_table: RwLock::new(RulesTable::new(main_table_id.clone())),
2270            tables: Mutex::new(HashMap::from_iter(core::iter::once((
2271                main_table_id.clone(),
2272                main_table,
2273            )))),
2274            main_table_id,
2275            multicast_forwarding: Default::default(),
2276            multicast_forwarding_counters: Default::default(),
2277            fragment_cache: Mutex::new(
2278                IpPacketFragmentCache::new::<NestedIntoCoreTimerCtx<CC, _>>(bindings_ctx),
2279            ),
2280            pmtu_cache: Mutex::new(PmtuCache::new::<NestedIntoCoreTimerCtx<CC, _>>(bindings_ctx)),
2281            counters: Default::default(),
2282            raw_sockets: Default::default(),
2283            raw_socket_counters: Default::default(),
2284            filter: RwLock::new(filter::State::new::<NestedIntoCoreTimerCtx<CC, _>>(bindings_ctx)),
2285            igmp_counters: Default::default(),
2286            mld_counters: Default::default(),
2287        }
2288    }
2289}
2290
2291/// The identifier for timer events in the IP layer.
2292#[derive(Debug, Clone, Eq, PartialEq, Hash, GenericOverIp)]
2293#[generic_over_ip()]
2294pub enum IpLayerTimerId {
2295    /// A timer event for IPv4 packet reassembly timers.
2296    ReassemblyTimeoutv4(FragmentTimerId<Ipv4>),
2297    /// A timer event for IPv6 packet reassembly timers.
2298    ReassemblyTimeoutv6(FragmentTimerId<Ipv6>),
2299    /// A timer event for IPv4 path MTU discovery.
2300    PmtuTimeoutv4(PmtuTimerId<Ipv4>),
2301    /// A timer event for IPv6 path MTU discovery.
2302    PmtuTimeoutv6(PmtuTimerId<Ipv6>),
2303    /// A timer event for IPv4 filtering timers.
2304    FilterTimerv4(FilterTimerId<Ipv4>),
2305    /// A timer event for IPv6 filtering timers.
2306    FilterTimerv6(FilterTimerId<Ipv6>),
2307    /// A timer event for IPv4 Multicast forwarding timers.
2308    MulticastForwardingTimerv4(MulticastForwardingTimerId<Ipv4>),
2309    /// A timer event for IPv6 Multicast forwarding timers.
2310    MulticastForwardingTimerv6(MulticastForwardingTimerId<Ipv6>),
2311}
2312
2313impl<I: Ip> From<FragmentTimerId<I>> for IpLayerTimerId {
2314    fn from(timer: FragmentTimerId<I>) -> IpLayerTimerId {
2315        I::map_ip(timer, IpLayerTimerId::ReassemblyTimeoutv4, IpLayerTimerId::ReassemblyTimeoutv6)
2316    }
2317}
2318
2319impl<I: Ip> From<PmtuTimerId<I>> for IpLayerTimerId {
2320    fn from(timer: PmtuTimerId<I>) -> IpLayerTimerId {
2321        I::map_ip(timer, IpLayerTimerId::PmtuTimeoutv4, IpLayerTimerId::PmtuTimeoutv6)
2322    }
2323}
2324
2325impl<I: Ip> From<FilterTimerId<I>> for IpLayerTimerId {
2326    fn from(timer: FilterTimerId<I>) -> IpLayerTimerId {
2327        I::map_ip(timer, IpLayerTimerId::FilterTimerv4, IpLayerTimerId::FilterTimerv6)
2328    }
2329}
2330
2331impl<I: Ip> From<MulticastForwardingTimerId<I>> for IpLayerTimerId {
2332    fn from(timer: MulticastForwardingTimerId<I>) -> IpLayerTimerId {
2333        I::map_ip(
2334            timer,
2335            IpLayerTimerId::MulticastForwardingTimerv4,
2336            IpLayerTimerId::MulticastForwardingTimerv6,
2337        )
2338    }
2339}
2340
2341impl<CC, BC> HandleableTimer<CC, BC> for IpLayerTimerId
2342where
2343    CC: TimerHandler<BC, FragmentTimerId<Ipv4>>
2344        + TimerHandler<BC, FragmentTimerId<Ipv6>>
2345        + TimerHandler<BC, PmtuTimerId<Ipv4>>
2346        + TimerHandler<BC, PmtuTimerId<Ipv6>>
2347        + TimerHandler<BC, FilterTimerId<Ipv4>>
2348        + TimerHandler<BC, FilterTimerId<Ipv6>>
2349        + TimerHandler<BC, MulticastForwardingTimerId<Ipv4>>
2350        + TimerHandler<BC, MulticastForwardingTimerId<Ipv6>>,
2351    BC: TimerBindingsTypes,
2352{
2353    fn handle(self, core_ctx: &mut CC, bindings_ctx: &mut BC, timer: BC::UniqueTimerId) {
2354        match self {
2355            IpLayerTimerId::ReassemblyTimeoutv4(id) => {
2356                core_ctx.handle_timer(bindings_ctx, id, timer)
2357            }
2358            IpLayerTimerId::ReassemblyTimeoutv6(id) => {
2359                core_ctx.handle_timer(bindings_ctx, id, timer)
2360            }
2361            IpLayerTimerId::PmtuTimeoutv4(id) => core_ctx.handle_timer(bindings_ctx, id, timer),
2362            IpLayerTimerId::PmtuTimeoutv6(id) => core_ctx.handle_timer(bindings_ctx, id, timer),
2363            IpLayerTimerId::FilterTimerv4(id) => core_ctx.handle_timer(bindings_ctx, id, timer),
2364            IpLayerTimerId::FilterTimerv6(id) => core_ctx.handle_timer(bindings_ctx, id, timer),
2365            IpLayerTimerId::MulticastForwardingTimerv4(id) => {
2366                core_ctx.handle_timer(bindings_ctx, id, timer)
2367            }
2368            IpLayerTimerId::MulticastForwardingTimerv6(id) => {
2369                core_ctx.handle_timer(bindings_ctx, id, timer)
2370            }
2371        }
2372    }
2373}
2374
2375/// An ICMP error, and the metadata required to send it.
2376///
2377/// This allows the sending of the ICMP error to be decoupled from the
2378/// generation of the error, which is advantageous because sending the error
2379/// requires the underlying packet buffer, which cannot be "moved" in certain
2380/// contexts.
2381pub(crate) struct IcmpErrorSender<'a, I: IcmpHandlerIpExt, D> {
2382    /// The ICMP error that should be sent.
2383    err: I::IcmpError,
2384    /// The original source IP address of the packet (before the local-ingress
2385    /// hook evaluation).
2386    src_ip: SocketIpAddr<I::Addr>,
2387    /// The original destination IP address of the packet (before the
2388    /// local-ingress hook evaluation).
2389    dst_ip: SocketIpAddr<I::Addr>,
2390    /// The frame destination of the packet.
2391    frame_dst: Option<LocalFrameDestination>,
2392    /// The device out which to send the error.
2393    device: &'a D,
2394    /// The metadata from the packet, allowing the packet's backing buffer to be
2395    /// returned to it's pre-IP-parse state with [`GrowBuffer::undo_parse`].
2396    meta: ParseMetadata,
2397    /// The marks used to send the ICMP error.
2398    marks: Marks,
2399    /// The protocol of the original packet.
2400    proto: I::Proto,
2401}
2402
2403impl<'a, I: IcmpHandlerIpExt, D> IcmpErrorSender<'a, I, D> {
2404    pub fn new<CC, B>(
2405        core_ctx: &mut CC,
2406        err: I::IcmpError,
2407        packet: &I::Packet<B>,
2408        frame_dst: Option<LocalFrameDestination>,
2409        device: &'a D,
2410        marks: Marks,
2411    ) -> Option<Self>
2412    where
2413        I: IpCountersIpExt,
2414        CC: ResourceCounterContext<D, IpCounters<I>>,
2415        B: SplitByteSlice,
2416    {
2417        let Some(src_ip) = SocketIpAddr::new(packet.src_ip()) else {
2418            core_ctx.increment_both(device, |c| &c.unspecified_source);
2419            return None;
2420        };
2421        let Some(dst_ip) = SocketIpAddr::new(packet.dst_ip()) else {
2422            return None;
2423        };
2424
2425        // In IPv4, don't respond to non-initial fragments.
2426        let is_ipv4_fragment = I::map_ip_in(
2427            packet,
2428            |p| {
2429                packet_formats::ipv4::Ipv4Header::fragment_type(p)
2430                    == Ipv4FragmentType::NonInitialFragment
2431            },
2432            |_| false,
2433        );
2434        if is_ipv4_fragment {
2435            return None;
2436        }
2437
2438        let meta = packet.parse_metadata();
2439        let proto = packet.proto();
2440        Some(Self { err, src_ip, dst_ip, frame_dst, device, meta, marks, proto })
2441    }
2442
2443    /// Generate an send an appropriate ICMP error in response to this error.
2444    ///
2445    /// The provided `body` must be the original buffer from which the IP
2446    /// packet responsible for this error was parsed. It is expected to be in a
2447    /// state that allows undoing the IP packet parse (e.g. unmodified after the
2448    /// IP packet was parsed).
2449    pub fn send<B, BC, CC>(self, core_ctx: &mut CC, bindings_ctx: &mut BC, mut body: B)
2450    where
2451        B: BufferMut,
2452        CC: IcmpErrorHandler<I, BC, DeviceId = D>,
2453    {
2454        let IcmpErrorSender { err, src_ip, dst_ip, frame_dst, device, meta, marks, proto } = self;
2455        let header_len = meta.header_len();
2456
2457        // Undo the parsing of the IP Packet, moving the buffer's cursor so that
2458        // it points at the start of the IP header. This way, the sent ICMP
2459        // error will contain the entire original IP packet.
2460        body.undo_parse(meta);
2461
2462        core_ctx.send_icmp_error_message(
2463            bindings_ctx,
2464            Some(device),
2465            frame_dst,
2466            src_ip,
2467            dst_ip,
2468            body,
2469            err,
2470            header_len,
2471            proto,
2472            &marks,
2473        );
2474    }
2475}
2476
2477// Early demux results may be invalidated by SNAT in the LOCAL_INGRESS hook.
2478// This struct is used to check if the early demux result is still valid.
2479//
2480// TODO(https://fxbug.dev/476507679): Add tests to ensure this works properly
2481// once SNAT is fully implemented.
2482#[derive(PartialEq, Eq)]
2483struct EarlyDemuxResult<I: Ip, S> {
2484    socket: S,
2485    src_addr: I::Addr,
2486    src_port: Option<u16>,
2487}
2488
2489impl<I: FilterIpExt, S> EarlyDemuxResult<I, S> {
2490    fn new<P: IpPacket<I>>(socket: S, packet: &P) -> Self {
2491        let src_port =
2492            packet.maybe_transport_packet().transport_packet_data().map(|t| t.src_port());
2493        Self { socket, src_addr: packet.src_addr(), src_port }
2494    }
2495
2496    // Returns the socket if it's still the right socket to handle the packet.
2497    fn take_socket<P: IpPacket<I>>(self, packet: &P) -> Option<S> {
2498        let src_port =
2499            packet.maybe_transport_packet().transport_packet_data().map(|t| t.src_port());
2500        (self.src_addr == packet.src_addr() && self.src_port == src_port).then_some(self.socket)
2501    }
2502
2503    fn update_packet_metadata<CC, BC>(
2504        &self,
2505        core_ctx: &mut CC,
2506        packet_metadata: &mut IpLayerPacketMetadata<I, CC::WeakAddressId, BC>,
2507    ) where
2508        I: IpLayerIpExt,
2509        S: SocketMetadata<CC>,
2510        BC: IpLayerBindingsContext<I, CC::DeviceId>,
2511        CC: IpLayerIngressContext<I, BC>,
2512    {
2513        packet_metadata.socket_info = Some(self.socket.socket_info(core_ctx));
2514        for mark in BC::marks_to_set_on_ingress() {
2515            *packet_metadata.marks.get_mut(*mark) = self.socket.marks(core_ctx).get(*mark).clone();
2516        }
2517    }
2518}
2519
2520pub(crate) fn reject_type_to_icmpv4_error(reject_type: RejectType) -> Option<Icmpv4Error> {
2521    let error = match reject_type {
2522        RejectType::NetUnreachable => Icmpv4Error::NetUnreachable,
2523        RejectType::ProtoUnreachable => Icmpv4Error::ProtocolUnreachable,
2524        RejectType::PortUnreachable => Icmpv4Error::PortUnreachable,
2525        RejectType::HostUnreachable => Icmpv4Error::HostUnreachable,
2526        RejectType::RoutePolicyFail => Icmpv4Error::NetworkProhibited,
2527        RejectType::RejectRoute => Icmpv4Error::HostProhibited,
2528        RejectType::AdminProhibited => Icmpv4Error::AdminProhibited,
2529        // TODO(https://fxbug.dev/488116504): Implement RejectType::TcpReset.
2530        RejectType::TcpReset => return None,
2531    };
2532    Some(error)
2533}
2534
2535pub(crate) fn reject_type_to_icmpv6_error(reject_type: RejectType) -> Option<Icmpv6Error> {
2536    let error = match reject_type {
2537        RejectType::NetUnreachable => Icmpv6Error::NetUnreachable,
2538        RejectType::PortUnreachable => Icmpv6Error::PortUnreachable,
2539        RejectType::HostUnreachable => Icmpv6Error::AddressUnreachable,
2540        RejectType::AdminProhibited => Icmpv6Error::AdminProhibited,
2541        RejectType::RoutePolicyFail => Icmpv6Error::SourceAddressPolicyFailed,
2542        RejectType::RejectRoute => Icmpv6Error::RejectRoute,
2543        // TODO(https://fxbug.dev/488116504): Implement ProtoUnreachable and TcpReset.
2544        RejectType::TcpReset | RejectType::ProtoUnreachable => return None,
2545    };
2546    Some(error)
2547}
2548// TODO(joshlf): Once we support multiple extension headers in IPv6, we will
2549// need to verify that the callers of this function are still sound. In
2550// particular, they may accidentally pass a parse_metadata argument which
2551// corresponds to a single extension header rather than all of the IPv6 headers.
2552
2553/// Dispatch a received IPv4 packet to the appropriate protocol.
2554///
2555/// `device` is the device the packet was received on. `parse_metadata` is the
2556/// parse metadata associated with parsing the IP headers. It is used to undo
2557/// that parsing. Both `device` and `parse_metadata` are required in order to
2558/// send ICMP messages in response to unrecognized protocols or ports. If either
2559/// of `device` or `parse_metadata` is `None`, the caller promises that the
2560/// protocol and port are recognized.
2561///
2562/// # Panics
2563///
2564/// `dispatch_receive_ipv4_packet` panics if the protocol is unrecognized and
2565/// `parse_metadata` is `None`. If an IGMP message is received but it is not
2566/// coming from a device, i.e., `device` given is `None`,
2567/// `dispatch_receive_ip_packet` will also panic.
2568fn dispatch_receive_ipv4_packet<
2569    'a,
2570    'b,
2571    BC: IpLayerBindingsContext<Ipv4, CC::DeviceId>,
2572    CC: IpLayerIngressContext<Ipv4, BC>,
2573>(
2574    core_ctx: &'a mut CC,
2575    bindings_ctx: &'a mut BC,
2576    device: &'b CC::DeviceId,
2577    frame_dst: Option<LocalFrameDestination>,
2578    mut packet: Ipv4Packet<&'a mut [u8]>,
2579    mut packet_metadata: IpLayerPacketMetadata<Ipv4, CC::WeakAddressId, BC>,
2580    receive_meta: ReceiveIpPacketMeta<Ipv4>,
2581) -> Result<(), IcmpErrorSender<'b, Ipv4, CC::DeviceId>> {
2582    core_ctx.increment_both(device, |c| &c.dispatch_receive_ip_packet);
2583
2584    // Skip early demux if the packet was redirected to a TPROXY.
2585    // TODO(https://fxbug.dev/475851987): Handle TPROXY in early_demux.
2586    let early_demux_result = receive_meta
2587        .transparent_override
2588        .is_none()
2589        .then(|| {
2590            core_ctx.early_demux(
2591                device,
2592                frame_dst,
2593                packet.src_ip(),
2594                packet.dst_ip(),
2595                packet.proto(),
2596                packet.body(),
2597            )
2598        })
2599        .flatten()
2600        .map(|socket| {
2601            let early_demux_result = EarlyDemuxResult::new(socket, &packet);
2602            early_demux_result.update_packet_metadata(core_ctx, &mut packet_metadata);
2603            early_demux_result
2604        });
2605
2606    let filter_verdict = core_ctx.filter_handler().local_ingress_hook(
2607        bindings_ctx,
2608        &mut packet,
2609        device,
2610        &mut packet_metadata,
2611    );
2612
2613    let marks = packet_metadata.marks;
2614    packet_metadata.acknowledge_drop();
2615
2616    match filter_verdict {
2617        filter::Verdict::Stop(filter::DropOrReject::Drop) => {
2618            return Ok(());
2619        }
2620        filter::Verdict::Stop(filter::DropOrReject::Reject(reject_type)) => {
2621            return match reject_type_to_icmpv4_error(reject_type) {
2622                Some(icmp_error) => {
2623                    match IcmpErrorSender::new(
2624                        core_ctx, icmp_error, &packet, frame_dst, device, marks,
2625                    ) {
2626                        Some(icmp_sender) => Err(icmp_sender),
2627                        None => Ok(()),
2628                    }
2629                }
2630                None => {
2631                    debug!("Unsupported reject type: {:?}", reject_type);
2632                    return Ok(());
2633                }
2634            };
2635        }
2636        filter::Verdict::Proceed(filter::Accept) => (),
2637    };
2638
2639    // These invariants are validated by the caller of this function, but it's
2640    // possible for the LOCAL_INGRESS hook to rewrite the packet, so we have to
2641    // check them again.
2642    let Some(src_ip) = packet.src_ipv4() else {
2643        debug!(
2644            "dispatch_receive_ipv4_packet: received packet from invalid source {} after the \
2645            LOCAL_INGRESS hook; dropping",
2646            packet.src_ip()
2647        );
2648        core_ctx.increment_both(device, |c| &c.invalid_source);
2649        return Ok(());
2650    };
2651    let Some(dst_ip) = SpecifiedAddr::new(packet.dst_ip()) else {
2652        core_ctx.increment_both(device, |c| &c.unspecified_destination);
2653        debug!(
2654            "dispatch_receive_ipv4_packet: Received packet with unspecified destination IP address \
2655            after the LOCAL_INGRESS hook; dropping"
2656        );
2657        return Ok(());
2658    };
2659
2660    core_ctx.deliver_packet_to_raw_ip_sockets(bindings_ctx, &packet, &device);
2661
2662    // Check if the early demux result is still valid.
2663    let early_demux_socket = early_demux_result.and_then(|result| result.take_socket(&packet));
2664
2665    let proto = packet.proto();
2666    let (prefix, options, body) = packet.parts_with_body_mut();
2667    let buffer = Buf::new(body, ..);
2668    let header_info = Ipv4HeaderInfo { prefix, options: options.as_ref() };
2669    let mut receive_info = LocalDeliveryPacketInfo { meta: receive_meta, header_info, marks };
2670
2671    core_ctx
2672        .dispatch_receive_ip_packet(
2673            bindings_ctx,
2674            device,
2675            src_ip,
2676            dst_ip,
2677            proto,
2678            buffer,
2679            &mut receive_info,
2680            early_demux_socket,
2681        )
2682        .or_else(|icmp_error| {
2683            match IcmpErrorSender::new(core_ctx, icmp_error, &packet, frame_dst, device, marks) {
2684                Some(icmp_sender) => Err(icmp_sender),
2685                None => Ok(()),
2686            }
2687        })
2688}
2689
2690/// Dispatch a received IPv6 packet to the appropriate protocol.
2691///
2692/// `dispatch_receive_ipv6_packet` has the same semantics as
2693/// `dispatch_receive_ipv4_packet`, but for IPv6.
2694fn dispatch_receive_ipv6_packet<
2695    'a,
2696    'b,
2697    BC: IpLayerBindingsContext<Ipv6, CC::DeviceId>,
2698    CC: IpLayerIngressContext<Ipv6, BC>,
2699>(
2700    core_ctx: &'a mut CC,
2701    bindings_ctx: &'a mut BC,
2702    device: &'b CC::DeviceId,
2703    frame_dst: Option<LocalFrameDestination>,
2704    mut packet: Ipv6Packet<&'a mut [u8]>,
2705    mut packet_metadata: IpLayerPacketMetadata<Ipv6, CC::WeakAddressId, BC>,
2706    meta: ReceiveIpPacketMeta<Ipv6>,
2707) -> Result<(), IcmpErrorSender<'b, Ipv6, CC::DeviceId>> {
2708    // TODO(https://fxbug.dev/42095067): Once we support multiple extension
2709    // headers in IPv6, we will need to verify that the callers of this
2710    // function are still sound. In particular, they may accidentally pass a
2711    // parse_metadata argument which corresponds to a single extension
2712    // header rather than all of the IPv6 headers.
2713
2714    core_ctx.increment_both(device, |c| &c.dispatch_receive_ip_packet);
2715
2716    // Skip early demux if the packet was redirected to a TPROXY.
2717    // TODO(https://fxbug.dev/475851987): Handle TPROXY in early_demux.
2718    let early_demux_result = meta
2719        .transparent_override
2720        .is_none()
2721        .then(|| {
2722            core_ctx.early_demux(
2723                device,
2724                frame_dst,
2725                packet.src_ip(),
2726                packet.dst_ip(),
2727                packet.proto(),
2728                packet.body(),
2729            )
2730        })
2731        .flatten()
2732        .map(|socket| {
2733            let early_demux_result = EarlyDemuxResult::new(socket, &packet);
2734            early_demux_result.update_packet_metadata(core_ctx, &mut packet_metadata);
2735            early_demux_result
2736        });
2737
2738    let filter_verdict = core_ctx.filter_handler().local_ingress_hook(
2739        bindings_ctx,
2740        &mut packet,
2741        device,
2742        &mut packet_metadata,
2743    );
2744
2745    let marks = packet_metadata.marks;
2746    packet_metadata.acknowledge_drop();
2747
2748    match filter_verdict {
2749        filter::Verdict::Stop(filter::DropOrReject::Drop) => {
2750            return Ok(());
2751        }
2752        filter::Verdict::Stop(filter::DropOrReject::Reject(reject_type)) => {
2753            return match reject_type_to_icmpv6_error(reject_type) {
2754                Some(icmp_error) => {
2755                    match IcmpErrorSender::new(
2756                        core_ctx, icmp_error, &packet, frame_dst, device, marks,
2757                    ) {
2758                        Some(icmp_sender) => Err(icmp_sender),
2759                        None => Ok(()),
2760                    }
2761                }
2762                None => {
2763                    debug!("Unsupported reject type: {:?}", reject_type);
2764                    return Ok(());
2765                }
2766            };
2767        }
2768        filter::Verdict::Proceed(filter::Accept) => {}
2769    }
2770
2771    // These invariants are validated by the caller of this function, but it's
2772    // possible for the LOCAL_INGRESS hook to rewrite the packet, so we have to
2773    // check them again.
2774    let Some(src_ip) = packet.src_ipv6() else {
2775        debug!(
2776            "dispatch_receive_ipv6_packet: received packet from invalid source {} after the \
2777            LOCAL_INGRESS hook; dropping",
2778            packet.src_ip()
2779        );
2780
2781        core_ctx.increment_both(device, |c| &c.invalid_source);
2782        return Ok(());
2783    };
2784    let Some(dst_ip) = SpecifiedAddr::new(packet.dst_ip()) else {
2785        core_ctx.increment_both(device, |c| &c.unspecified_destination);
2786        debug!(
2787            "dispatch_receive_ipv6_packet: Received packet with unspecified destination IP address \
2788            after the LOCAL_INGRESS hook; dropping"
2789        );
2790        return Ok(());
2791    };
2792
2793    core_ctx.deliver_packet_to_raw_ip_sockets(bindings_ctx, &packet, &device);
2794
2795    // Check if the early demux result is still valid.
2796    let early_demux_socket = early_demux_result.and_then(|result| result.take_socket(&packet));
2797
2798    let proto = packet.proto();
2799    let (fixed, extension, body) = packet.parts_with_body_mut();
2800    let buffer = Buf::new(body, ..);
2801    let header_info = Ipv6HeaderInfo { fixed, extension };
2802    let mut receive_info = LocalDeliveryPacketInfo { meta, header_info, marks };
2803
2804    core_ctx
2805        .dispatch_receive_ip_packet(
2806            bindings_ctx,
2807            device,
2808            src_ip,
2809            dst_ip,
2810            proto,
2811            buffer,
2812            &mut receive_info,
2813            early_demux_socket,
2814        )
2815        .or_else(|icmp_error| {
2816            let marks = receive_info.marks;
2817            match IcmpErrorSender::new(core_ctx, icmp_error, &packet, frame_dst, device, marks) {
2818                Some(icmp_sender) => Err(icmp_sender),
2819                None => Ok(()),
2820            }
2821        })
2822}
2823
2824/// The metadata required to forward an IP Packet.
2825///
2826/// This allows the forwarding of the packet to be decoupled from the
2827/// determination of how to forward. This is advantageous because forwarding
2828/// requires the underlying packet buffer, which cannot be "moved" in certain
2829/// contexts.
2830pub(crate) struct IpPacketForwarder<
2831    'a,
2832    I: IpLayerIpExt,
2833    D,
2834    A,
2835    BT: FilterBindingsTypes + TxMetadataBindingsTypes,
2836> {
2837    inbound_device: &'a D,
2838    outbound_device: &'a D,
2839    packet_meta: IpLayerPacketMetadata<I, A, BT>,
2840    src_ip: I::RecvSrcAddr,
2841    dst_ip: SpecifiedAddr<I::Addr>,
2842    destination: IpPacketDestination<I, &'a D>,
2843    proto: I::Proto,
2844    parse_meta: ParseMetadata,
2845    frame_dst: Option<LocalFrameDestination>,
2846}
2847
2848impl<'a, I, D, A, BC> IpPacketForwarder<'a, I, D, A, BC>
2849where
2850    I: IpLayerIpExt,
2851    BC: IpLayerBindingsContext<I, D>,
2852{
2853    // Forward the provided buffer as specified by this [`IpPacketForwarder`].
2854    fn forward_with_buffer<CC, B>(self, core_ctx: &mut CC, bindings_ctx: &mut BC, buffer: B)
2855    where
2856        B: BufferMut,
2857        CC: IpLayerForwardingContext<I, BC, DeviceId = D, WeakAddressId = A>,
2858    {
2859        let Self {
2860            inbound_device,
2861            outbound_device,
2862            packet_meta,
2863            src_ip,
2864            dst_ip,
2865            destination,
2866            proto,
2867            parse_meta,
2868            frame_dst,
2869        } = self;
2870
2871        let packet = ForwardedPacket::new(src_ip.get(), dst_ip.get(), proto, parse_meta, buffer);
2872
2873        trace!("forward_with_buffer: forwarding {} packet", I::NAME);
2874
2875        let marks = packet_meta.marks;
2876        match send_ip_frame(
2877            core_ctx,
2878            bindings_ctx,
2879            outbound_device,
2880            destination,
2881            packet,
2882            packet_meta,
2883            Mtu::no_limit(),
2884        ) {
2885            Ok(()) => (),
2886            Err(IpSendFrameError { serializer, error }) => {
2887                match error {
2888                    IpSendFrameErrorReason::Device(
2889                        SendFrameErrorReason::SizeConstraintsViolation,
2890                    ) => {
2891                        debug!("failed to forward {} packet: MTU exceeded", I::NAME);
2892                        core_ctx.increment_both(outbound_device, |c| &c.mtu_exceeded);
2893                        let mtu = core_ctx.get_mtu(inbound_device);
2894                        // NB: Ipv6 sends a PacketTooBig error. Ipv4 sends nothing.
2895                        let Some(err) = I::IcmpError::mtu_exceeded(mtu) else {
2896                            return;
2897                        };
2898                        // NB: Only send an ICMP error if the sender's src
2899                        // is specified.
2900                        let Some(src_ip) = I::received_source_as_icmp_source(src_ip) else {
2901                            return;
2902                        };
2903
2904                        let Some(dst_ip) = SocketIpAddr::new(dst_ip.get()) else {
2905                            return;
2906                        };
2907
2908                        // TODO(https://fxbug.dev/362489447): Increment the TTL since we
2909                        // just decremented it. The fact that we don't do this is
2910                        // technically a violation of the ICMP spec (we're not
2911                        // encapsulating the original packet that caused the
2912                        // issue, but a slightly modified version of it), but
2913                        // it's not that big of a deal because it won't affect
2914                        // the sender's ability to figure out the minimum path
2915                        // MTU. This may break other logic, though, so we should
2916                        // still fix it eventually.
2917                        core_ctx.send_icmp_error_message(
2918                            bindings_ctx,
2919                            Some(inbound_device),
2920                            frame_dst,
2921                            src_ip,
2922                            dst_ip,
2923                            serializer.into_buffer(),
2924                            err,
2925                            parse_meta.header_len(),
2926                            proto,
2927                            &marks,
2928                        );
2929                    }
2930                    IpSendFrameErrorReason::Device(SendFrameErrorReason::QueueFull)
2931                    | IpSendFrameErrorReason::Device(SendFrameErrorReason::Alloc)
2932                    | IpSendFrameErrorReason::IllegalLoopbackAddress => (),
2933                }
2934                debug!("failed to forward {} packet: {error:?}", I::NAME);
2935            }
2936        }
2937    }
2938}
2939
2940/// The action to take for a packet that was a candidate for forwarding.
2941pub(crate) enum ForwardingAction<
2942    'a,
2943    I: IpLayerIpExt,
2944    D,
2945    A,
2946    BT: FilterBindingsTypes + TxMetadataBindingsTypes,
2947> {
2948    /// Drop the packet without forwarding it or generating an ICMP error.
2949    SilentlyDrop,
2950    /// Forward the packet, as specified by the [`IpPacketForwarder`].
2951    Forward(IpPacketForwarder<'a, I, D, A, BT>),
2952    /// Drop the packet without forwarding, and generate an ICMP error as
2953    /// specified by the [`IcmpErrorSender`].
2954    DropWithIcmpError(IcmpErrorSender<'a, I, D>),
2955}
2956
2957impl<'a, I, D, A, BC> ForwardingAction<'a, I, D, A, BC>
2958where
2959    I: IpLayerIpExt,
2960    BC: IpLayerBindingsContext<I, D>,
2961{
2962    /// Perform the action prescribed by self, with the provided packet buffer.
2963    pub(crate) fn perform_action_with_buffer<CC, B>(
2964        self,
2965        core_ctx: &mut CC,
2966        bindings_ctx: &mut BC,
2967        buffer: B,
2968    ) where
2969        B: BufferMut,
2970        CC: IpLayerForwardingContext<I, BC, DeviceId = D, WeakAddressId = A>,
2971    {
2972        match self {
2973            ForwardingAction::SilentlyDrop => {}
2974            ForwardingAction::Forward(forwarder) => {
2975                forwarder.forward_with_buffer(core_ctx, bindings_ctx, buffer)
2976            }
2977            ForwardingAction::DropWithIcmpError(icmp_sender) => {
2978                icmp_sender.send(core_ctx, bindings_ctx, buffer)
2979            }
2980        }
2981    }
2982}
2983
2984/// Determine which [`ForwardingAction`] should be taken for an IP packet.
2985pub(crate) fn determine_ip_packet_forwarding_action<'a, 'b, I, BC, CC>(
2986    core_ctx: &'a mut CC,
2987    mut packet: I::Packet<&'a mut [u8]>,
2988    mut packet_meta: IpLayerPacketMetadata<I, CC::WeakAddressId, BC>,
2989    minimum_ttl: Option<u8>,
2990    inbound_device: &'b CC::DeviceId,
2991    outbound_device: &'b CC::DeviceId,
2992    destination: IpPacketDestination<I, &'b CC::DeviceId>,
2993    frame_dst: Option<LocalFrameDestination>,
2994    src_ip: I::RecvSrcAddr,
2995    dst_ip: SpecifiedAddr<I::Addr>,
2996) -> ForwardingAction<'b, I, CC::DeviceId, CC::WeakAddressId, BC>
2997where
2998    I: IpLayerIpExt,
2999    BC: IpLayerBindingsContext<I, CC::DeviceId>,
3000    CC: IpLayerForwardingContext<I, BC>,
3001{
3002    // When forwarding, if a datagram's TTL is one or zero, discard it, as
3003    // decrementing the TTL would put it below the allowed minimum value.
3004    // For IPv4, see "TTL" section, https://tools.ietf.org/html/rfc791#page-14.
3005    // For IPv6, see "Hop Limit" section, https://datatracker.ietf.org/doc/html/rfc2460#page-5.
3006    const DEFAULT_MINIMUM_FORWARDING_TTL: u8 = 2;
3007    let minimum_ttl = minimum_ttl.unwrap_or(DEFAULT_MINIMUM_FORWARDING_TTL);
3008
3009    let ttl = packet.ttl();
3010    if ttl < minimum_ttl {
3011        debug!(
3012            "{} packet not forwarded due to inadequate TTL: got={ttl} minimum={minimum_ttl}",
3013            I::NAME
3014        );
3015        // As per RFC 792's specification of the Time Exceeded Message:
3016        //     If the gateway processing a datagram finds the time to live
3017        //     field is zero it must discard the datagram. The gateway may
3018        //     also notify the source host via the time exceeded message.
3019        // And RFC 4443 section 3.3:
3020        //    If a router receives a packet with a Hop Limit of zero, or if
3021        //    a router decrements a packet's Hop Limit to zero, it MUST
3022        //    discard the packet and originate an ICMPv6 Time Exceeded
3023        //    message with Code 0 to the source of the packet.
3024        // Don't send a Time Exceeded Message in cases where the netstack is
3025        // enforcing a higher minimum TTL (e.g. as part of a multicast route).
3026        if ttl > 1 {
3027            packet_meta.acknowledge_drop();
3028            return ForwardingAction::SilentlyDrop;
3029        }
3030
3031        core_ctx.increment_both(inbound_device, |c| &c.ttl_expired);
3032
3033        let marks = packet_meta.marks;
3034        packet_meta.acknowledge_drop();
3035
3036        // Construct and send the appropriate ICMP error for the IP version.
3037        match IcmpErrorSender::new(
3038            core_ctx,
3039            I::IcmpError::ttl_expired(),
3040            &packet,
3041            frame_dst,
3042            inbound_device,
3043            marks,
3044        ) {
3045            Some(icmp_sender) => return ForwardingAction::DropWithIcmpError(icmp_sender),
3046            None => return ForwardingAction::SilentlyDrop,
3047        }
3048    }
3049
3050    trace!("determine_ip_packet_forwarding_action: adequate TTL");
3051
3052    // For IPv6 packets, handle extension headers first.
3053    //
3054    // Any previous handling of extension headers was done under the
3055    // assumption that we are the final destination of the packet. Now that
3056    // we know we're forwarding, we need to re-examine them.
3057    let maybe_ipv6_packet_action = I::map_ip_in(
3058        &packet,
3059        |_packet| None,
3060        |packet| {
3061            Some(ipv6::handle_extension_headers(core_ctx, inbound_device, frame_dst, packet, false))
3062        },
3063    );
3064    match maybe_ipv6_packet_action {
3065        None => {} // NB: Ipv4 case.
3066        Some(Ipv6PacketAction::_Discard) => {
3067            core_ctx.increment_both(inbound_device, |c| {
3068                #[derive(GenericOverIp)]
3069                #[generic_over_ip(I, Ip)]
3070                struct InCounters<'a, I: IpLayerIpExt>(
3071                    &'a <I::RxCounters as CounterCollectionSpec>::CounterCollection<Counter>,
3072                );
3073                I::map_ip_in::<_, _>(
3074                    InCounters(&c.version_rx),
3075                    |_counters| {
3076                        unreachable!(
3077                            "`I` must be `Ipv6` because we're handling IPv6 extension headers"
3078                        )
3079                    },
3080                    |InCounters(counters)| &counters.extension_header_discard,
3081                )
3082            });
3083            trace!(
3084                "determine_ip_packet_forwarding_action: handled IPv6 extension headers: \
3085                discarding packet"
3086            );
3087            packet_meta.acknowledge_drop();
3088            return ForwardingAction::SilentlyDrop;
3089        }
3090        Some(Ipv6PacketAction::Continue) => {
3091            trace!(
3092                "determine_ip_packet_forwarding_action: handled IPv6 extension headers: \
3093                forwarding packet"
3094            );
3095        }
3096        Some(Ipv6PacketAction::ProcessFragment) => {
3097            unreachable!(
3098                "When forwarding packets, we should only ever look at the hop by hop \
3099                    options extension header (if present)"
3100            )
3101        }
3102    };
3103
3104    match core_ctx.filter_handler().forwarding_hook(
3105        I::as_filter_packet(&mut packet),
3106        inbound_device,
3107        outbound_device,
3108        &mut packet_meta,
3109    ) {
3110        filter::Verdict::Stop(filter::DropOrReject::Drop) => {
3111            packet_meta.acknowledge_drop();
3112            trace!("determine_ip_packet_forwarding_action: filter verdict: Drop");
3113            return ForwardingAction::SilentlyDrop;
3114        }
3115        filter::Verdict::Stop(filter::DropOrReject::Reject(reject_type)) => {
3116            // TODO(https://fxbug.dev/466098884): Send reject packet.
3117            packet_meta.acknowledge_drop();
3118            trace!(
3119                "determine_ip_packet_forwarding_action: filter verdict: Reject({:?})",
3120                reject_type
3121            );
3122            return ForwardingAction::SilentlyDrop;
3123        }
3124        filter::Verdict::Proceed(filter::Accept) => {}
3125    }
3126
3127    packet.set_ttl(ttl - 1);
3128    let (_, _, proto, parse_meta): (I::Addr, I::Addr, _, _) = packet.into_metadata();
3129    ForwardingAction::Forward(IpPacketForwarder {
3130        inbound_device,
3131        outbound_device,
3132        packet_meta,
3133        src_ip,
3134        dst_ip,
3135        destination,
3136        proto,
3137        parse_meta,
3138        frame_dst,
3139    })
3140}
3141
3142pub(crate) fn send_ip_frame<I, CC, BC, S>(
3143    core_ctx: &mut CC,
3144    bindings_ctx: &mut BC,
3145    device: &CC::DeviceId,
3146    destination: IpPacketDestination<I, &CC::DeviceId>,
3147    mut body: S,
3148    mut packet_metadata: IpLayerPacketMetadata<I, CC::WeakAddressId, BC>,
3149    limit_mtu: Mtu,
3150) -> Result<(), IpSendFrameError<S>>
3151where
3152    I: IpLayerIpExt,
3153    BC: FilterBindingsContext<CC::DeviceId> + TxMetadataBindingsTypes + MarksBindingsContext,
3154    CC: IpLayerEgressContext<I, BC> + IpDeviceMtuContext<I> + IpDeviceAddressIdContext<I>,
3155    S: FragmentableIpSerializer<I, Buffer: BufferMut> + FilterIpPacket<I>,
3156{
3157    let (verdict, proof) = core_ctx.filter_handler().egress_hook(
3158        bindings_ctx,
3159        &mut body,
3160        device,
3161        &mut packet_metadata,
3162    );
3163    match verdict {
3164        filter::Verdict::Stop(filter::DropPacket) => {
3165            packet_metadata.acknowledge_drop();
3166            return Ok(());
3167        }
3168        filter::Verdict::Proceed(filter::Accept) => {}
3169    }
3170
3171    // If the packet is leaving through the loopback device, attempt to extract a
3172    // weak reference to the packet's conntrack entry to plumb that through the
3173    // device layer so it can be reused on ingress to the IP layer.
3174    let (conntrack_connection_and_direction, tx_metadata, marks, _socket_cookie) =
3175        packet_metadata.into_parts();
3176    let conntrack_entry = if device.is_loopback() {
3177        conntrack_connection_and_direction
3178            .and_then(|(conn, dir)| WeakConntrackConnection::new(&conn).map(|conn| (conn, dir)))
3179    } else {
3180        None
3181    };
3182
3183    let mut device_layer_marks = Marks::default();
3184    for mark in BC::marks_to_keep_on_egress() {
3185        *device_layer_marks.get_mut(*mark) = *marks.get(*mark);
3186    }
3187
3188    let device_ip_layer_metadata =
3189        DeviceIpLayerMetadata { conntrack_entry, tx_metadata, marks: device_layer_marks };
3190
3191    // The filtering layer may have changed our address. Perform a last moment
3192    // check to protect against sending loopback addresses on the wire for
3193    // non-loopback devices, which is an RFC violation.
3194    if !device.is_loopback()
3195        && (I::LOOPBACK_SUBNET.contains(&body.src_addr())
3196            || I::LOOPBACK_SUBNET.contains(&body.dst_addr()))
3197    {
3198        core_ctx.increment_both(device, |c| &c.tx_illegal_loopback_address);
3199        return Err(IpSendFrameError {
3200            serializer: body,
3201            error: IpSendFrameErrorReason::IllegalLoopbackAddress,
3202        });
3203    }
3204
3205    // Use the minimum MTU between the target device and the requested mtu.
3206    let mtu = limit_mtu.min(core_ctx.get_mtu(device));
3207
3208    let body = body.with_size_limit(mtu.into());
3209
3210    let fits_mtu = match body.serialize_new_buf(
3211        &mut NetworkSerializationContext::default(),
3212        PacketConstraints::UNCONSTRAINED,
3213        AlwaysFailBufferAlloc,
3214    ) {
3215        // We hit the allocator that refused to allocate new data, which
3216        // means the MTU is respected.
3217        Err(SerializeError::Alloc(())) => true,
3218        // MTU failure, we should try to fragment.
3219        Err(SerializeError::SizeLimitExceeded) => false,
3220    };
3221
3222    if fits_mtu {
3223        return core_ctx
3224            .send_ip_frame(bindings_ctx, device, destination, device_ip_layer_metadata, body, proof)
3225            .map_err(|ErrorAndSerializer { serializer, error }| IpSendFrameError {
3226                serializer: serializer.into_inner(),
3227                error: error.into(),
3228            });
3229    }
3230
3231    // Body doesn't fit MTU, we must fragment this serializer in order to send
3232    // it out.
3233    core_ctx.increment_both(device, |c| &c.fragmentation.fragmentation_required);
3234
3235    // Taken on the last frame.
3236    let mut device_ip_layer_metadata = Some(device_ip_layer_metadata);
3237    let body = body.into_inner();
3238    let result = match IpFragmenter::new(bindings_ctx, &body, mtu) {
3239        Ok(mut fragmenter) => loop {
3240            let (fragment, has_more) = match fragmenter.next() {
3241                None => break Ok(()),
3242                Some(f) => f,
3243            };
3244
3245            // TODO(https://fxbug.dev/391953082): We should penalize sockets
3246            // via the tx metadata when we incur IP fragmentation instead of
3247            // just attaching the ownership to the last fragment. For now, we
3248            // attach the tx metadata to the last frame only.
3249            let device_ip_layer_metadata = if has_more {
3250                // Unwrap here because only the last frame can take it.
3251                let device_ip_layer_metadata = device_ip_layer_metadata.as_ref().unwrap();
3252                DeviceIpLayerMetadata {
3253                    conntrack_entry: device_ip_layer_metadata.conntrack_entry.clone(),
3254                    tx_metadata: Default::default(),
3255                    marks: device_ip_layer_metadata.marks,
3256                }
3257            } else {
3258                // Unwrap here because the last frame can only happen once.
3259                device_ip_layer_metadata.take().unwrap()
3260            };
3261
3262            match core_ctx.send_ip_frame(
3263                bindings_ctx,
3264                device,
3265                destination.clone(),
3266                device_ip_layer_metadata,
3267                fragment,
3268                proof.clone_for_fragmentation(),
3269            ) {
3270                Ok(()) => {
3271                    core_ctx.increment_both(device, |c| &c.fragmentation.fragments);
3272                }
3273                Err(ErrorAndSerializer { serializer: _, error }) => {
3274                    core_ctx
3275                        .increment_both(device, |c| &c.fragmentation.error_fragmented_serializer);
3276                    break Err(error);
3277                }
3278            }
3279        },
3280        Err(e) => {
3281            core_ctx.increment_both(device, |c| &c.fragmentation.error_counter(&e));
3282            Err(SendFrameErrorReason::SizeConstraintsViolation)
3283        }
3284    };
3285    result.map_err(|e| IpSendFrameError { serializer: body, error: e.into() })
3286}
3287
3288/// A buffer allocator that always fails to allocate a new buffer.
3289///
3290/// Can be used to check for packet size constraints in serializer without in
3291/// fact serializing the buffer.
3292struct AlwaysFailBufferAlloc;
3293
3294impl LayoutBufferAlloc<Never> for AlwaysFailBufferAlloc {
3295    type Error = ();
3296    fn layout_alloc(
3297        self,
3298        _prefix: usize,
3299        _body: usize,
3300        _suffix: usize,
3301    ) -> Result<Never, Self::Error> {
3302        Err(())
3303    }
3304}
3305
3306/// Drop a packet and undo the effects of parsing it.
3307///
3308/// `drop_packet_and_undo_parse!` takes a `$packet` and a `$buffer` which the
3309/// packet was parsed from. It saves the results of the `src_ip()`, `dst_ip()`,
3310/// `proto()`, and `parse_metadata()` methods. It drops `$packet` and uses the
3311/// result of `parse_metadata()` to undo the effects of parsing the packet.
3312/// Finally, it returns the source IP, destination IP, protocol, and parse
3313/// metadata.
3314macro_rules! drop_packet_and_undo_parse {
3315    ($packet:expr, $buffer:expr) => {{
3316        let (src_ip, dst_ip, proto, meta) = $packet.into_metadata();
3317        $buffer.undo_parse(meta);
3318        (src_ip, dst_ip, proto, meta)
3319    }};
3320}
3321
3322/// The result of calling [`process_fragment`], depending on what action needs
3323/// to be taken by the caller.
3324enum ProcessFragmentResult<'a, I: IpLayerIpExt> {
3325    /// Processing of the packet is complete and no more action should be
3326    /// taken.
3327    Done,
3328
3329    /// Reassembly is not needed. The returned packet is the same one that was
3330    /// passed in the call to [`process_fragment`].
3331    NotNeeded(I::Packet<&'a mut [u8]>),
3332
3333    /// A packet was successfully reassembled into the provided buffer. If a
3334    /// parsed packet is needed, then the caller must perform that parsing.
3335    Reassembled(Vec<u8>),
3336}
3337
3338/// Process a fragment and reassemble if required.
3339///
3340/// Attempts to process a potential fragment packet and reassemble if we are
3341/// ready to do so. Returns an enum to the caller with the result of processing
3342/// the potential fragment.
3343fn process_fragment<'a, I, CC, BC>(
3344    core_ctx: &mut CC,
3345    bindings_ctx: &mut BC,
3346    device: &CC::DeviceId,
3347    packet: I::Packet<&'a mut [u8]>,
3348) -> ProcessFragmentResult<'a, I>
3349where
3350    I: IpLayerIpExt,
3351    for<'b> I::Packet<&'b mut [u8]>: FragmentablePacket,
3352    CC: IpLayerIngressContext<I, BC>,
3353    BC: IpLayerBindingsContext<I, CC::DeviceId>,
3354{
3355    match FragmentHandler::<I, _>::process_fragment::<&mut [u8]>(core_ctx, bindings_ctx, packet) {
3356        // Handle the packet right away since reassembly is not needed.
3357        FragmentProcessingState::NotNeeded(packet) => {
3358            trace!("receive_ip_packet: not fragmented");
3359            ProcessFragmentResult::NotNeeded(packet)
3360        }
3361        // Ready to reassemble a packet.
3362        FragmentProcessingState::Ready { key, packet_len } => {
3363            trace!("receive_ip_packet: fragmented, ready for reassembly");
3364            // Allocate a buffer of `packet_len` bytes.
3365            let mut buffer = Buf::new(alloc::vec![0; packet_len], ..);
3366
3367            // Attempt to reassemble the packet.
3368            let reassemble_result = match FragmentHandler::<I, _>::reassemble_packet(
3369                core_ctx,
3370                bindings_ctx,
3371                &key,
3372                buffer.buffer_view_mut(),
3373            ) {
3374                // Successfully reassembled the packet, handle it.
3375                Ok(()) => ProcessFragmentResult::Reassembled(buffer.into_inner()),
3376                Err(e) => {
3377                    core_ctx.increment_both(device, |c| &c.fragment_reassembly_error);
3378                    debug!("receive_ip_packet: fragmented, failed to reassemble: {:?}", e);
3379                    ProcessFragmentResult::Done
3380                }
3381            };
3382            reassemble_result
3383        }
3384        // Cannot proceed since we need more fragments before we
3385        // can reassemble a packet.
3386        FragmentProcessingState::NeedMoreFragments => {
3387            core_ctx.increment_both(device, |c| &c.need_more_fragments);
3388            trace!("receive_ip_packet: fragmented, need more before reassembly");
3389            ProcessFragmentResult::Done
3390        }
3391        // TODO(ghanan): Handle invalid fragments.
3392        FragmentProcessingState::InvalidFragment => {
3393            core_ctx.increment_both(device, |c| &c.invalid_fragment);
3394            trace!("receive_ip_packet: fragmented, invalid");
3395            ProcessFragmentResult::Done
3396        }
3397        FragmentProcessingState::OutOfMemory => {
3398            core_ctx.increment_both(device, |c| &c.fragment_cache_full);
3399            trace!("receive_ip_packet: fragmented, dropped because OOM");
3400            ProcessFragmentResult::Done
3401        }
3402    }
3403}
3404
3405// TODO(joshlf): Can we turn `try_parse_ip_packet` into a function? So far, I've
3406// been unable to get the borrow checker to accept it.
3407
3408/// Try to parse an IP packet from a buffer.
3409///
3410/// If parsing fails, return the buffer to its original state so that its
3411/// contents can be used to send an ICMP error message. When invoked, the macro
3412/// expands to an expression whose type is `Result<P, P::Error>`, where `P` is
3413/// the parsed packet type.
3414macro_rules! try_parse_ip_packet {
3415    ($buffer:expr) => {{
3416        let p_len = $buffer.prefix_len();
3417        let s_len = $buffer.suffix_len();
3418
3419        let result = $buffer.parse_mut();
3420
3421        if let Err(err) = result {
3422            // Revert `buffer` to it's original state.
3423            let n_p_len = $buffer.prefix_len();
3424            let n_s_len = $buffer.suffix_len();
3425
3426            if n_p_len > p_len {
3427                $buffer.grow_front(n_p_len - p_len);
3428            }
3429
3430            if n_s_len > s_len {
3431                $buffer.grow_back(n_s_len - s_len);
3432            }
3433
3434            Err(err)
3435        } else {
3436            result
3437        }
3438    }};
3439}
3440
3441/// Clone an IP packet so that it may be delivered to a multicast route target.
3442///
3443/// Note: We must copy the underlying data here, as the filtering
3444/// engine may uniquely modify each instance as part of
3445/// performing forwarding.
3446///
3447/// In the future there are potential optimizations we could
3448/// pursue, including:
3449///   * Copy-on-write semantics for the buffer/packet so that
3450///     copies of the underlying data are done on an as-needed
3451///     basis.
3452///   * Avoid reparsing the IP packet. Because we're parsing an
3453///     exact copy of a known good packet, it would be safe to
3454///     adopt the data as an IP packet without performing any
3455///     validation.
3456// NB: This is a macro, not a function, because Rust's "move" semantics prevent
3457// us from returning both a buffer and a packet referencing that buffer.
3458macro_rules! clone_packet_for_mcast_forwarding {
3459    {let ($new_data:ident, $new_buffer:ident, $new_packet:ident) = $packet:ident} => {
3460        let mut $new_data = $packet.to_vec();
3461        let mut $new_buffer: Buf<&mut [u8]> = Buf::new($new_data.as_mut(), ..);
3462        let $new_packet = try_parse_ip_packet!($new_buffer).unwrap();
3463    };
3464}
3465
3466/// Receive an IPv4 packet from a device.
3467///
3468/// `frame_dst` specifies how this packet was received; see [`FrameDestination`]
3469/// for options.
3470pub fn receive_ipv4_packet<
3471    BC: IpLayerBindingsContext<Ipv4, CC::DeviceId>,
3472    B: BufferMut,
3473    CC: IpLayerIngressContext<Ipv4, BC>,
3474>(
3475    core_ctx: &mut CC,
3476    bindings_ctx: &mut BC,
3477    device: &CC::DeviceId,
3478    frame_dst: Option<LocalFrameDestination>,
3479    device_ip_layer_metadata: DeviceIpLayerMetadata<BC>,
3480    parsing_context: NetworkParsingContext,
3481    buffer: B,
3482) {
3483    if !core_ctx.is_ip_device_enabled(&device) {
3484        return;
3485    }
3486
3487    // This is required because we may need to process the buffer that was
3488    // passed in or a reassembled one, which have different types.
3489    let mut buffer: packet::Either<B, Buf<Vec<u8>>> = packet::Either::A(buffer);
3490
3491    core_ctx.increment_both(device, |c| &c.receive_ip_packet);
3492    trace!("receive_ip_packet({device:?})");
3493
3494    let packet: Ipv4Packet<_> = match try_parse_ip_packet!(buffer) {
3495        Ok(packet) => packet,
3496        Err(ParseError::Format)
3497        | Err(ParseError::Checksum)
3498        | Err(ParseError::NotSupported)
3499        | Err(ParseError::NotExpected) => {
3500            core_ctx.increment_both(device, |c| &c.unparsable_packet);
3501            return;
3502        }
3503    };
3504
3505    // We verify these properties later by actually creating the corresponding
3506    // witness types after the INGRESS filtering hook, but we keep these checks
3507    // here as an optimization to return early and save some work.
3508    if packet.src_ipv4().is_none() {
3509        debug!(
3510            "receive_ipv4_packet: received packet from invalid source {}; dropping",
3511            packet.src_ip()
3512        );
3513        core_ctx.increment_both(device, |c| &c.invalid_source);
3514        return;
3515    };
3516    if !packet.dst_ip().is_specified() {
3517        core_ctx.increment_both(device, |c| &c.unspecified_destination);
3518        debug!("receive_ipv4_packet: Received packet with unspecified destination IP; dropping");
3519        return;
3520    };
3521
3522    // Per RFC 1122, Section 3.2.1.3:
3523    //   Internal host loopback address.  Addresses of this form
3524    //   MUST NOT appear outside a host.
3525    if !device.is_loopback()
3526        && (Ipv4::LOOPBACK_SUBNET.contains(&packet.src_ip())
3527            || Ipv4::LOOPBACK_SUBNET.contains(&packet.dst_ip()))
3528    {
3529        debug!(
3530            "receive_ipv4_packet: received loopback packet (src={}, dst={}) \
3531            on non-loopback interface; dropping",
3532            packet.src_ip(),
3533            packet.dst_ip(),
3534        );
3535        return;
3536    }
3537
3538    // Reassemble all packets before local delivery or forwarding. Reassembly
3539    // before forwarding is not RFC-compliant, but it's the easiest way to
3540    // ensure that fragments are filtered properly. Linux does this and it
3541    // doesn't seem to create major problems.
3542    //
3543    // TODO(https://fxbug.dev/345814518): Forward fragments without reassembly.
3544    //
3545    // Note, the `process_fragment` function could panic if the packet does not
3546    // have fragment data. However, we are guaranteed that it will not panic
3547    // because the fragment data is in the fixed header so it is always present
3548    // (even if the fragment data has values that implies that the packet is not
3549    // fragmented).
3550    let mut packet = match process_fragment(core_ctx, bindings_ctx, device, packet) {
3551        ProcessFragmentResult::Done => return,
3552        ProcessFragmentResult::NotNeeded(packet) => packet,
3553        ProcessFragmentResult::Reassembled(buf) => {
3554            let buf = Buf::new(buf, ..);
3555            buffer = packet::Either::B(buf);
3556
3557            match buffer.parse_mut() {
3558                Ok(packet) => packet,
3559                Err(err) => {
3560                    core_ctx.increment_both(device, |c| &c.fragment_reassembly_error);
3561                    debug!("receive_ip_packet: fragmented, failed to reassemble: {:?}", err);
3562                    return;
3563                }
3564            }
3565        }
3566    };
3567
3568    // TODO(ghanan): Act upon options.
3569
3570    let mut packet_metadata = IpLayerPacketMetadata::from_device_ip_layer_metadata(
3571        core_ctx,
3572        device,
3573        device_ip_layer_metadata,
3574    );
3575    let mut filter = core_ctx.filter_handler();
3576    match filter.ingress_hook(bindings_ctx, &mut packet, device, &mut packet_metadata) {
3577        filter::Verdict::Proceed(filter::Accept) => {}
3578        filter::Verdict::Stop(filter::IngressStopReason::Drop) => {
3579            packet_metadata.acknowledge_drop();
3580            return;
3581        }
3582        filter::Verdict::Stop(filter::IngressStopReason::TransparentLocalDelivery {
3583            addr,
3584            port,
3585        }) => {
3586            // Drop the filter handler since it holds a mutable borrow of `core_ctx`, which
3587            // we need to provide to the packet dispatch function.
3588            drop(filter);
3589
3590            let Some(addr) = SpecifiedAddr::new(addr) else {
3591                core_ctx.increment_both(device, |c| &c.unspecified_destination);
3592                debug!("cannot perform transparent delivery to unspecified destination; dropping");
3593                return;
3594            };
3595
3596            let receive_meta = ReceiveIpPacketMeta {
3597                // It's possible that the packet was actually sent to a
3598                // broadcast address, but it doesn't matter here since it's
3599                // being delivered to a transparent proxy.
3600                broadcast: None,
3601                transparent_override: Some(TransparentLocalDelivery { addr, port }),
3602                parsing_context,
3603            };
3604
3605            // Short-circuit the routing process and override local demux, providing a local
3606            // address and port to which the packet should be transparently delivered at the
3607            // transport layer.
3608            dispatch_receive_ipv4_packet(
3609                core_ctx,
3610                bindings_ctx,
3611                device,
3612                frame_dst,
3613                packet,
3614                packet_metadata,
3615                receive_meta,
3616            )
3617            .unwrap_or_else(|icmp_sender| icmp_sender.send(core_ctx, bindings_ctx, buffer));
3618            return;
3619        }
3620    }
3621    // Drop the filter handler since it holds a mutable borrow of `core_ctx`, which
3622    // we need below.
3623    drop(filter);
3624
3625    let Some(src_ip) = packet.src_ipv4() else {
3626        core_ctx.increment_both(device, |c| &c.invalid_source);
3627        debug!(
3628            "receive_ipv4_packet: received packet from invalid source {}; dropping",
3629            packet.src_ip()
3630        );
3631        return;
3632    };
3633
3634    let action = receive_ipv4_packet_action(
3635        core_ctx,
3636        bindings_ctx,
3637        device,
3638        &packet,
3639        frame_dst,
3640        &packet_metadata.marks,
3641    );
3642    match action {
3643        ReceivePacketAction::MulticastForward { targets, address_status, dst_ip } => {
3644            // TOOD(https://fxbug.dev/364242513): Support connection tracking of
3645            // the multiplexed flows created by multicast forwarding. Here, we
3646            // use the existing metadata for the first action taken, and then
3647            // a default instance for each subsequent action. The first action
3648            // will populate the conntrack table with an entry, which will then
3649            // be used by all subsequent forwards.
3650            let mut packet_metadata = Some(packet_metadata);
3651            for MulticastRouteTarget { output_interface, min_ttl } in targets.as_ref() {
3652                clone_packet_for_mcast_forwarding! {
3653                    let (copy_of_data, copy_of_buffer, copy_of_packet) = packet
3654                };
3655                determine_ip_packet_forwarding_action::<Ipv4, _, _>(
3656                    core_ctx,
3657                    copy_of_packet,
3658                    packet_metadata.take().unwrap_or_default(),
3659                    Some(*min_ttl),
3660                    device,
3661                    &output_interface,
3662                    IpPacketDestination::from_addr(dst_ip),
3663                    frame_dst,
3664                    src_ip,
3665                    dst_ip,
3666                )
3667                .perform_action_with_buffer(core_ctx, bindings_ctx, copy_of_buffer);
3668            }
3669
3670            // If we also have an interest in the packet, deliver it locally.
3671            if let Some(address_status) = address_status {
3672                let receive_meta = ReceiveIpPacketMeta {
3673                    broadcast: address_status.to_broadcast_marker(),
3674                    transparent_override: None,
3675                    parsing_context,
3676                };
3677                dispatch_receive_ipv4_packet(
3678                    core_ctx,
3679                    bindings_ctx,
3680                    device,
3681                    frame_dst,
3682                    packet,
3683                    packet_metadata.take().unwrap_or_default(),
3684                    receive_meta,
3685                )
3686                .unwrap_or_else(|icmp_sender| icmp_sender.send(core_ctx, bindings_ctx, buffer));
3687            }
3688        }
3689        ReceivePacketAction::Deliver { address_status, internal_forwarding } => {
3690            // NB: when performing internal forwarding, hit the
3691            // forwarding hook.
3692            match internal_forwarding {
3693                InternalForwarding::Used(outbound_device) => {
3694                    core_ctx.increment_both(device, |c| &c.forward);
3695                    match core_ctx.filter_handler().forwarding_hook(
3696                        &mut packet,
3697                        device,
3698                        &outbound_device,
3699                        &mut packet_metadata,
3700                    ) {
3701                        filter::Verdict::Stop(filter::DropOrReject::Drop) => {
3702                            packet_metadata.acknowledge_drop();
3703                            return;
3704                        }
3705                        filter::Verdict::Stop(filter::DropOrReject::Reject(_reject_type)) => {
3706                            // TODO(https://fxbug.dev/466098884): Send reject packet.
3707                            packet_metadata.acknowledge_drop();
3708                            return;
3709                        }
3710                        filter::Verdict::Proceed(filter::Accept) => {}
3711                    }
3712                }
3713                InternalForwarding::NotUsed => {}
3714            }
3715
3716            let receive_meta = ReceiveIpPacketMeta {
3717                broadcast: address_status.to_broadcast_marker(),
3718                transparent_override: None,
3719                parsing_context,
3720            };
3721            dispatch_receive_ipv4_packet(
3722                core_ctx,
3723                bindings_ctx,
3724                device,
3725                frame_dst,
3726                packet,
3727                packet_metadata,
3728                receive_meta,
3729            )
3730            .unwrap_or_else(|icmp_sender| icmp_sender.send(core_ctx, bindings_ctx, buffer));
3731        }
3732        ReceivePacketAction::Forward {
3733            original_dst,
3734            dst: Destination { device: dst_device, next_hop },
3735        } => {
3736            determine_ip_packet_forwarding_action::<Ipv4, _, _>(
3737                core_ctx,
3738                packet,
3739                packet_metadata,
3740                None,
3741                device,
3742                &dst_device,
3743                IpPacketDestination::from_next_hop(next_hop, original_dst),
3744                frame_dst,
3745                src_ip,
3746                original_dst,
3747            )
3748            .perform_action_with_buffer(core_ctx, bindings_ctx, buffer);
3749        }
3750        ReceivePacketAction::SendNoRouteToDest { dst: dst_ip } => {
3751            debug!("received IPv4 packet with no known route to destination {}", dst_ip);
3752
3753            let marks = packet_metadata.marks;
3754            packet_metadata.acknowledge_drop();
3755
3756            if let Some(sender) = IcmpErrorSender::new(
3757                core_ctx,
3758                Icmpv4Error::NetUnreachable,
3759                &packet,
3760                frame_dst,
3761                device,
3762                marks,
3763            ) {
3764                sender.send(core_ctx, bindings_ctx, buffer);
3765            }
3766        }
3767        ReceivePacketAction::Drop { reason } => {
3768            let src_ip = packet.src_ip();
3769            let dst_ip = packet.dst_ip();
3770            packet_metadata.acknowledge_drop();
3771            core_ctx.increment_both(device, |c| &c.dropped);
3772            debug!(
3773                "receive_ipv4_packet: dropping packet from {src_ip} to {dst_ip} received on \
3774                {device:?}: {reason:?}",
3775            );
3776        }
3777    }
3778}
3779
3780fn handle_ipv6_parse_error<BC, B, CC>(
3781    core_ctx: &mut CC,
3782    bindings_ctx: &mut BC,
3783    device: &CC::DeviceId,
3784    frame_dst: Option<LocalFrameDestination>,
3785    device_ip_layer_metadata: DeviceIpLayerMetadata<BC>,
3786    mut buffer: B,
3787    error: Ipv6ParseError,
3788) where
3789    BC: IpLayerBindingsContext<Ipv6, CC::DeviceId>,
3790    B: BufferMut,
3791    CC: IpLayerIngressContext<Ipv6, BC>,
3792{
3793    // Conditionally send an ICMP response if we encountered a parameter
3794    // problem error when parsing an IPv6 packet. Note, we do not always
3795    // send back an ICMP response as it can be used as an attack vector for
3796    // DDoS attacks. We only send back an ICMP response if the RFC requires
3797    // that we MUST send one, as noted by `must_send_icmp` and `action`.
3798    let Ipv6ParseError::ParameterProblem { src_ip, dst_ip, code, pointer, must_send_icmp, action } =
3799        error
3800    else {
3801        core_ctx.increment_both(device, |c| &c.unparsable_packet);
3802        debug!("receive_ipv6_packet: Failed to parse IPv6 packet: {:?}", error);
3803        return;
3804    };
3805    if !must_send_icmp || !action.should_send_icmp(&dst_ip) {
3806        return;
3807    }
3808    core_ctx.increment_both(device, |c| &c.parameter_problem);
3809    let dst_ip = match SocketIpAddr::new(dst_ip) {
3810        Some(ip) => ip,
3811        None => {
3812            core_ctx.increment_both(device, |c| &c.unspecified_destination);
3813            debug!("receive_ipv6_packet: Dropping packet with unspecified destination IP");
3814            return;
3815        }
3816    };
3817
3818    let src_ip = match Ipv6SourceAddr::new(src_ip) {
3819        None => {
3820            core_ctx.increment_both(device, |c| &c.invalid_source);
3821            return;
3822        }
3823        Some(Ipv6SourceAddr::Unspecified) => {
3824            core_ctx.increment_both(device, |c| &c.unspecified_source);
3825            return;
3826        }
3827        Some(Ipv6SourceAddr::Unicast(src_ip)) => {
3828            SocketIpAddr::new_from_ipv6_non_mapped_unicast(src_ip)
3829        }
3830    };
3831
3832    // Try raw parser to find main packet protocol and body offset. If this
3833    // fails as well then we can't send an ICMP error message.
3834    let raw_packet: Ipv6PacketRaw<_> = match try_parse_ip_packet!(buffer) {
3835        Ok(packet) => packet,
3836        Err(error) => {
3837            core_ctx.increment_both(device, |c| &c.unparsable_packet);
3838            debug!("receive_ipv6_packet: Failed to parse IPv6 packet: {:?}", error);
3839            return;
3840        }
3841    };
3842    let proto = match raw_packet.proto() {
3843        Ok(proto) => proto,
3844        Err(error) => {
3845            core_ctx.increment_both(device, |c| &c.unparsable_packet);
3846            debug!("receive_ipv6_packet: Failed to get protocol from IPv6 packet: {:?}", error);
3847            return;
3848        }
3849    };
3850    let parse_metadata = raw_packet.parse_metadata();
3851    let header_len = parse_metadata.header_len();
3852    buffer.undo_parse(parse_metadata);
3853
3854    let err = Icmpv6Error::ParameterProblem {
3855        code,
3856        pointer,
3857        allow_dst_multicast: action.should_send_icmp_to_multicast(),
3858    };
3859
3860    IcmpErrorHandler::<Ipv6, _>::send_icmp_error_message(
3861        core_ctx,
3862        bindings_ctx,
3863        Some(device),
3864        frame_dst,
3865        src_ip,
3866        dst_ip,
3867        buffer,
3868        err,
3869        header_len,
3870        proto,
3871        &device_ip_layer_metadata.marks,
3872    );
3873}
3874
3875/// Receive an IPv6 packet from a device.
3876///
3877/// `frame_dst` specifies how this packet was received; see [`FrameDestination`]
3878/// for options.
3879pub fn receive_ipv6_packet<
3880    BC: IpLayerBindingsContext<Ipv6, CC::DeviceId>,
3881    B: BufferMut,
3882    CC: IpLayerIngressContext<Ipv6, BC>,
3883>(
3884    core_ctx: &mut CC,
3885    bindings_ctx: &mut BC,
3886    device: &CC::DeviceId,
3887    frame_dst: Option<LocalFrameDestination>,
3888    device_ip_layer_metadata: DeviceIpLayerMetadata<BC>,
3889    parsing_context: NetworkParsingContext,
3890    buffer: B,
3891) {
3892    if !core_ctx.is_ip_device_enabled(&device) {
3893        return;
3894    }
3895
3896    // This is required because we may need to process the buffer that was
3897    // passed in or a reassembled one, which have different types.
3898    let mut buffer: packet::Either<B, Buf<Vec<u8>>> = packet::Either::A(buffer);
3899
3900    core_ctx.increment_both(device, |c| &c.receive_ip_packet);
3901    trace!("receive_ipv6_packet({:?})", device);
3902
3903    let packet: Ipv6Packet<_> = match try_parse_ip_packet!(buffer) {
3904        Ok(packet) => packet,
3905        Err(error) => {
3906            handle_ipv6_parse_error(
3907                core_ctx,
3908                bindings_ctx,
3909                device,
3910                frame_dst,
3911                device_ip_layer_metadata,
3912                buffer,
3913                error,
3914            );
3915            return;
3916        }
3917    };
3918
3919    trace!("receive_ipv6_packet: parsed packet: {:?}", packet);
3920
3921    // TODO(ghanan): Act upon extension headers.
3922
3923    // We verify these properties later by actually creating the corresponding
3924    // witness types after the INGRESS filtering hook, but we keep these checks
3925    // here as an optimization to return early and save some work.
3926    if packet.src_ipv6().is_none() {
3927        debug!(
3928            "receive_ipv6_packet: received packet from invalid source {}; dropping",
3929            packet.src_ip()
3930        );
3931        core_ctx.increment_both(device, |c| &c.invalid_source);
3932        return;
3933    };
3934    if !packet.dst_ip().is_specified() {
3935        core_ctx.increment_both(device, |c| &c.unspecified_destination);
3936        debug!("receive_ipv6_packet: Received packet with unspecified destination IP; dropping");
3937        return;
3938    };
3939
3940    // Per RFC 4291, Section 2.5.3:
3941    //   The loopback address must not be used as the source address in IPv6
3942    //   packets that are sent outside of a single node.  An IPv6 packet with
3943    //   a destination address of loopback must never be sent outside of a
3944    //   single node and must never be forwarded by an IPv6 router.  A packet
3945    //   received on an interface with a destination address of loopback must
3946    //   be dropped.
3947    if !device.is_loopback()
3948        && (Ipv6::LOOPBACK_SUBNET.contains(&packet.src_ip())
3949            || Ipv6::LOOPBACK_SUBNET.contains(&packet.dst_ip()))
3950    {
3951        debug!(
3952            "receive_ipv6_packet: received loopback packet (src={}, dst={}) \
3953            on non-loopback interface; dropping",
3954            packet.src_ip(),
3955            packet.dst_ip(),
3956        );
3957        return;
3958    }
3959
3960    // Reassemble all packets before local delivery or forwarding. Reassembly
3961    // before forwarding is not RFC-compliant, but it's the easiest way to
3962    // ensure that fragments are filtered properly. Linux does this and it
3963    // doesn't seem to create major problems.
3964    //
3965    // TODO(https://fxbug.dev/345814518): Forward fragments without reassembly.
3966    //
3967    // delivery_extension_header_action is used to prevent looking at the
3968    // extension headers twice when a non-fragmented packet is delivered
3969    // locally.
3970    let (mut packet, delivery_extension_header_action) =
3971        match ipv6::handle_extension_headers(core_ctx, device, frame_dst, &packet, true) {
3972            Ipv6PacketAction::_Discard => {
3973                core_ctx.increment_both(device, |c| &c.version_rx.extension_header_discard);
3974                trace!("receive_ipv6_packet: handled IPv6 extension headers: discarding packet");
3975                return;
3976            }
3977            Ipv6PacketAction::Continue => {
3978                trace!("receive_ipv6_packet: handled IPv6 extension headers: dispatching packet");
3979                (packet, Some(Ipv6PacketAction::Continue))
3980            }
3981            Ipv6PacketAction::ProcessFragment => {
3982                trace!(
3983                    "receive_ipv6_packet: handled IPv6 extension headers: handling \
3984                    fragmented packet"
3985                );
3986
3987                // Note, `IpPacketFragmentCache::process_fragment`
3988                // could panic if the packet does not have fragment data.
3989                // However, we are guaranteed that it will not panic for an
3990                // IPv6 packet because the fragment data is in an (optional)
3991                // fragment extension header which we attempt to handle by
3992                // calling `ipv6::handle_extension_headers`. We will only
3993                // end up here if its return value is
3994                // `Ipv6PacketAction::ProcessFragment` which is only
3995                // possible when the packet has the fragment extension
3996                // header (even if the fragment data has values that implies
3997                // that the packet is not fragmented).
3998                match process_fragment(core_ctx, bindings_ctx, device, packet) {
3999                    ProcessFragmentResult::Done => return,
4000                    ProcessFragmentResult::NotNeeded(packet) => {
4001                        // While strange, it's possible for there to be a Fragment
4002                        // header that says the packet doesn't need defragmentation.
4003                        // As per RFC 8200 4.5:
4004                        //
4005                        //   If the fragment is a whole datagram (that is, both the
4006                        //   Fragment Offset field and the M flag are zero), then it
4007                        //   does not need any further reassembly and should be
4008                        //   processed as a fully reassembled packet (i.e., updating
4009                        //   Next Header, adjust Payload Length, removing the
4010                        //   Fragment header, etc.).
4011                        //
4012                        // In this case, we're not technically reassembling the
4013                        // packet, since, per the RFC, that would mean removing the
4014                        // Fragment header.
4015                        (packet, Some(Ipv6PacketAction::Continue))
4016                    }
4017                    ProcessFragmentResult::Reassembled(buf) => {
4018                        let buf = Buf::new(buf, ..);
4019                        buffer = packet::Either::B(buf);
4020
4021                        match buffer.parse_mut() {
4022                            Ok(packet) => (packet, None),
4023                            Err(err) => {
4024                                core_ctx.increment_both(device, |c| &c.fragment_reassembly_error);
4025                                debug!(
4026                                    "receive_ip_packet: fragmented, failed to reassemble: {:?}",
4027                                    err
4028                                );
4029                                return;
4030                            }
4031                        }
4032                    }
4033                }
4034            }
4035        };
4036
4037    let mut packet_metadata = IpLayerPacketMetadata::from_device_ip_layer_metadata(
4038        core_ctx,
4039        device,
4040        device_ip_layer_metadata,
4041    );
4042    let mut filter = core_ctx.filter_handler();
4043
4044    match filter.ingress_hook(bindings_ctx, &mut packet, device, &mut packet_metadata) {
4045        filter::Verdict::Proceed(filter::Accept) => {}
4046        filter::Verdict::Stop(filter::IngressStopReason::Drop) => {
4047            packet_metadata.acknowledge_drop();
4048            return;
4049        }
4050        filter::Verdict::Stop(filter::IngressStopReason::TransparentLocalDelivery {
4051            addr,
4052            port,
4053        }) => {
4054            // Drop the filter handler since it holds a mutable borrow of `core_ctx`, which
4055            // we need to provide to the packet dispatch function.
4056            drop(filter);
4057
4058            let Some(addr) = SpecifiedAddr::new(addr) else {
4059                core_ctx.increment_both(device, |c| &c.unspecified_destination);
4060                debug!("cannot perform transparent delivery to unspecified destination; dropping");
4061                return;
4062            };
4063
4064            let receive_meta = ReceiveIpPacketMeta {
4065                broadcast: None,
4066                transparent_override: Some(TransparentLocalDelivery { addr, port }),
4067                parsing_context,
4068            };
4069
4070            // Short-circuit the routing process and override local demux, providing a local
4071            // address and port to which the packet should be transparently delivered at the
4072            // transport layer.
4073            dispatch_receive_ipv6_packet(
4074                core_ctx,
4075                bindings_ctx,
4076                device,
4077                frame_dst,
4078                packet,
4079                packet_metadata,
4080                receive_meta,
4081            )
4082            .unwrap_or_else(|icmp_sender| icmp_sender.send(core_ctx, bindings_ctx, buffer));
4083            return;
4084        }
4085    }
4086    // Drop the filter handler since it holds a mutable borrow of `core_ctx`, which
4087    // we need below.
4088    drop(filter);
4089
4090    let Some(src_ip) = packet.src_ipv6() else {
4091        debug!(
4092            "receive_ipv6_packet: received packet from invalid source {}; dropping",
4093            packet.src_ip()
4094        );
4095        core_ctx.increment_both(device, |c| &c.invalid_source);
4096        return;
4097    };
4098
4099    match receive_ipv6_packet_action(
4100        core_ctx,
4101        bindings_ctx,
4102        device,
4103        &packet,
4104        frame_dst,
4105        &packet_metadata.marks,
4106    ) {
4107        ReceivePacketAction::MulticastForward { targets, address_status, dst_ip } => {
4108            // TOOD(https://fxbug.dev/364242513): Support connection tracking of
4109            // the multiplexed flows created by multicast forwarding. Here, we
4110            // use the existing metadata for the first action taken, and then
4111            // a default instance for each subsequent action. The first action
4112            // will populate the conntrack table with an entry, which will then
4113            // be used by all subsequent forwards.
4114            let mut packet_metadata = Some(packet_metadata);
4115            for MulticastRouteTarget { output_interface, min_ttl } in targets.as_ref() {
4116                clone_packet_for_mcast_forwarding! {
4117                    let (copy_of_data, copy_of_buffer, copy_of_packet) = packet
4118                };
4119                determine_ip_packet_forwarding_action::<Ipv6, _, _>(
4120                    core_ctx,
4121                    copy_of_packet,
4122                    packet_metadata.take().unwrap_or_default(),
4123                    Some(*min_ttl),
4124                    device,
4125                    &output_interface,
4126                    IpPacketDestination::from_addr(dst_ip),
4127                    frame_dst,
4128                    src_ip,
4129                    dst_ip,
4130                )
4131                .perform_action_with_buffer(core_ctx, bindings_ctx, copy_of_buffer);
4132            }
4133
4134            // If we also have an interest in the packet, deliver it locally.
4135            if let Some(_) = address_status {
4136                let receive_meta = ReceiveIpPacketMeta {
4137                    broadcast: None,
4138                    transparent_override: None,
4139                    parsing_context,
4140                };
4141
4142                dispatch_receive_ipv6_packet(
4143                    core_ctx,
4144                    bindings_ctx,
4145                    device,
4146                    frame_dst,
4147                    packet,
4148                    packet_metadata.take().unwrap_or_default(),
4149                    receive_meta,
4150                )
4151                .unwrap_or_else(|icmp_sender| icmp_sender.send(core_ctx, bindings_ctx, buffer));
4152            }
4153        }
4154        ReceivePacketAction::Deliver { address_status: _, internal_forwarding } => {
4155            trace!("receive_ipv6_packet: delivering locally");
4156
4157            let action = if let Some(action) = delivery_extension_header_action {
4158                action
4159            } else {
4160                ipv6::handle_extension_headers(core_ctx, device, frame_dst, &packet, true)
4161            };
4162            match action {
4163                Ipv6PacketAction::_Discard => {
4164                    core_ctx.increment_both(device, |c| &c.version_rx.extension_header_discard);
4165                    trace!(
4166                        "receive_ipv6_packet: handled IPv6 extension headers: discarding packet"
4167                    );
4168                    packet_metadata.acknowledge_drop();
4169                }
4170                Ipv6PacketAction::Continue => {
4171                    trace!(
4172                        "receive_ipv6_packet: handled IPv6 extension headers: dispatching packet"
4173                    );
4174
4175                    // NB: when performing internal forwarding, hit the
4176                    // forwarding hook.
4177                    match internal_forwarding {
4178                        InternalForwarding::Used(outbound_device) => {
4179                            core_ctx.increment_both(device, |c| &c.forward);
4180                            match core_ctx.filter_handler().forwarding_hook(
4181                                &mut packet,
4182                                device,
4183                                &outbound_device,
4184                                &mut packet_metadata,
4185                            ) {
4186                                filter::Verdict::Stop(filter::DropOrReject::Drop) => {
4187                                    packet_metadata.acknowledge_drop();
4188                                    return;
4189                                }
4190                                filter::Verdict::Stop(filter::DropOrReject::Reject(
4191                                    _reject_type,
4192                                )) => {
4193                                    // TODO(https://fxbug.dev/466098884): Send reject packet.
4194                                    packet_metadata.acknowledge_drop();
4195                                    return;
4196                                }
4197                                filter::Verdict::Proceed(filter::Accept) => {}
4198                            }
4199                        }
4200                        InternalForwarding::NotUsed => {}
4201                    }
4202
4203                    let meta = ReceiveIpPacketMeta {
4204                        broadcast: None,
4205                        transparent_override: None,
4206                        parsing_context,
4207                    };
4208                    dispatch_receive_ipv6_packet(
4209                        core_ctx,
4210                        bindings_ctx,
4211                        device,
4212                        frame_dst,
4213                        packet,
4214                        packet_metadata,
4215                        meta,
4216                    )
4217                    .unwrap_or_else(|icmp_sender| icmp_sender.send(core_ctx, bindings_ctx, buffer));
4218                }
4219                Ipv6PacketAction::ProcessFragment => {
4220                    debug!("receive_ipv6_packet: found fragment header after reassembly; dropping");
4221                    packet_metadata.acknowledge_drop();
4222                }
4223            }
4224        }
4225        ReceivePacketAction::Forward {
4226            original_dst,
4227            dst: Destination { device: dst_device, next_hop },
4228        } => {
4229            determine_ip_packet_forwarding_action::<Ipv6, _, _>(
4230                core_ctx,
4231                packet,
4232                packet_metadata,
4233                None,
4234                device,
4235                &dst_device,
4236                IpPacketDestination::from_next_hop(next_hop, original_dst),
4237                frame_dst,
4238                src_ip,
4239                original_dst,
4240            )
4241            .perform_action_with_buffer(core_ctx, bindings_ctx, buffer);
4242        }
4243        ReceivePacketAction::SendNoRouteToDest { dst: dst_ip } => {
4244            let (_, _, proto, meta): (Ipv6Addr, Ipv6Addr, _, _) =
4245                drop_packet_and_undo_parse!(packet, buffer);
4246            debug!("received IPv6 packet with no known route to destination {}", dst_ip);
4247            let marks = packet_metadata.marks;
4248            packet_metadata.acknowledge_drop();
4249
4250            let src_ip = match src_ip {
4251                Ipv6SourceAddr::Unspecified => {
4252                    core_ctx.increment_both(device, |c| &c.unspecified_source);
4253                    return;
4254                }
4255                Ipv6SourceAddr::Unicast(src_ip) => {
4256                    SocketIpAddr::new_from_ipv6_non_mapped_unicast(src_ip)
4257                }
4258            };
4259
4260            IcmpErrorHandler::<Ipv6, _>::send_icmp_error_message(
4261                core_ctx,
4262                bindings_ctx,
4263                Some(device),
4264                frame_dst,
4265                src_ip,
4266                SocketIpAddr::new_from_witness(dst_ip),
4267                buffer,
4268                Icmpv6Error::NetUnreachable,
4269                meta.header_len(),
4270                proto,
4271                &marks,
4272            );
4273        }
4274        ReceivePacketAction::Drop { reason } => {
4275            core_ctx.increment_both(device, |c| &c.dropped);
4276            let src_ip = packet.src_ip();
4277            let dst_ip = packet.dst_ip();
4278            packet_metadata.acknowledge_drop();
4279            debug!(
4280                "receive_ipv6_packet: dropping packet from {src_ip} to {dst_ip} received on \
4281                {device:?}: {reason:?}",
4282            );
4283        }
4284    }
4285}
4286
4287/// The action to take in order to process a received IP packet.
4288#[derive(Debug, PartialEq)]
4289pub enum ReceivePacketAction<I: BroadcastIpExt + IpLayerIpExt, DeviceId: StrongDeviceIdentifier> {
4290    /// Deliver the packet locally.
4291    Deliver {
4292        /// Status of the receiving IP address.
4293        address_status: I::AddressStatus,
4294        /// `InternalForwarding::Used(d)` if we're delivering the packet as a
4295        /// Weak Host performing internal forwarding via output device `d`.
4296        internal_forwarding: InternalForwarding<DeviceId>,
4297    },
4298
4299    /// Forward the packet to the given destination.
4300    Forward {
4301        /// The original destination IP address of the packet.
4302        original_dst: SpecifiedAddr<I::Addr>,
4303        /// The destination that the packet should be forwarded to.
4304        dst: Destination<I::Addr, DeviceId>,
4305    },
4306
4307    /// A multicast packet that should be forwarded (& optional local delivery).
4308    ///
4309    /// The packet should be forwarded to each of the given targets. This case
4310    /// is only returned when the packet is eligible for multicast forwarding;
4311    /// `Self::Deliver` is used for packets that are ineligible (either because
4312    /// multicast forwarding is disabled, or because there are no applicable
4313    /// multicast routes with which to forward the packet).
4314    MulticastForward {
4315        /// The multicast targets to forward the packet via.
4316        targets: MulticastRouteTargets<DeviceId>,
4317        /// Some if the host is a member of the multicast group and the packet
4318        /// should be delivered locally (in addition to forwarding).
4319        address_status: Option<I::AddressStatus>,
4320        /// The multicast address the packet should be forwarded to.
4321        dst_ip: SpecifiedAddr<I::Addr>,
4322    },
4323
4324    /// Send a Destination Unreachable ICMP error message to the packet's sender
4325    /// and drop the packet.
4326    ///
4327    /// For ICMPv4, use the code "net unreachable". For ICMPv6, use the code "no
4328    /// route to destination".
4329    SendNoRouteToDest {
4330        /// The destination IP Address to which there was no route.
4331        dst: NonMappedAddr<SpecifiedAddr<I::Addr>>,
4332    },
4333
4334    /// Silently drop the packet.
4335    ///
4336    /// `reason` describes why the packet was dropped.
4337    #[allow(missing_docs)]
4338    Drop { reason: DropReason },
4339}
4340
4341// It's possible that there is more than one device with the address
4342// present. Prefer any address status over `UnicastTentative`.
4343fn choose_highest_priority_address_status<I: IpLayerIpExt>(
4344    address_statuses: impl Iterator<Item = I::AddressStatus>,
4345) -> Option<I::AddressStatus> {
4346    address_statuses.max_by_key(|status| {
4347        #[derive(GenericOverIp)]
4348        #[generic_over_ip(I, Ip)]
4349        struct Wrap<'a, I: IpLayerIpExt>(&'a I::AddressStatus);
4350        I::map_ip_in(
4351            Wrap(status),
4352            |Wrap(v4_status)| match v4_status {
4353                Ipv4PresentAddressStatus::UnicastTentative => 0,
4354                _ => 1,
4355            },
4356            |Wrap(v6_status)| match v6_status {
4357                Ipv6PresentAddressStatus::UnicastTentative => 0,
4358                _ => 1,
4359            },
4360        )
4361    })
4362}
4363
4364/// The reason a received IP packet is dropped.
4365#[derive(Debug, PartialEq)]
4366pub enum DropReason {
4367    /// Remote packet destined to tentative address.
4368    Tentative,
4369    /// Remote packet destined to the unspecified address.
4370    UnspecifiedDestination,
4371    /// Remote packet with an invalid destination address.
4372    InvalidDestination,
4373    /// Cannot forward a packet with unspecified source address.
4374    ForwardUnspecifiedSource,
4375    /// Cannot forward a packet with link-local source or destination address.
4376    ForwardLinkLocal,
4377    /// Packet should be forwarded but packet's inbound interface has forwarding
4378    /// disabled.
4379    ForwardingDisabledInboundIface,
4380    /// Remote packet destined to a multicast address that could not be:
4381    /// * delivered locally (because we are not a member of the multicast
4382    ///   group), or
4383    /// * forwarded (either because multicast forwarding is disabled, or no
4384    ///   applicable multicast route has been installed).
4385    MulticastNoInterest,
4386}
4387
4388/// Computes the action to take in order to process a received IPv4 packet.
4389pub fn receive_ipv4_packet_action<BC, CC, B>(
4390    core_ctx: &mut CC,
4391    bindings_ctx: &mut BC,
4392    device: &CC::DeviceId,
4393    packet: &Ipv4Packet<B>,
4394    frame_dst: Option<LocalFrameDestination>,
4395    marks: &Marks,
4396) -> ReceivePacketAction<Ipv4, CC::DeviceId>
4397where
4398    BC: IpLayerBindingsContext<Ipv4, CC::DeviceId>,
4399    CC: IpLayerContext<Ipv4, BC>,
4400    B: SplitByteSlice,
4401{
4402    let Some(dst_ip) = SpecifiedAddr::new(packet.dst_ip()) else {
4403        core_ctx.increment_both(device, |c| &c.unspecified_destination);
4404        return ReceivePacketAction::Drop { reason: DropReason::UnspecifiedDestination };
4405    };
4406
4407    // If the packet arrived at the loopback interface, check if any local
4408    // interface has the destination address assigned. This effectively lets the
4409    // loopback interface operate as a weak host for incoming packets.
4410    //
4411    // Note that (as of writing) the stack sends all locally destined traffic to
4412    // the loopback interface so we need this hack to allow the stack to accept
4413    // packets that arrive at the loopback interface (after being looped back)
4414    // but destined to an address that is assigned to another local interface.
4415    //
4416    // TODO(https://fxbug.dev/42065870): This should instead be controlled by
4417    // the routing table.
4418
4419    let highest_priority = if device.is_loopback() {
4420        core_ctx.with_address_statuses(dst_ip, |it| {
4421            let it = it.map(|(_device, status)| status);
4422            choose_highest_priority_address_status::<Ipv4>(it)
4423        })
4424    } else {
4425        core_ctx.address_status_for_device(dst_ip, device).into_present()
4426    };
4427    match highest_priority {
4428        Some(
4429            address_status @ (Ipv4PresentAddressStatus::UnicastAssigned
4430            | Ipv4PresentAddressStatus::LoopbackSubnet),
4431        ) => {
4432            core_ctx.increment_both(device, |c| &c.deliver_unicast);
4433            ReceivePacketAction::Deliver {
4434                address_status,
4435                internal_forwarding: InternalForwarding::NotUsed,
4436            }
4437        }
4438        Some(Ipv4PresentAddressStatus::UnicastTentative) => {
4439            // If the destination address is tentative (which implies that
4440            // we are still performing Duplicate Address Detection on
4441            // it), then we don't consider the address "assigned to an
4442            // interface", and so we drop packets instead of delivering them
4443            // locally.
4444            core_ctx.increment_both(device, |c| &c.drop_for_tentative);
4445            ReceivePacketAction::Drop { reason: DropReason::Tentative }
4446        }
4447
4448        Some(address_status @ Ipv4PresentAddressStatus::Multicast) => {
4449            receive_ip_multicast_packet_action(
4450                core_ctx,
4451                bindings_ctx,
4452                device,
4453                packet,
4454                Some(address_status),
4455                dst_ip,
4456                frame_dst,
4457            )
4458        }
4459        Some(
4460            address_status @ (Ipv4PresentAddressStatus::LimitedBroadcast
4461            | Ipv4PresentAddressStatus::SubnetBroadcast),
4462        ) => {
4463            core_ctx.increment_both(device, |c| &c.version_rx.deliver_broadcast);
4464            ReceivePacketAction::Deliver {
4465                address_status,
4466                internal_forwarding: InternalForwarding::NotUsed,
4467            }
4468        }
4469        None => receive_ip_packet_action_common::<Ipv4, _, _, _>(
4470            core_ctx,
4471            bindings_ctx,
4472            dst_ip,
4473            device,
4474            packet,
4475            frame_dst,
4476            marks,
4477        ),
4478    }
4479}
4480
4481/// Computes the action to take in order to process a received IPv6 packet.
4482pub fn receive_ipv6_packet_action<BC, CC, B>(
4483    core_ctx: &mut CC,
4484    bindings_ctx: &mut BC,
4485    device: &CC::DeviceId,
4486    packet: &Ipv6Packet<B>,
4487    frame_dst: Option<LocalFrameDestination>,
4488    marks: &Marks,
4489) -> ReceivePacketAction<Ipv6, CC::DeviceId>
4490where
4491    BC: IpLayerBindingsContext<Ipv6, CC::DeviceId>,
4492    CC: IpLayerContext<Ipv6, BC>,
4493    B: SplitByteSlice,
4494{
4495    let Some(dst_ip) = SpecifiedAddr::new(packet.dst_ip()) else {
4496        core_ctx.increment_both(device, |c| &c.unspecified_destination);
4497        return ReceivePacketAction::Drop { reason: DropReason::UnspecifiedDestination };
4498    };
4499
4500    // If the packet arrived at the loopback interface, check if any local
4501    // interface has the destination address assigned. This effectively lets
4502    // the loopback interface operate as a weak host for incoming packets.
4503    //
4504    // Note that (as of writing) the stack sends all locally destined traffic to
4505    // the loopback interface so we need this hack to allow the stack to accept
4506    // packets that arrive at the loopback interface (after being looped back)
4507    // but destined to an address that is assigned to another local interface.
4508    //
4509    // TODO(https://fxbug.dev/42175703): This should instead be controlled by the
4510    // routing table.
4511
4512    let highest_priority = if device.is_loopback() {
4513        core_ctx.with_address_statuses(dst_ip, |it| {
4514            let it = it.map(|(_device, status)| status);
4515            choose_highest_priority_address_status::<Ipv6>(it)
4516        })
4517    } else {
4518        core_ctx.address_status_for_device(dst_ip, device).into_present()
4519    };
4520    match highest_priority {
4521        Some(address_status @ Ipv6PresentAddressStatus::Multicast) => {
4522            receive_ip_multicast_packet_action(
4523                core_ctx,
4524                bindings_ctx,
4525                device,
4526                packet,
4527                Some(address_status),
4528                dst_ip,
4529                frame_dst,
4530            )
4531        }
4532        Some(address_status @ Ipv6PresentAddressStatus::UnicastAssigned) => {
4533            core_ctx.increment_both(device, |c| &c.deliver_unicast);
4534            ReceivePacketAction::Deliver {
4535                address_status,
4536                internal_forwarding: InternalForwarding::NotUsed,
4537            }
4538        }
4539        Some(Ipv6PresentAddressStatus::UnicastTentative) => {
4540            // If the destination address is tentative (which implies that
4541            // we are still performing NDP's Duplicate Address Detection on
4542            // it), then we don't consider the address "assigned to an
4543            // interface", and so we drop packets instead of delivering them
4544            // locally.
4545            //
4546            // As per RFC 4862 section 5.4:
4547            //
4548            //   An address on which the Duplicate Address Detection
4549            //   procedure is applied is said to be tentative until the
4550            //   procedure has completed successfully. A tentative address
4551            //   is not considered "assigned to an interface" in the
4552            //   traditional sense.  That is, the interface must accept
4553            //   Neighbor Solicitation and Advertisement messages containing
4554            //   the tentative address in the Target Address field, but
4555            //   processes such packets differently from those whose Target
4556            //   Address matches an address assigned to the interface. Other
4557            //   packets addressed to the tentative address should be
4558            //   silently discarded. Note that the "other packets" include
4559            //   Neighbor Solicitation and Advertisement messages that have
4560            //   the tentative (i.e., unicast) address as the IP destination
4561            //   address and contain the tentative address in the Target
4562            //   Address field.  Such a case should not happen in normal
4563            //   operation, though, since these messages are multicasted in
4564            //   the Duplicate Address Detection procedure.
4565            //
4566            // That is, we accept no packets destined to a tentative
4567            // address. NS and NA packets should be addressed to a multicast
4568            // address that we would have joined during DAD so that we can
4569            // receive those packets.
4570            core_ctx.increment_both(device, |c| &c.drop_for_tentative);
4571            ReceivePacketAction::Drop { reason: DropReason::Tentative }
4572        }
4573        None => receive_ip_packet_action_common::<Ipv6, _, _, _>(
4574            core_ctx,
4575            bindings_ctx,
4576            dst_ip,
4577            device,
4578            packet,
4579            frame_dst,
4580            marks,
4581        ),
4582    }
4583}
4584
4585/// Computes the action to take for multicast packets on behalf of
4586/// [`receive_ipv4_packet_action`] and [`receive_ipv6_packet_action`].
4587fn receive_ip_multicast_packet_action<
4588    I: IpLayerIpExt,
4589    B: SplitByteSlice,
4590    BC: IpLayerBindingsContext<I, CC::DeviceId>,
4591    CC: IpLayerContext<I, BC>,
4592>(
4593    core_ctx: &mut CC,
4594    bindings_ctx: &mut BC,
4595    device: &CC::DeviceId,
4596    packet: &I::Packet<B>,
4597    address_status: Option<I::AddressStatus>,
4598    dst_ip: SpecifiedAddr<I::Addr>,
4599    frame_dst: Option<LocalFrameDestination>,
4600) -> ReceivePacketAction<I, CC::DeviceId> {
4601    let targets = multicast_forwarding::lookup_multicast_route_or_stash_packet(
4602        core_ctx,
4603        bindings_ctx,
4604        packet,
4605        device,
4606        frame_dst,
4607    );
4608    match (targets, address_status) {
4609        (Some(targets), address_status) => {
4610            if address_status.is_some() {
4611                core_ctx.increment_both(device, |c| &c.deliver_multicast);
4612            }
4613            ReceivePacketAction::MulticastForward { targets, address_status, dst_ip }
4614        }
4615        (None, Some(address_status)) => {
4616            // If the address was present on the device (e.g. the host is a
4617            // member of the multicast group), fallback to local delivery.
4618            core_ctx.increment_both(device, |c| &c.deliver_multicast);
4619            ReceivePacketAction::Deliver {
4620                address_status,
4621                internal_forwarding: InternalForwarding::NotUsed,
4622            }
4623        }
4624        (None, None) => {
4625            // As per RFC 1122 Section 3.2.2
4626            //   An ICMP error message MUST NOT be sent as the result of
4627            //   receiving:
4628            //   ...
4629            //   * a datagram destined to an IP broadcast or IP multicast
4630            //     address
4631            //
4632            // As such, drop the packet
4633            core_ctx.increment_both(device, |c| &c.multicast_no_interest);
4634            ReceivePacketAction::Drop { reason: DropReason::MulticastNoInterest }
4635        }
4636    }
4637}
4638
4639/// Computes the remaining protocol-agnostic actions on behalf of
4640/// [`receive_ipv4_packet_action`] and [`receive_ipv6_packet_action`].
4641fn receive_ip_packet_action_common<
4642    I: IpLayerIpExt,
4643    B: SplitByteSlice,
4644    BC: IpLayerBindingsContext<I, CC::DeviceId>,
4645    CC: IpLayerContext<I, BC>,
4646>(
4647    core_ctx: &mut CC,
4648    bindings_ctx: &mut BC,
4649    dst_ip: SpecifiedAddr<I::Addr>,
4650    device_id: &CC::DeviceId,
4651    packet: &I::Packet<B>,
4652    frame_dst: Option<LocalFrameDestination>,
4653    marks: &Marks,
4654) -> ReceivePacketAction<I, CC::DeviceId> {
4655    if dst_ip.is_multicast() {
4656        return receive_ip_multicast_packet_action(
4657            core_ctx,
4658            bindings_ctx,
4659            device_id,
4660            packet,
4661            None,
4662            dst_ip,
4663            frame_dst,
4664        );
4665    }
4666
4667    // Don't allow mapped IPv6 addresses.
4668    let Some(dst_ip) = NonMappedAddr::new(dst_ip) else {
4669        return ReceivePacketAction::Drop { reason: DropReason::InvalidDestination };
4670    };
4671
4672    // The packet is not destined locally, so we attempt to forward it.
4673    if !core_ctx.is_device_unicast_forwarding_enabled(device_id) {
4674        // Forwarding is disabled; we are operating only as a host.
4675        //
4676        // For IPv4, per RFC 1122 Section 3.2.1.3, "A host MUST silently discard
4677        // an incoming datagram that is not destined for the host."
4678        //
4679        // For IPv6, per RFC 4443 Section 3.1, the only instance in which a host
4680        // sends an ICMPv6 Destination Unreachable message is when a packet is
4681        // destined to that host but on an unreachable port (Code 4 - "Port
4682        // unreachable"). Since the only sensible error message to send in this
4683        // case is a Destination Unreachable message, we interpret the RFC text
4684        // to mean that, consistent with IPv4's behavior, we should silently
4685        // discard the packet in this case.
4686        core_ctx.increment_both(device_id, |c| &c.forwarding_disabled);
4687        return ReceivePacketAction::Drop { reason: DropReason::ForwardingDisabledInboundIface };
4688    }
4689    // Per https://www.rfc-editor.org/rfc/rfc4291.html#section-2.5.2:
4690    //   An IPv6 packet with a source address of unspecified must never be forwarded by an IPv6
4691    //   router.
4692    // Per https://datatracker.ietf.org/doc/html/rfc1812#section-5.3.7:
4693    //   A router SHOULD NOT forward any packet that has an invalid IP source address or a source
4694    //   address on network 0
4695    let Some(source_address) = SpecifiedAddr::new(packet.src_ip()) else {
4696        return ReceivePacketAction::Drop { reason: DropReason::ForwardUnspecifiedSource };
4697    };
4698
4699    // If forwarding is enabled, allow local delivery if the packet is destined
4700    // for an IP assigned to a different interface.
4701    //
4702    // This enables a weak host model when the Netstack is configured as a
4703    // router. Conceptually, the netstack is forwarding the packet from the
4704    // input device, to the destination IP's device.
4705    if let Some(dst_ip) = NonMulticastAddr::new(dst_ip) {
4706        if let Some((outbound_device, address_status)) =
4707            get_device_with_assigned_address(core_ctx, IpDeviceAddr::new_from_witness(dst_ip))
4708        {
4709            return ReceivePacketAction::Deliver {
4710                address_status,
4711                internal_forwarding: InternalForwarding::Used(outbound_device),
4712            };
4713        }
4714    }
4715
4716    // For IPv4, RFC 3927 Section 2.7 states:
4717    //
4718    //   An IPv4 packet whose source and/or destination address is in the
4719    //   169.254/16 prefix MUST NOT be sent to any router for forwarding, and
4720    //   any network device receiving such a packet MUST NOT forward it,
4721    //   regardless of the TTL in the IPv4 header.
4722    //
4723    // However, to maintain behavioral similarity to both gVisor/Netstack2 and
4724    // Linux, we omit this check.
4725    //
4726    // For IPv6, RFC 4291 Section 2.5.6 states:
4727    //
4728    //   Routers must not forward any packets with Link-Local source or
4729    //   destination addresses to other links.
4730    if I::map_ip_in(
4731        &packet,
4732        |_| false,
4733        |packet| packet.src_ip().is_link_local() || packet.dst_ip().is_link_local(),
4734    ) {
4735        return ReceivePacketAction::Drop { reason: DropReason::ForwardLinkLocal };
4736    }
4737
4738    match lookup_route_table(
4739        core_ctx,
4740        dst_ip.get(),
4741        RuleInput {
4742            packet_origin: PacketOrigin::NonLocal { source_address, incoming_device: device_id },
4743            marks,
4744        },
4745    ) {
4746        Some(dst) => {
4747            core_ctx.increment_both(device_id, |c| &c.forward);
4748            ReceivePacketAction::Forward { original_dst: *dst_ip, dst }
4749        }
4750        None => {
4751            core_ctx.increment_both(device_id, |c| &c.no_route_to_host);
4752            ReceivePacketAction::SendNoRouteToDest { dst: dst_ip }
4753        }
4754    }
4755}
4756
4757// Look up the route to a host.
4758fn lookup_route_table<
4759    I: IpLayerIpExt,
4760    BC: IpLayerBindingsContext<I, CC::DeviceId>,
4761    CC: IpStateContext<I, BC>,
4762>(
4763    core_ctx: &mut CC,
4764    dst_ip: I::Addr,
4765    rule_input: RuleInput<'_, I, CC::DeviceId>,
4766) -> Option<Destination<I::Addr, CC::DeviceId>> {
4767    let bound_device = match rule_input.packet_origin {
4768        PacketOrigin::Local { bound_address: _, bound_device } => bound_device,
4769        PacketOrigin::NonLocal { source_address: _, incoming_device: _ } => None,
4770    };
4771    core_ctx.with_rules_table(|core_ctx, rules: &RulesTable<_, _, BC>| {
4772        match walk_rules(core_ctx, rules, (), &rule_input, |(), core_ctx, table| {
4773            match table.lookup(core_ctx, bound_device, dst_ip) {
4774                Some(dst) => ControlFlow::Break(Some(dst)),
4775                None => ControlFlow::Continue(()),
4776            }
4777        }) {
4778            ControlFlow::Break(RuleAction::Lookup(RuleWalkInfo {
4779                inner: dst,
4780                observed_source_address_matcher: _,
4781            })) => dst,
4782            ControlFlow::Break(RuleAction::Unreachable) => None,
4783            ControlFlow::Continue(RuleWalkInfo {
4784                inner: (),
4785                observed_source_address_matcher: _,
4786            }) => None,
4787        }
4788    })
4789}
4790
4791/// Packed destination passed to [`IpDeviceSendContext::send_ip_frame`].
4792#[derive(Debug, Derivative, Clone)]
4793#[derivative(Eq(bound = "D: Eq"), PartialEq(bound = "D: PartialEq"))]
4794pub enum IpPacketDestination<I: BroadcastIpExt, D> {
4795    /// Broadcast packet.
4796    Broadcast(I::BroadcastMarker),
4797
4798    /// Multicast packet to the specified IP.
4799    Multicast(MulticastAddr<I::Addr>),
4800
4801    /// Send packet to the neighbor with the specified IP (the receiving
4802    /// node is either a router or the final recipient of the packet).
4803    Neighbor(SpecifiedAddr<I::Addr>),
4804
4805    /// Loopback the packet to the specified device. Can be used only when
4806    /// sending to the loopback device.
4807    Loopback(D),
4808}
4809
4810impl<I: BroadcastIpExt, D> IpPacketDestination<I, D> {
4811    /// Creates `IpPacketDestination` for IP address.
4812    pub fn from_addr(addr: SpecifiedAddr<I::Addr>) -> Self {
4813        match MulticastAddr::new(addr.into_addr()) {
4814            Some(mc_addr) => Self::Multicast(mc_addr),
4815            None => Self::Neighbor(addr),
4816        }
4817    }
4818
4819    /// Create `IpPacketDestination` from `NextHop`.
4820    pub fn from_next_hop(next_hop: NextHop<I::Addr>, dst_ip: SpecifiedAddr<I::Addr>) -> Self {
4821        match next_hop {
4822            NextHop::RemoteAsNeighbor => Self::from_addr(dst_ip),
4823            NextHop::Gateway(gateway) => Self::Neighbor(gateway),
4824            NextHop::Broadcast(marker) => Self::Broadcast(marker),
4825        }
4826    }
4827}
4828
4829/// The metadata associated with an outgoing IP packet.
4830#[derive(Debug, Clone)]
4831pub struct SendIpPacketMeta<I: IpExt, D, Src> {
4832    /// The outgoing device.
4833    pub device: D,
4834
4835    /// The source address of the packet.
4836    pub src_ip: Src,
4837
4838    /// The destination address of the packet.
4839    pub dst_ip: SpecifiedAddr<I::Addr>,
4840
4841    /// The destination for the send operation.
4842    pub destination: IpPacketDestination<I, D>,
4843
4844    /// The upper-layer protocol held in the packet's payload.
4845    pub proto: I::Proto,
4846
4847    /// The time-to-live (IPv4) or hop limit (IPv6) for the packet.
4848    ///
4849    /// If not set, a default TTL may be used.
4850    pub ttl: Option<NonZeroU8>,
4851
4852    /// An MTU to artificially impose on the whole IP packet.
4853    ///
4854    /// Note that the device's and discovered path MTU may still be imposed on
4855    /// the packet.
4856    pub mtu: Mtu,
4857
4858    /// Traffic Class (IPv6) or Type of Service (IPv4) field for the packet.
4859    pub dscp_and_ecn: DscpAndEcn,
4860}
4861
4862impl<I: IpExt, D> From<SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>>
4863    for SendIpPacketMeta<I, D, Option<SpecifiedAddr<I::Addr>>>
4864{
4865    fn from(
4866        SendIpPacketMeta { device, src_ip, dst_ip, destination, proto, ttl, mtu, dscp_and_ecn }: SendIpPacketMeta<
4867            I,
4868            D,
4869            SpecifiedAddr<I::Addr>,
4870        >,
4871    ) -> SendIpPacketMeta<I, D, Option<SpecifiedAddr<I::Addr>>> {
4872        SendIpPacketMeta {
4873            device,
4874            src_ip: Some(src_ip),
4875            dst_ip,
4876            destination,
4877            proto,
4878            ttl,
4879            mtu,
4880            dscp_and_ecn,
4881        }
4882    }
4883}
4884
4885/// Trait for abstracting the IP layer for locally-generated traffic.  That is,
4886/// traffic generated by the netstack itself (e.g. ICMP, IGMP, or MLD).
4887///
4888/// NOTE: Due to filtering rules, it is possible that the device provided in
4889/// `meta` will not be the device that final IP packet is actually sent from.
4890pub trait IpLayerHandler<I: IpExt + FragmentationIpExt + FilterIpExt, BC>:
4891    DeviceIdContext<AnyDevice>
4892{
4893    /// Encapsulate and send the provided transport packet and from the device
4894    /// provided in `meta`.
4895    fn send_ip_packet_from_device<S>(
4896        &mut self,
4897        bindings_ctx: &mut BC,
4898        meta: SendIpPacketMeta<I, &Self::DeviceId, Option<SpecifiedAddr<I::Addr>>>,
4899        body: S,
4900    ) -> Result<(), IpSendFrameError<S>>
4901    where
4902        S: TransportPacketSerializer<I>,
4903        S::Buffer: BufferMut;
4904
4905    /// Send an IP packet that doesn't require the encapsulation and other
4906    /// processing of [`send_ip_packet_from_device`] from the device specified
4907    /// in `meta`.
4908    // TODO(https://fxbug.dev/333908066): The packets going through this
4909    // function only hit the EGRESS filter hook, bypassing LOCAL_EGRESS.
4910    // Refactor callers and other functions to prevent this.
4911    fn send_ip_frame<S>(
4912        &mut self,
4913        bindings_ctx: &mut BC,
4914        device: &Self::DeviceId,
4915        destination: IpPacketDestination<I, &Self::DeviceId>,
4916        body: S,
4917    ) -> Result<(), IpSendFrameError<S>>
4918    where
4919        S: FragmentableIpSerializer<I, Buffer: BufferMut> + FilterIpPacket<I>;
4920}
4921
4922impl<
4923    I: IpLayerIpExt,
4924    BC: IpLayerBindingsContext<I, <CC as DeviceIdContext<AnyDevice>>::DeviceId>,
4925    CC: IpLayerEgressContext<I, BC> + IpDeviceEgressStateContext<I> + IpDeviceMtuContext<I>,
4926> IpLayerHandler<I, BC> for CC
4927{
4928    fn send_ip_packet_from_device<S>(
4929        &mut self,
4930        bindings_ctx: &mut BC,
4931        meta: SendIpPacketMeta<I, &CC::DeviceId, Option<SpecifiedAddr<I::Addr>>>,
4932        body: S,
4933    ) -> Result<(), IpSendFrameError<S>>
4934    where
4935        S: TransportPacketSerializer<I>,
4936        S::Buffer: BufferMut,
4937    {
4938        send_ip_packet_from_device(self, bindings_ctx, meta, body, IpLayerPacketMetadata::default())
4939    }
4940
4941    fn send_ip_frame<S>(
4942        &mut self,
4943        bindings_ctx: &mut BC,
4944        device: &Self::DeviceId,
4945        destination: IpPacketDestination<I, &Self::DeviceId>,
4946        body: S,
4947    ) -> Result<(), IpSendFrameError<S>>
4948    where
4949        S: FragmentableIpSerializer<I, Buffer: BufferMut> + FilterIpPacket<I>,
4950    {
4951        send_ip_frame(
4952            self,
4953            bindings_ctx,
4954            device,
4955            destination,
4956            body,
4957            IpLayerPacketMetadata::default(),
4958            Mtu::no_limit(),
4959        )
4960    }
4961}
4962
4963/// Sends an Ip packet with the specified metadata.
4964///
4965/// # Panics
4966///
4967/// Panics if either the source or destination address is the loopback address
4968/// and the device is a non-loopback device.
4969pub(crate) fn send_ip_packet_from_device<I, BC, CC, S>(
4970    core_ctx: &mut CC,
4971    bindings_ctx: &mut BC,
4972    meta: SendIpPacketMeta<
4973        I,
4974        &<CC as DeviceIdContext<AnyDevice>>::DeviceId,
4975        Option<SpecifiedAddr<I::Addr>>,
4976    >,
4977    body: S,
4978    packet_metadata: IpLayerPacketMetadata<I, CC::WeakAddressId, BC>,
4979) -> Result<(), IpSendFrameError<S>>
4980where
4981    I: IpLayerIpExt,
4982    BC: FilterBindingsContext<CC::DeviceId> + TxMetadataBindingsTypes + MarksBindingsContext,
4983    CC: IpLayerEgressContext<I, BC> + IpDeviceEgressStateContext<I> + IpDeviceMtuContext<I>,
4984    S: TransportPacketSerializer<I>,
4985    S::Buffer: BufferMut,
4986{
4987    let SendIpPacketMeta { device, src_ip, dst_ip, destination, proto, ttl, mtu, dscp_and_ecn } =
4988        meta;
4989    core_ctx.increment_both(device, |c| &c.send_ip_packet);
4990    let next_packet_id = gen_ip_packet_id(core_ctx);
4991    let ttl = ttl.unwrap_or_else(|| core_ctx.get_hop_limit(device)).get();
4992    let src_ip = src_ip.map_or(I::UNSPECIFIED_ADDRESS, |a| a.get());
4993    let mut builder = I::PacketBuilder::new(src_ip, dst_ip.get(), ttl, proto);
4994
4995    #[derive(GenericOverIp)]
4996    #[generic_over_ip(I, Ip)]
4997    struct Wrap<'a, I: IpLayerIpExt> {
4998        builder: &'a mut I::PacketBuilder<NetworkSerializationContext>,
4999        next_packet_id: I::PacketId,
5000    }
5001
5002    I::map_ip::<_, ()>(
5003        Wrap { builder: &mut builder, next_packet_id },
5004        |Wrap { builder, next_packet_id }| {
5005            builder.id(next_packet_id);
5006        },
5007        |Wrap { builder: _, next_packet_id: () }| {
5008            // IPv6 doesn't have packet IDs.
5009        },
5010    );
5011
5012    builder.set_dscp_and_ecn(dscp_and_ecn);
5013
5014    let ip_frame = builder.wrap_body(body);
5015    send_ip_frame(core_ctx, bindings_ctx, device, destination, ip_frame, packet_metadata, mtu)
5016        .map_err(|ser| ser.map_serializer(|s| s.into_inner()))
5017}
5018
5019/// Abstracts access to a [`filter::FilterHandler`] for core contexts.
5020pub trait FilterHandlerProvider<I: FilterIpExt, BT: FilterBindingsTypes>:
5021    IpDeviceAddressIdContext<I, DeviceId: netstack3_base::InterfaceProperties<BT::DeviceClass>>
5022{
5023    /// The filter handler.
5024    type Handler<'a>: filter::FilterHandler<I, BT, DeviceId = Self::DeviceId, WeakAddressId = Self::WeakAddressId>
5025    where
5026        Self: 'a;
5027
5028    /// Gets the filter handler for this context.
5029    fn filter_handler(&mut self) -> Self::Handler<'_>;
5030}
5031
5032#[cfg(any(test, feature = "testutils"))]
5033pub(crate) mod testutil {
5034    use super::*;
5035
5036    use netstack3_base::testutil::{FakeBindingsCtx, FakeCoreCtx, FakeStrongDeviceId};
5037    use netstack3_base::{
5038        AssignedAddrIpExt, NetworkSerializer, SendFrameContext, SendFrameError, SendableFrameMeta,
5039    };
5040
5041    /// A [`SendIpPacketMeta`] for dual stack contextx.
5042    #[derive(Debug, GenericOverIp)]
5043    #[generic_over_ip()]
5044    #[allow(missing_docs)]
5045    pub enum DualStackSendIpPacketMeta<D> {
5046        V4(SendIpPacketMeta<Ipv4, D, SpecifiedAddr<Ipv4Addr>>),
5047        V6(SendIpPacketMeta<Ipv6, D, SpecifiedAddr<Ipv6Addr>>),
5048    }
5049
5050    impl<I: IpExt, D> From<SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>>
5051        for DualStackSendIpPacketMeta<D>
5052    {
5053        fn from(value: SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>) -> Self {
5054            #[derive(GenericOverIp)]
5055            #[generic_over_ip(I, Ip)]
5056            struct Wrap<I: IpExt, D>(SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>);
5057            use DualStackSendIpPacketMeta::*;
5058            I::map_ip_in(Wrap(value), |Wrap(value)| V4(value), |Wrap(value)| V6(value))
5059        }
5060    }
5061
5062    impl<I: IpExt, S, DeviceId, BC>
5063        SendableFrameMeta<FakeCoreCtx<S, DualStackSendIpPacketMeta<DeviceId>, DeviceId>, BC>
5064        for SendIpPacketMeta<I, DeviceId, SpecifiedAddr<I::Addr>>
5065    {
5066        fn send_meta<SS>(
5067            self,
5068            core_ctx: &mut FakeCoreCtx<S, DualStackSendIpPacketMeta<DeviceId>, DeviceId>,
5069            bindings_ctx: &mut BC,
5070            frame: SS,
5071        ) -> Result<(), SendFrameError<SS>>
5072        where
5073            SS: NetworkSerializer,
5074            SS::Buffer: BufferMut,
5075        {
5076            SendFrameContext::send_frame(
5077                &mut core_ctx.frames,
5078                bindings_ctx,
5079                DualStackSendIpPacketMeta::from(self),
5080                frame,
5081            )
5082        }
5083    }
5084
5085    /// Error returned when the IP version doesn't match.
5086    #[derive(Debug)]
5087    pub struct WrongIpVersion;
5088
5089    impl<D> DualStackSendIpPacketMeta<D> {
5090        /// Returns the internal [`SendIpPacketMeta`] if this is carrying the
5091        /// version matching `I`.
5092        pub fn try_as<I: IpExt>(
5093            &self,
5094        ) -> Result<&SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>, WrongIpVersion> {
5095            #[derive(GenericOverIp)]
5096            #[generic_over_ip(I, Ip)]
5097            struct Wrap<'a, I: IpExt, D>(
5098                Option<&'a SendIpPacketMeta<I, D, SpecifiedAddr<I::Addr>>>,
5099            );
5100            use DualStackSendIpPacketMeta::*;
5101            let Wrap(dual_stack) = I::map_ip(
5102                self,
5103                |value| {
5104                    Wrap(match value {
5105                        V4(meta) => Some(meta),
5106                        V6(_) => None,
5107                    })
5108                },
5109                |value| {
5110                    Wrap(match value {
5111                        V4(_) => None,
5112                        V6(meta) => Some(meta),
5113                    })
5114                },
5115            );
5116            dual_stack.ok_or(WrongIpVersion)
5117        }
5118    }
5119
5120    impl<I, BC, S, Meta, DeviceId> FilterHandlerProvider<I, BC> for FakeCoreCtx<S, Meta, DeviceId>
5121    where
5122        I: AssignedAddrIpExt + FilterIpExt,
5123        BC: FilterBindingsContext<DeviceId>,
5124        DeviceId: FakeStrongDeviceId + netstack3_base::InterfaceProperties<BC::DeviceClass>,
5125    {
5126        type Handler<'a>
5127            = filter::testutil::NoopImpl<DeviceId>
5128        where
5129            Self: 'a;
5130
5131        fn filter_handler(&mut self) -> Self::Handler<'_> {
5132            filter::testutil::NoopImpl::default()
5133        }
5134    }
5135
5136    impl<TimerId, Event: Debug, State, FrameMeta> MarksBindingsContext
5137        for FakeBindingsCtx<TimerId, Event, State, FrameMeta>
5138    {
5139        fn marks_to_keep_on_egress() -> &'static [MarkDomain] {
5140            const MARKS: [MarkDomain; 1] = [MarkDomain::Mark1];
5141            &MARKS
5142        }
5143
5144        fn marks_to_set_on_ingress() -> &'static [MarkDomain] {
5145            const MARKS: [MarkDomain; 1] = [MarkDomain::Mark2];
5146            &MARKS
5147        }
5148    }
5149}
5150
5151#[cfg(test)]
5152mod test {
5153    use super::*;
5154
5155    #[test]
5156    fn highest_priority_address_status_v4() {
5157        // Prefer assigned addresses over tentative addresses.
5158        assert_eq!(
5159            choose_highest_priority_address_status::<Ipv4>(
5160                [
5161                    Ipv4PresentAddressStatus::UnicastAssigned,
5162                    Ipv4PresentAddressStatus::UnicastTentative
5163                ]
5164                .into_iter()
5165            ),
5166            Some(Ipv4PresentAddressStatus::UnicastAssigned)
5167        )
5168    }
5169
5170    #[test]
5171    fn highest_priority_address_status_v6() {
5172        // Prefer assigned addresses over tentative addresses.
5173        assert_eq!(
5174            choose_highest_priority_address_status::<Ipv6>(
5175                [
5176                    Ipv6PresentAddressStatus::UnicastAssigned,
5177                    Ipv6PresentAddressStatus::UnicastTentative
5178                ]
5179                .into_iter()
5180            ),
5181            Some(Ipv6PresentAddressStatus::UnicastAssigned)
5182        )
5183    }
5184}