netstack3_filter/
conntrack.rs

1// Copyright 2024 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5mod tcp;
6
7use alloc::collections::HashMap;
8use alloc::fmt::Debug;
9use alloc::sync::{Arc, Weak};
10use alloc::vec::Vec;
11use assert_matches::assert_matches;
12use core::any::Any;
13use core::fmt::Display;
14use core::hash::Hash;
15use core::time::Duration;
16
17use derivative::Derivative;
18use net_types::ip::{GenericOverIp, Ip, IpVersionMarker};
19use packet_formats::ip::{IpExt, IpProto, Ipv4Proto, Ipv6Proto};
20
21use crate::context::{FilterBindingsContext, FilterBindingsTypes};
22use crate::logic::FilterTimerId;
23use crate::packets::{IpPacket, MaybeTransportPacket, TransportPacketData};
24use netstack3_base::sync::Mutex;
25use netstack3_base::{CoreTimerContext, Inspectable, Inspector, Instant, TimerContext};
26
27/// The time from the end of one GC cycle to the beginning of the next.
28const GC_INTERVAL: Duration = Duration::from_secs(10);
29
30/// The time since the last seen packet after which an established UDP
31/// connection will be considered expired and is eligible for garbage
32/// collection.
33///
34/// This was taken from RFC 4787 REQ-5.
35const CONNECTION_EXPIRY_TIME_UDP: Duration = Duration::from_secs(120);
36
37/// The time since the last seen packet after which a generic connection will be
38/// considered expired and is eligible for garbage collection.
39const CONNECTION_EXPIRY_OTHER: Duration = Duration::from_secs(30);
40
41/// The maximum number of entries in the conntrack table.
42///
43/// NOTE: This is subtly different from the number of connections in the table
44/// because self-connected sockets only have a single entry instead of the
45/// normal two.
46pub(crate) const MAXIMUM_ENTRIES: usize = 100_000;
47
48/// Implements a connection tracking subsystem.
49///
50/// The `E` parameter is for external data that is stored in the [`Connection`]
51/// struct and can be extracted with the [`Connection::external_data()`]
52/// function.
53pub struct Table<I: IpExt, E, BT: FilterBindingsTypes> {
54    inner: Mutex<TableInner<I, E, BT>>,
55}
56
57struct TableInner<I: IpExt, E, BT: FilterBindingsTypes> {
58    /// A connection is inserted into the map twice: once for the original
59    /// tuple, and once for the reply tuple.
60    table: HashMap<Tuple<I>, Arc<ConnectionShared<I, E, BT>>>,
61    /// A timer for triggering garbage collection events.
62    gc_timer: BT::Timer,
63    /// The number of times the table size limit was hit.
64    table_limit_hits: u32,
65    /// Of the times the table limit was hit, the number of times we had to drop
66    /// a packet because we couldn't make space in the table.
67    table_limit_drops: u32,
68}
69
70impl<I: IpExt, E, BT: FilterBindingsTypes> Table<I, E, BT> {
71    /// Returns whether the table contains a connection for the specified tuple.
72    ///
73    /// This is for NAT to determine whether a generated tuple will clash with
74    /// one already in the map. While it might seem inefficient, to require
75    /// locking in a loop, taking an uncontested lock is going to be
76    /// significantly faster than the RNG used to allocate NAT parameters.
77    pub fn contains_tuple(&self, tuple: &Tuple<I>) -> bool {
78        self.inner.lock().table.contains_key(tuple)
79    }
80
81    /// Returns a [`Connection`] for the flow indexed by `tuple`, if one exists.
82    pub(crate) fn get_shared_connection(
83        &self,
84        tuple: &Tuple<I>,
85    ) -> Option<Arc<ConnectionShared<I, E, BT>>> {
86        let guard = self.inner.lock();
87        let conn = guard.table.get(&tuple)?;
88        Some(conn.clone())
89    }
90
91    /// Returns a [`Connection`] for the flow indexed by `tuple`, if one exists.
92    pub fn get_connection(&self, tuple: &Tuple<I>) -> Option<Connection<I, E, BT>> {
93        let guard = self.inner.lock();
94        let conn = guard.table.get(&tuple)?;
95        Some(Connection::Shared(conn.clone()))
96    }
97
98    /// Returns the number of entries in the table.
99    ///
100    /// NOTE: This is usually twice the number of connections, but self-connected sockets will only
101    /// have a single entry.
102    #[cfg(feature = "testutils")]
103    pub fn num_entries(&self) -> usize {
104        self.inner.lock().table.len()
105    }
106
107    /// Removes the [`Connection`] for the flow indexed by `tuple`, if one exists,
108    /// and returns it to the caller.
109    #[cfg(feature = "testutils")]
110    pub fn remove_connection(&mut self, tuple: &Tuple<I>) -> Option<Connection<I, E, BT>> {
111        let mut guard = self.inner.lock();
112
113        // Remove the entry indexed by the tuple.
114        let conn = guard.table.remove(&tuple)?;
115        let (original, reply) = (&conn.inner.original_tuple, &conn.inner.reply_tuple);
116
117        // If this is not a self-connected flow, we need to remove the other tuple on
118        // which the connection is indexed.
119        if original != reply {
120            if tuple == original {
121                assert!(guard.table.remove(reply).is_some());
122            } else {
123                assert!(guard.table.remove(original).is_some());
124            }
125        }
126
127        Some(Connection::Shared(conn))
128    }
129}
130
131fn schedule_gc<BC>(bindings_ctx: &mut BC, timer: &mut BC::Timer)
132where
133    BC: TimerContext,
134{
135    let _ = bindings_ctx.schedule_timer(GC_INTERVAL, timer);
136}
137
138impl<I: IpExt, E, BC: FilterBindingsContext> Table<I, E, BC> {
139    pub(crate) fn new<CC: CoreTimerContext<FilterTimerId<I>, BC>>(bindings_ctx: &mut BC) -> Self {
140        Self {
141            inner: Mutex::new(TableInner {
142                table: HashMap::new(),
143                gc_timer: CC::new_timer(
144                    bindings_ctx,
145                    FilterTimerId::ConntrackGc(IpVersionMarker::<I>::new()),
146                ),
147                table_limit_hits: 0,
148                table_limit_drops: 0,
149            }),
150        }
151    }
152}
153
154impl<
155        I: IpExt,
156        E: Default + Send + Sync + PartialEq + CompatibleWith + 'static,
157        BC: FilterBindingsContext,
158    > Table<I, E, BC>
159{
160    /// Attempts to insert the `Connection` into the table.
161    ///
162    /// To be called once a packet for the connection has passed all filtering.
163    /// The boolean return value represents whether the connection was newly
164    /// added to the connection tracking state.
165    ///
166    /// This is on [`Table`] instead of [`Connection`] because conntrack needs
167    /// to be able to manipulate its internal map.
168    pub(crate) fn finalize_connection(
169        &self,
170        bindings_ctx: &mut BC,
171        connection: Connection<I, E, BC>,
172    ) -> Result<(bool, Option<Arc<ConnectionShared<I, E, BC>>>), FinalizeConnectionError> {
173        let exclusive = match connection {
174            Connection::Exclusive(c) => c,
175            // Given that make_shared is private, the only way for us to receive
176            // a shared connection is if it was already present in the map. This
177            // is far and away the most common case under normal operation.
178            Connection::Shared(inner) => return Ok((false, Some(inner))),
179        };
180
181        if exclusive.do_not_insert {
182            return Ok((false, None));
183        }
184
185        let mut guard = self.inner.lock();
186
187        if guard.table.len() >= MAXIMUM_ENTRIES {
188            guard.table_limit_hits = guard.table_limit_hits.saturating_add(1);
189
190            struct Info<'a, I: IpExt, BT: FilterBindingsTypes> {
191                original_tuple: &'a Tuple<I>,
192                reply_tuple: &'a Tuple<I>,
193                lifecycle: EstablishmentLifecycle,
194                last_seen: BT::Instant,
195            }
196
197            let mut info: Option<Info<'_, I, BC>> = None;
198
199            let now = bindings_ctx.now();
200            // Find a non-established connection to evict.
201            //
202            // 1. If a connection is expired, immediately choose it.
203            // 2. Otherwise, pick the connection that is "least established".
204            //    - SeenOriginal is less established than SeenReply
205            //    - A connection is less established than another with the same
206            //      establishment lifecycle if it saw a packet less recently.
207            //
208            // If all connections are established, then we can't free any space
209            // and report the error to the caller.
210            for (_, conn) in &guard.table {
211                let state = conn.state.lock();
212                if state.is_expired(now) {
213                    info = Some(Info {
214                        original_tuple: &conn.inner.original_tuple,
215                        reply_tuple: &conn.inner.reply_tuple,
216                        lifecycle: state.establishment_lifecycle,
217                        last_seen: state.last_packet_time,
218                    });
219                    break;
220                }
221
222                match state.establishment_lifecycle {
223                    EstablishmentLifecycle::SeenOriginal | EstablishmentLifecycle::SeenReply => {
224                        match &info {
225                            None => {
226                                info = Some(Info {
227                                    original_tuple: &conn.inner.original_tuple,
228                                    reply_tuple: &conn.inner.reply_tuple,
229                                    lifecycle: state.establishment_lifecycle,
230                                    last_seen: state.last_packet_time,
231                                })
232                            }
233                            Some(existing) => {
234                                if state.establishment_lifecycle < existing.lifecycle
235                                    || (state.establishment_lifecycle == existing.lifecycle
236                                        && state.last_packet_time < existing.last_seen)
237                                {
238                                    info = Some(Info {
239                                        original_tuple: &conn.inner.original_tuple,
240                                        reply_tuple: &conn.inner.reply_tuple,
241                                        lifecycle: state.establishment_lifecycle,
242                                        last_seen: state.last_packet_time,
243                                    })
244                                }
245                            }
246                        }
247                    }
248                    EstablishmentLifecycle::Established => {}
249                }
250            }
251
252            if let Some(Info { original_tuple, reply_tuple, .. }) = info {
253                let original_tuple = original_tuple.clone();
254                let reply_tuple = reply_tuple.clone();
255
256                assert!(guard.table.remove(&original_tuple).is_some());
257                if original_tuple != reply_tuple {
258                    assert!(guard.table.remove(&reply_tuple).is_some());
259                }
260            } else {
261                guard.table_limit_drops = guard.table_limit_drops.saturating_add(1);
262                return Err(FinalizeConnectionError::TableFull);
263            }
264        }
265
266        // The expected case here is that there isn't a conflict.
267        //
268        // Normally, we'd want to use the entry API to reduce the number of map
269        // lookups, but this setup allows us to completely avoid any heap
270        // allocations until we're sure that the insertion will succeed. This
271        // wastes a little CPU in the common case to avoid pathological behavior
272        // in degenerate cases.
273        if guard.table.contains_key(&exclusive.inner.original_tuple)
274            || guard.table.contains_key(&exclusive.inner.reply_tuple)
275        {
276            // NOTE: It's possible for the first two packets (or more) in the
277            // same flow to create ExclusiveConnections. Typically packets for
278            // the same flow are handled sequentically, so each subsequent
279            // packet should see the connection created by the first one.
280            // However, it is possible (e.g. if these two packets arrive on
281            // different interfaces) for them to race.
282            //
283            // In this case, subsequent packets would be reported as conflicts.
284            // To avoid this race condition, we check whether the conflicting
285            // connection in the table is actually the same as the connection
286            // that we are attempting to finalize; if so, we can simply adopt
287            // the already-finalized connection.
288            let conn = if let Some(conn) = guard.table.get(&exclusive.inner.original_tuple) {
289                conn
290            } else {
291                guard
292                    .table
293                    .get(&exclusive.inner.reply_tuple)
294                    .expect("checked that tuple is in table and table is locked")
295            };
296            if conn.compatible_with(&exclusive) {
297                return Ok((false, Some(conn.clone())));
298            }
299
300            // TODO(https://fxbug.dev/372549231): add a counter for this error.
301            Err(FinalizeConnectionError::Conflict)
302        } else {
303            let shared = exclusive.make_shared();
304            let clone = Arc::clone(&shared);
305
306            let res = guard.table.insert(shared.inner.original_tuple.clone(), shared.clone());
307            debug_assert!(res.is_none());
308
309            if shared.inner.reply_tuple != shared.inner.original_tuple {
310                let res = guard.table.insert(shared.inner.reply_tuple.clone(), shared);
311                debug_assert!(res.is_none());
312            }
313
314            // For the most part, this will only schedule the timer once, when
315            // the first packet hits the netstack. However, since the GC timer
316            // is only rescheduled during GC when the table has entries, it's
317            // possible that this will be called again if the table ever becomes
318            // empty.
319            if bindings_ctx.scheduled_instant(&mut guard.gc_timer).is_none() {
320                schedule_gc(bindings_ctx, &mut guard.gc_timer);
321            }
322
323            Ok((true, Some(clone)))
324        }
325    }
326}
327
328impl<I: IpExt, E: Default, BC: FilterBindingsContext> Table<I, E, BC> {
329    /// Returns a [`Connection`] for the packet's flow. If a connection does not
330    /// currently exist, a new one is created.
331    ///
332    /// At the same time, process the packet for the connection, updating
333    /// internal connection state.
334    ///
335    /// After processing is complete, you must call
336    /// [`finalize_connection`](Table::finalize_connection) with this
337    /// connection.
338    pub(crate) fn get_connection_for_packet_and_update<P: IpPacket<I>>(
339        &self,
340        bindings_ctx: &BC,
341        packet: &P,
342    ) -> Result<Option<(Connection<I, E, BC>, ConnectionDirection)>, GetConnectionError<I, E, BC>>
343    {
344        let Some(packet) = PacketMetadata::new(packet) else {
345            return Ok(None);
346        };
347
348        let mut connection = match self.inner.lock().table.get(&packet.tuple) {
349            Some(connection) => Connection::Shared(connection.clone()),
350            None => match ConnectionExclusive::from_deconstructed_packet(bindings_ctx, &packet) {
351                None => return Ok(None),
352                Some(c) => Connection::Exclusive(c),
353            },
354        };
355
356        let direction = connection
357            .direction(&packet.tuple)
358            .expect("tuple must match connection as we just looked up connection by tuple");
359
360        match connection.update(bindings_ctx, &packet, direction) {
361            Ok(ConnectionUpdateAction::NoAction) => Ok(Some((connection, direction))),
362            Ok(ConnectionUpdateAction::RemoveEntry) => match connection {
363                Connection::Exclusive(mut conn) => {
364                    conn.do_not_insert = true;
365                    Ok(Some((Connection::Exclusive(conn), direction)))
366                }
367                Connection::Shared(conn) => {
368                    // RACE: It's possible that GC already removed the
369                    // connection from the table, since we released the table
370                    // lock while updating the connection.
371                    let mut guard = self.inner.lock();
372                    let _ = guard.table.remove(&conn.inner.original_tuple);
373                    let _ = guard.table.remove(&conn.inner.reply_tuple);
374
375                    Ok(Some((Connection::Shared(conn), direction)))
376                }
377            },
378            Err(ConnectionUpdateError::InvalidPacket) => {
379                Err(GetConnectionError::InvalidPacket(connection, direction))
380            }
381        }
382    }
383
384    pub(crate) fn perform_gc(&self, bindings_ctx: &mut BC) {
385        let now = bindings_ctx.now();
386        let mut guard = self.inner.lock();
387
388        // Sadly, we can't easily remove entries from the map in-place for two
389        // reasons:
390        // - HashMap::retain() will look at each connection twice, since it will
391        // be inserted under both tuples. If a packet updates last_packet_time
392        // between these two checks, we might remove one tuple of the connection
393        // but not the other, leaving a single tuple in the table, which breaks
394        // a core invariant.
395        // - You can't modify a std::HashMap while iterating over it.
396        let to_remove: Vec<_> = guard
397            .table
398            .iter()
399            .filter_map(|(tuple, conn)| {
400                if *tuple == conn.inner.original_tuple && conn.is_expired(now) {
401                    Some((conn.inner.original_tuple.clone(), conn.inner.reply_tuple.clone()))
402                } else {
403                    None
404                }
405            })
406            .collect();
407
408        for (original_tuple, reply_tuple) in to_remove {
409            assert!(guard.table.remove(&original_tuple).is_some());
410            if reply_tuple != original_tuple {
411                assert!(guard.table.remove(&reply_tuple).is_some());
412            }
413        }
414
415        // The table is only expected to be empty in exceptional cases, or
416        // during tests. The test case especially important, because some tests
417        // will wait for core to quiesce by waiting for timers to stop firing.
418        // By only rescheduling when there are still entries in the table, we
419        // ensure that we won't enter an infinite timer firing/scheduling loop.
420        if !guard.table.is_empty() {
421            schedule_gc(bindings_ctx, &mut guard.gc_timer);
422        }
423    }
424}
425
426impl<I: IpExt, E: Inspectable, BT: FilterBindingsTypes> Inspectable for Table<I, E, BT> {
427    fn record<Inspector: netstack3_base::Inspector>(&self, inspector: &mut Inspector) {
428        let guard = self.inner.lock();
429
430        inspector.record_usize("num_entries", guard.table.len());
431        inspector.record_uint("table_limit_hits", guard.table_limit_hits);
432        inspector.record_uint("table_limit_drops", guard.table_limit_drops);
433
434        inspector.record_child("connections", |inspector| {
435            guard
436                .table
437                .iter()
438                .filter_map(|(tuple, connection)| {
439                    if *tuple == connection.inner.original_tuple {
440                        Some(connection)
441                    } else {
442                        None
443                    }
444                })
445                .for_each(|connection| {
446                    inspector.record_unnamed_child(|inspector| {
447                        inspector.delegate_inspectable(connection.as_ref())
448                    });
449                });
450        });
451    }
452}
453
454/// A tuple for a flow in a single direction.
455#[derive(Debug, Clone, PartialEq, Eq, Hash, GenericOverIp)]
456#[generic_over_ip(I, Ip)]
457pub struct Tuple<I: IpExt> {
458    /// The IP protocol number of the flow.
459    pub protocol: TransportProtocol,
460    /// The source IP address of the flow.
461    pub src_addr: I::Addr,
462    /// The destination IP address of the flow.
463    pub dst_addr: I::Addr,
464    /// The transport-layer source port or ID of the flow.
465    pub src_port_or_id: u16,
466    /// The transport-layer destination port or ID of the flow.
467    pub dst_port_or_id: u16,
468}
469
470impl<I: IpExt> Tuple<I> {
471    /// Creates a `Tuple` from an `IpPacket`, if possible.
472    ///
473    /// Returns `None` if the packet doesn't have an inner transport packet.
474    #[cfg(test)]
475    pub(crate) fn from_packet<'a, P: IpPacket<I>>(packet: &'a P) -> Option<Self> {
476        // Subtlety: For ICMP packets, only request/response messages will have
477        // a transport packet defined (and currently only ECHO messages do).
478        // This gets us basic tracking for free, and lets us implicitly ignore
479        // ICMP errors, which are not meant to be tracked.
480        //
481        // If other message types eventually have TransportPacket impls, then
482        // this would lead to confusing different message types that happen to
483        // have the same ID.
484        let transport_packet_data = packet.maybe_transport_packet().transport_packet_data()?;
485        Some(Self::from_packet_and_transport_data(packet, &transport_packet_data))
486    }
487
488    fn from_packet_and_transport_data<'a, P: IpPacket<I>>(
489        packet: &'a P,
490        transport_packet_data: &TransportPacketData,
491    ) -> Self {
492        let protocol = I::map_ip(packet.protocol(), |proto| proto.into(), |proto| proto.into());
493
494        let (src_port, dst_port) = match transport_packet_data {
495            TransportPacketData::Tcp { src_port, dst_port, .. }
496            | TransportPacketData::Generic { src_port, dst_port } => (*src_port, *dst_port),
497        };
498
499        Self {
500            protocol: protocol,
501            src_addr: packet.src_addr(),
502            dst_addr: packet.dst_addr(),
503            src_port_or_id: src_port,
504            dst_port_or_id: dst_port,
505        }
506    }
507
508    /// Returns the inverted version of the tuple.
509    ///
510    /// This means the src and dst addresses are swapped. For TCP and UDP, the
511    /// ports are reversed, but for ICMP, where the ports stand in for other
512    /// information, things are more complicated.
513    pub(crate) fn invert(self) -> Tuple<I> {
514        // TODO(https://fxbug.dev/328064082): Support tracking different ICMP
515        // request/response types.
516        Self {
517            protocol: self.protocol,
518            src_addr: self.dst_addr,
519            dst_addr: self.src_addr,
520            src_port_or_id: self.dst_port_or_id,
521            dst_port_or_id: self.src_port_or_id,
522        }
523    }
524}
525
526impl<I: IpExt> Inspectable for Tuple<I> {
527    fn record<Inspector: netstack3_base::Inspector>(&self, inspector: &mut Inspector) {
528        inspector.record_debug("protocol", self.protocol);
529        inspector.record_ip_addr("src_addr", self.src_addr);
530        inspector.record_ip_addr("dst_addr", self.dst_addr);
531        inspector.record_usize("src_port_or_id", self.src_port_or_id);
532        inspector.record_usize("dst_port_or_id", self.dst_port_or_id);
533    }
534}
535
536/// The direction of a packet when compared to a given connection.
537#[derive(Debug, Copy, Clone, PartialEq, Eq)]
538pub enum ConnectionDirection {
539    /// The packet is traveling in the same direction as the first packet seen
540    /// for the [`Connection`].
541    Original,
542
543    /// The packet is traveling in the opposite direction from the first packet
544    /// seen for the [`Connection`].
545    Reply,
546}
547
548/// An error returned from [`Table::finalize_connection`].
549#[derive(Debug)]
550pub(crate) enum FinalizeConnectionError {
551    /// There is a conflicting connection already tracked by conntrack. The
552    /// to-be-finalized connection was not inserted into the table.
553    Conflict,
554
555    /// The table has reached the hard size cap and no room could be made.
556    TableFull,
557}
558
559/// Type to track additional processing required after updating a connection.
560#[derive(Debug, PartialEq, Eq)]
561enum ConnectionUpdateAction {
562    /// Processing completed and no further action necessary.
563    NoAction,
564
565    /// The entry for this connection should be removed from the conntrack table.
566    RemoveEntry,
567}
568
569/// An error returned from [`Connection::update`].
570#[derive(Debug, PartialEq, Eq)]
571enum ConnectionUpdateError {
572    /// The packet was invalid. The caller may decide whether to drop this
573    /// packet or not.
574    InvalidPacket,
575}
576
577/// An error returned from [`Table::get_connection_for_packet_and_update`].
578#[derive(Derivative)]
579#[derivative(Debug(bound = "E: Debug"))]
580pub(crate) enum GetConnectionError<I: IpExt, E, BT: FilterBindingsTypes> {
581    /// The packet was invalid. The caller may decide whether to drop it or not.
582    InvalidPacket(Connection<I, E, BT>, ConnectionDirection),
583}
584
585/// A `Connection` contains all of the information about a single connection
586/// tracked by conntrack.
587#[derive(Derivative)]
588#[derivative(Debug(bound = "E: Debug"))]
589pub enum Connection<I: IpExt, E, BT: FilterBindingsTypes> {
590    /// A connection that is directly owned by the packet that originated the
591    /// connection and no others. All fields are modifiable.
592    Exclusive(ConnectionExclusive<I, E, BT>),
593
594    /// This is an existing connection, and there are possibly many other
595    /// packets that are concurrently modifying it.
596    Shared(Arc<ConnectionShared<I, E, BT>>),
597}
598
599/// An error when attempting to retrieve the underlying conntrack entry from a
600/// weak handle to it.
601#[derive(Debug)]
602pub enum WeakConnectionError {
603    /// The entry was removed from the table after the weak handle was created.
604    EntryRemoved,
605    /// The entry does not match the type that is expected (due to an IP version
606    /// mismatch, for example).
607    InvalidEntry,
608}
609
610/// A type-erased weak handle to a connection tracking entry.
611///
612/// We use type erasure here to get rid of the parameterization on IP version;
613/// this handle is meant to be able to transit the device layer and at that
614/// point things are not parameterized on IP version (IPv4 and IPv6 packets both
615/// end up in the same device queue, for example).
616///
617/// When this is received for incoming packets, [`WeakConnection::into_inner`]
618/// can be used to downcast to the expected concrete [`Connection`] type.
619#[derive(Debug, Clone)]
620pub struct WeakConnection(pub(crate) Weak<dyn Any + Send + Sync>);
621
622impl WeakConnection {
623    /// Creates a new type-erased weak handle to the provided conntrack entry,
624    /// provided it is a shared entry.
625    pub fn new<I: IpExt, BT: FilterBindingsTypes + 'static, E: Send + Sync + 'static>(
626        conn: &Connection<I, E, BT>,
627    ) -> Option<Self> {
628        let shared = match conn {
629            Connection::Exclusive(_) => return None,
630            Connection::Shared(shared) => shared,
631        };
632        let weak = Arc::downgrade(shared);
633        Some(Self(weak))
634    }
635
636    /// Attempts to upgrade the provided weak handle to the conntrack entry and
637    /// downcast it to the specified concrete [`Connection`] type.
638    ///
639    /// Fails if either the weak handle cannot be upgraded (because the conntrack
640    /// entry has since been removed), or the type-erased handle cannot be downcast
641    /// to the concrete type (because the packet was modified after the creation of
642    /// this handle such that it no longer matches, e.g. the IP version of the
643    /// connection).
644    pub fn into_inner<I: IpExt, BT: FilterBindingsTypes + 'static, E: Send + Sync + 'static>(
645        self,
646    ) -> Result<Connection<I, E, BT>, WeakConnectionError> {
647        let Self(inner) = self;
648        let shared = inner
649            .upgrade()
650            .ok_or(WeakConnectionError::EntryRemoved)?
651            .downcast()
652            .map_err(|_err: Arc<_>| WeakConnectionError::InvalidEntry)?;
653        Ok(Connection::Shared(shared))
654    }
655}
656
657impl<I: IpExt, E, BT: FilterBindingsTypes> Connection<I, E, BT> {
658    /// Returns the tuple of the original direction of this connection.
659    pub fn original_tuple(&self) -> &Tuple<I> {
660        match self {
661            Connection::Exclusive(c) => &c.inner.original_tuple,
662            Connection::Shared(c) => &c.inner.original_tuple,
663        }
664    }
665
666    /// Returns the tuple of the reply direction of this connection.
667    pub(crate) fn reply_tuple(&self) -> &Tuple<I> {
668        match self {
669            Connection::Exclusive(c) => &c.inner.reply_tuple,
670            Connection::Shared(c) => &c.inner.reply_tuple,
671        }
672    }
673
674    /// Returns a reference to the [`Connection::external_data`] field.
675    pub fn external_data(&self) -> &E {
676        match self {
677            Connection::Exclusive(c) => &c.inner.external_data,
678            Connection::Shared(c) => &c.inner.external_data,
679        }
680    }
681
682    /// Returns the direction the tuple represents with respect to the
683    /// connection.
684    pub(crate) fn direction(&self, tuple: &Tuple<I>) -> Option<ConnectionDirection> {
685        let (original, reply) = match self {
686            Connection::Exclusive(c) => (&c.inner.original_tuple, &c.inner.reply_tuple),
687            Connection::Shared(c) => (&c.inner.original_tuple, &c.inner.reply_tuple),
688        };
689
690        // The ordering here is sadly mildly load-bearing. For self-connected
691        // sockets, the first comparison will be true, so having the original
692        // tuple first would mean that the connection is never marked
693        // established.
694        //
695        // This ordering means that all self-connected connections will be
696        // marked as established immediately upon receiving the first packet.
697        if tuple == reply {
698            Some(ConnectionDirection::Reply)
699        } else if tuple == original {
700            Some(ConnectionDirection::Original)
701        } else {
702            None
703        }
704    }
705
706    /// Returns a copy of the internal connection state
707    #[allow(dead_code)]
708    pub(crate) fn state(&self) -> ConnectionState<BT> {
709        match self {
710            Connection::Exclusive(c) => c.state.clone(),
711            Connection::Shared(c) => c.state.lock().clone(),
712        }
713    }
714}
715
716impl<I: IpExt, E, BC: FilterBindingsContext> Connection<I, E, BC> {
717    fn update(
718        &mut self,
719        bindings_ctx: &BC,
720        packet: &PacketMetadata<I>,
721        direction: ConnectionDirection,
722    ) -> Result<ConnectionUpdateAction, ConnectionUpdateError> {
723        let now = bindings_ctx.now();
724
725        match self {
726            Connection::Exclusive(c) => c.state.update(direction, &packet.transport_data, now),
727            Connection::Shared(c) => c.state.lock().update(direction, &packet.transport_data, now),
728        }
729    }
730}
731
732/// Fields common to both [`ConnectionExclusive`] and [`ConnectionShared`].
733#[derive(Derivative)]
734#[derivative(Debug(bound = "E: Debug"), PartialEq(bound = "E: PartialEq"))]
735pub struct ConnectionCommon<I: IpExt, E> {
736    /// The 5-tuple for the connection in the original direction. This is
737    /// arbitrary, and is just the direction where a packet was first seen.
738    pub(crate) original_tuple: Tuple<I>,
739
740    /// The 5-tuple for the connection in the reply direction. This is what's
741    /// used for packet rewriting for NAT.
742    pub(crate) reply_tuple: Tuple<I>,
743
744    /// Extra information that is not needed by the conntrack module itself. In
745    /// the case of NAT, we expect this to contain things such as the kind of
746    /// rewriting that will occur (e.g. SNAT vs DNAT).
747    pub(crate) external_data: E,
748}
749
750impl<I: IpExt, E: Inspectable> Inspectable for ConnectionCommon<I, E> {
751    fn record<Inspector: netstack3_base::Inspector>(&self, inspector: &mut Inspector) {
752        inspector.record_child("original_tuple", |inspector| {
753            inspector.delegate_inspectable(&self.original_tuple);
754        });
755
756        inspector.record_child("reply_tuple", |inspector| {
757            inspector.delegate_inspectable(&self.reply_tuple);
758        });
759
760        // We record external_data as an inspectable because that allows us to
761        // prevent accidentally leaking data, which could happen if we just used
762        // the Debug impl.
763        inspector.record_child("external_data", |inspector| {
764            inspector.delegate_inspectable(&self.external_data);
765        });
766    }
767}
768
769#[derive(Debug, Clone)]
770enum ProtocolState {
771    Tcp(tcp::Connection),
772    Udp,
773    Other,
774}
775
776impl ProtocolState {
777    fn update(
778        &mut self,
779        dir: ConnectionDirection,
780        transport_data: &TransportPacketData,
781    ) -> Result<ConnectionUpdateAction, ConnectionUpdateError> {
782        match self {
783            ProtocolState::Tcp(tcp_conn) => {
784                let (segment, payload_len) = assert_matches!(
785                    transport_data,
786                    TransportPacketData::Tcp { segment, payload_len, .. } => (segment, payload_len)
787                );
788                tcp_conn.update(&segment, *payload_len, dir)
789            }
790            ProtocolState::Udp | ProtocolState::Other => Ok(ConnectionUpdateAction::NoAction),
791        }
792    }
793}
794
795/// The lifecycle of the connection getting to being established.
796///
797/// To mimic Linux behavior, we require seeing three packets in order to mark a
798/// connection established.
799/// 1. Original
800/// 2. Reply
801/// 3. Original
802///
803/// The first packet is implicit in the creation of the connection when the
804/// first packet is seen.
805#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
806enum EstablishmentLifecycle {
807    SeenOriginal,
808    SeenReply,
809    Established,
810}
811
812impl EstablishmentLifecycle {
813    fn update(self, dir: ConnectionDirection) -> Self {
814        match self {
815            EstablishmentLifecycle::SeenOriginal => match dir {
816                ConnectionDirection::Original => self,
817                ConnectionDirection::Reply => EstablishmentLifecycle::SeenReply,
818            },
819            EstablishmentLifecycle::SeenReply => match dir {
820                ConnectionDirection::Original => EstablishmentLifecycle::Established,
821                ConnectionDirection::Reply => self,
822            },
823            EstablishmentLifecycle::Established => self,
824        }
825    }
826}
827
828/// Dynamic per-connection state.
829#[derive(Derivative)]
830#[derivative(Clone(bound = ""), Debug(bound = ""))]
831pub(crate) struct ConnectionState<BT: FilterBindingsTypes> {
832    /// The time the last packet was seen for this connection (in either of the
833    /// original or reply directions).
834    last_packet_time: BT::Instant,
835
836    /// Where in the generic establishment lifecycle the current connection is.
837    establishment_lifecycle: EstablishmentLifecycle,
838
839    /// State that is specific to a given protocol (e.g. TCP or UDP).
840    protocol_state: ProtocolState,
841}
842
843impl<BT: FilterBindingsTypes> ConnectionState<BT> {
844    fn update(
845        &mut self,
846        dir: ConnectionDirection,
847        transport_data: &TransportPacketData,
848        now: BT::Instant,
849    ) -> Result<ConnectionUpdateAction, ConnectionUpdateError> {
850        if self.last_packet_time < now {
851            self.last_packet_time = now;
852        }
853
854        self.establishment_lifecycle = self.establishment_lifecycle.update(dir);
855
856        self.protocol_state.update(dir, transport_data)
857    }
858
859    fn is_expired(&self, now: BT::Instant) -> bool {
860        let duration = now.saturating_duration_since(self.last_packet_time);
861
862        let expiry_duration = match &self.protocol_state {
863            ProtocolState::Tcp(tcp_conn) => tcp_conn.expiry_duration(self.establishment_lifecycle),
864            ProtocolState::Udp => CONNECTION_EXPIRY_TIME_UDP,
865            // ICMP ends up here. The ICMP messages we track are simple
866            // request/response protocols, so we always expect to get a response
867            // quickly (within 2 RTT). Any followup messages (e.g. if making
868            // periodic ECHO requests) should reuse this existing connection.
869            ProtocolState::Other => CONNECTION_EXPIRY_OTHER,
870        };
871
872        duration >= expiry_duration
873    }
874}
875
876impl<BT: FilterBindingsTypes> Inspectable for ConnectionState<BT> {
877    fn record<Inspector: netstack3_base::Inspector>(&self, inspector: &mut Inspector) {
878        inspector.record_bool(
879            "established",
880            match self.establishment_lifecycle {
881                EstablishmentLifecycle::SeenOriginal | EstablishmentLifecycle::SeenReply => false,
882                EstablishmentLifecycle::Established => true,
883            },
884        );
885        inspector.record_inspectable_value("last_packet_time", &self.last_packet_time);
886    }
887}
888
889/// A conntrack connection with single ownership.
890///
891/// Because of this, many fields may be updated without synchronization. There
892/// is no chance of messing with other packets for this connection or ending up
893/// out-of-sync with the table (e.g. by changing the tuples once the connection
894/// has been inserted).
895#[derive(Derivative)]
896#[derivative(Debug(bound = "E: Debug"))]
897pub struct ConnectionExclusive<I: IpExt, E, BT: FilterBindingsTypes> {
898    pub(crate) inner: ConnectionCommon<I, E>,
899    pub(crate) state: ConnectionState<BT>,
900
901    /// When true, do not insert the connection into the conntrack table.
902    ///
903    /// This allows the stack to still operate against the connection (e.g. for
904    /// NAT), while guaranteeing that it won't make it into the table.
905    do_not_insert: bool,
906}
907
908impl<I: IpExt, E, BT: FilterBindingsTypes> ConnectionExclusive<I, E, BT> {
909    /// Turn this exclusive connection into a shared one. This is required in
910    /// order to insert into the [`Table`] table.
911    fn make_shared(self) -> Arc<ConnectionShared<I, E, BT>> {
912        Arc::new(ConnectionShared { inner: self.inner, state: Mutex::new(self.state) })
913    }
914
915    pub(crate) fn reply_tuple(&self) -> &Tuple<I> {
916        &self.inner.reply_tuple
917    }
918
919    pub(crate) fn rewrite_reply_dst_addr(&mut self, addr: I::Addr) {
920        self.inner.reply_tuple.dst_addr = addr;
921    }
922
923    pub(crate) fn rewrite_reply_src_addr(&mut self, addr: I::Addr) {
924        self.inner.reply_tuple.src_addr = addr;
925    }
926
927    pub(crate) fn rewrite_reply_src_port_or_id(&mut self, port_or_id: u16) {
928        self.inner.reply_tuple.src_port_or_id = port_or_id;
929        match self.inner.reply_tuple.protocol {
930            TransportProtocol::Icmp => {
931                // ICMP uses a single ID and conntrack keeps track of it in both
932                // ID fields. This makes it easier to keep a single logic to
933                // flip the direction. Hence we need to update the rest of the
934                // tuple.
935                //
936                // TODO(https://fxbug.dev/328064082): Probably needs revisiting
937                // as part of better support for ICMP request/response.
938                self.inner.reply_tuple.dst_port_or_id = port_or_id;
939            }
940            TransportProtocol::Tcp | TransportProtocol::Udp | TransportProtocol::Other(_) => {}
941        }
942    }
943
944    pub(crate) fn rewrite_reply_dst_port_or_id(&mut self, port_or_id: u16) {
945        self.inner.reply_tuple.dst_port_or_id = port_or_id;
946        match self.inner.reply_tuple.protocol {
947            TransportProtocol::Icmp => {
948                // ICMP uses a single ID and conntrack keeps track of it in both
949                // ID fields. This makes it easier to keep a single logic to
950                // flip the direction. Hence we need to update the rest of the
951                // tuple.
952                //
953                // TODO(https://fxbug.dev/328064082): Probably needs revisiting
954                // as part of better support for ICMP request/response.
955                self.inner.reply_tuple.src_port_or_id = port_or_id;
956            }
957            TransportProtocol::Tcp | TransportProtocol::Udp | TransportProtocol::Other(_) => {}
958        }
959    }
960}
961
962impl<I: IpExt, E: Default, BC: FilterBindingsContext> ConnectionExclusive<I, E, BC> {
963    pub(crate) fn from_deconstructed_packet(
964        bindings_ctx: &BC,
965        PacketMetadata { tuple, transport_data }: &PacketMetadata<I>,
966    ) -> Option<Self> {
967        let reply_tuple = tuple.clone().invert();
968        let self_connected = reply_tuple == *tuple;
969
970        Some(Self {
971            inner: ConnectionCommon {
972                original_tuple: tuple.clone(),
973                reply_tuple,
974                external_data: E::default(),
975            },
976            state: ConnectionState {
977                last_packet_time: bindings_ctx.now(),
978                establishment_lifecycle: EstablishmentLifecycle::SeenOriginal,
979                protocol_state: match tuple.protocol {
980                    TransportProtocol::Tcp => {
981                        let (segment, payload_len) = transport_data
982                            .tcp_segment_and_len()
983                            .expect("protocol was TCP, so transport data should have TCP info");
984
985                        ProtocolState::Tcp(tcp::Connection::new(
986                            segment,
987                            payload_len,
988                            self_connected,
989                        )?)
990                    }
991                    TransportProtocol::Udp => ProtocolState::Udp,
992                    TransportProtocol::Icmp | TransportProtocol::Other(_) => ProtocolState::Other,
993                },
994            },
995            do_not_insert: false,
996        })
997    }
998}
999
1000/// A conntrack connection with shared ownership.
1001///
1002/// All fields are private, because other packets, and the conntrack table
1003/// itself, will be depending on them not to change. Fields must be accessed
1004/// through the associated methods.
1005#[derive(Derivative)]
1006#[derivative(Debug(bound = "E: Debug"))]
1007pub struct ConnectionShared<I: IpExt, E, BT: FilterBindingsTypes> {
1008    inner: ConnectionCommon<I, E>,
1009    state: Mutex<ConnectionState<BT>>,
1010}
1011
1012/// The IP-agnostic transport protocol of a packet.
1013#[allow(missing_docs)]
1014#[derive(Copy, Clone, PartialEq, Eq, Hash, GenericOverIp)]
1015#[generic_over_ip()]
1016pub enum TransportProtocol {
1017    Tcp,
1018    Udp,
1019    Icmp,
1020    Other(u8),
1021}
1022
1023impl From<Ipv4Proto> for TransportProtocol {
1024    fn from(value: Ipv4Proto) -> Self {
1025        match value {
1026            Ipv4Proto::Proto(IpProto::Tcp) => TransportProtocol::Tcp,
1027            Ipv4Proto::Proto(IpProto::Udp) => TransportProtocol::Udp,
1028            Ipv4Proto::Icmp => TransportProtocol::Icmp,
1029            v => TransportProtocol::Other(v.into()),
1030        }
1031    }
1032}
1033
1034impl From<Ipv6Proto> for TransportProtocol {
1035    fn from(value: Ipv6Proto) -> Self {
1036        match value {
1037            Ipv6Proto::Proto(IpProto::Tcp) => TransportProtocol::Tcp,
1038            Ipv6Proto::Proto(IpProto::Udp) => TransportProtocol::Udp,
1039            Ipv6Proto::Icmpv6 => TransportProtocol::Icmp,
1040            v => TransportProtocol::Other(v.into()),
1041        }
1042    }
1043}
1044
1045impl From<IpProto> for TransportProtocol {
1046    fn from(value: IpProto) -> Self {
1047        match value {
1048            IpProto::Tcp => TransportProtocol::Tcp,
1049            IpProto::Udp => TransportProtocol::Udp,
1050            v @ IpProto::Reserved => TransportProtocol::Other(v.into()),
1051        }
1052    }
1053}
1054
1055impl Display for TransportProtocol {
1056    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1057        match self {
1058            TransportProtocol::Tcp => write!(f, "TCP"),
1059            TransportProtocol::Udp => write!(f, "UDP"),
1060            TransportProtocol::Icmp => write!(f, "ICMP"),
1061            TransportProtocol::Other(n) => write!(f, "Other({n})"),
1062        }
1063    }
1064}
1065
1066impl Debug for TransportProtocol {
1067    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1068        Display::fmt(&self, f)
1069    }
1070}
1071
1072impl<I: IpExt, E, BT: FilterBindingsTypes> ConnectionShared<I, E, BT> {
1073    fn is_expired(&self, now: BT::Instant) -> bool {
1074        self.state.lock().is_expired(now)
1075    }
1076}
1077
1078impl<I: IpExt, E: CompatibleWith, BT: FilterBindingsTypes> ConnectionShared<I, E, BT> {
1079    /// Returns whether the provided exclusive connection is compatible with this
1080    /// one, to the extent that a shared reference to this tracked connection could
1081    /// be adopted in place of the exclusive connection.
1082    pub(crate) fn compatible_with(&self, conn: &ConnectionExclusive<I, E, BT>) -> bool {
1083        self.inner.original_tuple == conn.inner.original_tuple
1084            && self.inner.reply_tuple == conn.inner.reply_tuple
1085            && self.inner.external_data.compatible_with(&conn.inner.external_data)
1086    }
1087}
1088
1089impl<I: IpExt, E: Inspectable, BT: FilterBindingsTypes> Inspectable for ConnectionShared<I, E, BT> {
1090    fn record<Inspector: netstack3_base::Inspector>(&self, inspector: &mut Inspector) {
1091        inspector.delegate_inspectable(&self.inner);
1092        inspector.delegate_inspectable(&*self.state.lock());
1093    }
1094}
1095
1096/// Allows a caller to check whether a given connection tracking entry (or some
1097/// configuration owned by that entry) is compatible with another.
1098pub trait CompatibleWith {
1099    /// Returns whether the provided entity is compatible with this entity in the
1100    /// context of connection tracking.
1101    fn compatible_with(&self, other: &Self) -> bool;
1102}
1103
1104/// A struct containing relevant fields extracted from the IP and transport
1105/// headers that means we only have to touch the incoming IpPacket once. Also
1106/// acts as a witness type that the tuple and transport data have the same
1107/// transport protocol.
1108pub(crate) struct PacketMetadata<I: IpExt> {
1109    tuple: Tuple<I>,
1110    transport_data: TransportPacketData,
1111}
1112
1113impl<I: IpExt> PacketMetadata<I> {
1114    pub(crate) fn new<P: IpPacket<I>>(packet: &P) -> Option<Self> {
1115        let transport_packet_data = packet.maybe_transport_packet().transport_packet_data()?;
1116
1117        let tuple = Tuple::from_packet_and_transport_data(packet, &transport_packet_data);
1118
1119        match tuple.protocol {
1120            TransportProtocol::Tcp => {
1121                assert_matches!(transport_packet_data, TransportPacketData::Tcp { .. })
1122            }
1123            TransportProtocol::Udp | TransportProtocol::Icmp | TransportProtocol::Other(_) => {
1124                assert_matches!(transport_packet_data, TransportPacketData::Generic { .. })
1125            }
1126        }
1127
1128        Some(Self { tuple, transport_data: transport_packet_data })
1129    }
1130}
1131
1132#[cfg(test)]
1133pub(crate) mod testutils {
1134    use crate::packets::testutil::internal::{FakeIpPacket, FakeUdpPacket, TestIpExt};
1135
1136    /// Create a pair of UDP packets that are inverses of one another. Uses `index` to create
1137    /// packets that are unique.
1138    pub(crate) fn make_test_udp_packets<I: TestIpExt>(
1139        index: u32,
1140    ) -> (FakeIpPacket<I, FakeUdpPacket>, FakeIpPacket<I, FakeUdpPacket>) {
1141        // This ensures that, no matter how big index is, we'll always have
1142        // unique src and dst ports, and thus unique connections.
1143        let src_port = (index % (u16::MAX as u32)) as u16;
1144        let dst_port = (index / (u16::MAX as u32)) as u16;
1145
1146        let packet = FakeIpPacket::<I, _> {
1147            src_ip: I::SRC_IP,
1148            dst_ip: I::DST_IP,
1149            body: FakeUdpPacket { src_port, dst_port },
1150        };
1151        let reply_packet = FakeIpPacket::<I, _> {
1152            src_ip: I::DST_IP,
1153            dst_ip: I::SRC_IP,
1154            body: FakeUdpPacket { src_port: dst_port, dst_port: src_port },
1155        };
1156
1157        (packet, reply_packet)
1158    }
1159}
1160
1161#[cfg(test)]
1162mod tests {
1163    use core::convert::Infallible as Never;
1164
1165    use assert_matches::assert_matches;
1166    use ip_test_macro::ip_test;
1167    use netstack3_base::testutil::FakeTimerCtxExt;
1168    use netstack3_base::{Control, IntoCoreTimerCtx, SegmentHeader, SeqNum, UnscaledWindowSize};
1169    use packet_formats::ip::IpProto;
1170    use test_case::test_case;
1171
1172    use super::testutils::make_test_udp_packets;
1173    use super::*;
1174    use crate::context::testutil::{FakeBindingsCtx, FakeCtx};
1175    use crate::packets::testutil::internal::{
1176        ArbitraryValue, FakeIpPacket, FakeTcpSegment, FakeUdpPacket, TransportPacketExt,
1177    };
1178    use crate::packets::MaybeTransportPacketMut;
1179    use crate::state::IpRoutines;
1180    use crate::testutil::TestIpExt;
1181
1182    struct NoTransportPacket;
1183
1184    impl MaybeTransportPacket for &NoTransportPacket {
1185        fn transport_packet_data(&self) -> Option<TransportPacketData> {
1186            None
1187        }
1188    }
1189
1190    impl<I: IpExt> TransportPacketExt<I> for &NoTransportPacket {
1191        fn proto() -> I::Proto {
1192            I::Proto::from(IpProto::Tcp)
1193        }
1194    }
1195
1196    impl<I: IpExt> MaybeTransportPacketMut<I> for NoTransportPacket {
1197        type TransportPacketMut<'a> = Never;
1198
1199        fn transport_packet_mut(&mut self) -> Option<Self::TransportPacketMut<'_>> {
1200            None
1201        }
1202    }
1203
1204    impl CompatibleWith for () {
1205        fn compatible_with(&self, (): &()) -> bool {
1206            true
1207        }
1208    }
1209
1210    #[test_case(
1211        EstablishmentLifecycle::SeenOriginal,
1212        ConnectionDirection::Original
1213          => EstablishmentLifecycle::SeenOriginal
1214    )]
1215    #[test_case(
1216        EstablishmentLifecycle::SeenOriginal,
1217        ConnectionDirection::Reply
1218          => EstablishmentLifecycle::SeenReply
1219    )]
1220    #[test_case(
1221        EstablishmentLifecycle::SeenReply,
1222        ConnectionDirection::Original
1223          => EstablishmentLifecycle::Established
1224    )]
1225    #[test_case(
1226        EstablishmentLifecycle::SeenReply,
1227        ConnectionDirection::Reply
1228          => EstablishmentLifecycle::SeenReply
1229    )]
1230    #[test_case(
1231        EstablishmentLifecycle::Established,
1232        ConnectionDirection::Original
1233          => EstablishmentLifecycle::Established
1234    )]
1235    #[test_case(
1236        EstablishmentLifecycle::Established,
1237        ConnectionDirection::Reply
1238          => EstablishmentLifecycle::Established
1239    )]
1240    fn establishment_lifecycle_test(
1241        lifecycle: EstablishmentLifecycle,
1242        dir: ConnectionDirection,
1243    ) -> EstablishmentLifecycle {
1244        lifecycle.update(dir)
1245    }
1246
1247    #[ip_test(I)]
1248    #[test_case(TransportProtocol::Udp)]
1249    #[test_case(TransportProtocol::Tcp)]
1250    fn tuple_invert_udp_tcp<I: IpExt + TestIpExt>(protocol: TransportProtocol) {
1251        let orig_tuple = Tuple::<I> {
1252            protocol: protocol,
1253            src_addr: I::SRC_IP,
1254            dst_addr: I::DST_IP,
1255            src_port_or_id: I::SRC_PORT,
1256            dst_port_or_id: I::DST_PORT,
1257        };
1258
1259        let expected = Tuple::<I> {
1260            protocol: protocol,
1261            src_addr: I::DST_IP,
1262            dst_addr: I::SRC_IP,
1263            src_port_or_id: I::DST_PORT,
1264            dst_port_or_id: I::SRC_PORT,
1265        };
1266
1267        let inverted = orig_tuple.invert();
1268
1269        assert_eq!(inverted, expected);
1270    }
1271
1272    #[ip_test(I)]
1273    fn tuple_from_tcp_packet<I: IpExt + TestIpExt>() {
1274        let expected = Tuple::<I> {
1275            protocol: TransportProtocol::Tcp,
1276            src_addr: I::SRC_IP,
1277            dst_addr: I::DST_IP,
1278            src_port_or_id: I::SRC_PORT,
1279            dst_port_or_id: I::DST_PORT,
1280        };
1281
1282        let packet = FakeIpPacket::<I, _> {
1283            src_ip: I::SRC_IP,
1284            dst_ip: I::DST_IP,
1285            body: FakeTcpSegment {
1286                src_port: I::SRC_PORT,
1287                dst_port: I::DST_PORT,
1288                segment: SegmentHeader::arbitrary_value(),
1289                payload_len: 4,
1290            },
1291        };
1292
1293        let tuple = Tuple::from_packet(&packet).expect("valid TCP packet should return a tuple");
1294        assert_eq!(tuple, expected);
1295    }
1296
1297    #[ip_test(I)]
1298    fn tuple_from_packet_no_body<I: IpExt + TestIpExt>() {
1299        let packet = FakeIpPacket::<I, NoTransportPacket> {
1300            src_ip: I::SRC_IP,
1301            dst_ip: I::DST_IP,
1302            body: NoTransportPacket {},
1303        };
1304
1305        let tuple = Tuple::from_packet(&packet);
1306        assert_matches!(tuple, None);
1307    }
1308
1309    #[ip_test(I)]
1310    fn connection_from_tuple<I: IpExt + TestIpExt>() {
1311        let bindings_ctx = FakeBindingsCtx::<I>::new();
1312
1313        let packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1314            src_ip: I::SRC_IP,
1315            dst_ip: I::DST_IP,
1316            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1317        })
1318        .unwrap();
1319        let original_tuple = packet.tuple.clone();
1320        let reply_tuple = original_tuple.clone().invert();
1321
1322        let connection =
1323            ConnectionExclusive::<_, (), _>::from_deconstructed_packet(&bindings_ctx, &packet)
1324                .unwrap();
1325
1326        assert_eq!(&connection.inner.original_tuple, &original_tuple);
1327        assert_eq!(&connection.inner.reply_tuple, &reply_tuple);
1328    }
1329
1330    #[ip_test(I)]
1331    fn connection_make_shared_has_same_underlying_info<I: IpExt + TestIpExt>() {
1332        let bindings_ctx = FakeBindingsCtx::<I>::new();
1333
1334        let packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1335            src_ip: I::SRC_IP,
1336            dst_ip: I::DST_IP,
1337            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1338        })
1339        .unwrap();
1340        let original_tuple = packet.tuple.clone();
1341        let reply_tuple = original_tuple.clone().invert();
1342
1343        let mut connection =
1344            ConnectionExclusive::from_deconstructed_packet(&bindings_ctx, &packet).unwrap();
1345        connection.inner.external_data = 1234;
1346        let shared = connection.make_shared();
1347
1348        assert_eq!(shared.inner.original_tuple, original_tuple);
1349        assert_eq!(shared.inner.reply_tuple, reply_tuple);
1350        assert_eq!(shared.inner.external_data, 1234);
1351    }
1352
1353    enum ConnectionKind {
1354        Exclusive,
1355        Shared,
1356    }
1357
1358    #[ip_test(I)]
1359    #[test_case(ConnectionKind::Exclusive)]
1360    #[test_case(ConnectionKind::Shared)]
1361    fn connection_getters<I: IpExt + TestIpExt>(connection_kind: ConnectionKind) {
1362        let bindings_ctx = FakeBindingsCtx::<I>::new();
1363
1364        let packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1365            src_ip: I::SRC_IP,
1366            dst_ip: I::DST_IP,
1367            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1368        })
1369        .unwrap();
1370        let original_tuple = packet.tuple.clone();
1371        let reply_tuple = original_tuple.clone().invert();
1372
1373        let mut connection =
1374            ConnectionExclusive::from_deconstructed_packet(&bindings_ctx, &packet).unwrap();
1375        connection.inner.external_data = 1234;
1376
1377        let connection = match connection_kind {
1378            ConnectionKind::Exclusive => Connection::Exclusive(connection),
1379            ConnectionKind::Shared => Connection::Shared(connection.make_shared()),
1380        };
1381
1382        assert_eq!(connection.original_tuple(), &original_tuple);
1383        assert_eq!(connection.reply_tuple(), &reply_tuple);
1384        assert_eq!(connection.external_data(), &1234);
1385    }
1386
1387    #[ip_test(I)]
1388    #[test_case(ConnectionKind::Exclusive)]
1389    #[test_case(ConnectionKind::Shared)]
1390    fn connection_direction<I: IpExt + TestIpExt>(connection_kind: ConnectionKind) {
1391        let bindings_ctx = FakeBindingsCtx::<I>::new();
1392
1393        let packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1394            src_ip: I::SRC_IP,
1395            dst_ip: I::DST_IP,
1396            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1397        })
1398        .unwrap();
1399        let original_tuple = packet.tuple.clone();
1400        let reply_tuple = original_tuple.clone().invert();
1401
1402        let mut other_tuple = original_tuple.clone();
1403        other_tuple.src_port_or_id += 1;
1404
1405        let connection: ConnectionExclusive<_, (), _> =
1406            ConnectionExclusive::from_deconstructed_packet(&bindings_ctx, &packet).unwrap();
1407        let connection = match connection_kind {
1408            ConnectionKind::Exclusive => Connection::Exclusive(connection),
1409            ConnectionKind::Shared => Connection::Shared(connection.make_shared()),
1410        };
1411
1412        assert_matches!(connection.direction(&original_tuple), Some(ConnectionDirection::Original));
1413        assert_matches!(connection.direction(&reply_tuple), Some(ConnectionDirection::Reply));
1414        assert_matches!(connection.direction(&other_tuple), None);
1415    }
1416
1417    #[ip_test(I)]
1418    #[test_case(ConnectionKind::Exclusive)]
1419    #[test_case(ConnectionKind::Shared)]
1420    fn connection_update<I: IpExt + TestIpExt>(connection_kind: ConnectionKind) {
1421        let mut bindings_ctx = FakeBindingsCtx::<I>::new();
1422        bindings_ctx.sleep(Duration::from_secs(1));
1423
1424        let packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1425            src_ip: I::SRC_IP,
1426            dst_ip: I::DST_IP,
1427            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1428        })
1429        .unwrap();
1430
1431        let reply_packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1432            src_ip: I::DST_IP,
1433            dst_ip: I::SRC_IP,
1434            body: FakeUdpPacket { src_port: I::DST_PORT, dst_port: I::SRC_PORT },
1435        })
1436        .unwrap();
1437
1438        let connection =
1439            ConnectionExclusive::<_, (), _>::from_deconstructed_packet(&bindings_ctx, &packet)
1440                .unwrap();
1441        let mut connection = match connection_kind {
1442            ConnectionKind::Exclusive => Connection::Exclusive(connection),
1443            ConnectionKind::Shared => Connection::Shared(connection.make_shared()),
1444        };
1445
1446        assert_matches!(
1447            connection.update(&bindings_ctx, &packet, ConnectionDirection::Original),
1448            Ok(ConnectionUpdateAction::NoAction)
1449        );
1450        let state = connection.state();
1451        assert_matches!(state.establishment_lifecycle, EstablishmentLifecycle::SeenOriginal);
1452        assert_eq!(state.last_packet_time.offset, Duration::from_secs(1));
1453
1454        // Tuple in reply direction should set established to true and obviously
1455        // update last packet time.
1456        bindings_ctx.sleep(Duration::from_secs(1));
1457        assert_matches!(
1458            connection.update(&bindings_ctx, &reply_packet, ConnectionDirection::Reply),
1459            Ok(ConnectionUpdateAction::NoAction)
1460        );
1461        let state = connection.state();
1462        assert_matches!(state.establishment_lifecycle, EstablishmentLifecycle::SeenReply);
1463        assert_eq!(state.last_packet_time.offset, Duration::from_secs(2));
1464    }
1465
1466    #[ip_test(I)]
1467    fn table_get_exclusive_connection_and_finalize_shared<I: IpExt + TestIpExt>() {
1468        let mut bindings_ctx = FakeBindingsCtx::new();
1469        bindings_ctx.sleep(Duration::from_secs(1));
1470        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
1471
1472        let packet = FakeIpPacket::<I, _> {
1473            src_ip: I::SRC_IP,
1474            dst_ip: I::DST_IP,
1475            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1476        };
1477
1478        let reply_packet = FakeIpPacket::<I, _> {
1479            src_ip: I::DST_IP,
1480            dst_ip: I::SRC_IP,
1481            body: FakeUdpPacket { src_port: I::DST_PORT, dst_port: I::SRC_PORT },
1482        };
1483
1484        let original_tuple = Tuple::from_packet(&packet).expect("packet should be valid");
1485        let reply_tuple = Tuple::from_packet(&reply_packet).expect("packet should be valid");
1486
1487        let (conn, dir) = table
1488            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1489            .expect("packet should be valid")
1490            .expect("connection should be present");
1491        let state = conn.state();
1492        assert_matches!(state.establishment_lifecycle, EstablishmentLifecycle::SeenOriginal);
1493        assert_eq!(state.last_packet_time.offset, Duration::from_secs(1));
1494
1495        // Since the connection isn't present in the map, we should get a
1496        // freshly-allocated exclusive connection and the map should not have
1497        // been touched.
1498        assert_matches!(conn, Connection::Exclusive(_));
1499        assert_eq!(dir, ConnectionDirection::Original);
1500        assert!(!table.contains_tuple(&original_tuple));
1501        assert!(!table.contains_tuple(&reply_tuple));
1502
1503        // Once we finalize the connection, it should be present in the map.
1504        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn), Ok((true, Some(_))));
1505        assert!(table.contains_tuple(&original_tuple));
1506        assert!(table.contains_tuple(&reply_tuple));
1507
1508        // We should now get a shared connection back for packets in either
1509        // direction now that the connection is present in the table.
1510        bindings_ctx.sleep(Duration::from_secs(1));
1511        let (conn, dir) = table
1512            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1513            .expect("packet should be valid")
1514            .expect("connection should be present");
1515        assert_eq!(dir, ConnectionDirection::Original);
1516        let state = conn.state();
1517        assert_matches!(state.establishment_lifecycle, EstablishmentLifecycle::SeenOriginal);
1518        assert_eq!(state.last_packet_time.offset, Duration::from_secs(2));
1519        let conn = assert_matches!(conn, Connection::Shared(conn) => conn);
1520
1521        bindings_ctx.sleep(Duration::from_secs(1));
1522        let (reply_conn, dir) = table
1523            .get_connection_for_packet_and_update(&bindings_ctx, &reply_packet)
1524            .expect("packet should be valid")
1525            .expect("connection should be present");
1526        assert_eq!(dir, ConnectionDirection::Reply);
1527        let state = reply_conn.state();
1528        assert_matches!(state.establishment_lifecycle, EstablishmentLifecycle::SeenReply);
1529        assert_eq!(state.last_packet_time.offset, Duration::from_secs(3));
1530        let reply_conn = assert_matches!(reply_conn, Connection::Shared(conn) => conn);
1531
1532        // We should be getting the same connection in both directions.
1533        assert!(Arc::ptr_eq(&conn, &reply_conn));
1534
1535        // Inserting the connection a second time shouldn't change the map.
1536        let (conn, _dir) = table
1537            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1538            .expect("packet should be valid")
1539            .unwrap();
1540        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn), Ok((false, Some(_))));
1541        assert!(table.contains_tuple(&original_tuple));
1542        assert!(table.contains_tuple(&reply_tuple));
1543    }
1544
1545    #[ip_test(I)]
1546    fn table_conflict<I: IpExt + TestIpExt>() {
1547        let mut bindings_ctx = FakeBindingsCtx::new();
1548        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
1549
1550        let original_packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1551            src_ip: I::SRC_IP,
1552            dst_ip: I::DST_IP,
1553            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1554        })
1555        .unwrap();
1556
1557        let nated_original_packet = PacketMetadata::new(&FakeIpPacket::<I, _> {
1558            src_ip: I::SRC_IP,
1559            dst_ip: I::DST_IP,
1560            body: FakeUdpPacket { src_port: I::SRC_PORT + 1, dst_port: I::DST_PORT + 1 },
1561        })
1562        .unwrap();
1563
1564        let conn1 = Connection::Exclusive(
1565            ConnectionExclusive::<_, (), _>::from_deconstructed_packet(
1566                &bindings_ctx,
1567                &original_packet,
1568            )
1569            .unwrap(),
1570        );
1571
1572        // Fake NAT that ends up allocating the same reply tuple as an existing
1573        // connection.
1574        let mut conn2 = ConnectionExclusive::<_, (), _>::from_deconstructed_packet(
1575            &bindings_ctx,
1576            &original_packet,
1577        )
1578        .unwrap();
1579        conn2.inner.original_tuple = nated_original_packet.tuple.clone();
1580        let conn2 = Connection::Exclusive(conn2);
1581
1582        // Fake NAT that ends up allocating the same original tuple as an
1583        // existing connection.
1584        let mut conn3 = ConnectionExclusive::<_, (), _>::from_deconstructed_packet(
1585            &bindings_ctx,
1586            &original_packet,
1587        )
1588        .unwrap();
1589        conn3.inner.reply_tuple = nated_original_packet.tuple.clone().invert();
1590        let conn3 = Connection::Exclusive(conn3);
1591
1592        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn1), Ok((true, Some(_))));
1593        assert_matches!(
1594            table.finalize_connection(&mut bindings_ctx, conn2),
1595            Err(FinalizeConnectionError::Conflict)
1596        );
1597        assert_matches!(
1598            table.finalize_connection(&mut bindings_ctx, conn3),
1599            Err(FinalizeConnectionError::Conflict)
1600        );
1601    }
1602
1603    #[ip_test(I)]
1604    fn table_conflict_identical_connection<
1605        I: IpExt + crate::packets::testutil::internal::TestIpExt,
1606    >() {
1607        let mut bindings_ctx = FakeBindingsCtx::new();
1608        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
1609
1610        let original_packet =
1611            PacketMetadata::new(&FakeIpPacket::<I, FakeUdpPacket>::arbitrary_value()).unwrap();
1612
1613        // Simulate a race where two packets in the same flow both end up
1614        // creating identical exclusive connections.
1615
1616        let conn = Connection::Exclusive(
1617            ConnectionExclusive::<_, (), _>::from_deconstructed_packet(
1618                &bindings_ctx,
1619                &original_packet,
1620            )
1621            .unwrap(),
1622        );
1623        let finalized = assert_matches!(
1624            table.finalize_connection(&mut bindings_ctx, conn),
1625            Ok((true, Some(conn))) => conn
1626        );
1627
1628        let conn = Connection::Exclusive(
1629            ConnectionExclusive::<_, (), _>::from_deconstructed_packet(
1630                &bindings_ctx,
1631                &original_packet,
1632            )
1633            .unwrap(),
1634        );
1635        let conn = assert_matches!(
1636            table.finalize_connection(&mut bindings_ctx, conn),
1637            Ok((false, Some(conn))) => conn
1638        );
1639        assert!(Arc::ptr_eq(&finalized, &conn));
1640    }
1641
1642    #[derive(Copy, Clone)]
1643    enum GcTrigger {
1644        /// Call [`perform_gc`] function directly, avoiding any timer logic.
1645        Direct,
1646        /// Trigger a timer expiry, which indirectly calls into [`perform_gc`].
1647        Timer,
1648    }
1649
1650    #[ip_test(I)]
1651    #[test_case(GcTrigger::Direct)]
1652    #[test_case(GcTrigger::Timer)]
1653    fn garbage_collection<I: TestIpExt>(gc_trigger: GcTrigger) {
1654        fn perform_gc<I: TestIpExt>(
1655            core_ctx: &mut FakeCtx<I>,
1656            bindings_ctx: &mut FakeBindingsCtx<I>,
1657            gc_trigger: GcTrigger,
1658        ) {
1659            match gc_trigger {
1660                GcTrigger::Direct => core_ctx.conntrack().perform_gc(bindings_ctx),
1661                GcTrigger::Timer => {
1662                    for timer in bindings_ctx
1663                        .trigger_timers_until_instant(bindings_ctx.timer_ctx.instant.time, core_ctx)
1664                    {
1665                        assert_matches!(timer, FilterTimerId::ConntrackGc(_));
1666                    }
1667                }
1668            }
1669        }
1670
1671        let mut bindings_ctx = FakeBindingsCtx::new();
1672        let mut core_ctx = FakeCtx::with_ip_routines(&mut bindings_ctx, IpRoutines::default());
1673
1674        let first_packet = FakeIpPacket::<I, _> {
1675            src_ip: I::SRC_IP,
1676            dst_ip: I::DST_IP,
1677            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
1678        };
1679
1680        let second_packet = FakeIpPacket::<I, _> {
1681            src_ip: I::SRC_IP,
1682            dst_ip: I::DST_IP,
1683            body: FakeUdpPacket { src_port: I::SRC_PORT + 1, dst_port: I::DST_PORT },
1684        };
1685        let second_packet_reply = FakeIpPacket::<I, _> {
1686            src_ip: I::DST_IP,
1687            dst_ip: I::SRC_IP,
1688            body: FakeUdpPacket { src_port: I::DST_PORT, dst_port: I::SRC_PORT + 1 },
1689        };
1690
1691        let first_tuple = Tuple::from_packet(&first_packet).expect("packet should be valid");
1692        let first_tuple_reply = first_tuple.clone().invert();
1693        let second_tuple = Tuple::from_packet(&second_packet).expect("packet should be valid");
1694        let second_tuple_reply =
1695            Tuple::from_packet(&second_packet_reply).expect("packet should be valid");
1696
1697        // T=0: Packets for two connections come in.
1698        let (conn, _dir) = core_ctx
1699            .conntrack()
1700            .get_connection_for_packet_and_update(&bindings_ctx, &first_packet)
1701            .expect("packet should be valid")
1702            .expect("packet should be trackable");
1703        assert_matches!(
1704            core_ctx
1705                .conntrack()
1706                .finalize_connection(&mut bindings_ctx, conn)
1707                .expect("connection finalize should succeed"),
1708            (true, Some(_))
1709        );
1710        let (conn, _dir) = core_ctx
1711            .conntrack()
1712            .get_connection_for_packet_and_update(&bindings_ctx, &second_packet)
1713            .expect("packet should be valid")
1714            .expect("packet should be trackable");
1715        assert_matches!(
1716            core_ctx
1717                .conntrack()
1718                .finalize_connection(&mut bindings_ctx, conn)
1719                .expect("connection finalize should succeed"),
1720            (true, Some(_))
1721        );
1722        assert!(core_ctx.conntrack().contains_tuple(&first_tuple));
1723        assert!(core_ctx.conntrack().contains_tuple(&second_tuple));
1724        assert_eq!(core_ctx.conntrack().inner.lock().table.len(), 4);
1725
1726        // T=GC_INTERVAL: Triggering a GC does not clean up any connections,
1727        // because no connections are stale yet.
1728        bindings_ctx.sleep(GC_INTERVAL);
1729        perform_gc(&mut core_ctx, &mut bindings_ctx, gc_trigger);
1730        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple), true);
1731        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple_reply), true);
1732        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple), true);
1733        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple_reply), true);
1734        assert_eq!(core_ctx.conntrack().inner.lock().table.len(), 4);
1735
1736        // T=GC_INTERVAL a packet for just the second connection comes in.
1737        let (conn, _dir) = core_ctx
1738            .conntrack()
1739            .get_connection_for_packet_and_update(&bindings_ctx, &second_packet_reply)
1740            .expect("packet should be valid")
1741            .expect("packet should be trackable");
1742        assert_matches!(conn.state().establishment_lifecycle, EstablishmentLifecycle::SeenReply);
1743        assert_matches!(
1744            core_ctx
1745                .conntrack()
1746                .finalize_connection(&mut bindings_ctx, conn)
1747                .expect("connection finalize should succeed"),
1748            (false, Some(_))
1749        );
1750        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple), true);
1751        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple_reply), true);
1752        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple), true);
1753        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple_reply), true);
1754        assert_eq!(core_ctx.conntrack().inner.lock().table.len(), 4);
1755
1756        // The state in the table at this point is:
1757        // Connection 1:
1758        //   - Last packet seen at T=0
1759        //   - Expires after T=CONNECTION_EXPIRY_TIME_UDP
1760        // Connection 2:
1761        //   - Last packet seen at T=GC_INTERVAL
1762        //   - Expires after CONNECTION_EXPIRY_TIME_UDP + GC_INTERVAL
1763
1764        // T=2*GC_INTERVAL: Triggering a GC does not clean up any connections.
1765        bindings_ctx.sleep(GC_INTERVAL);
1766        perform_gc(&mut core_ctx, &mut bindings_ctx, gc_trigger);
1767        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple), true);
1768        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple_reply), true);
1769        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple), true);
1770        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple_reply), true);
1771        assert_eq!(core_ctx.conntrack().inner.lock().table.len(), 4);
1772
1773        // Time advances to expiry for the first packet
1774        // (T=CONNECTION_EXPIRY_TIME_UDP) trigger gc and note that the first
1775        // connection was cleaned up
1776        bindings_ctx.sleep(CONNECTION_EXPIRY_TIME_UDP - 2 * GC_INTERVAL);
1777        perform_gc(&mut core_ctx, &mut bindings_ctx, gc_trigger);
1778        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple), false);
1779        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple_reply), false);
1780        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple), true);
1781        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple_reply), true);
1782        assert_eq!(core_ctx.conntrack().inner.lock().table.len(), 2);
1783
1784        // Advance time past the expiry time for the second connection
1785        // (T=CONNECTION_EXPIRY_TIME_UDP + GC_INTERVAL) and see that it is
1786        // cleaned up.
1787        bindings_ctx.sleep(GC_INTERVAL);
1788        perform_gc(&mut core_ctx, &mut bindings_ctx, gc_trigger);
1789        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple), false);
1790        assert_eq!(core_ctx.conntrack().contains_tuple(&first_tuple_reply), false);
1791        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple), false);
1792        assert_eq!(core_ctx.conntrack().contains_tuple(&second_tuple_reply), false);
1793        assert!(core_ctx.conntrack().inner.lock().table.is_empty());
1794    }
1795
1796    fn fill_table<I, E, BC>(
1797        bindings_ctx: &mut BC,
1798        table: &Table<I, E, BC>,
1799        entries: impl Iterator<Item = u32>,
1800        establishment_lifecycle: EstablishmentLifecycle,
1801    ) where
1802        I: IpExt + TestIpExt,
1803        E: Debug + Default + Send + Sync + PartialEq + CompatibleWith + 'static,
1804        BC: FilterBindingsContext,
1805    {
1806        for i in entries {
1807            let (packet, reply_packet) = make_test_udp_packets(i);
1808            let (conn, _dir) = table
1809                .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1810                .expect("packet should be valid")
1811                .expect("packet should be trackable");
1812            assert_matches!(
1813                table
1814                    .finalize_connection(bindings_ctx, conn)
1815                    .expect("connection finalize should succeed"),
1816                (true, Some(_))
1817            );
1818
1819            if establishment_lifecycle >= EstablishmentLifecycle::SeenReply {
1820                let (conn, _dir) = table
1821                    .get_connection_for_packet_and_update(&bindings_ctx, &reply_packet)
1822                    .expect("packet should be valid")
1823                    .expect("packet should be trackable");
1824                assert_matches!(
1825                    table
1826                        .finalize_connection(bindings_ctx, conn)
1827                        .expect("connection finalize should succeed"),
1828                    (false, Some(_))
1829                );
1830
1831                if establishment_lifecycle >= EstablishmentLifecycle::Established {
1832                    let (conn, _dir) = table
1833                        .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1834                        .expect("packet should be valid")
1835                        .expect("packet should be trackable");
1836                    assert_matches!(
1837                        table
1838                            .finalize_connection(bindings_ctx, conn)
1839                            .expect("connection finalize should succeed"),
1840                        (false, Some(_))
1841                    );
1842                }
1843            }
1844        }
1845    }
1846
1847    #[ip_test(I)]
1848    #[test_case(EstablishmentLifecycle::SeenOriginal; "existing connections unestablished")]
1849    #[test_case(EstablishmentLifecycle::SeenReply; "existing connections partially established")]
1850    #[test_case(EstablishmentLifecycle::Established; "existing connections established")]
1851    fn table_size_limit_evict_less_established<I: IpExt + TestIpExt>(
1852        existing_lifecycle: EstablishmentLifecycle,
1853    ) {
1854        let mut bindings_ctx = FakeBindingsCtx::<I>::new();
1855        bindings_ctx.sleep(Duration::from_secs(1));
1856        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
1857
1858        fill_table(
1859            &mut bindings_ctx,
1860            &table,
1861            0..(MAXIMUM_ENTRIES / 2).try_into().unwrap(),
1862            existing_lifecycle,
1863        );
1864
1865        // The table should be full whether or not the connections are
1866        // established since finalize_connection always inserts the connection
1867        // under the original and reply tuples.
1868        assert_eq!(table.inner.lock().table.len(), MAXIMUM_ENTRIES);
1869
1870        let (packet, _) = make_test_udp_packets((MAXIMUM_ENTRIES / 2).try_into().unwrap());
1871        let (conn, _dir) = table
1872            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1873            .expect("packet should be valid")
1874            .expect("packet should be trackable");
1875        if existing_lifecycle == EstablishmentLifecycle::Established {
1876            // Inserting a new connection should fail because it would grow the
1877            // table.
1878            assert_matches!(
1879                table.finalize_connection(&mut bindings_ctx, conn),
1880                Err(FinalizeConnectionError::TableFull)
1881            );
1882
1883            // Inserting an existing connection again should succeed because
1884            // it's not growing the table.
1885            let (packet, _) = make_test_udp_packets((MAXIMUM_ENTRIES / 2 - 1).try_into().unwrap());
1886            let (conn, _dir) = table
1887                .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1888                .expect("packet should be valid")
1889                .expect("packet should be trackable");
1890            assert_matches!(
1891                table
1892                    .finalize_connection(&mut bindings_ctx, conn)
1893                    .expect("connection finalize should succeed"),
1894                (false, Some(_))
1895            );
1896        } else {
1897            assert_matches!(
1898                table
1899                    .finalize_connection(&mut bindings_ctx, conn)
1900                    .expect("connection finalize should succeed"),
1901                (true, Some(_))
1902            );
1903        }
1904    }
1905
1906    #[ip_test(I)]
1907    fn table_size_limit_evict_expired<I: IpExt + TestIpExt>() {
1908        let mut bindings_ctx = FakeBindingsCtx::<I>::new();
1909        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
1910
1911        // Add one connection that expires a second sooner than the others.
1912        let evicted_tuple = {
1913            let (packet, _) = make_test_udp_packets(0);
1914            Tuple::from_packet(&packet).unwrap()
1915        };
1916        fill_table(&mut bindings_ctx, &table, 0..=0, EstablishmentLifecycle::Established);
1917        bindings_ctx.sleep(Duration::from_secs(1));
1918        fill_table(
1919            &mut bindings_ctx,
1920            &table,
1921            1..(MAXIMUM_ENTRIES / 2).try_into().unwrap(),
1922            EstablishmentLifecycle::Established,
1923        );
1924
1925        assert_eq!(table.inner.lock().table.len(), MAXIMUM_ENTRIES);
1926        assert!(table.contains_tuple(&evicted_tuple));
1927
1928        let (packet, _) = make_test_udp_packets((MAXIMUM_ENTRIES / 2).try_into().unwrap());
1929        // The table is full, and no connections can be evicted (they're all
1930        // established and unexpired), so we can't insert a new connection.
1931        let (conn, _dir) = table
1932            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1933            .expect("packet should be valid")
1934            .expect("packet should be trackable");
1935        assert_matches!(
1936            table.finalize_connection(&mut bindings_ctx, conn),
1937            Err(FinalizeConnectionError::TableFull)
1938        );
1939
1940        // Now the first connection can be evicted because it's expired, and we
1941        // see that we're able to insert a new connection.
1942        bindings_ctx.sleep(CONNECTION_EXPIRY_TIME_UDP - Duration::from_secs(1));
1943        let (conn, _dir) = table
1944            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1945            .expect("packet should be valid")
1946            .expect("packet should be trackable");
1947        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn), Ok(_));
1948        assert!(!table.contains_tuple(&evicted_tuple));
1949    }
1950
1951    #[ip_test(I)]
1952    fn table_size_limit_less_established<I: IpExt + TestIpExt>() {
1953        let mut bindings_ctx = FakeBindingsCtx::<I>::new();
1954        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
1955
1956        let evicted_tuple = {
1957            let (packet, _) = make_test_udp_packets(0);
1958            Tuple::from_packet(&packet).unwrap()
1959        };
1960        // Add one connection that expires a second sooner than the others.
1961        fill_table(&mut bindings_ctx, &table, 0..=0, EstablishmentLifecycle::SeenOriginal);
1962        bindings_ctx.sleep(Duration::from_secs(1));
1963        fill_table(&mut bindings_ctx, &table, 1..=1, EstablishmentLifecycle::SeenOriginal);
1964        fill_table(
1965            &mut bindings_ctx,
1966            &table,
1967            2..(MAXIMUM_ENTRIES / 2).try_into().unwrap(),
1968            EstablishmentLifecycle::SeenReply,
1969        );
1970
1971        assert_eq!(table.inner.lock().table.len(), MAXIMUM_ENTRIES);
1972        assert!(table.contains_tuple(&evicted_tuple));
1973
1974        // We can insert since all connections in the table are eligible for
1975        // eviction, but we want to be sure that the least established
1976        // connection was the one that's actually evicted.
1977        let (packet, _) = make_test_udp_packets((MAXIMUM_ENTRIES / 2).try_into().unwrap());
1978        let (conn, _dir) = table
1979            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
1980            .expect("packet should be valid")
1981            .expect("packet should be trackable");
1982        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn), Ok(_));
1983        assert!(!table.contains_tuple(&evicted_tuple));
1984    }
1985
1986    #[cfg(target_os = "fuchsia")]
1987    #[ip_test(I)]
1988    fn inspect<I: IpExt + TestIpExt>() {
1989        use alloc::boxed::Box;
1990        use alloc::string::ToString;
1991        use diagnostics_assertions::assert_data_tree;
1992        use diagnostics_traits::FuchsiaInspector;
1993        use fuchsia_inspect::Inspector;
1994
1995        let mut bindings_ctx = FakeBindingsCtx::<I>::new();
1996        bindings_ctx.sleep(Duration::from_secs(1));
1997        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
1998
1999        {
2000            let inspector = Inspector::new(Default::default());
2001            let mut bindings_inspector = FuchsiaInspector::<()>::new(inspector.root());
2002            bindings_inspector.delegate_inspectable(&table);
2003
2004            assert_data_tree!(inspector, "root": {
2005                "table_limit_drops": 0u64,
2006                "table_limit_hits": 0u64,
2007                "num_entries": 0u64,
2008                "connections": {},
2009            });
2010        }
2011
2012        // Insert the first connection into the table in an unestablished state.
2013        // This will later be evicted when the table fills up.
2014        let (packet, _) = make_test_udp_packets::<I>(0);
2015        let (conn, _dir) = table
2016            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
2017            .expect("packet should be valid")
2018            .expect("packet should be trackable");
2019        assert_matches!(conn.state().establishment_lifecycle, EstablishmentLifecycle::SeenOriginal);
2020        assert_matches!(
2021            table
2022                .finalize_connection(&mut bindings_ctx, conn)
2023                .expect("connection finalize should succeed"),
2024            (true, Some(_))
2025        );
2026
2027        {
2028            let inspector = Inspector::new(Default::default());
2029            let mut bindings_inspector = FuchsiaInspector::<()>::new(inspector.root());
2030            bindings_inspector.delegate_inspectable(&table);
2031
2032            assert_data_tree!(inspector, "root": {
2033                "table_limit_drops": 0u64,
2034                "table_limit_hits": 0u64,
2035                "num_entries": 2u64,
2036                "connections": {
2037                    "0": {
2038                        "original_tuple": {
2039                            "protocol": "UDP",
2040                            "src_addr": I::SRC_IP.to_string(),
2041                            "dst_addr": I::DST_IP.to_string(),
2042                            "src_port_or_id": 0u64,
2043                            "dst_port_or_id": 0u64,
2044                        },
2045                        "reply_tuple": {
2046                            "protocol": "UDP",
2047                            "src_addr": I::DST_IP.to_string(),
2048                            "dst_addr": I::SRC_IP.to_string(),
2049                            "src_port_or_id": 0u64,
2050                            "dst_port_or_id": 0u64,
2051                        },
2052                        "external_data": {},
2053                        "established": false,
2054                        "last_packet_time": 1_000_000_000u64,
2055                    }
2056                },
2057            });
2058        }
2059
2060        // Fill the table up the rest of the way.
2061        fill_table(
2062            &mut bindings_ctx,
2063            &table,
2064            1..(MAXIMUM_ENTRIES / 2).try_into().unwrap(),
2065            EstablishmentLifecycle::Established,
2066        );
2067
2068        assert_eq!(table.inner.lock().table.len(), MAXIMUM_ENTRIES);
2069
2070        // This first one should succeed because it can evict the
2071        // non-established connection.
2072        let (packet, reply_packet) =
2073            make_test_udp_packets((MAXIMUM_ENTRIES / 2).try_into().unwrap());
2074        let (conn, _dir) = table
2075            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
2076            .expect("packet should be valid")
2077            .expect("packet should be trackable");
2078        assert_matches!(
2079            table
2080                .finalize_connection(&mut bindings_ctx, conn)
2081                .expect("connection finalize should succeed"),
2082            (true, Some(_))
2083        );
2084        let (conn, _dir) = table
2085            .get_connection_for_packet_and_update(&bindings_ctx, &reply_packet)
2086            .expect("packet should be valid")
2087            .expect("packet should be trackable");
2088        assert_matches!(
2089            table
2090                .finalize_connection(&mut bindings_ctx, conn)
2091                .expect("connection finalize should succeed"),
2092            (false, Some(_))
2093        );
2094        let (conn, _dir) = table
2095            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
2096            .expect("packet should be valid")
2097            .expect("packet should be trackable");
2098        assert_matches!(
2099            table
2100                .finalize_connection(&mut bindings_ctx, conn)
2101                .expect("connection finalize should succeed"),
2102            (false, Some(_))
2103        );
2104
2105        // This next one should fail because there are no connections left to
2106        // evict.
2107        let (packet, _) = make_test_udp_packets((MAXIMUM_ENTRIES / 2 + 1).try_into().unwrap());
2108        let (conn, _dir) = table
2109            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
2110            .expect("packet should be valid")
2111            .expect("packet should be trackable");
2112        assert_matches!(
2113            table.finalize_connection(&mut bindings_ctx, conn),
2114            Err(FinalizeConnectionError::TableFull)
2115        );
2116
2117        {
2118            let inspector = Inspector::new(Default::default());
2119            let mut bindings_inspector = FuchsiaInspector::<()>::new(inspector.root());
2120            bindings_inspector.delegate_inspectable(&table);
2121
2122            assert_data_tree!(inspector, "root": contains {
2123                "table_limit_drops": 1u64,
2124                "table_limit_hits": 2u64,
2125                "num_entries": MAXIMUM_ENTRIES as u64,
2126            });
2127        }
2128    }
2129
2130    #[ip_test(I)]
2131    fn self_connected_socket<I: IpExt + TestIpExt>() {
2132        let mut bindings_ctx = FakeBindingsCtx::new();
2133        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
2134
2135        let packet = FakeIpPacket::<I, _> {
2136            src_ip: I::SRC_IP,
2137            dst_ip: I::SRC_IP,
2138            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::SRC_PORT },
2139        };
2140
2141        let tuple = Tuple::from_packet(&packet).expect("packet should be valid");
2142        let reply_tuple = tuple.clone().invert();
2143
2144        assert_eq!(tuple, reply_tuple);
2145
2146        let (conn, _dir) = table
2147            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
2148            .expect("packet should be valid")
2149            .expect("packet should be trackable");
2150        let state = conn.state();
2151        // Since we can't differentiate between the original and reply tuple,
2152        // the connection ends up being marked established immediately.
2153        assert_matches!(state.establishment_lifecycle, EstablishmentLifecycle::SeenReply);
2154
2155        assert_matches!(conn, Connection::Exclusive(_));
2156        assert!(!table.contains_tuple(&tuple));
2157
2158        // Once we finalize the connection, it should be present in the map.
2159        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn), Ok((true, Some(_))));
2160        assert!(table.contains_tuple(&tuple));
2161
2162        // There should be a single connection in the table, despite there only
2163        // being a single tuple.
2164        assert_eq!(table.inner.lock().table.len(), 1);
2165
2166        bindings_ctx.sleep(CONNECTION_EXPIRY_TIME_UDP);
2167        table.perform_gc(&mut bindings_ctx);
2168
2169        assert!(table.inner.lock().table.is_empty());
2170    }
2171
2172    #[ip_test(I)]
2173    fn remove_entry_on_update<I: IpExt + TestIpExt>() {
2174        let mut bindings_ctx = FakeBindingsCtx::new();
2175        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
2176
2177        let original_packet = FakeIpPacket::<I, _> {
2178            src_ip: I::SRC_IP,
2179            dst_ip: I::DST_IP,
2180            body: FakeTcpSegment {
2181                src_port: I::SRC_PORT,
2182                dst_port: I::DST_PORT,
2183                segment: SegmentHeader {
2184                    seq: SeqNum::new(1024),
2185                    wnd: UnscaledWindowSize::from(16u16),
2186                    control: Some(Control::SYN),
2187                    ..Default::default()
2188                },
2189                payload_len: 0,
2190            },
2191        };
2192
2193        let reply_packet = FakeIpPacket::<I, _> {
2194            src_ip: I::DST_IP,
2195            dst_ip: I::SRC_IP,
2196            body: FakeTcpSegment {
2197                src_port: I::DST_PORT,
2198                dst_port: I::SRC_PORT,
2199                segment: SegmentHeader {
2200                    seq: SeqNum::new(0),
2201                    ack: Some(SeqNum::new(1025)),
2202                    wnd: UnscaledWindowSize::from(16u16),
2203                    control: Some(Control::RST),
2204                    ..Default::default()
2205                },
2206                payload_len: 0,
2207            },
2208        };
2209
2210        let tuple = Tuple::from_packet(&original_packet).expect("packet should be valid");
2211        let reply_tuple = tuple.clone().invert();
2212
2213        let (conn, _dir) = table
2214            .get_connection_for_packet_and_update(&bindings_ctx, &original_packet)
2215            .expect("packet should be valid")
2216            .expect("packet should be trackable");
2217        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn), Ok((true, Some(_))));
2218
2219        assert!(table.contains_tuple(&tuple));
2220        assert!(table.contains_tuple(&reply_tuple));
2221
2222        // Sending the reply RST through should result in the connection being
2223        // removed from the table.
2224        let (conn, _dir) = table
2225            .get_connection_for_packet_and_update(&bindings_ctx, &reply_packet)
2226            .expect("packet should be valid")
2227            .expect("packet should be trackable");
2228
2229        assert!(!table.contains_tuple(&tuple));
2230        assert!(!table.contains_tuple(&reply_tuple));
2231        assert!(table.inner.lock().table.is_empty());
2232
2233        // The connection should not added back on finalization.
2234        assert_matches!(table.finalize_connection(&mut bindings_ctx, conn), Ok((false, Some(_))));
2235
2236        assert!(!table.contains_tuple(&tuple));
2237        assert!(!table.contains_tuple(&reply_tuple));
2238        assert!(table.inner.lock().table.is_empty());
2239
2240        // GC should complete successfully.
2241        bindings_ctx.sleep(Duration::from_secs(60 * 60 * 24 * 6));
2242        table.perform_gc(&mut bindings_ctx);
2243    }
2244
2245    #[ip_test(I)]
2246    fn do_not_insert<I: IpExt + TestIpExt>() {
2247        let mut bindings_ctx = FakeBindingsCtx::new();
2248        let table = Table::<_, (), _>::new::<IntoCoreTimerCtx>(&mut bindings_ctx);
2249
2250        let packet = FakeIpPacket::<I, _> {
2251            src_ip: I::SRC_IP,
2252            dst_ip: I::DST_IP,
2253            body: FakeUdpPacket { src_port: I::SRC_PORT, dst_port: I::DST_PORT },
2254        };
2255
2256        let tuple = Tuple::from_packet(&packet).expect("packet should be valid");
2257        let reply_tuple = tuple.clone().invert();
2258
2259        let (conn, _dir) = table
2260            .get_connection_for_packet_and_update(&bindings_ctx, &packet)
2261            .expect("packet should be valid")
2262            .expect("packet should be trackable");
2263        let mut conn = assert_matches!(conn, Connection::Exclusive(conn) => conn);
2264        conn.do_not_insert = true;
2265        assert_matches!(
2266            table.finalize_connection(&mut bindings_ctx, Connection::Exclusive(conn)),
2267            Ok((false, None))
2268        );
2269
2270        assert!(!table.contains_tuple(&tuple));
2271        assert!(!table.contains_tuple(&reply_tuple));
2272    }
2273}