netstack3_tcp/
base.rs

1// Copyright 2022 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! The Transmission Control Protocol (TCP).
6
7use core::num::NonZeroU8;
8use core::time::Duration;
9
10use derivative::Derivative;
11use net_types::SpecifiedAddr;
12use net_types::ip::{GenericOverIp, Ip, Ipv4, Ipv6, Mtu};
13use netstack3_base::{
14    IcmpErrorCode, Icmpv4ErrorCode, Icmpv6ErrorCode, IpExt, Marks, Mms, UnscaledWindowSize,
15    WeakDeviceIdentifier, WindowSize,
16};
17use netstack3_ip::socket::{RouteResolutionOptions, SendOptions};
18use packet_formats::icmp::{
19    Icmpv4DestUnreachableCode, Icmpv4TimeExceededCode, Icmpv6DestUnreachableCode,
20};
21use packet_formats::ip::DscpAndEcn;
22use packet_formats::utils::NonZeroDuration;
23use rand::Rng;
24use thiserror::Error;
25
26use crate::internal::buffer::BufferLimits;
27use crate::internal::counters::{TcpCountersWithSocket, TcpCountersWithoutSocket};
28use crate::internal::socket::generators::{IsnGenerator, TimestampOffsetGenerator};
29use crate::internal::socket::{DualStackIpExt, Sockets, TcpBindingsTypes, WeakTcpSocketId};
30use crate::internal::state::DEFAULT_MAX_SYN_RETRIES;
31
32/// Default lifetime for a orphaned connection in FIN_WAIT2.
33pub const DEFAULT_FIN_WAIT2_TIMEOUT: Duration = Duration::from_secs(60);
34
35/// Errors surfaced to the user.
36#[derive(Copy, Clone, Debug, PartialEq, Eq, Error)]
37pub enum ConnectionError {
38    /// The connection was refused, RST segment received while in SYN_SENT state.
39    #[error("connection refused (RST segment received while in SYN_SENT state")]
40    ConnectionRefused,
41    /// The connection was reset because of a RST segment.
42    #[error("connection was reset because of a RST segment")]
43    ConnectionReset,
44    /// The connection was closed because the network is unreachable.
45    #[error("connection was closed because the network is unreachable")]
46    NetworkUnreachable,
47    /// The connection was closed because the host is unreachable.
48    #[error("connection was closed because the host is unreachable")]
49    HostUnreachable,
50    /// The connection was closed because the protocol is unreachable.
51    #[error("connection was closed because the protocol is unreachable")]
52    ProtocolUnreachable,
53    /// The connection was closed because the port is unreachable.
54    #[error("connection was closed because the port is unreachable")]
55    PortUnreachable,
56    /// The connection was closed because the host is down.
57    #[error("connection was closed because the host is down")]
58    DestinationHostDown,
59    /// The connection was closed because the source route failed.
60    #[error("connection was closed because the source route failed")]
61    SourceRouteFailed,
62    /// The connection was closed because the source host is isolated.
63    #[error("connection was closed because the source host is isolated")]
64    SourceHostIsolated,
65    /// The connection was closed because of a time out.
66    #[error("connection was closed because of a time out")]
67    TimedOut,
68    /// The connection was closed because of a lack of required permissions.
69    #[error("connection was closed because of a lack of required permissions")]
70    PermissionDenied,
71    /// The connection was closed because there was a protocol error.
72    #[error("connection was closed because there was a protocol error")]
73    ProtocolError,
74}
75
76/// The meaning of a particular ICMP error to a TCP socket.
77pub(crate) enum IcmpErrorResult {
78    /// There has been an error on the connection that must be handled.
79    ConnectionError(ConnectionError),
80    /// The PMTU used by the connection has been updated.
81    PmtuUpdate(Mms),
82}
83
84impl IcmpErrorResult {
85    // Notes: the following mappings are guided by the packetimpact test here:
86    // https://cs.opensource.google/gvisor/gvisor/+/master:test/packetimpact/tests/tcp_network_unreachable_test.go;drc=611e6e1247a0691f5fd198f411c68b3bc79d90af
87    pub(crate) fn try_from_icmp_error(err: IcmpErrorCode) -> Option<IcmpErrorResult> {
88        match err {
89            IcmpErrorCode::V4(Icmpv4ErrorCode::DestUnreachable(code, message)) => {
90                match code {
91                    Icmpv4DestUnreachableCode::DestNetworkUnreachable => {
92                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
93                    }
94                    Icmpv4DestUnreachableCode::DestHostUnreachable => {
95                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
96                    }
97                    Icmpv4DestUnreachableCode::DestProtocolUnreachable => {
98                        Some(IcmpErrorResult::ConnectionError(ConnectionError::ProtocolUnreachable))
99                    }
100                    Icmpv4DestUnreachableCode::DestPortUnreachable => {
101                        Some(IcmpErrorResult::ConnectionError(ConnectionError::PortUnreachable))
102                    }
103                    Icmpv4DestUnreachableCode::SourceRouteFailed => {
104                        Some(IcmpErrorResult::ConnectionError(ConnectionError::SourceRouteFailed))
105                    }
106                    Icmpv4DestUnreachableCode::DestNetworkUnknown => {
107                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
108                    }
109                    Icmpv4DestUnreachableCode::DestHostUnknown => {
110                        Some(IcmpErrorResult::ConnectionError(ConnectionError::DestinationHostDown))
111                    }
112                    Icmpv4DestUnreachableCode::SourceHostIsolated => {
113                        Some(IcmpErrorResult::ConnectionError(ConnectionError::SourceHostIsolated))
114                    }
115                    Icmpv4DestUnreachableCode::NetworkAdministrativelyProhibited => {
116                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
117                    }
118                    Icmpv4DestUnreachableCode::HostAdministrativelyProhibited => {
119                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
120                    }
121                    Icmpv4DestUnreachableCode::NetworkUnreachableForToS => {
122                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
123                    }
124                    Icmpv4DestUnreachableCode::HostUnreachableForToS => {
125                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
126                    }
127                    Icmpv4DestUnreachableCode::CommAdministrativelyProhibited => {
128                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
129                    }
130                    Icmpv4DestUnreachableCode::HostPrecedenceViolation => {
131                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
132                    }
133                    Icmpv4DestUnreachableCode::PrecedenceCutoffInEffect => {
134                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
135                    }
136                    Icmpv4DestUnreachableCode::FragmentationRequired => {
137                        let mtu = message.next_hop_mtu().expect("stack should always fill in MTU");
138                        let mtu = Mtu::new(mtu.get().into());
139                        let mms = Mms::from_mtu::<Ipv4>(mtu, 0 /* no IP options used */)?;
140                        Some(IcmpErrorResult::PmtuUpdate(mms))
141                    }
142                }
143            }
144            IcmpErrorCode::V4(Icmpv4ErrorCode::ParameterProblem(_)) => {
145                Some(IcmpErrorResult::ConnectionError(ConnectionError::ProtocolError))
146            }
147            IcmpErrorCode::V4(Icmpv4ErrorCode::TimeExceeded(
148                Icmpv4TimeExceededCode::TtlExpired,
149            )) => Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable)),
150            IcmpErrorCode::V4(Icmpv4ErrorCode::TimeExceeded(
151                Icmpv4TimeExceededCode::FragmentReassemblyTimeExceeded,
152            )) => Some(IcmpErrorResult::ConnectionError(ConnectionError::TimedOut)),
153            IcmpErrorCode::V4(Icmpv4ErrorCode::Redirect(_)) => None,
154            IcmpErrorCode::V6(Icmpv6ErrorCode::DestUnreachable(code)) => {
155                Some(IcmpErrorResult::ConnectionError(match code {
156                    Icmpv6DestUnreachableCode::NoRoute => ConnectionError::NetworkUnreachable,
157                    Icmpv6DestUnreachableCode::CommAdministrativelyProhibited => {
158                        ConnectionError::PermissionDenied
159                    }
160                    Icmpv6DestUnreachableCode::BeyondScope => ConnectionError::HostUnreachable,
161                    Icmpv6DestUnreachableCode::AddrUnreachable => ConnectionError::HostUnreachable,
162                    Icmpv6DestUnreachableCode::PortUnreachable => ConnectionError::PortUnreachable,
163                    Icmpv6DestUnreachableCode::SrcAddrFailedPolicy => {
164                        ConnectionError::PermissionDenied
165                    }
166                    Icmpv6DestUnreachableCode::RejectRoute => ConnectionError::PermissionDenied,
167                }))
168            }
169            IcmpErrorCode::V6(Icmpv6ErrorCode::ParameterProblem(_)) => {
170                Some(IcmpErrorResult::ConnectionError(ConnectionError::ProtocolError))
171            }
172            IcmpErrorCode::V6(Icmpv6ErrorCode::TimeExceeded(_)) => {
173                Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
174            }
175            IcmpErrorCode::V6(Icmpv6ErrorCode::PacketTooBig(mtu)) => {
176                let mms = Mms::from_mtu::<Ipv6>(mtu, 0 /* no IP options used */)?;
177                Some(IcmpErrorResult::PmtuUpdate(mms))
178            }
179        }
180    }
181}
182
183/// Metadata associated with an outgoing TCP packet.
184#[derive(Derivative, GenericOverIp)]
185#[generic_over_ip(I, Ip)]
186#[derivative(Debug(bound = ""))]
187#[cfg_attr(any(test, feature = "testutils"), derivative(PartialEq(bound = "")))]
188pub struct TcpSocketTxMetadata<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes> {
189    /// The socket from which the packet originates.
190    socket: WeakTcpSocketId<I, D, BT>,
191}
192
193impl<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes>
194    TcpSocketTxMetadata<I, D, BT>
195{
196    /// Creates a new `TcpSocketTxMetadata`.
197    pub(crate) fn new(socket: WeakTcpSocketId<I, D, BT>) -> Self {
198        Self { socket }
199    }
200
201    /// Gets the socket from which the packet originates.
202    pub fn socket(&self) -> &WeakTcpSocketId<I, D, BT> {
203        &self.socket
204    }
205}
206
207/// Stack wide state supporting TCP.
208#[derive(GenericOverIp)]
209#[generic_over_ip(I, Ip)]
210pub struct TcpState<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes> {
211    /// The initial sequence number generator.
212    pub isn_generator: IsnGenerator<BT::Instant>,
213    /// The timestamp offset generator.
214    pub timestamp_offset_generator: TimestampOffsetGenerator<BT::Instant>,
215    /// TCP sockets state.
216    pub sockets: Sockets<I, D, BT>,
217    /// TCP counters that cannot be attributed to a specific socket.
218    pub counters_without_socket: TcpCountersWithoutSocket<I>,
219    /// TCP counters that can be attributed to a specific socket.
220    pub counters_with_socket: TcpCountersWithSocket<I>,
221}
222
223impl<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes> TcpState<I, D, BT> {
224    /// Creates a new TCP stack state.
225    pub fn new(now: BT::Instant, rng: &mut impl Rng) -> Self {
226        Self {
227            isn_generator: IsnGenerator::new(now, rng),
228            timestamp_offset_generator: TimestampOffsetGenerator::new(now, rng),
229            sockets: Sockets::new(),
230            counters_without_socket: Default::default(),
231            counters_with_socket: Default::default(),
232        }
233    }
234}
235
236/// Named tuple for holding sizes of buffers for a socket.
237#[derive(Copy, Clone, Debug)]
238#[cfg_attr(test, derive(Eq, PartialEq))]
239pub struct BufferSizes {
240    /// The size of the send buffer.
241    pub send: usize,
242    /// The size of the receive buffer.
243    pub receive: usize,
244}
245/// Sensible defaults only for testing.
246#[cfg(any(test, feature = "testutils"))]
247impl Default for BufferSizes {
248    fn default() -> Self {
249        BufferSizes { send: WindowSize::DEFAULT.into(), receive: WindowSize::DEFAULT.into() }
250    }
251}
252
253impl BufferSizes {
254    pub(crate) fn rcv_limits(&self) -> BufferLimits {
255        let Self { send: _, receive } = self;
256        BufferLimits { capacity: *receive, len: 0 }
257    }
258
259    pub(crate) fn rwnd(&self) -> WindowSize {
260        let Self { send: _, receive } = *self;
261        WindowSize::new(receive).unwrap_or(WindowSize::MAX)
262    }
263
264    pub(crate) fn rwnd_unscaled(&self) -> UnscaledWindowSize {
265        let Self { send: _, receive } = *self;
266        UnscaledWindowSize::from(u16::try_from(receive).unwrap_or(u16::MAX))
267    }
268}
269
270/// A mutable reference to buffer configuration.
271pub(crate) enum BuffersRefMut<'a, R, S> {
272    /// All buffers are dropped.
273    NoBuffers,
274    /// Buffer sizes are configured but not instantiated yet.
275    Sizes(&'a mut BufferSizes),
276    /// Buffers are instantiated and mutable references are provided.
277    Both { send: &'a mut S, recv: &'a mut R },
278    /// Only the send buffer is still instantiated, which happens in Closing
279    /// states.
280    SendOnly(&'a mut S),
281    /// Only the receive buffer is still instantiated, which happens in Finwait
282    /// states.
283    RecvOnly(&'a mut R),
284}
285
286impl<'a, R, S> BuffersRefMut<'a, R, S> {
287    pub(crate) fn into_send_buffer(self) -> Option<&'a mut S> {
288        match self {
289            Self::NoBuffers | Self::Sizes(_) | Self::RecvOnly(_) => None,
290            Self::Both { send, recv: _ } | Self::SendOnly(send) => Some(send),
291        }
292    }
293
294    pub(crate) fn into_receive_buffer(self) -> Option<&'a mut R> {
295        match self {
296            Self::NoBuffers | Self::Sizes(_) | Self::SendOnly(_) => None,
297            Self::Both { send: _, recv } | Self::RecvOnly(recv) => Some(recv),
298        }
299    }
300}
301
302/// The IP sock options used by TCP.
303#[derive(Clone, Copy, Default, Debug, PartialEq, Eq)]
304pub struct TcpIpSockOptions {
305    /// Socket marks used for routing.
306    pub marks: Marks,
307}
308
309impl<I: Ip> RouteResolutionOptions<I> for TcpIpSockOptions {
310    fn marks(&self) -> &Marks {
311        &self.marks
312    }
313
314    fn transparent(&self) -> bool {
315        false
316    }
317}
318
319impl<I: IpExt> SendOptions<I> for TcpIpSockOptions {
320    fn hop_limit(&self, _destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
321        None
322    }
323
324    fn multicast_loop(&self) -> bool {
325        false
326    }
327
328    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
329        None
330    }
331
332    fn dscp_and_ecn(&self) -> DscpAndEcn {
333        DscpAndEcn::default()
334    }
335
336    fn mtu(&self) -> Mtu {
337        Mtu::no_limit()
338    }
339}
340
341/// TCP socket options.
342///
343/// This only stores options that are trivial to get and set.
344#[derive(Clone, Copy, Debug, PartialEq, Eq)]
345pub struct SocketOptions {
346    /// Socket options that control TCP keep-alive mechanism, see [`KeepAlive`].
347    pub keep_alive: KeepAlive,
348    /// Switch to turn nagle algorithm on/off.
349    pub nagle_enabled: bool,
350    /// The period of time after which the connection should be aborted if no
351    /// ACK is received.
352    pub user_timeout: Option<NonZeroDuration>,
353    /// Switch to turn delayed ACK on/off.
354    pub delayed_ack: bool,
355    /// The period of time after with a dangling FIN_WAIT2 state should be
356    /// reclaimed.
357    pub fin_wait2_timeout: Option<Duration>,
358    /// The maximum SYN retransmissions before aborting a connection.
359    pub max_syn_retries: NonZeroU8,
360    /// Ip socket options.
361    pub ip_options: TcpIpSockOptions,
362}
363
364impl Default for SocketOptions {
365    fn default() -> Self {
366        Self {
367            keep_alive: KeepAlive::default(),
368            // RFC 9293 Section 3.7.4:
369            //   A TCP implementation SHOULD implement the Nagle algorithm to
370            //   coalesce short segments
371            nagle_enabled: true,
372            user_timeout: None,
373            delayed_ack: true,
374            fin_wait2_timeout: Some(DEFAULT_FIN_WAIT2_TIMEOUT),
375            max_syn_retries: DEFAULT_MAX_SYN_RETRIES,
376            ip_options: TcpIpSockOptions::default(),
377        }
378    }
379}
380
381/// Options that are related to TCP keep-alive.
382#[derive(Clone, Copy, Debug, PartialEq, Eq)]
383pub struct KeepAlive {
384    /// The amount of time for an idle connection to wait before sending out
385    /// probes.
386    pub idle: NonZeroDuration,
387    /// Interval between consecutive probes.
388    pub interval: NonZeroDuration,
389    /// Maximum number of probes we send before considering the connection dead.
390    ///
391    /// `u8` is enough because if a connection doesn't hear back from the peer
392    /// after 256 probes, then chances are that the connection is already dead.
393    pub count: NonZeroU8,
394    /// Only send probes if keep-alive is enabled.
395    pub enabled: bool,
396}
397
398impl Default for KeepAlive {
399    fn default() -> Self {
400        // Default values inspired by Linux's TCP implementation:
401        // https://github.com/torvalds/linux/blob/0326074ff4652329f2a1a9c8685104576bd8d131/include/net/tcp.h#L155-L157
402        const DEFAULT_IDLE_DURATION: NonZeroDuration =
403            NonZeroDuration::from_secs(2 * 60 * 60).unwrap();
404        const DEFAULT_INTERVAL: NonZeroDuration = NonZeroDuration::from_secs(75).unwrap();
405        const DEFAULT_COUNT: NonZeroU8 = NonZeroU8::new(9).unwrap();
406
407        Self {
408            idle: DEFAULT_IDLE_DURATION,
409            interval: DEFAULT_INTERVAL,
410            count: DEFAULT_COUNT,
411            // Per RFC 9293(https://datatracker.ietf.org/doc/html/rfc9293#section-3.8.4):
412            //   ... they MUST default to off.
413            enabled: false,
414        }
415    }
416}