netstack3_tcp/
base.rs

1// Copyright 2022 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! The Transmission Control Protocol (TCP).
6
7use core::num::NonZeroU8;
8use core::time::Duration;
9
10use derivative::Derivative;
11use net_types::SpecifiedAddr;
12use net_types::ip::{GenericOverIp, Ip, Ipv4, Ipv6, Mtu};
13use netstack3_base::{
14    IcmpErrorCode, Icmpv4ErrorCode, Icmpv6ErrorCode, IpExt, Marks, Mms, UnscaledWindowSize,
15    WeakDeviceIdentifier, WindowSize,
16};
17use netstack3_ip::socket::{RouteResolutionOptions, SendOptions};
18use packet_formats::icmp::{
19    Icmpv4DestUnreachableCode, Icmpv4TimeExceededCode, Icmpv6DestUnreachableCode,
20};
21use packet_formats::ip::DscpAndEcn;
22use packet_formats::utils::NonZeroDuration;
23use rand::Rng;
24use thiserror::Error;
25
26use crate::internal::buffer::BufferLimits;
27use crate::internal::counters::{TcpCountersWithSocket, TcpCountersWithoutSocket};
28use crate::internal::socket::generators::{IsnGenerator, TimestampOffsetGenerator};
29use crate::internal::socket::{DualStackIpExt, Sockets, TcpBindingsTypes, WeakTcpSocketId};
30use crate::internal::state::DEFAULT_MAX_SYN_RETRIES;
31
32/// Default lifetime for a orphaned connection in FIN_WAIT2.
33pub const DEFAULT_FIN_WAIT2_TIMEOUT: Duration = Duration::from_secs(60);
34
35/// Errors surfaced to the user.
36#[derive(Copy, Clone, Debug, PartialEq, Eq, Error)]
37pub enum ConnectionError {
38    /// The connection was refused, RST segment received while in SYN_SENT state.
39    #[error("connection refused (RST segment received while in SYN_SENT state")]
40    ConnectionRefused,
41    /// The connection was reset because of a RST segment.
42    #[error("connection was reset because of a RST segment")]
43    ConnectionReset,
44    /// The connection was closed because the network is unreachable.
45    #[error("connection was closed because the network is unreachable")]
46    NetworkUnreachable,
47    /// The connection was closed because the host is unreachable.
48    #[error("connection was closed because the host is unreachable")]
49    HostUnreachable,
50    /// The connection was closed because the protocol is unreachable.
51    #[error("connection was closed because the protocol is unreachable")]
52    ProtocolUnreachable,
53    /// The connection was closed because the port is unreachable.
54    #[error("connection was closed because the port is unreachable")]
55    PortUnreachable,
56    /// The connection was closed because the host is down.
57    #[error("connection was closed because the host is down")]
58    DestinationHostDown,
59    /// The connection was closed because the source route failed.
60    #[error("connection was closed because the source route failed")]
61    SourceRouteFailed,
62    /// The connection was closed because the source host is isolated.
63    #[error("connection was closed because the source host is isolated")]
64    SourceHostIsolated,
65    /// The connection was closed because of a time out.
66    #[error("connection was closed because of a time out")]
67    TimedOut,
68    /// The connection was closed because of a lack of required permissions.
69    #[error("connection was closed because of a lack of required permissions")]
70    PermissionDenied,
71    /// The connection was closed because there was a protocol error.
72    #[error("connection was closed because there was a protocol error")]
73    ProtocolError,
74    /// The connection was aborted by the system.
75    #[error("connection was aborted by the system")]
76    Aborted,
77}
78
79/// The meaning of a particular ICMP error to a TCP socket.
80pub(crate) enum IcmpErrorResult {
81    /// There has been an error on the connection that must be handled.
82    ConnectionError(ConnectionError),
83    /// The PMTU used by the connection has been updated.
84    PmtuUpdate(Mms),
85}
86
87impl IcmpErrorResult {
88    // Notes: the following mappings are guided by the packetimpact test here:
89    // https://cs.opensource.google/gvisor/gvisor/+/master:test/packetimpact/tests/tcp_network_unreachable_test.go;drc=611e6e1247a0691f5fd198f411c68b3bc79d90af
90    pub(crate) fn try_from_icmp_error(err: IcmpErrorCode) -> Option<IcmpErrorResult> {
91        match err {
92            IcmpErrorCode::V4(Icmpv4ErrorCode::DestUnreachable(code, message)) => {
93                match code {
94                    Icmpv4DestUnreachableCode::DestNetworkUnreachable => {
95                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
96                    }
97                    Icmpv4DestUnreachableCode::DestHostUnreachable => {
98                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
99                    }
100                    Icmpv4DestUnreachableCode::DestProtocolUnreachable => {
101                        Some(IcmpErrorResult::ConnectionError(ConnectionError::ProtocolUnreachable))
102                    }
103                    Icmpv4DestUnreachableCode::DestPortUnreachable => {
104                        Some(IcmpErrorResult::ConnectionError(ConnectionError::PortUnreachable))
105                    }
106                    Icmpv4DestUnreachableCode::SourceRouteFailed => {
107                        Some(IcmpErrorResult::ConnectionError(ConnectionError::SourceRouteFailed))
108                    }
109                    Icmpv4DestUnreachableCode::DestNetworkUnknown => {
110                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
111                    }
112                    Icmpv4DestUnreachableCode::DestHostUnknown => {
113                        Some(IcmpErrorResult::ConnectionError(ConnectionError::DestinationHostDown))
114                    }
115                    Icmpv4DestUnreachableCode::SourceHostIsolated => {
116                        Some(IcmpErrorResult::ConnectionError(ConnectionError::SourceHostIsolated))
117                    }
118                    Icmpv4DestUnreachableCode::NetworkAdministrativelyProhibited => {
119                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
120                    }
121                    Icmpv4DestUnreachableCode::HostAdministrativelyProhibited => {
122                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
123                    }
124                    Icmpv4DestUnreachableCode::NetworkUnreachableForToS => {
125                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
126                    }
127                    Icmpv4DestUnreachableCode::HostUnreachableForToS => {
128                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
129                    }
130                    Icmpv4DestUnreachableCode::CommAdministrativelyProhibited => {
131                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
132                    }
133                    Icmpv4DestUnreachableCode::HostPrecedenceViolation => {
134                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
135                    }
136                    Icmpv4DestUnreachableCode::PrecedenceCutoffInEffect => {
137                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
138                    }
139                    Icmpv4DestUnreachableCode::FragmentationRequired => {
140                        let mtu = message.next_hop_mtu().expect("stack should always fill in MTU");
141                        let mtu = Mtu::new(mtu.get().into());
142                        let mms = Mms::from_mtu::<Ipv4>(mtu, 0 /* no IP options used */)?;
143                        Some(IcmpErrorResult::PmtuUpdate(mms))
144                    }
145                }
146            }
147            IcmpErrorCode::V4(Icmpv4ErrorCode::ParameterProblem(_)) => {
148                Some(IcmpErrorResult::ConnectionError(ConnectionError::ProtocolError))
149            }
150            IcmpErrorCode::V4(Icmpv4ErrorCode::TimeExceeded(
151                Icmpv4TimeExceededCode::TtlExpired,
152            )) => Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable)),
153            IcmpErrorCode::V4(Icmpv4ErrorCode::TimeExceeded(
154                Icmpv4TimeExceededCode::FragmentReassemblyTimeExceeded,
155            )) => Some(IcmpErrorResult::ConnectionError(ConnectionError::TimedOut)),
156            IcmpErrorCode::V4(Icmpv4ErrorCode::Redirect(_)) => None,
157            IcmpErrorCode::V6(Icmpv6ErrorCode::DestUnreachable(code)) => {
158                Some(IcmpErrorResult::ConnectionError(match code {
159                    Icmpv6DestUnreachableCode::NoRoute => ConnectionError::NetworkUnreachable,
160                    Icmpv6DestUnreachableCode::CommAdministrativelyProhibited => {
161                        ConnectionError::PermissionDenied
162                    }
163                    Icmpv6DestUnreachableCode::BeyondScope => ConnectionError::HostUnreachable,
164                    Icmpv6DestUnreachableCode::AddrUnreachable => ConnectionError::HostUnreachable,
165                    Icmpv6DestUnreachableCode::PortUnreachable => ConnectionError::PortUnreachable,
166                    Icmpv6DestUnreachableCode::SrcAddrFailedPolicy => {
167                        ConnectionError::PermissionDenied
168                    }
169                    Icmpv6DestUnreachableCode::RejectRoute => ConnectionError::PermissionDenied,
170                }))
171            }
172            IcmpErrorCode::V6(Icmpv6ErrorCode::ParameterProblem(_)) => {
173                Some(IcmpErrorResult::ConnectionError(ConnectionError::ProtocolError))
174            }
175            IcmpErrorCode::V6(Icmpv6ErrorCode::TimeExceeded(_)) => {
176                Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
177            }
178            IcmpErrorCode::V6(Icmpv6ErrorCode::PacketTooBig(mtu)) => {
179                let mms = Mms::from_mtu::<Ipv6>(mtu, 0 /* no IP options used */)?;
180                Some(IcmpErrorResult::PmtuUpdate(mms))
181            }
182        }
183    }
184}
185
186/// Metadata associated with an outgoing TCP packet.
187#[derive(Derivative, GenericOverIp)]
188#[generic_over_ip(I, Ip)]
189#[derivative(Debug(bound = ""))]
190#[cfg_attr(any(test, feature = "testutils"), derivative(PartialEq(bound = "")))]
191pub struct TcpSocketTxMetadata<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes> {
192    /// The socket from which the packet originates.
193    socket: WeakTcpSocketId<I, D, BT>,
194}
195
196impl<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes>
197    TcpSocketTxMetadata<I, D, BT>
198{
199    /// Creates a new `TcpSocketTxMetadata`.
200    pub(crate) fn new(socket: WeakTcpSocketId<I, D, BT>) -> Self {
201        Self { socket }
202    }
203
204    /// Gets the socket from which the packet originates.
205    pub fn socket(&self) -> &WeakTcpSocketId<I, D, BT> {
206        &self.socket
207    }
208}
209
210/// Stack wide state supporting TCP.
211#[derive(GenericOverIp)]
212#[generic_over_ip(I, Ip)]
213pub struct TcpState<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes> {
214    /// The initial sequence number generator.
215    pub isn_generator: IsnGenerator<BT::Instant>,
216    /// The timestamp offset generator.
217    pub timestamp_offset_generator: TimestampOffsetGenerator<BT::Instant>,
218    /// TCP sockets state.
219    pub sockets: Sockets<I, D, BT>,
220    /// TCP counters that cannot be attributed to a specific socket.
221    pub counters_without_socket: TcpCountersWithoutSocket<I>,
222    /// TCP counters that can be attributed to a specific socket.
223    pub counters_with_socket: TcpCountersWithSocket<I>,
224}
225
226impl<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes> TcpState<I, D, BT> {
227    /// Creates a new TCP stack state.
228    pub fn new(now: BT::Instant, rng: &mut impl Rng) -> Self {
229        Self {
230            isn_generator: IsnGenerator::new(now, rng),
231            timestamp_offset_generator: TimestampOffsetGenerator::new(now, rng),
232            sockets: Sockets::new(),
233            counters_without_socket: Default::default(),
234            counters_with_socket: Default::default(),
235        }
236    }
237}
238
239/// Named tuple for holding sizes of buffers for a socket.
240#[derive(Copy, Clone, Debug)]
241#[cfg_attr(test, derive(Eq, PartialEq))]
242pub struct BufferSizes {
243    /// The size of the send buffer.
244    pub send: usize,
245    /// The size of the receive buffer.
246    pub receive: usize,
247}
248/// Sensible defaults only for testing.
249#[cfg(any(test, feature = "testutils"))]
250impl Default for BufferSizes {
251    fn default() -> Self {
252        BufferSizes { send: WindowSize::DEFAULT.into(), receive: WindowSize::DEFAULT.into() }
253    }
254}
255
256impl BufferSizes {
257    pub(crate) fn rcv_limits(&self) -> BufferLimits {
258        let Self { send: _, receive } = self;
259        BufferLimits { capacity: *receive, len: 0 }
260    }
261
262    pub(crate) fn rwnd(&self) -> WindowSize {
263        let Self { send: _, receive } = *self;
264        WindowSize::new(receive).unwrap_or(WindowSize::MAX)
265    }
266
267    pub(crate) fn rwnd_unscaled(&self) -> UnscaledWindowSize {
268        let Self { send: _, receive } = *self;
269        UnscaledWindowSize::from(u16::try_from(receive).unwrap_or(u16::MAX))
270    }
271}
272
273/// A mutable reference to buffer configuration.
274pub(crate) enum BuffersRefMut<'a, R, S> {
275    /// All buffers are dropped.
276    NoBuffers,
277    /// Buffer sizes are configured but not instantiated yet.
278    Sizes(&'a mut BufferSizes),
279    /// Buffers are instantiated and mutable references are provided.
280    Both { send: &'a mut S, recv: &'a mut R },
281    /// Only the send buffer is still instantiated, which happens in Closing
282    /// states.
283    SendOnly(&'a mut S),
284    /// Only the receive buffer is still instantiated, which happens in Finwait
285    /// states.
286    RecvOnly(&'a mut R),
287}
288
289impl<'a, R, S> BuffersRefMut<'a, R, S> {
290    pub(crate) fn into_send_buffer(self) -> Option<&'a mut S> {
291        match self {
292            Self::NoBuffers | Self::Sizes(_) | Self::RecvOnly(_) => None,
293            Self::Both { send, recv: _ } | Self::SendOnly(send) => Some(send),
294        }
295    }
296
297    pub(crate) fn into_receive_buffer(self) -> Option<&'a mut R> {
298        match self {
299            Self::NoBuffers | Self::Sizes(_) | Self::SendOnly(_) => None,
300            Self::Both { send: _, recv } | Self::RecvOnly(recv) => Some(recv),
301        }
302    }
303}
304
305/// The IP sock options used by TCP.
306#[derive(Clone, Copy, Default, Debug, PartialEq, Eq)]
307pub struct TcpIpSockOptions {
308    /// Socket marks used for routing.
309    pub marks: Marks,
310}
311
312impl<I: Ip> RouteResolutionOptions<I> for TcpIpSockOptions {
313    fn marks(&self) -> &Marks {
314        &self.marks
315    }
316
317    fn transparent(&self) -> bool {
318        false
319    }
320}
321
322impl<I: IpExt> SendOptions<I> for TcpIpSockOptions {
323    fn hop_limit(&self, _destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
324        None
325    }
326
327    fn multicast_loop(&self) -> bool {
328        false
329    }
330
331    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
332        None
333    }
334
335    fn dscp_and_ecn(&self) -> DscpAndEcn {
336        DscpAndEcn::default()
337    }
338
339    fn mtu(&self) -> Mtu {
340        Mtu::no_limit()
341    }
342}
343
344/// TCP socket options.
345///
346/// This only stores options that are trivial to get and set.
347#[derive(Clone, Copy, Debug, PartialEq, Eq)]
348pub struct SocketOptions {
349    /// Socket options that control TCP keep-alive mechanism, see [`KeepAlive`].
350    pub keep_alive: KeepAlive,
351    /// Switch to turn nagle algorithm on/off.
352    pub nagle_enabled: bool,
353    /// The period of time after which the connection should be aborted if no
354    /// ACK is received.
355    pub user_timeout: Option<NonZeroDuration>,
356    /// Switch to turn delayed ACK on/off.
357    pub delayed_ack: bool,
358    /// The period of time after with a dangling FIN_WAIT2 state should be
359    /// reclaimed.
360    pub fin_wait2_timeout: Option<Duration>,
361    /// The maximum SYN retransmissions before aborting a connection.
362    pub max_syn_retries: NonZeroU8,
363    /// Ip socket options.
364    pub ip_options: TcpIpSockOptions,
365}
366
367impl Default for SocketOptions {
368    fn default() -> Self {
369        Self {
370            keep_alive: KeepAlive::default(),
371            // RFC 9293 Section 3.7.4:
372            //   A TCP implementation SHOULD implement the Nagle algorithm to
373            //   coalesce short segments
374            nagle_enabled: true,
375            user_timeout: None,
376            delayed_ack: true,
377            fin_wait2_timeout: Some(DEFAULT_FIN_WAIT2_TIMEOUT),
378            max_syn_retries: DEFAULT_MAX_SYN_RETRIES,
379            ip_options: TcpIpSockOptions::default(),
380        }
381    }
382}
383
384/// Options that are related to TCP keep-alive.
385#[derive(Clone, Copy, Debug, PartialEq, Eq)]
386pub struct KeepAlive {
387    /// The amount of time for an idle connection to wait before sending out
388    /// probes.
389    pub idle: NonZeroDuration,
390    /// Interval between consecutive probes.
391    pub interval: NonZeroDuration,
392    /// Maximum number of probes we send before considering the connection dead.
393    ///
394    /// `u8` is enough because if a connection doesn't hear back from the peer
395    /// after 256 probes, then chances are that the connection is already dead.
396    pub count: NonZeroU8,
397    /// Only send probes if keep-alive is enabled.
398    pub enabled: bool,
399}
400
401impl Default for KeepAlive {
402    fn default() -> Self {
403        // Default values inspired by Linux's TCP implementation:
404        // https://github.com/torvalds/linux/blob/0326074ff4652329f2a1a9c8685104576bd8d131/include/net/tcp.h#L155-L157
405        const DEFAULT_IDLE_DURATION: NonZeroDuration =
406            NonZeroDuration::from_secs(2 * 60 * 60).unwrap();
407        const DEFAULT_INTERVAL: NonZeroDuration = NonZeroDuration::from_secs(75).unwrap();
408        const DEFAULT_COUNT: NonZeroU8 = NonZeroU8::new(9).unwrap();
409
410        Self {
411            idle: DEFAULT_IDLE_DURATION,
412            interval: DEFAULT_INTERVAL,
413            count: DEFAULT_COUNT,
414            // Per RFC 9293(https://datatracker.ietf.org/doc/html/rfc9293#section-3.8.4):
415            //   ... they MUST default to off.
416            enabled: false,
417        }
418    }
419}