Skip to main content

netstack3_tcp/
base.rs

1// Copyright 2022 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! The Transmission Control Protocol (TCP).
6
7use core::num::NonZeroU8;
8use core::time::Duration;
9
10use derivative::Derivative;
11use net_types::SpecifiedAddr;
12use net_types::ip::{GenericOverIp, Ip, Ipv4, Ipv6, Mtu};
13use netstack3_base::{
14    ChecksumOffloadResult, IcmpErrorCode, Icmpv4ErrorCode, Icmpv6ErrorCode, IpExt, Marks, Mms,
15    UnscaledWindowSize, WeakDeviceIdentifier, WindowSize,
16};
17use netstack3_ip::socket::{RouteResolutionOptions, SendOptions};
18use packet_formats::icmp::{
19    Icmpv4DestUnreachableCode, Icmpv4TimeExceededCode, Icmpv6DestUnreachableCode,
20};
21use packet_formats::ip::DscpAndEcn;
22use packet_formats::utils::NonZeroDuration;
23use rand::Rng;
24use thiserror::Error;
25
26use crate::internal::buffer::BufferLimits;
27use crate::internal::counters::{TcpCountersWithSocket, TcpCountersWithoutSocket};
28use crate::internal::socket::generators::{IsnGenerator, TimestampOffsetGenerator};
29use crate::internal::socket::{DualStackIpExt, Sockets, TcpBindingsTypes, WeakTcpSocketId};
30use crate::internal::state::DEFAULT_MAX_SYN_RETRIES;
31
32/// Default lifetime for a orphaned connection in FIN_WAIT2.
33pub const DEFAULT_FIN_WAIT2_TIMEOUT: Duration = Duration::from_secs(60);
34
35/// Errors surfaced to the user.
36#[derive(Copy, Clone, Debug, PartialEq, Eq, Error)]
37pub enum ConnectionError {
38    /// The connection was refused, RST segment received while in SYN_SENT state.
39    #[error("connection refused (RST segment received while in SYN_SENT state")]
40    ConnectionRefused,
41    /// The connection was reset because of a RST segment.
42    #[error("connection was reset because of a RST segment")]
43    ConnectionReset,
44    /// The connection was closed because the network is unreachable.
45    #[error("connection was closed because the network is unreachable")]
46    NetworkUnreachable,
47    /// The connection was closed because the host is unreachable.
48    #[error("connection was closed because the host is unreachable")]
49    HostUnreachable,
50    /// The connection was closed because the protocol is unreachable.
51    #[error("connection was closed because the protocol is unreachable")]
52    ProtocolUnreachable,
53    /// The connection was closed because the port is unreachable.
54    #[error("connection was closed because the port is unreachable")]
55    PortUnreachable,
56    /// The connection was closed because the host is down.
57    #[error("connection was closed because the host is down")]
58    DestinationHostDown,
59    /// The connection was closed because the source route failed.
60    #[error("connection was closed because the source route failed")]
61    SourceRouteFailed,
62    /// The connection was closed because the source host is isolated.
63    #[error("connection was closed because the source host is isolated")]
64    SourceHostIsolated,
65    /// The connection was closed because of a time out.
66    #[error("connection was closed because of a time out")]
67    TimedOut,
68    /// The connection was closed because of a lack of required permissions.
69    #[error("connection was closed because of a lack of required permissions")]
70    PermissionDenied,
71    /// The connection was closed because there was a protocol error.
72    #[error("connection was closed because there was a protocol error")]
73    ProtocolError,
74    /// The connection was aborted by the system.
75    #[error("connection was aborted by the system")]
76    Aborted,
77}
78
79/// The meaning of a particular ICMP error to a TCP socket.
80pub(crate) enum IcmpErrorResult {
81    /// There has been an error on the connection that must be handled.
82    ConnectionError(ConnectionError),
83    /// The PMTU used by the connection has been updated.
84    PmtuUpdate(Mms),
85}
86
87impl IcmpErrorResult {
88    // Notes: the following mappings are guided by the packetimpact test here:
89    // https://cs.opensource.google/gvisor/gvisor/+/master:test/packetimpact/tests/tcp_network_unreachable_test.go;drc=611e6e1247a0691f5fd198f411c68b3bc79d90af
90    pub(crate) fn try_from_icmp_error(err: IcmpErrorCode) -> Option<IcmpErrorResult> {
91        match err {
92            IcmpErrorCode::V4(Icmpv4ErrorCode::DestUnreachable(code, message)) => {
93                match code {
94                    Icmpv4DestUnreachableCode::DestNetworkUnreachable => {
95                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
96                    }
97                    Icmpv4DestUnreachableCode::DestHostUnreachable => {
98                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
99                    }
100                    Icmpv4DestUnreachableCode::DestProtocolUnreachable => {
101                        Some(IcmpErrorResult::ConnectionError(ConnectionError::ProtocolUnreachable))
102                    }
103                    Icmpv4DestUnreachableCode::DestPortUnreachable => {
104                        Some(IcmpErrorResult::ConnectionError(ConnectionError::PortUnreachable))
105                    }
106                    Icmpv4DestUnreachableCode::SourceRouteFailed => {
107                        Some(IcmpErrorResult::ConnectionError(ConnectionError::SourceRouteFailed))
108                    }
109                    Icmpv4DestUnreachableCode::DestNetworkUnknown => {
110                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
111                    }
112                    Icmpv4DestUnreachableCode::DestHostUnknown => {
113                        Some(IcmpErrorResult::ConnectionError(ConnectionError::DestinationHostDown))
114                    }
115                    Icmpv4DestUnreachableCode::SourceHostIsolated => {
116                        Some(IcmpErrorResult::ConnectionError(ConnectionError::SourceHostIsolated))
117                    }
118                    Icmpv4DestUnreachableCode::NetworkAdministrativelyProhibited => {
119                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
120                    }
121                    Icmpv4DestUnreachableCode::HostAdministrativelyProhibited => {
122                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
123                    }
124                    Icmpv4DestUnreachableCode::NetworkUnreachableForToS => {
125                        Some(IcmpErrorResult::ConnectionError(ConnectionError::NetworkUnreachable))
126                    }
127                    Icmpv4DestUnreachableCode::HostUnreachableForToS => {
128                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
129                    }
130                    Icmpv4DestUnreachableCode::CommAdministrativelyProhibited => {
131                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
132                    }
133                    Icmpv4DestUnreachableCode::HostPrecedenceViolation => {
134                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
135                    }
136                    Icmpv4DestUnreachableCode::PrecedenceCutoffInEffect => {
137                        Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
138                    }
139                    Icmpv4DestUnreachableCode::FragmentationRequired => {
140                        let mtu = message.next_hop_mtu().expect("stack should always fill in MTU");
141                        let mtu = Mtu::new(mtu.get().into());
142                        let mms = Mms::from_mtu::<Ipv4>(mtu, 0 /* no IP options used */)?;
143                        Some(IcmpErrorResult::PmtuUpdate(mms))
144                    }
145                }
146            }
147            IcmpErrorCode::V4(Icmpv4ErrorCode::ParameterProblem(_)) => {
148                Some(IcmpErrorResult::ConnectionError(ConnectionError::ProtocolError))
149            }
150            IcmpErrorCode::V4(Icmpv4ErrorCode::TimeExceeded(
151                Icmpv4TimeExceededCode::TtlExpired,
152            )) => Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable)),
153            IcmpErrorCode::V4(Icmpv4ErrorCode::TimeExceeded(
154                Icmpv4TimeExceededCode::FragmentReassemblyTimeExceeded,
155            )) => Some(IcmpErrorResult::ConnectionError(ConnectionError::TimedOut)),
156            IcmpErrorCode::V4(Icmpv4ErrorCode::Redirect(_)) => None,
157            IcmpErrorCode::V6(Icmpv6ErrorCode::DestUnreachable(code)) => {
158                Some(IcmpErrorResult::ConnectionError(match code {
159                    Icmpv6DestUnreachableCode::NoRoute => ConnectionError::NetworkUnreachable,
160                    Icmpv6DestUnreachableCode::CommAdministrativelyProhibited => {
161                        ConnectionError::PermissionDenied
162                    }
163                    Icmpv6DestUnreachableCode::BeyondScope => ConnectionError::HostUnreachable,
164                    Icmpv6DestUnreachableCode::AddrUnreachable => ConnectionError::HostUnreachable,
165                    Icmpv6DestUnreachableCode::PortUnreachable => ConnectionError::PortUnreachable,
166                    Icmpv6DestUnreachableCode::SrcAddrFailedPolicy => {
167                        ConnectionError::PermissionDenied
168                    }
169                    Icmpv6DestUnreachableCode::RejectRoute => ConnectionError::PermissionDenied,
170                }))
171            }
172            IcmpErrorCode::V6(Icmpv6ErrorCode::ParameterProblem(_)) => {
173                Some(IcmpErrorResult::ConnectionError(ConnectionError::ProtocolError))
174            }
175            IcmpErrorCode::V6(Icmpv6ErrorCode::TimeExceeded(_)) => {
176                Some(IcmpErrorResult::ConnectionError(ConnectionError::HostUnreachable))
177            }
178            IcmpErrorCode::V6(Icmpv6ErrorCode::PacketTooBig(mtu)) => {
179                let mms = Mms::from_mtu::<Ipv6>(mtu, 0 /* no IP options used */)?;
180                Some(IcmpErrorResult::PmtuUpdate(mms))
181            }
182        }
183    }
184}
185
186/// Metadata associated with an outgoing TCP packet.
187#[derive(Derivative, GenericOverIp)]
188#[generic_over_ip(I, Ip)]
189#[derivative(Debug(bound = ""))]
190#[cfg_attr(any(test, feature = "testutils"), derivative(PartialEq(bound = "")))]
191pub struct TcpSocketTxMetadata<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes> {
192    /// The socket from which the packet originates.
193    socket: WeakTcpSocketId<I, D, BT>,
194    checksum_offload_result: Option<ChecksumOffloadResult>,
195}
196
197impl<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes>
198    TcpSocketTxMetadata<I, D, BT>
199{
200    /// Creates a new `TcpSocketTxMetadata`.
201    pub(crate) fn new(socket: WeakTcpSocketId<I, D, BT>) -> Self {
202        Self { socket, checksum_offload_result: None }
203    }
204
205    /// Gets the socket from which the packet originates.
206    pub fn socket(&self) -> &WeakTcpSocketId<I, D, BT> {
207        &self.socket
208    }
209
210    /// Returns the checksum offload result.
211    pub fn checksum_offload_result(&self) -> Option<ChecksumOffloadResult> {
212        self.checksum_offload_result.clone()
213    }
214
215    /// Sets the checksum offload result.
216    pub fn set_checksum_offload_result(&mut self, result: Option<ChecksumOffloadResult>) {
217        self.checksum_offload_result = result;
218    }
219}
220
221/// Stack wide state supporting TCP.
222#[derive(GenericOverIp)]
223#[generic_over_ip(I, Ip)]
224pub struct TcpState<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes> {
225    /// The initial sequence number generator.
226    pub isn_generator: IsnGenerator<BT::Instant>,
227    /// The timestamp offset generator.
228    pub timestamp_offset_generator: TimestampOffsetGenerator<BT::Instant>,
229    /// TCP sockets state.
230    pub sockets: Sockets<I, D, BT>,
231    /// TCP counters that cannot be attributed to a specific socket.
232    pub counters_without_socket: TcpCountersWithoutSocket<I>,
233    /// TCP counters that can be attributed to a specific socket.
234    pub counters_with_socket: TcpCountersWithSocket<I>,
235}
236
237impl<I: DualStackIpExt, D: WeakDeviceIdentifier, BT: TcpBindingsTypes> TcpState<I, D, BT> {
238    /// Creates a new TCP stack state.
239    pub fn new(now: BT::Instant, rng: &mut impl Rng) -> Self {
240        Self {
241            isn_generator: IsnGenerator::new(now, rng),
242            timestamp_offset_generator: TimestampOffsetGenerator::new(now, rng),
243            sockets: Sockets::new(),
244            counters_without_socket: Default::default(),
245            counters_with_socket: Default::default(),
246        }
247    }
248}
249
250/// Named tuple for holding sizes of buffers for a socket.
251#[derive(Copy, Clone, Debug)]
252#[cfg_attr(test, derive(Eq, PartialEq))]
253pub struct BufferSizes {
254    /// The size of the send buffer.
255    pub send: usize,
256    /// The size of the receive buffer.
257    pub receive: usize,
258}
259/// Sensible defaults only for testing.
260#[cfg(any(test, feature = "testutils"))]
261impl Default for BufferSizes {
262    fn default() -> Self {
263        BufferSizes { send: WindowSize::DEFAULT.into(), receive: WindowSize::DEFAULT.into() }
264    }
265}
266
267impl BufferSizes {
268    pub(crate) fn rcv_limits(&self) -> BufferLimits {
269        let Self { send: _, receive } = self;
270        BufferLimits { capacity: *receive, len: 0 }
271    }
272
273    pub(crate) fn rwnd(&self) -> WindowSize {
274        let Self { send: _, receive } = *self;
275        WindowSize::new(receive).unwrap_or(WindowSize::MAX)
276    }
277
278    pub(crate) fn rwnd_unscaled(&self) -> UnscaledWindowSize {
279        let Self { send: _, receive } = *self;
280        UnscaledWindowSize::from(u16::try_from(receive).unwrap_or(u16::MAX))
281    }
282}
283
284/// A mutable reference to buffer configuration.
285pub(crate) enum BuffersRefMut<'a, R, S> {
286    /// All buffers are dropped.
287    NoBuffers,
288    /// Buffer sizes are configured but not instantiated yet.
289    Sizes(&'a mut BufferSizes),
290    /// Buffers are instantiated and mutable references are provided.
291    Both { send: &'a mut S, recv: &'a mut R },
292    /// Only the send buffer is still instantiated, which happens in Closing
293    /// states.
294    SendOnly(&'a mut S),
295    /// Only the receive buffer is still instantiated, which happens in Finwait
296    /// states.
297    RecvOnly(&'a mut R),
298}
299
300impl<'a, R, S> BuffersRefMut<'a, R, S> {
301    pub(crate) fn into_send_buffer(self) -> Option<&'a mut S> {
302        match self {
303            Self::NoBuffers | Self::Sizes(_) | Self::RecvOnly(_) => None,
304            Self::Both { send, recv: _ } | Self::SendOnly(send) => Some(send),
305        }
306    }
307
308    pub(crate) fn into_receive_buffer(self) -> Option<&'a mut R> {
309        match self {
310            Self::NoBuffers | Self::Sizes(_) | Self::SendOnly(_) => None,
311            Self::Both { send: _, recv } | Self::RecvOnly(recv) => Some(recv),
312        }
313    }
314}
315
316/// The IP sock options used by TCP.
317#[derive(Clone, Copy, Default, Debug, PartialEq, Eq)]
318pub struct TcpIpSockOptions {
319    /// Socket marks used for routing.
320    pub marks: Marks,
321}
322
323impl<I: Ip> RouteResolutionOptions<I> for TcpIpSockOptions {
324    fn marks(&self) -> &Marks {
325        &self.marks
326    }
327
328    fn transparent(&self) -> bool {
329        false
330    }
331}
332
333impl<I: IpExt> SendOptions<I> for TcpIpSockOptions {
334    fn hop_limit(&self, _destination: &SpecifiedAddr<I::Addr>) -> Option<NonZeroU8> {
335        None
336    }
337
338    fn multicast_loop(&self) -> bool {
339        false
340    }
341
342    fn allow_broadcast(&self) -> Option<I::BroadcastMarker> {
343        None
344    }
345
346    fn dscp_and_ecn(&self) -> DscpAndEcn {
347        DscpAndEcn::default()
348    }
349
350    fn mtu(&self) -> Mtu {
351        Mtu::no_limit()
352    }
353}
354
355/// TCP socket options.
356///
357/// This only stores options that are trivial to get and set.
358#[derive(Clone, Copy, Debug, PartialEq, Eq)]
359pub struct SocketOptions {
360    /// Socket options that control TCP keep-alive mechanism, see [`KeepAlive`].
361    pub keep_alive: KeepAlive,
362    /// Switch to turn nagle algorithm on/off.
363    pub nagle_enabled: bool,
364    /// The period of time after which the connection should be aborted if no
365    /// ACK is received.
366    pub user_timeout: Option<NonZeroDuration>,
367    /// Switch to turn delayed ACK on/off.
368    pub delayed_ack: bool,
369    /// The period of time after with a dangling FIN_WAIT2 state should be
370    /// reclaimed.
371    pub fin_wait2_timeout: Option<Duration>,
372    /// The maximum SYN retransmissions before aborting a connection.
373    pub max_syn_retries: NonZeroU8,
374    /// Ip socket options.
375    pub ip_options: TcpIpSockOptions,
376}
377
378impl Default for SocketOptions {
379    fn default() -> Self {
380        Self {
381            keep_alive: KeepAlive::default(),
382            // RFC 9293 Section 3.7.4:
383            //   A TCP implementation SHOULD implement the Nagle algorithm to
384            //   coalesce short segments
385            nagle_enabled: true,
386            user_timeout: None,
387            delayed_ack: true,
388            fin_wait2_timeout: Some(DEFAULT_FIN_WAIT2_TIMEOUT),
389            max_syn_retries: DEFAULT_MAX_SYN_RETRIES,
390            ip_options: TcpIpSockOptions::default(),
391        }
392    }
393}
394
395/// Options that are related to TCP keep-alive.
396#[derive(Clone, Copy, Debug, PartialEq, Eq)]
397pub struct KeepAlive {
398    /// The amount of time for an idle connection to wait before sending out
399    /// probes.
400    pub idle: NonZeroDuration,
401    /// Interval between consecutive probes.
402    pub interval: NonZeroDuration,
403    /// Maximum number of probes we send before considering the connection dead.
404    ///
405    /// `u8` is enough because if a connection doesn't hear back from the peer
406    /// after 256 probes, then chances are that the connection is already dead.
407    pub count: NonZeroU8,
408    /// Only send probes if keep-alive is enabled.
409    pub enabled: bool,
410}
411
412impl Default for KeepAlive {
413    fn default() -> Self {
414        // Default values inspired by Linux's TCP implementation:
415        // https://github.com/torvalds/linux/blob/0326074ff4652329f2a1a9c8685104576bd8d131/include/net/tcp.h#L155-L157
416        const DEFAULT_IDLE_DURATION: NonZeroDuration =
417            NonZeroDuration::from_secs(2 * 60 * 60).unwrap();
418        const DEFAULT_INTERVAL: NonZeroDuration = NonZeroDuration::from_secs(75).unwrap();
419        const DEFAULT_COUNT: NonZeroU8 = NonZeroU8::new(9).unwrap();
420
421        Self {
422            idle: DEFAULT_IDLE_DURATION,
423            interval: DEFAULT_INTERVAL,
424            count: DEFAULT_COUNT,
425            // Per RFC 9293(https://datatracker.ietf.org/doc/html/rfc9293#section-3.8.4):
426            //   ... they MUST default to off.
427            enabled: false,
428        }
429    }
430}