netstack3_tcp/
sack_scoreboard.rs

1// Copyright 2025 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Implements the selective acknowledgement scoreboard data structure as
6//! described in [RFC 6675].
7//!
8//! [RFC 6675]: https://datatracker.ietf.org/doc/html/rfc6675
9
10use netstack3_base::{Mss, SackBlocks, SeqNum};
11
12use crate::internal::congestion::DUP_ACK_THRESHOLD;
13use crate::internal::seq_ranges::{SeqRange, SeqRanges};
14
15#[derive(Debug, Default)]
16#[cfg_attr(test, derive(PartialEq, Eq))]
17pub(crate) struct SackScoreboard {
18    /// The ranges for which we've received selective acknowledgements.
19    ///
20    /// Each range is tagged with a boolean that caches the result of [IsLost] for
21    /// the un-sacked range before it.
22    ///
23    /// [IsLost]: https://datatracker.ietf.org/doc/html/rfc6675#section-4
24    acked_ranges: SeqRanges<bool>,
25    /// Stores the number of bytes assumed to be in transit according to the definition of Pipe
26    /// defined in [RFC 6675 section 4].
27    ///
28    /// [RFC 6675 section 4]: https://datatracker.ietf.org/doc/html/rfc6675#section-4
29    pipe: u32,
30}
31
32impl SackScoreboard {
33    /// Processes an incoming ACK and updates the scoreboard.
34    ///
35    /// - `ack` is the cumulative acknowledgement in the received segment.
36    /// - `snd_nxt` is the value of SND.NXT or the highest sequence number that
37    ///   has been sent out.
38    /// - `high_rxt` is the equivalent to the HighRxt value defined in [RFC 6675
39    ///   section 2], but we define it as the next available sequence number for
40    ///   retransmission (off by one from the RFC definition). It is expected to
41    ///   only be available if loss recovery is initiated.
42    /// - `sack_blocks` are the selective ack blocks informed by the peer in the
43    ///   segment.
44    /// - `smss` is the send size mss.
45    ///
46    /// Any blocks referencing data before `ack` or after `snd_nxt` are
47    /// *ignored* as bad data. We chose to ignore any blocks after `snd_nxt`
48    /// here so the SACK recovery algorithm works as described in [RFC 6675].
49    /// Note that [RFC 6675 section 5.1] calls out that the algorithm described
50    /// in the RFC is not suited to deal with a retransmit timeout, so to avoid
51    /// an improper pipe calculation we ignore any blocks past SND.NXT.
52    ///
53    /// Returns `true` if this segment should be considered a duplicate as per
54    /// the definition in [RFC 6675 section 2].
55    ///
56    /// [RFC 6675]: https://datatracker.ietf.org/doc/html/rfc6675
57    /// [RFC 6675 section 5.1]:
58    ///     https://datatracker.ietf.org/doc/html/rfc6675#section-5.1
59    /// [RFC 6675 Section 2]:
60    ///     https://datatracker.ietf.org/doc/html/rfc6675#section-2
61    pub(crate) fn process_ack(
62        &mut self,
63        ack: SeqNum,
64        snd_nxt: SeqNum,
65        high_rxt: Option<SeqNum>,
66        sack_blocks: &SackBlocks,
67        smss: Mss,
68    ) -> bool {
69        let Self { acked_ranges, pipe } = self;
70
71        // If we receive an ACK that is after SND.NXT, this must be a very
72        // delayed acknowledgement post a retransmission event. The state
73        // machine will eventually move SND.NXT to account for this, but this
74        // violates the scoreboard's expectations.
75        //
76        // Because we need to ensure the pipe is updated accordingly and any
77        // previous SACK ranges are cleared, process this as if it was a full
78        // cumulative ack.
79        let snd_nxt = snd_nxt.latest(ack);
80
81        // Fast exit if there's nothing interesting to do.
82        if acked_ranges.is_empty() && sack_blocks.is_empty() {
83            // Ack must not be after snd_nxt.
84            *pipe = u32::try_from(snd_nxt - ack).unwrap();
85            return false;
86        }
87
88        // Update the scoreboard with the cumulative acknowledgement.
89        //
90        // A note here: we discard all the sacked ranges that start at or after
91        // the acknowledged number. If there is intersection we must assume that
92        // the peer reneged the block.
93        acked_ranges.discard_starting_at_or_before(ack);
94
95        // Insert each valid block in acked ranges.
96        let new = sack_blocks.iter_skip_invalid().fold(false, |new, sack_block| {
97            // NB: SackBlock type here ensures this is a valid non-empty range.
98            let (start, end) = sack_block.into_parts();
99            if start.before_or_eq(ack) || end.after(snd_nxt) {
100                // Ignore block that is not in the expected range [ack, snd_nxt].
101                return new;
102            }
103            // Insert with an arbitrary metadata value, we'll update it with the
104            // IsLost value afterwards.
105            let is_lost = false;
106            let changed = acked_ranges.insert(start..end, is_lost);
107
108            new || changed
109        });
110
111        let sacked_byte_threshold = sacked_bytes_threshold(smss);
112        let high_rxt = high_rxt.unwrap_or(ack);
113        let get_pipe_increment = |hole: SeqRange<bool>| {
114            // From RFC 6675, where S1 is a single
115            // sequence number:
116            //
117            // (a) If IsLost (S1) returns false: Pipe is incremented by 1
118            // octet.
119            // (b) If S1 <= HighRxt: Pipe is incremented by 1 octet.
120            let mut pipe = 0u32;
121            let is_lost = *(hole.meta());
122            if !is_lost {
123                pipe = pipe.saturating_add(hole.len());
124            }
125
126            if let Some(hole) = hole.cap_right(high_rxt) {
127                pipe = pipe.saturating_add(hole.len());
128            }
129            pipe
130        };
131
132        // Recalculate pipe and update IsLost in the collection.
133        //
134        // We iterate acked_ranges in reverse order so we can fold over the
135        // total number of ranges and SACKed bytes that come *after* the range we
136        // operate on at each point.
137        let (new_pipe, _sacked, _count, later_start, later_is_lost) =
138            acked_ranges.iter_mut().rev().fold(
139                (0u32, 0, 0, snd_nxt, false),
140                |(pipe, sacked, count, later_start, later_is_lost), acked_range| {
141                    // IsLost is kept for the hole to the left of this block. So the
142                    // block we're currently iterating on counts as part of is_lost,
143                    // as well as the the total number of sacked bytes.
144                    let count = count + 1;
145                    let sacked = sacked + acked_range.len();
146                    // From RFC 6675, IsLost is defined as:
147                    //
148                    //  The routine returns true when either DupThresh discontiguous
149                    //  SACKed sequences have arrived above 'SeqNum' or more than
150                    //  (DupThresh - 1) * SMSS bytes with sequence numbers greater
151                    //  than 'SeqNum' have been SACKed
152                    let is_lost = count >= DUP_ACK_THRESHOLD || sacked > sacked_byte_threshold;
153                    acked_range.set_meta(is_lost);
154
155                    // Increment pipe. From RFC 6675:
156                    //
157                    //  After initializing pipe to zero, the following steps are
158                    //  taken for each octet 'S1' in the sequence space between
159                    //  HighACK and HighData that has not been SACKed[...]
160                    //
161                    // So pipe is only calculated for the gaps between the acked
162                    // ranges, i.e., from the current end to the start of the
163                    // later block.
164                    let pipe = if let Some(hole) =
165                        SeqRange::new(acked_range.end()..later_start, later_is_lost)
166                    {
167                        pipe.saturating_add(get_pipe_increment(hole))
168                    } else {
169                        // An empty hole can only happen in the first iteration,
170                        // when the right edge is SND.NXT.
171                        assert_eq!(later_start, snd_nxt);
172                        pipe
173                    };
174
175                    (pipe, sacked, count, acked_range.start(), is_lost)
176                },
177            );
178        // Add the final hole between cumulative ack and first sack block
179        // and finalize the pipe value.
180        *pipe = match SeqRange::new(ack..later_start, later_is_lost) {
181            Some(first_hole) => new_pipe.saturating_add(get_pipe_increment(first_hole)),
182            None => {
183                // An empty first hole can only happen if we don't have any
184                // sack blocks, and ACK is equal to SND.NXT.
185                assert_eq!(ack, snd_nxt);
186                new_pipe
187            }
188        };
189
190        new
191    }
192
193    pub(crate) fn pipe(&self) -> u32 {
194        self.pipe
195    }
196
197    /// Increments the pipe value kept by the scoreboard by `value`.
198    ///
199    /// Note that [`SackScoreboard::process_ack`] always updates the pipe value
200    /// based on the scoreboard. Whenever a segment is sent, we must increment
201    /// the pipe value so the estimate of total bytes in transit is always up to
202    /// date until the next ACK arrives.
203    pub(crate) fn increment_pipe(&mut self, value: u32) {
204        self.pipe = self.pipe.saturating_add(value);
205    }
206
207    pub(crate) fn on_retransmission_timeout(&mut self) {
208        let Self { acked_ranges, pipe } = self;
209        // RFC 2018 says that we MUST clear all SACK information on a
210        // retransmission timeout.
211        //
212        // RFC 6675 changes that to a SHOULD keep SACK information on a
213        // retransmission timeout, but doesn't quite specify how to deal with
214        // the SACKed ranges post the timeout. Notably, the pipe estimate is
215        // very clearly off post an RTO.
216        //
217        // Given that, the conservative thing to do here is to clear the
218        // scoreboard and reset the pipe so estimates can be based again on the
219        // rewound value of SND.NXT and the eventually retransmitted SACK blocks
220        // that we may get post the RTO event. Note that `process_ack` ignores
221        // any SACK blocks post SND.NXT in order to maintain the pipe variable
222        // sensible as well.
223        //
224        // See:
225        // - https://datatracker.ietf.org/doc/html/rfc2018
226        // - https://datatracker.ietf.org/doc/html/rfc6675
227        *pipe = 0;
228        acked_ranges.clear();
229    }
230
231    pub(crate) fn on_mss_update(
232        &mut self,
233        snd_una: SeqNum,
234        snd_nxt: SeqNum,
235        high_rxt: Option<SeqNum>,
236        mss: Mss,
237    ) {
238        // When MSS updates, we must recalculate so we know what frames are
239        // considered lost or not.
240        //
241        // Notably, this will also update the pipe variable so we have a new
242        // estimate of bytes in flight with a new value for snd_nxt.
243        //
244        // Given we don't detect renegging, this is equivalent to processing an
245        // ACK at the given parameters and without any SACK blocks.
246        let _: bool = self.process_ack(snd_una, snd_nxt, high_rxt, &SackBlocks::EMPTY, mss);
247    }
248}
249
250/// Returns the threshold over which a sequence number is considered lost per
251/// the definition of `IsLost` in [RFC 6675 section 4].
252///
253/// [RFC 6675 section 4]:
254///     https://datatracker.ietf.org/doc/html/rfc6675#section-4
255fn sacked_bytes_threshold(mss: Mss) -> u32 {
256    u32::from(DUP_ACK_THRESHOLD - 1) * u32::from(mss)
257}
258
259#[cfg(test)]
260mod test {
261    use core::num::NonZeroU16;
262    use core::ops::Range;
263
264    use super::*;
265    use crate::internal::seq_ranges::SeqRange;
266    use crate::internal::testutil;
267
268    const TEST_MSS: Mss = Mss(NonZeroU16::new(50).unwrap());
269
270    fn seq_ranges(iter: impl IntoIterator<Item = (Range<u32>, bool)>) -> SeqRanges<bool> {
271        iter.into_iter()
272            .map(|(Range { start, end }, is_lost)| {
273                SeqRange::new(SeqNum::new(start)..SeqNum::new(end), is_lost).unwrap()
274            })
275            .collect()
276    }
277
278    impl SackScoreboard {
279        fn sacked_bytes(&self) -> u32 {
280            self.acked_ranges.iter().map(|seq_range| seq_range.len()).sum()
281        }
282    }
283
284    #[test]
285    fn process_ack_noop_if_empty() {
286        let mut sb = SackScoreboard::default();
287        let ack = SeqNum::new(1);
288        let snd_nxt = SeqNum::new(100);
289        let high_rxt = None;
290        assert!(!sb.process_ack(ack, snd_nxt, high_rxt, &SackBlocks::default(), TEST_MSS));
291        assert!(sb.acked_ranges.is_empty());
292        assert_eq!(sb.pipe, u32::try_from(snd_nxt - ack).unwrap());
293    }
294
295    #[test]
296    fn process_ack_ignores_bad_blocks() {
297        let mut sb = SackScoreboard::default();
298        let ack = SeqNum::new(5);
299        let snd_nxt = SeqNum::new(100);
300        let high_rxt = None;
301        // Ignores everything that doesn't match the cumulative ack.
302        assert!(!sb.process_ack(
303            ack,
304            snd_nxt,
305            high_rxt,
306            &testutil::sack_blocks([0..1, 4..6, 5..10]),
307            TEST_MSS
308        ));
309        assert!(sb.acked_ranges.is_empty());
310
311        // Ignores everything past snd_nxt.
312        assert!(!sb.process_ack(
313            ack,
314            snd_nxt,
315            high_rxt,
316            &testutil::sack_blocks([100..200, 50..150]),
317            TEST_MSS
318        ));
319        assert!(sb.acked_ranges.is_empty());
320    }
321
322    #[test]
323    fn process_ack_cumulative_ack() {
324        let mut sb = SackScoreboard::default();
325        let ack = SeqNum::new(5);
326        let snd_nxt = SeqNum::new(100);
327        let high_rxt = None;
328        let blocks = testutil::sack_blocks([20..30]);
329        assert!(sb.process_ack(ack, snd_nxt, high_rxt, &blocks, TEST_MSS));
330        let expect_ranges = seq_ranges([(20..30, false)]);
331        assert_eq!(sb.acked_ranges, expect_ranges);
332        assert_eq!(sb.pipe, u32::try_from(snd_nxt - ack).unwrap() - sb.sacked_bytes());
333
334        let ack = SeqNum::new(10);
335        assert!(!sb.process_ack(ack, snd_nxt, high_rxt, &blocks, TEST_MSS));
336        assert_eq!(sb.acked_ranges, expect_ranges);
337        assert_eq!(sb.pipe, u32::try_from(snd_nxt - ack).unwrap() - sb.sacked_bytes());
338    }
339
340    #[test]
341    fn process_ack_is_lost_dup_thresh() {
342        let mut sb = SackScoreboard::default();
343        let ack = SeqNum::new(5);
344        let snd_nxt = SeqNum::new(100);
345        let high_rxt = None;
346
347        let block1 = 20..30;
348        let block2 = 35..40;
349        let block3 = 45..50;
350
351        assert!(sb.process_ack(
352            ack,
353            snd_nxt,
354            high_rxt,
355            &testutil::sack_blocks([block1.clone(), block2.clone(), block3.clone()]),
356            TEST_MSS
357        ));
358        assert_eq!(
359            sb.acked_ranges,
360            seq_ranges([(block1.clone(), true), (block2, false), (block3, false)])
361        );
362        assert_eq!(
363            sb.pipe,
364            u32::try_from(snd_nxt - ack).unwrap()
365                - sb.sacked_bytes()
366                - (block1.start - u32::from(ack))
367        );
368    }
369
370    #[test]
371    fn process_ack_pipe_rule_a() {
372        let mut sb = SackScoreboard::default();
373        let ack = SeqNum::new(5);
374        let snd_nxt = SeqNum::new(500);
375        let high_rxt = None;
376        let small_block = 20..30;
377        let large_block_start = 35;
378        let large_block = large_block_start..(large_block_start + sacked_bytes_threshold(TEST_MSS));
379
380        assert!(sb.process_ack(
381            ack,
382            snd_nxt,
383            high_rxt,
384            &testutil::sack_blocks([small_block.clone(), large_block.clone()]),
385            TEST_MSS
386        ));
387        // Large block is exactly at the limit of the hole to its left being
388        // considered lost as well.
389        assert_eq!(
390            sb.acked_ranges,
391            seq_ranges([(small_block.clone(), true), (large_block.clone(), false)])
392        );
393        assert_eq!(
394            sb.pipe,
395            u32::try_from(snd_nxt - ack).unwrap()
396                - sb.sacked_bytes()
397                - (small_block.start - u32::from(ack))
398        );
399
400        // Now increase the large block by one.
401        let large_block = large_block.start..(large_block.end + 1);
402        assert!(sb.process_ack(
403            ack,
404            snd_nxt,
405            high_rxt,
406            &testutil::sack_blocks([small_block.clone(), large_block.clone()]),
407            TEST_MSS
408        ));
409        // Now the hole to the left of large block is also considered lost.
410        assert_eq!(
411            sb.acked_ranges,
412            seq_ranges([(small_block.clone(), true), (large_block.clone(), true)])
413        );
414        assert_eq!(
415            sb.pipe,
416            u32::try_from(snd_nxt - ack).unwrap()
417                - sb.sacked_bytes()
418                - (small_block.start - u32::from(ack))
419                - (large_block.start - small_block.end)
420        );
421    }
422
423    #[test]
424    fn process_ack_pipe_rule_b() {
425        let ack = SeqNum::new(5);
426        let snd_nxt = SeqNum::new(500);
427        let first_block = 20..30;
428        let second_block = 40..50;
429
430        let blocks = testutil::sack_blocks([first_block.clone(), second_block.clone()]);
431
432        // Extract the baseline pipe that if we receive an ACK with the
433        // parameters above but without a HighRxt value.
434        let baseline = {
435            let mut sb = SackScoreboard::default();
436            assert!(sb.process_ack(ack, snd_nxt, None, &blocks, TEST_MSS));
437            sb.pipe
438        };
439
440        // Drive HighRxt across the entire possible sequence number range that
441        // we expect to see it and check the pipe value is changing accordingly.
442        let hole1 = (u32::from(ack)..first_block.start).map(|seq| (seq, true));
443        let block1 = first_block.clone().map(|seq| (seq, false));
444        let hole2 = (first_block.end..second_block.start).map(|seq| (seq, true));
445        let block2 = second_block.map(|seq| (seq, false));
446        // Shift expecting an increment one over because HighRxt starting at
447        // HighAck is expected to be a zero contribution. This aligns the
448        // off-by-one in the expectations.
449        let iter =
450            hole1.chain(block1).chain(hole2).chain(block2).scan(false, |prev, (seq, sacked)| {
451                let expect_increment = core::mem::replace(prev, sacked);
452                Some((seq, expect_increment))
453            });
454
455        let _: u32 = iter.fold(0u32, |total, (seq, expect_increment)| {
456            let total = total + u32::from(expect_increment);
457            let mut sb = SackScoreboard::default();
458            assert!(sb.process_ack(ack, snd_nxt, Some(SeqNum::new(seq)), &blocks, TEST_MSS));
459            assert_eq!(sb.pipe - baseline, total, "at {seq}");
460            total
461        });
462    }
463
464    #[test]
465    fn process_ack_simple() {
466        let mut sb = SackScoreboard::default();
467        let ack = SeqNum::new(5);
468        let snd_nxt = SeqNum::new(500);
469        let high_rxt = None;
470
471        // Receive a single cumulative ack up to ack.
472        assert!(!sb.process_ack(ack, snd_nxt, high_rxt, &SackBlocks::default(), TEST_MSS));
473        assert_eq!(sb.acked_ranges, SeqRanges::default(),);
474        assert_eq!(sb.pipe, u32::try_from(snd_nxt - ack).unwrap());
475
476        // Cumulative ack doesn't move, 1 SACK range signaling loss is received.
477        let sack1 = 10..(10 + sacked_bytes_threshold(TEST_MSS) + 1);
478        assert!(sb.process_ack(
479            ack,
480            snd_nxt,
481            high_rxt,
482            &testutil::sack_blocks([sack1.clone()]),
483            TEST_MSS
484        ));
485        assert_eq!(sb.acked_ranges, seq_ranges([(sack1.clone(), true)]),);
486        assert_eq!(
487            sb.pipe,
488            u32::try_from(snd_nxt - ack).unwrap()
489                - sb.sacked_bytes()
490                - (sack1.start - u32::from(ack))
491        );
492
493        // Another SACK range comes in, at the end of this transmission block.
494        let sack2 = (u32::from(snd_nxt) - u32::from(TEST_MSS))..u32::from(snd_nxt);
495        assert!(sb.process_ack(
496            ack,
497            snd_nxt,
498            high_rxt,
499            &testutil::sack_blocks([sack1.clone(), sack2.clone()]),
500            TEST_MSS
501        ));
502        assert_eq!(sb.acked_ranges, seq_ranges([(sack1.clone(), true), (sack2.clone(), false)]));
503        assert_eq!(
504            sb.pipe,
505            u32::try_from(snd_nxt - ack).unwrap()
506                - sb.sacked_bytes()
507                - (sack1.start - u32::from(ack))
508        );
509
510        // Cumulative acknowledge the first SACK range.
511        let ack = SeqNum::new(sack1.end);
512        assert!(!sb.process_ack(
513            ack,
514            snd_nxt,
515            high_rxt,
516            &testutil::sack_blocks([sack2.clone()]),
517            TEST_MSS
518        ));
519        assert_eq!(sb.acked_ranges, seq_ranges([(sack2, false)]));
520        assert_eq!(sb.pipe, u32::try_from(snd_nxt - ack).unwrap() - sb.sacked_bytes());
521
522        // Cumulative acknowledge all the transmission.
523        assert!(!sb.process_ack(snd_nxt, snd_nxt, high_rxt, &SackBlocks::default(), TEST_MSS));
524        assert_eq!(sb.acked_ranges, SeqRanges::default());
525        assert_eq!(sb.pipe, 0);
526    }
527
528    #[test]
529    fn ack_after_snd_nxt() {
530        let mut sb = SackScoreboard::default();
531        let ack = SeqNum::new(5);
532        let snd_nxt = SeqNum::new(500);
533        let high_rxt = None;
534        let block = 10..20;
535        assert!(sb.process_ack(
536            ack,
537            snd_nxt,
538            high_rxt,
539            &testutil::sack_blocks([block.clone()]),
540            TEST_MSS
541        ));
542        assert_eq!(sb.acked_ranges, seq_ranges([(block.clone(), false)]));
543        assert_eq!(sb.pipe, u32::try_from(snd_nxt - ack).unwrap() - sb.sacked_bytes());
544
545        // SND.NXT rewinds after RTO.
546        let snd_nxt = ack;
547        // But we receive an ACK post the kept block.
548        let ack = SeqNum::new(block.end);
549        assert!(ack.after(snd_nxt));
550        assert!(!sb.process_ack(ack, snd_nxt, high_rxt, &SackBlocks::default(), TEST_MSS));
551        assert_eq!(sb.acked_ranges, SeqRanges::default());
552        assert_eq!(sb.pipe, 0);
553    }
554}