netstack3_tcp/sack_scoreboard.rs
1// Copyright 2025 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Implements the selective acknowledgement scoreboard data structure as
6//! described in [RFC 6675].
7//!
8//! [RFC 6675]: https://datatracker.ietf.org/doc/html/rfc6675
9
10use netstack3_base::{Mss, SackBlocks, SeqNum};
11
12use crate::internal::congestion::DUP_ACK_THRESHOLD;
13use crate::internal::seq_ranges::{SeqRange, SeqRanges};
14
15#[derive(Debug, Default)]
16#[cfg_attr(test, derive(PartialEq, Eq))]
17pub(crate) struct SackScoreboard {
18 /// The ranges for which we've received selective acknowledgements.
19 ///
20 /// Each range is tagged with a boolean that caches the result of [IsLost] for
21 /// the un-sacked range before it.
22 ///
23 /// [IsLost]: https://datatracker.ietf.org/doc/html/rfc6675#section-4
24 acked_ranges: SeqRanges<bool>,
25 /// Stores the number of bytes assumed to be in transit according to the definition of Pipe
26 /// defined in [RFC 6675 section 4].
27 ///
28 /// [RFC 6675 section 4]: https://datatracker.ietf.org/doc/html/rfc6675#section-4
29 pipe: u32,
30}
31
32impl SackScoreboard {
33 /// Processes an incoming ACK and updates the scoreboard.
34 ///
35 /// - `ack` is the cumulative acknowledgement in the received segment.
36 /// - `snd_nxt` is the value of SND.NXT or the highest sequence number that
37 /// has been sent out.
38 /// - `high_rxt` is the equivalent to the HighRxt value defined in [RFC 6675
39 /// section 2], but we define it as the next available sequence number for
40 /// retransmission (off by one from the RFC definition). It is expected to
41 /// only be available if loss recovery is initiated.
42 /// - `sack_blocks` are the selective ack blocks informed by the peer in the
43 /// segment.
44 /// - `smss` is the send size mss.
45 ///
46 /// Any blocks referencing data before `ack` or after `snd_nxt` are
47 /// *ignored* as bad data. We chose to ignore any blocks after `snd_nxt`
48 /// here so the SACK recovery algorithm works as described in [RFC 6675].
49 /// Note that [RFC 6675 section 5.1] calls out that the algorithm described
50 /// in the RFC is not suited to deal with a retransmit timeout, so to avoid
51 /// an improper pipe calculation we ignore any blocks past SND.NXT.
52 ///
53 /// Returns `true` if this segment should be considered a duplicate as per
54 /// the definition in [RFC 6675 section 2].
55 ///
56 /// [RFC 6675]: https://datatracker.ietf.org/doc/html/rfc6675
57 /// [RFC 6675 section 5.1]:
58 /// https://datatracker.ietf.org/doc/html/rfc6675#section-5.1
59 /// [RFC 6675 Section 2]:
60 /// https://datatracker.ietf.org/doc/html/rfc6675#section-2
61 pub(crate) fn process_ack(
62 &mut self,
63 ack: SeqNum,
64 snd_nxt: SeqNum,
65 high_rxt: Option<SeqNum>,
66 sack_blocks: &SackBlocks,
67 smss: Mss,
68 ) -> bool {
69 let Self { acked_ranges, pipe } = self;
70
71 // If we receive an ACK that is after SND.NXT, this must be a very
72 // delayed acknowledgement post a retransmission event. The state
73 // machine will eventually move SND.NXT to account for this, but this
74 // violates the scoreboard's expectations.
75 //
76 // Because we need to ensure the pipe is updated accordingly and any
77 // previous SACK ranges are cleared, process this as if it was a full
78 // cumulative ack.
79 let snd_nxt = snd_nxt.latest(ack);
80
81 // Fast exit if there's nothing interesting to do.
82 if acked_ranges.is_empty() && sack_blocks.is_empty() {
83 // Ack must not be after snd_nxt.
84 *pipe = u32::try_from(snd_nxt - ack).unwrap();
85 return false;
86 }
87
88 // Update the scoreboard with the cumulative acknowledgement.
89 //
90 // A note here: we discard all the sacked ranges that start at or after
91 // the acknowledged number. If there is intersection we must assume that
92 // the peer reneged the block.
93 acked_ranges.discard_starting_at_or_before(ack);
94
95 // Insert each valid block in acked ranges.
96 let new = sack_blocks.iter_skip_invalid().fold(false, |new, sack_block| {
97 // NB: SackBlock type here ensures this is a valid non-empty range.
98 let (start, end) = sack_block.into_parts();
99 if start.before_or_eq(ack) || end.after(snd_nxt) {
100 // Ignore block that is not in the expected range [ack, snd_nxt].
101 return new;
102 }
103 // Insert with an arbitrary metadata value, we'll update it with the
104 // IsLost value afterwards.
105 let is_lost = false;
106 let changed = acked_ranges.insert(start..end, is_lost);
107
108 new || changed
109 });
110
111 let sacked_byte_threshold = sacked_bytes_threshold(smss);
112 let high_rxt = high_rxt.unwrap_or(ack);
113 let get_pipe_increment = |hole: SeqRange<bool>| {
114 // From RFC 6675, where S1 is a single
115 // sequence number:
116 //
117 // (a) If IsLost (S1) returns false: Pipe is incremented by 1
118 // octet.
119 // (b) If S1 <= HighRxt: Pipe is incremented by 1 octet.
120 let mut pipe = 0u32;
121 let is_lost = *(hole.meta());
122 if !is_lost {
123 pipe = pipe.saturating_add(hole.len());
124 }
125
126 if let Some(hole) = hole.cap_right(high_rxt) {
127 pipe = pipe.saturating_add(hole.len());
128 }
129 pipe
130 };
131
132 // Recalculate pipe and update IsLost in the collection.
133 //
134 // We iterate acked_ranges in reverse order so we can fold over the
135 // total number of ranges and SACKed bytes that come *after* the range we
136 // operate on at each point.
137 let (new_pipe, _sacked, _count, later_start, later_is_lost) =
138 acked_ranges.iter_mut().rev().fold(
139 (0u32, 0, 0, snd_nxt, false),
140 |(pipe, sacked, count, later_start, later_is_lost), acked_range| {
141 // IsLost is kept for the hole to the left of this block. So the
142 // block we're currently iterating on counts as part of is_lost,
143 // as well as the the total number of sacked bytes.
144 let count = count + 1;
145 let sacked = sacked + acked_range.len();
146 // From RFC 6675, IsLost is defined as:
147 //
148 // The routine returns true when either DupThresh discontiguous
149 // SACKed sequences have arrived above 'SeqNum' or more than
150 // (DupThresh - 1) * SMSS bytes with sequence numbers greater
151 // than 'SeqNum' have been SACKed
152 let is_lost = count >= DUP_ACK_THRESHOLD || sacked > sacked_byte_threshold;
153 acked_range.set_meta(is_lost);
154
155 // Increment pipe. From RFC 6675:
156 //
157 // After initializing pipe to zero, the following steps are
158 // taken for each octet 'S1' in the sequence space between
159 // HighACK and HighData that has not been SACKed[...]
160 //
161 // So pipe is only calculated for the gaps between the acked
162 // ranges, i.e., from the current end to the start of the
163 // later block.
164 let pipe = if let Some(hole) =
165 SeqRange::new(acked_range.end()..later_start, later_is_lost)
166 {
167 pipe.saturating_add(get_pipe_increment(hole))
168 } else {
169 // An empty hole can only happen in the first iteration,
170 // when the right edge is SND.NXT.
171 assert_eq!(later_start, snd_nxt);
172 pipe
173 };
174
175 (pipe, sacked, count, acked_range.start(), is_lost)
176 },
177 );
178 // Add the final hole between cumulative ack and first sack block
179 // and finalize the pipe value.
180 *pipe = match SeqRange::new(ack..later_start, later_is_lost) {
181 Some(first_hole) => new_pipe.saturating_add(get_pipe_increment(first_hole)),
182 None => {
183 // An empty first hole can only happen if we don't have any
184 // sack blocks, and ACK is equal to SND.NXT.
185 assert_eq!(ack, snd_nxt);
186 new_pipe
187 }
188 };
189
190 new
191 }
192
193 pub(crate) fn pipe(&self) -> u32 {
194 self.pipe
195 }
196
197 /// Increments the pipe value kept by the scoreboard by `value`.
198 ///
199 /// Note that [`SackScoreboard::process_ack`] always updates the pipe value
200 /// based on the scoreboard. Whenever a segment is sent, we must increment
201 /// the pipe value so the estimate of total bytes in transit is always up to
202 /// date until the next ACK arrives.
203 pub(crate) fn increment_pipe(&mut self, value: u32) {
204 self.pipe = self.pipe.saturating_add(value);
205 }
206
207 pub(crate) fn on_retransmission_timeout(&mut self) {
208 let Self { acked_ranges, pipe } = self;
209 // RFC 2018 says that we MUST clear all SACK information on a
210 // retransmission timeout.
211 //
212 // RFC 6675 changes that to a SHOULD keep SACK information on a
213 // retransmission timeout, but doesn't quite specify how to deal with
214 // the SACKed ranges post the timeout. Notably, the pipe estimate is
215 // very clearly off post an RTO.
216 //
217 // Given that, the conservative thing to do here is to clear the
218 // scoreboard and reset the pipe so estimates can be based again on the
219 // rewound value of SND.NXT and the eventually retransmitted SACK blocks
220 // that we may get post the RTO event. Note that `process_ack` ignores
221 // any SACK blocks post SND.NXT in order to maintain the pipe variable
222 // sensible as well.
223 //
224 // See:
225 // - https://datatracker.ietf.org/doc/html/rfc2018
226 // - https://datatracker.ietf.org/doc/html/rfc6675
227 *pipe = 0;
228 acked_ranges.clear();
229 }
230
231 pub(crate) fn on_mss_update(
232 &mut self,
233 snd_una: SeqNum,
234 snd_nxt: SeqNum,
235 high_rxt: Option<SeqNum>,
236 mss: Mss,
237 ) {
238 // When MSS updates, we must recalculate so we know what frames are
239 // considered lost or not.
240 //
241 // Notably, this will also update the pipe variable so we have a new
242 // estimate of bytes in flight with a new value for snd_nxt.
243 //
244 // Given we don't detect renegging, this is equivalent to processing an
245 // ACK at the given parameters and without any SACK blocks.
246 let _: bool = self.process_ack(snd_una, snd_nxt, high_rxt, &SackBlocks::EMPTY, mss);
247 }
248}
249
250/// Returns the threshold over which a sequence number is considered lost per
251/// the definition of `IsLost` in [RFC 6675 section 4].
252///
253/// [RFC 6675 section 4]:
254/// https://datatracker.ietf.org/doc/html/rfc6675#section-4
255fn sacked_bytes_threshold(mss: Mss) -> u32 {
256 u32::from(DUP_ACK_THRESHOLD - 1) * u32::from(mss)
257}
258
259#[cfg(test)]
260mod test {
261 use core::num::NonZeroU16;
262 use core::ops::Range;
263
264 use super::*;
265 use crate::internal::seq_ranges::SeqRange;
266 use crate::internal::testutil;
267
268 const TEST_MSS: Mss = Mss(NonZeroU16::new(50).unwrap());
269
270 fn seq_ranges(iter: impl IntoIterator<Item = (Range<u32>, bool)>) -> SeqRanges<bool> {
271 iter.into_iter()
272 .map(|(Range { start, end }, is_lost)| {
273 SeqRange::new(SeqNum::new(start)..SeqNum::new(end), is_lost).unwrap()
274 })
275 .collect()
276 }
277
278 impl SackScoreboard {
279 fn sacked_bytes(&self) -> u32 {
280 self.acked_ranges.iter().map(|seq_range| seq_range.len()).sum()
281 }
282 }
283
284 #[test]
285 fn process_ack_noop_if_empty() {
286 let mut sb = SackScoreboard::default();
287 let ack = SeqNum::new(1);
288 let snd_nxt = SeqNum::new(100);
289 let high_rxt = None;
290 assert!(!sb.process_ack(ack, snd_nxt, high_rxt, &SackBlocks::default(), TEST_MSS));
291 assert!(sb.acked_ranges.is_empty());
292 assert_eq!(sb.pipe, u32::try_from(snd_nxt - ack).unwrap());
293 }
294
295 #[test]
296 fn process_ack_ignores_bad_blocks() {
297 let mut sb = SackScoreboard::default();
298 let ack = SeqNum::new(5);
299 let snd_nxt = SeqNum::new(100);
300 let high_rxt = None;
301 // Ignores everything that doesn't match the cumulative ack.
302 assert!(!sb.process_ack(
303 ack,
304 snd_nxt,
305 high_rxt,
306 &testutil::sack_blocks([0..1, 4..6, 5..10]),
307 TEST_MSS
308 ));
309 assert!(sb.acked_ranges.is_empty());
310
311 // Ignores everything past snd_nxt.
312 assert!(!sb.process_ack(
313 ack,
314 snd_nxt,
315 high_rxt,
316 &testutil::sack_blocks([100..200, 50..150]),
317 TEST_MSS
318 ));
319 assert!(sb.acked_ranges.is_empty());
320 }
321
322 #[test]
323 fn process_ack_cumulative_ack() {
324 let mut sb = SackScoreboard::default();
325 let ack = SeqNum::new(5);
326 let snd_nxt = SeqNum::new(100);
327 let high_rxt = None;
328 let blocks = testutil::sack_blocks([20..30]);
329 assert!(sb.process_ack(ack, snd_nxt, high_rxt, &blocks, TEST_MSS));
330 let expect_ranges = seq_ranges([(20..30, false)]);
331 assert_eq!(sb.acked_ranges, expect_ranges);
332 assert_eq!(sb.pipe, u32::try_from(snd_nxt - ack).unwrap() - sb.sacked_bytes());
333
334 let ack = SeqNum::new(10);
335 assert!(!sb.process_ack(ack, snd_nxt, high_rxt, &blocks, TEST_MSS));
336 assert_eq!(sb.acked_ranges, expect_ranges);
337 assert_eq!(sb.pipe, u32::try_from(snd_nxt - ack).unwrap() - sb.sacked_bytes());
338 }
339
340 #[test]
341 fn process_ack_is_lost_dup_thresh() {
342 let mut sb = SackScoreboard::default();
343 let ack = SeqNum::new(5);
344 let snd_nxt = SeqNum::new(100);
345 let high_rxt = None;
346
347 let block1 = 20..30;
348 let block2 = 35..40;
349 let block3 = 45..50;
350
351 assert!(sb.process_ack(
352 ack,
353 snd_nxt,
354 high_rxt,
355 &testutil::sack_blocks([block1.clone(), block2.clone(), block3.clone()]),
356 TEST_MSS
357 ));
358 assert_eq!(
359 sb.acked_ranges,
360 seq_ranges([(block1.clone(), true), (block2, false), (block3, false)])
361 );
362 assert_eq!(
363 sb.pipe,
364 u32::try_from(snd_nxt - ack).unwrap()
365 - sb.sacked_bytes()
366 - (block1.start - u32::from(ack))
367 );
368 }
369
370 #[test]
371 fn process_ack_pipe_rule_a() {
372 let mut sb = SackScoreboard::default();
373 let ack = SeqNum::new(5);
374 let snd_nxt = SeqNum::new(500);
375 let high_rxt = None;
376 let small_block = 20..30;
377 let large_block_start = 35;
378 let large_block = large_block_start..(large_block_start + sacked_bytes_threshold(TEST_MSS));
379
380 assert!(sb.process_ack(
381 ack,
382 snd_nxt,
383 high_rxt,
384 &testutil::sack_blocks([small_block.clone(), large_block.clone()]),
385 TEST_MSS
386 ));
387 // Large block is exactly at the limit of the hole to its left being
388 // considered lost as well.
389 assert_eq!(
390 sb.acked_ranges,
391 seq_ranges([(small_block.clone(), true), (large_block.clone(), false)])
392 );
393 assert_eq!(
394 sb.pipe,
395 u32::try_from(snd_nxt - ack).unwrap()
396 - sb.sacked_bytes()
397 - (small_block.start - u32::from(ack))
398 );
399
400 // Now increase the large block by one.
401 let large_block = large_block.start..(large_block.end + 1);
402 assert!(sb.process_ack(
403 ack,
404 snd_nxt,
405 high_rxt,
406 &testutil::sack_blocks([small_block.clone(), large_block.clone()]),
407 TEST_MSS
408 ));
409 // Now the hole to the left of large block is also considered lost.
410 assert_eq!(
411 sb.acked_ranges,
412 seq_ranges([(small_block.clone(), true), (large_block.clone(), true)])
413 );
414 assert_eq!(
415 sb.pipe,
416 u32::try_from(snd_nxt - ack).unwrap()
417 - sb.sacked_bytes()
418 - (small_block.start - u32::from(ack))
419 - (large_block.start - small_block.end)
420 );
421 }
422
423 #[test]
424 fn process_ack_pipe_rule_b() {
425 let ack = SeqNum::new(5);
426 let snd_nxt = SeqNum::new(500);
427 let first_block = 20..30;
428 let second_block = 40..50;
429
430 let blocks = testutil::sack_blocks([first_block.clone(), second_block.clone()]);
431
432 // Extract the baseline pipe that if we receive an ACK with the
433 // parameters above but without a HighRxt value.
434 let baseline = {
435 let mut sb = SackScoreboard::default();
436 assert!(sb.process_ack(ack, snd_nxt, None, &blocks, TEST_MSS));
437 sb.pipe
438 };
439
440 // Drive HighRxt across the entire possible sequence number range that
441 // we expect to see it and check the pipe value is changing accordingly.
442 let hole1 = (u32::from(ack)..first_block.start).map(|seq| (seq, true));
443 let block1 = first_block.clone().map(|seq| (seq, false));
444 let hole2 = (first_block.end..second_block.start).map(|seq| (seq, true));
445 let block2 = second_block.map(|seq| (seq, false));
446 // Shift expecting an increment one over because HighRxt starting at
447 // HighAck is expected to be a zero contribution. This aligns the
448 // off-by-one in the expectations.
449 let iter =
450 hole1.chain(block1).chain(hole2).chain(block2).scan(false, |prev, (seq, sacked)| {
451 let expect_increment = core::mem::replace(prev, sacked);
452 Some((seq, expect_increment))
453 });
454
455 let _: u32 = iter.fold(0u32, |total, (seq, expect_increment)| {
456 let total = total + u32::from(expect_increment);
457 let mut sb = SackScoreboard::default();
458 assert!(sb.process_ack(ack, snd_nxt, Some(SeqNum::new(seq)), &blocks, TEST_MSS));
459 assert_eq!(sb.pipe - baseline, total, "at {seq}");
460 total
461 });
462 }
463
464 #[test]
465 fn process_ack_simple() {
466 let mut sb = SackScoreboard::default();
467 let ack = SeqNum::new(5);
468 let snd_nxt = SeqNum::new(500);
469 let high_rxt = None;
470
471 // Receive a single cumulative ack up to ack.
472 assert!(!sb.process_ack(ack, snd_nxt, high_rxt, &SackBlocks::default(), TEST_MSS));
473 assert_eq!(sb.acked_ranges, SeqRanges::default(),);
474 assert_eq!(sb.pipe, u32::try_from(snd_nxt - ack).unwrap());
475
476 // Cumulative ack doesn't move, 1 SACK range signaling loss is received.
477 let sack1 = 10..(10 + sacked_bytes_threshold(TEST_MSS) + 1);
478 assert!(sb.process_ack(
479 ack,
480 snd_nxt,
481 high_rxt,
482 &testutil::sack_blocks([sack1.clone()]),
483 TEST_MSS
484 ));
485 assert_eq!(sb.acked_ranges, seq_ranges([(sack1.clone(), true)]),);
486 assert_eq!(
487 sb.pipe,
488 u32::try_from(snd_nxt - ack).unwrap()
489 - sb.sacked_bytes()
490 - (sack1.start - u32::from(ack))
491 );
492
493 // Another SACK range comes in, at the end of this transmission block.
494 let sack2 = (u32::from(snd_nxt) - u32::from(TEST_MSS))..u32::from(snd_nxt);
495 assert!(sb.process_ack(
496 ack,
497 snd_nxt,
498 high_rxt,
499 &testutil::sack_blocks([sack1.clone(), sack2.clone()]),
500 TEST_MSS
501 ));
502 assert_eq!(sb.acked_ranges, seq_ranges([(sack1.clone(), true), (sack2.clone(), false)]));
503 assert_eq!(
504 sb.pipe,
505 u32::try_from(snd_nxt - ack).unwrap()
506 - sb.sacked_bytes()
507 - (sack1.start - u32::from(ack))
508 );
509
510 // Cumulative acknowledge the first SACK range.
511 let ack = SeqNum::new(sack1.end);
512 assert!(!sb.process_ack(
513 ack,
514 snd_nxt,
515 high_rxt,
516 &testutil::sack_blocks([sack2.clone()]),
517 TEST_MSS
518 ));
519 assert_eq!(sb.acked_ranges, seq_ranges([(sack2, false)]));
520 assert_eq!(sb.pipe, u32::try_from(snd_nxt - ack).unwrap() - sb.sacked_bytes());
521
522 // Cumulative acknowledge all the transmission.
523 assert!(!sb.process_ack(snd_nxt, snd_nxt, high_rxt, &SackBlocks::default(), TEST_MSS));
524 assert_eq!(sb.acked_ranges, SeqRanges::default());
525 assert_eq!(sb.pipe, 0);
526 }
527
528 #[test]
529 fn ack_after_snd_nxt() {
530 let mut sb = SackScoreboard::default();
531 let ack = SeqNum::new(5);
532 let snd_nxt = SeqNum::new(500);
533 let high_rxt = None;
534 let block = 10..20;
535 assert!(sb.process_ack(
536 ack,
537 snd_nxt,
538 high_rxt,
539 &testutil::sack_blocks([block.clone()]),
540 TEST_MSS
541 ));
542 assert_eq!(sb.acked_ranges, seq_ranges([(block.clone(), false)]));
543 assert_eq!(sb.pipe, u32::try_from(snd_nxt - ack).unwrap() - sb.sacked_bytes());
544
545 // SND.NXT rewinds after RTO.
546 let snd_nxt = ack;
547 // But we receive an ACK post the kept block.
548 let ack = SeqNum::new(block.end);
549 assert!(ack.after(snd_nxt));
550 assert!(!sb.process_ack(ack, snd_nxt, high_rxt, &SackBlocks::default(), TEST_MSS));
551 assert_eq!(sb.acked_ranges, SeqRanges::default());
552 assert_eq!(sb.pipe, 0);
553 }
554}