Skip to main content

fxfs/serialized_types/
varint.rs

1// Copyright 2026 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//! Lexicographically ordered variable-length integer (varint) encoding.
5//!
6//! This encoding ensures that the byte-wise (lexicographical) comparison of encoded values
7//! matches the numerical comparison of the original values.
8//!
9//! This is achieved by:
10//! 1. Dividing the value space into distinct length categories.
11//! 2. Assigning each category a non-overlapping range of first-byte values, ordered by length.
12//! 3. Encoding the remaining bits in Big-Endian order (which naturally matches numerical order).
13//!
14//! Encoding rules (bit-level):
15//! - `v < 0xc0` (0..191): 1 byte
16//!   - Pattern: `0b0xxxxxxx` or `0b10xxxxxx`
17//!   - First byte range: `0x00..=0xbf`
18//! - `v < 0x2000` (192..8191): 2 bytes
19//!   - Encoded as `v | 0xc000` (Big Endian)
20//!   - Pattern: `0b110xxxxx xxxxxxxx`
21//!   - First byte range: `0xc0..=0xdf`
22//! - `v < 0x1000_0000` (8192..268435455): 4 bytes
23//!   - Encoded as `v | 0xe000_0000` (Big Endian)
24//!   - Pattern: `0b1110xxxx xxxxxxxx ...`
25//!   - First byte range: `0xe0..=0xef`
26//! - `v < 0x0f00_0000_0000_0000`: 8 bytes
27//!   - Encoded as `v | 0xf000_0000_0000_0000` (Big Endian)
28//!   - Pattern: `0b1111xxxx xxxxxxxx ...`
29//!   - First byte range: `0xf0..=0xfe`
30//! - Else: 9 bytes
31//!   - Encoded as `0xff` followed by `v.to_be_bytes()`
32//!   - Pattern: `0b11111111 xxxxxxxx ...`
33//!   - First byte: `0xff`
34
35use anyhow::{Error, ensure};
36use std::ops::IndexMut;
37
38pub trait Buffer: AsRef<[u8]> + IndexMut<usize, Output = u8> + Send {
39    fn put(&mut self, data: &[u8]);
40}
41
42impl Buffer for Vec<u8> {
43    fn put(&mut self, data: &[u8]) {
44        self.extend_from_slice(data);
45    }
46}
47
48pub fn encode_varint(v: u64, buf: &mut impl Buffer) -> usize {
49    if v < 0xc0 {
50        buf.put(&[v as u8]);
51        1
52    } else if v < 0x2000 {
53        buf.put(&(v as u16 | 0xc000).to_be_bytes());
54        2
55    } else if v < 0x1000_0000 {
56        buf.put(&(v as u32 | 0xe000_0000).to_be_bytes());
57        4
58    } else if v < 0x0f00_0000_0000_0000 {
59        buf.put(&(v | 0xf000_0000_0000_0000).to_be_bytes());
60        8
61    } else {
62        buf.put(&[0xff]);
63        buf.put(&v.to_be_bytes());
64        9
65    }
66}
67
68pub fn decode_varint(data: &[u8]) -> Result<(u64, usize), Error> {
69    ensure!(!data.is_empty(), "Data too short");
70    let b = data[0];
71    if b < 0xc0 {
72        Ok((b as u64, 1))
73    } else if b < 0xe0 {
74        ensure!(data.len() >= 2, "Data too short");
75        let v = u16::from_be_bytes(data[..2].try_into().unwrap());
76        Ok(((v & !0xc000) as u64, 2))
77    } else if b < 0xf0 {
78        ensure!(data.len() >= 4, "Data too short");
79        let v = u32::from_be_bytes(data[..4].try_into().unwrap());
80        Ok(((v & !0xe000_0000) as u64, 4))
81    } else if b < 0xff {
82        ensure!(data.len() >= 8, "Data too short");
83        let v = u64::from_be_bytes(data[..8].try_into().unwrap());
84        Ok((v & !0xf000_0000_0000_0000, 8))
85    } else {
86        ensure!(data.len() >= 9, "Data too short");
87        let v = u64::from_be_bytes(data[1..9].try_into().unwrap());
88        Ok((v, 9))
89    }
90}
91
92#[cfg(test)]
93mod tests {
94    use super::*;
95
96    #[test]
97    fn test_correctness() {
98        let test_cases = vec![
99            0,
100            1,
101            0xbe,
102            0xbf,
103            0xc0,
104            0x1ffe,
105            0x1fff,
106            0x2000,
107            0x0fff_ffff,
108            0x1000_0000,
109            0x0eff_ffff_ffff_ffff,
110            0x0f00_0000_0000_0000,
111            u64::MAX,
112        ];
113        for v in test_cases {
114            let mut buf = Vec::new();
115            let len = encode_varint(v, &mut buf);
116            let (decoded, decoded_len) = decode_varint(&buf).unwrap();
117            assert_eq!(v, decoded);
118            assert_eq!(len, decoded_len);
119            assert_eq!(buf.len(), len);
120        }
121    }
122
123    #[test]
124    fn test_ordering_correctness() {
125        let test_cases = vec![
126            0,
127            1,
128            0xbe,
129            0xbf,
130            0xc0,
131            0x1ffe,
132            0x1fff,
133            0x2000,
134            0x0fff_ffff,
135            0x1000_0000,
136            0x0eff_ffff_ffff_ffff,
137            0x0f00_0000_0000_0000,
138            u64::MAX,
139        ];
140        for i in 0..test_cases.len() {
141            for j in 0..test_cases.len() {
142                let a = test_cases[i];
143                let b = test_cases[j];
144
145                let mut buf_a = Vec::new();
146                let mut buf_b = Vec::new();
147
148                encode_varint(a, &mut buf_a);
149                encode_varint(b, &mut buf_b);
150
151                let ord_cmp = a.cmp(&b);
152                let ser_cmp = buf_a.cmp(&buf_b);
153                assert_eq!(ord_cmp, ser_cmp, "Mismatch for {} and {}", a, b);
154            }
155        }
156    }
157}