Skip to main content

fxfs/serialized_types/
serialized_key.rs

1// Copyright 2026 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! This module implements a zero-copy/optimized byte representation for keys in Fxfs.
6//!
7//! Its goal is to facilitate lexicographically comparable key serialization, permitting optimal
8//! byte representations when performing operations across persistent layer structures in LSM trees.
9
10use crate::serialized_types::varint::{self, Buffer};
11use anyhow::{Error, ensure};
12use std::cmp;
13
14/// Evaluates comparison ordering natively between two serialized byte arrays.
15pub fn compare_keys(a: &[u8], b: &[u8]) -> Result<cmp::Ordering, Error> {
16    ensure!(a.len() >= 2, "Key data too short");
17    let len_a = u16::from_be_bytes(a[..2].try_into().unwrap()) as usize;
18    ensure!(b.len() >= 2, "Key data too short");
19    let len_b = u16::from_be_bytes(b[..2].try_into().unwrap()) as usize;
20    ensure!(2 + len_a <= a.len(), "Key length exceeds buffer");
21    let data_a = &a[2..2 + len_a];
22    ensure!(2 + len_b <= b.len(), "Key length exceeds buffer");
23    let data_b = &b[2..2 + len_b];
24    Ok(data_a.cmp(data_b))
25}
26
27/// Serializes keys sequentially into binary format suitable for lexicographical comparisons.
28///
29/// The key layout contains:
30/// - A 2-byte Big-Endian length prefix specifying the size of the serialized data in bytes.
31/// - The encoded variable-length binary key payload.
32pub struct KeySerializer<'a, B: Buffer> {
33    buffer: &'a mut B,
34    done: bool,
35    base: Option<u64>,
36    start_pos: usize,
37}
38
39impl<'a, B: Buffer> KeySerializer<'a, B> {
40    /// Creates a new `KeySerializer` attached to a persistent storage buffer.
41    pub fn new(buffer: &'a mut B, base: Option<u64>) -> Self {
42        let start_pos = buffer.as_ref().len();
43        // Placeholder for the two-byte length prefix.
44        buffer.put(&[0, 0]);
45        Self { buffer, done: false, base, start_pos }
46    }
47
48    /// Writes an order-preserving varint encoded 64-bit payload to buffer.
49    /// If a base was provided and this is the first u64, it writes the delta.
50    pub fn write_u64(&mut self, v: u64) {
51        debug_assert!(!self.done);
52        if let Some(base) = self.base.take() {
53            assert_eq!(
54                self.buffer.as_ref().len(),
55                self.start_pos + 2,
56                "write_u64 with base must be the first item"
57            );
58            assert!(v >= base, "Delta encoding underflow: v ({}) < base ({})", v, base);
59            varint::encode_varint(v - base, self.buffer);
60        } else {
61            varint::encode_varint(v, self.buffer);
62        }
63    }
64
65    /// Writes fixed-length raw byte payloads into serialization buffer.
66    pub fn write_bytes(&mut self, bytes: &[u8]) {
67        debug_assert!(!self.done);
68        self.buffer.put(bytes);
69    }
70
71    /// Resolves and writes the real key byte payload length to the two-byte prefix placeholder.
72    pub fn finalize(&mut self) {
73        if self.done {
74            return;
75        }
76        let len = self.buffer.as_ref().len() - self.start_pos - 2;
77        let len_u16 = u16::try_from(len).expect("Key size exceeds 16-bit unsigned boundary");
78        let bytes = len_u16.to_be_bytes();
79        self.buffer[self.start_pos] = bytes[0];
80        self.buffer[self.start_pos + 1] = bytes[1];
81        self.done = true;
82    }
83
84    /// Writes an order-preserving varint encoded 64-bit payload to buffer.
85    pub fn write_varint(&mut self, v: u64) {
86        debug_assert!(!self.done);
87        varint::encode_varint(v, self.buffer);
88    }
89
90    /// Writes trailing variable-length dynamic strings or vectors to serialization buffer.
91    pub fn write_last(&mut self, bytes: &[u8]) {
92        debug_assert!(!self.done);
93        self.buffer.put(bytes);
94        self.finalize();
95    }
96}
97
98impl<B: Buffer> Drop for KeySerializer<'_, B> {
99    fn drop(&mut self) {
100        // Validates proper structure usage by requiring explicit finalization invocation.
101        if !std::thread::panicking() {
102            debug_assert!(self.done);
103        }
104    }
105}
106
107/// Handles decoding of sequential serialized key payloads.
108pub struct KeyDeserializer<'a> {
109    pub data: &'a [u8],
110    base: Option<u64>,
111    is_first: bool,
112}
113
114impl<'a> KeyDeserializer<'a> {
115    /// Parses serialized key with a 2-byte length prefix from the front of payload data.
116    pub fn new(data: &'a [u8], base: Option<u64>) -> Result<(Self, usize), Error> {
117        ensure!(data.len() >= 2, "Invalid key format: Too short for length header");
118        let length = u16::from_be_bytes(data[..2].try_into().unwrap()) as usize;
119        ensure!(data.len() >= 2 + length, "Invalid key data payload size");
120        Ok((Self { data: &data[2..2 + length], base, is_first: true }, 2 + length))
121    }
122
123    /// Constructs deserializer without expecting standard 2-byte size prefix.
124    pub fn new_without_prefix(data: &'a [u8]) -> Self {
125        Self { data, base: None, is_first: true }
126    }
127
128    /// Reads a u64. If a base was provided and this is the first u64, it applies the base.
129    pub fn read_u64(&mut self) -> Result<u64, Error> {
130        let is_first = self.is_first;
131        let v = self.read_varint()?;
132        if let Some(base) = self.base.take() {
133            assert!(is_first, "read_u64 with base must be the first item");
134            Ok(v.checked_add(base).ok_or_else(|| {
135                anyhow::anyhow!("Delta decoding overflow: v ({}) + base ({})", v, base)
136            })?)
137        } else {
138            Ok(v)
139        }
140    }
141
142    /// Extracts an order-preserving decoded 64-bit variable length integer from stream.
143    pub fn read_varint(&mut self) -> Result<u64, Error> {
144        self.is_first = false;
145        let (v, len) = varint::decode_varint(self.data)?;
146        self.data = &self.data[len..];
147        Ok(v)
148    }
149
150    fn read_bytes(&mut self, len: usize) -> Result<&'a [u8], Error> {
151        self.is_first = false;
152        if self.data.len() < len {
153            anyhow::bail!("Data array boundary overrun");
154        }
155        let (head, tail) = self.data.split_at(len);
156        self.data = tail;
157        Ok(head)
158    }
159
160    fn read_last(&mut self) -> Vec<u8> {
161        self.is_first = false;
162        let data = self.data;
163        self.data = &[];
164        data.to_vec()
165    }
166}
167
168/// Trait defining the translation logic from Fxfs types into order-consistent binaries.
169pub trait SerializeKey: Sized {
170    /// Encodes key representation sequentially into serialization stream.
171    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>);
172
173    /// Decodes serializations sequentially from underlying raw bytes.
174    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error>;
175}
176
177impl SerializeKey for u8 {
178    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
179        serializer.write_bytes(std::slice::from_ref(self));
180    }
181    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error> {
182        Ok(deserializer.read_bytes(1)?[0])
183    }
184}
185
186impl SerializeKey for u32 {
187    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
188        serializer.write_varint(*self as u64)
189    }
190    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error> {
191        Ok(deserializer.read_varint()?.try_into()?)
192    }
193}
194
195impl SerializeKey for u64 {
196    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
197        serializer.write_u64(*self);
198    }
199    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error> {
200        deserializer.read_u64()
201    }
202}
203
204impl SerializeKey for String {
205    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
206        serializer.write_last(self.as_bytes());
207    }
208    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error> {
209        Ok(String::from_utf8(deserializer.read_last())?)
210    }
211}
212
213impl SerializeKey for fxfs_unicode::CasefoldString {
214    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
215        let s: &str = self.as_ref();
216        serializer.write_last(s.as_bytes());
217    }
218    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error> {
219        Ok(Self::new(String::deserialize_key_from(deserializer)?))
220    }
221}
222
223impl SerializeKey for Vec<u8> {
224    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
225        serializer.write_last(self);
226    }
227    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error> {
228        Ok(deserializer.read_last())
229    }
230}
231
232impl SerializeKey for std::ops::Range<u64> {
233    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
234        // Range upper-bounds are typically critical when evaluating extent allocations
235        // in tree merges, so we write end values before start values.
236        self.end.serialize_key_to(serializer);
237        self.start.serialize_key_to(serializer);
238    }
239    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error> {
240        let end = u64::deserialize_key_from(deserializer)?;
241        let start = u64::deserialize_key_from(deserializer)?;
242        Ok(start..end)
243    }
244}
245
246#[cfg(test)]
247mod tests {
248    use super::*;
249    use crate::lsm_tree::types::OrdUpperBound;
250    use crate::object_store::ExtentKey;
251    use crate::object_store::allocator::AllocatorKey;
252    use crate::object_store::object_record::{ObjectKey, ObjectKeyData};
253
254    #[test]
255    fn test_object_key_order_matches_cmp_upper_bound() {
256        let mut keys = Vec::new();
257        // Varint edge cases
258        keys.push(ObjectKey::object(0));
259        keys.push(ObjectKey::object(1));
260        keys.push(ObjectKey::object(0xbe));
261        keys.push(ObjectKey::object(0xbf));
262        keys.push(ObjectKey::object(0xc0));
263        keys.push(ObjectKey::object(0x1ffe));
264        keys.push(ObjectKey::object(0x1fff));
265        keys.push(ObjectKey::object(0x2000));
266        keys.push(ObjectKey::object(0x0fff_ffff));
267        keys.push(ObjectKey::object(0x1000_0000));
268
269        // String edge cases
270        keys.push(ObjectKey { object_id: 1, data: ObjectKeyData::Child { name: "".to_string() } });
271        keys.push(ObjectKey { object_id: 1, data: ObjectKeyData::Child { name: "a".to_string() } });
272        keys.push(ObjectKey { object_id: 1, data: ObjectKeyData::Child { name: "b".to_string() } });
273        keys.push(ObjectKey {
274            object_id: 1,
275            data: ObjectKeyData::Child { name: "aa".to_string() },
276        });
277        keys.push(ObjectKey { object_id: 1, data: ObjectKeyData::Child { name: "a".repeat(300) } });
278
279        // Extent edge cases
280        keys.push(ObjectKey::extent(1, 0, 100 * 512..200 * 512));
281        keys.push(ObjectKey::extent(1, 0, 100 * 512..150 * 512));
282        keys.push(ObjectKey::extent(1, 0, 50 * 512..150 * 512));
283        keys.push(ObjectKey::extent(1, 0, 150 * 512..200 * 512));
284        keys.push(ObjectKey::extent(2, 0, 100 * 512..200 * 512));
285        keys.push(ObjectKey::extent(1, 0, 0..100 * 512));
286        keys.push(ObjectKey::extent(1, 0, 50 * 512..100 * 512));
287
288        // Compare all pairs. We compare against `cmp_upper_bound` which is now a total order
289        // for ranges (comparing end then start), matching serialization order.
290        for i in 0..keys.len() {
291            for j in 0..keys.len() {
292                let mut buf_a = Vec::new();
293                let mut ser_a = KeySerializer::new(&mut buf_a, Some(0));
294                keys[i].serialize_key_to(&mut ser_a);
295                ser_a.finalize();
296
297                let mut buf_b = Vec::new();
298                let mut ser_b = KeySerializer::new(&mut buf_b, Some(0));
299                keys[j].serialize_key_to(&mut ser_b);
300                ser_b.finalize();
301
302                std::mem::drop(ser_a);
303                std::mem::drop(ser_b);
304
305                let cmp = keys[i].cmp_upper_bound(&keys[j]);
306                let ser_cmp = compare_keys(&buf_a, &buf_b).unwrap();
307                assert_eq!(cmp, ser_cmp, "Mismatch for keys {:?} and {:?}", keys[i], keys[j]);
308            }
309        }
310    }
311
312    #[test]
313    fn test_allocator_key_order_matches_cmp_upper_bound() {
314        let mut keys = Vec::new();
315        keys.push(AllocatorKey { device_range: ExtentKey::new(0..100 * 512) });
316        keys.push(AllocatorKey { device_range: ExtentKey::new(0..200 * 512) });
317        keys.push(AllocatorKey { device_range: ExtentKey::new(100 * 512..200 * 512) });
318        keys.push(AllocatorKey { device_range: ExtentKey::new(100 * 512..150 * 512) });
319        keys.push(AllocatorKey { device_range: ExtentKey::new(50 * 512..150 * 512) });
320        keys.push(AllocatorKey { device_range: ExtentKey::new(0..50 * 512) });
321        keys.push(AllocatorKey { device_range: ExtentKey::new(50 * 512..100 * 512) });
322
323        // Compare all pairs. We compare against `cmp_upper_bound` which is now a total order
324        // for ranges, matching serialization order.
325        for i in 0..keys.len() {
326            for j in 0..keys.len() {
327                let mut buf_a = Vec::new();
328                let mut ser_a = KeySerializer::new(&mut buf_a, Some(0));
329                keys[i].serialize_key_to(&mut ser_a);
330                ser_a.finalize();
331
332                let mut buf_b = Vec::new();
333                let mut ser_b = KeySerializer::new(&mut buf_b, Some(0));
334                keys[j].serialize_key_to(&mut ser_b);
335                ser_b.finalize();
336
337                std::mem::drop(ser_a);
338                std::mem::drop(ser_b);
339
340                let cmp = keys[i].cmp_upper_bound(&keys[j]);
341                let ser_cmp = compare_keys(&buf_a, &buf_b).unwrap();
342
343                assert_eq!(cmp, ser_cmp, "Mismatch for keys {:?} and {:?}", keys[i], keys[j]);
344            }
345        }
346    }
347
348    #[test]
349    fn test_delta_encoding() {
350        let mut buf = Vec::new();
351        let base = 100;
352        let val = 150;
353
354        // Serialize
355        {
356            let mut ser = KeySerializer::new(&mut buf, Some(base));
357            ser.write_u64(val);
358            ser.finalize();
359        }
360
361        // Deserialize
362        let (mut deser, length) = KeyDeserializer::new(&buf, Some(base)).unwrap();
363        assert_eq!(length, buf.len());
364        let decoded_val = deser.read_u64().unwrap();
365
366        assert_eq!(val, decoded_val);
367
368        // Verify bytes (val - base = 50)
369        assert_eq!(buf, vec![0, 1, 50]);
370    }
371
372    #[test]
373    #[should_panic(expected = "Delta encoding underflow")]
374    fn test_delta_encoding_underflow_panics() {
375        let mut buf = Vec::new();
376        let base = 100;
377        let val = 50;
378
379        let mut ser = KeySerializer::new(&mut buf, Some(base));
380        ser.write_u64(val);
381    }
382
383    #[test]
384    fn test_delta_encoding_only_first() {
385        let mut buf = Vec::new();
386        let base = 100;
387        let val1 = 150;
388        let val2 = 200;
389
390        // Serialize
391        {
392            let mut ser = KeySerializer::new(&mut buf, Some(base));
393            ser.write_u64(val1);
394            ser.write_u64(val2);
395            ser.finalize();
396        }
397
398        // Deserialize
399        let (mut deser, length) = KeyDeserializer::new(&buf, Some(base)).unwrap();
400        assert_eq!(length, buf.len());
401        let decoded_val1 = deser.read_u64().unwrap();
402        let decoded_val2 = deser.read_u64().unwrap();
403
404        assert_eq!(val1, decoded_val1);
405        assert_eq!(val2, decoded_val2);
406    }
407
408    #[test]
409    fn test_delta_encoding_none() {
410        let mut buf = Vec::new();
411        let val = 150;
412
413        // Serialize
414        {
415            let mut ser = KeySerializer::new(&mut buf, None);
416            ser.write_u64(val);
417            ser.finalize();
418        }
419
420        // Deserialize
421        let (mut deser, length) = KeyDeserializer::new(&buf, None).unwrap();
422        assert_eq!(length, buf.len());
423        let decoded_val = deser.read_u64().unwrap();
424
425        assert_eq!(val, decoded_val);
426
427        // Verify bytes (should be regular varint of 150, which fits in 1 byte in this encoding)
428        assert_eq!(buf, vec![0, 1, 150]);
429    }
430
431    #[test]
432    fn test_delta_encoding_overflow_on_read_returns_error() {
433        // Forge a buffer with a large varint.
434        // Varint of u64::MAX is 9 bytes of 0xff.
435        let buf = vec![0, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff];
436
437        // Deserialize with base = Some(1).
438        // read_u64 should try to add 1 to u64::MAX and return error.
439        let (mut deser, length) = KeyDeserializer::new(&buf, Some(1)).unwrap();
440        assert_eq!(length, buf.len());
441        assert!(deser.read_u64().is_err());
442    }
443
444    #[test]
445    #[should_panic(expected = "write_u64 with base must be the first item")]
446    fn test_write_u64_with_base_not_first_panics() {
447        let mut buf = Vec::new();
448        let base = 100;
449        let val = 150;
450
451        let mut ser = KeySerializer::new(&mut buf, Some(base));
452        ser.write_varint(5); // Write something else first
453        ser.write_u64(val); // Should panic here
454    }
455
456    #[test]
457    #[should_panic(expected = "read_u64 with base must be the first item")]
458    fn test_read_u64_with_base_not_first_panics() {
459        let mut buf = Vec::new();
460        let base = 100;
461        let val1 = 150;
462        let val2 = 200;
463
464        {
465            let mut ser = KeySerializer::new(&mut buf, Some(base));
466            ser.write_u64(val1);
467            ser.write_u64(val2);
468            ser.finalize();
469        }
470
471        let (mut deser, length) = KeyDeserializer::new(&buf, Some(base)).unwrap();
472        assert_eq!(length, buf.len());
473        deser.read_varint().unwrap(); // Reads val1 (delta)
474        deser.read_u64().unwrap(); // Tries to read val2 as u64.
475    }
476}