Skip to main content

fxfs/serialized_types/
serialized_key.rs

1// Copyright 2026 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! This module implements a zero-copy/optimized byte representation for keys in Fxfs.
6//!
7//! Its goal is to facilitate lexicographically comparable key serialization, permitting optimal
8//! byte representations when performing operations across persistent layer structures in LSM trees.
9
10use crate::serialized_types::varint::{self, Buffer};
11use anyhow::{Error, ensure};
12use std::cmp;
13
14/// Evaluates comparison ordering natively between two serialized byte arrays.
15pub fn compare_keys(a: &[u8], b: &[u8]) -> Result<cmp::Ordering, Error> {
16    ensure!(a.len() >= 2, "Key data too short");
17    let len_a = u16::from_be_bytes(a[..2].try_into().unwrap()) as usize;
18    ensure!(b.len() >= 2, "Key data too short");
19    let len_b = u16::from_be_bytes(b[..2].try_into().unwrap()) as usize;
20    ensure!(2 + len_a <= a.len(), "Key length exceeds buffer");
21    let data_a = &a[2..2 + len_a];
22    ensure!(2 + len_b <= b.len(), "Key length exceeds buffer");
23    let data_b = &b[2..2 + len_b];
24    Ok(data_a.cmp(data_b))
25}
26
27/// Serializes keys sequentially into binary format suitable for lexicographical comparisons.
28///
29/// The key layout contains:
30/// - A 2-byte Big-Endian length prefix specifying the size of the serialized data in bytes.
31/// - The encoded variable-length binary key payload.
32pub struct KeySerializer<'a, B: Buffer> {
33    buffer: &'a mut B,
34    done: bool,
35    base: Option<u64>,
36    start_pos: usize,
37}
38
39impl<'a, B: Buffer> KeySerializer<'a, B> {
40    /// Creates a new `KeySerializer` attached to a persistent storage buffer.
41    pub fn new(buffer: &'a mut B, base: Option<u64>) -> Self {
42        let start_pos = buffer.as_ref().len();
43        // Placeholder for the two-byte length prefix.
44        buffer.put(&[0, 0]);
45        Self { buffer, done: false, base, start_pos }
46    }
47
48    /// Writes an order-preserving varint encoded 64-bit payload to buffer.
49    /// If a base was provided and this is the first u64, it writes the delta.
50    pub fn write_u64(&mut self, v: u64) {
51        debug_assert!(!self.done);
52        if let Some(base) = self.base.take() {
53            assert_eq!(
54                self.buffer.as_ref().len(),
55                self.start_pos + 2,
56                "write_u64 with base must be the first item"
57            );
58            assert!(v >= base, "Delta encoding underflow: v ({}) < base ({})", v, base);
59            varint::encode_varint(v - base, self.buffer);
60        } else {
61            varint::encode_varint(v, self.buffer);
62        }
63    }
64
65    /// Writes fixed-length raw byte payloads into serialization buffer.
66    pub fn write_bytes(&mut self, bytes: &[u8]) {
67        debug_assert!(!self.done);
68        self.buffer.put(bytes);
69    }
70
71    /// Resolves and writes the real key byte payload length to the two-byte prefix placeholder.
72    pub fn finalize(&mut self) {
73        if self.done {
74            return;
75        }
76        let len = self.buffer.as_ref().len() - self.start_pos - 2;
77        let len_u16 = u16::try_from(len).expect("Key size exceeds 16-bit unsigned boundary");
78        let bytes = len_u16.to_be_bytes();
79        self.buffer[self.start_pos] = bytes[0];
80        self.buffer[self.start_pos + 1] = bytes[1];
81        self.done = true;
82    }
83
84    /// Writes an order-preserving varint encoded 64-bit payload to buffer.
85    pub fn write_varint(&mut self, v: u64) {
86        debug_assert!(!self.done);
87        varint::encode_varint(v, self.buffer);
88    }
89
90    /// Writes trailing variable-length dynamic strings or vectors to serialization buffer.
91    pub fn write_last(&mut self, bytes: &[u8]) {
92        debug_assert!(!self.done);
93        self.buffer.put(bytes);
94        self.finalize();
95    }
96}
97
98impl<B: Buffer> Drop for KeySerializer<'_, B> {
99    fn drop(&mut self) {
100        // Validates proper structure usage by requiring explicit finalization invocation.
101        if !std::thread::panicking() {
102            debug_assert!(self.done);
103        }
104    }
105}
106
107/// Handles decoding of sequential serialized key payloads.
108pub struct KeyDeserializer<'a> {
109    pub data: &'a [u8],
110    base: Option<u64>,
111    is_first: bool,
112}
113
114impl<'a> KeyDeserializer<'a> {
115    /// Parses serialized key with a 2-byte length prefix from the front of payload data.
116    pub fn new(data: &'a [u8], base: Option<u64>) -> Result<(Self, usize), Error> {
117        ensure!(data.len() >= 2, "Invalid key format: Too short for length header");
118        let length = u16::from_be_bytes(data[..2].try_into().unwrap()) as usize;
119        ensure!(data.len() >= 2 + length, "Invalid key data payload size");
120        Ok((Self { data: &data[2..2 + length], base, is_first: true }, 2 + length))
121    }
122
123    /// Constructs deserializer without expecting standard 2-byte size prefix.
124    pub fn new_without_prefix(data: &'a [u8]) -> Self {
125        Self { data, base: None, is_first: true }
126    }
127
128    /// Reads a u64. If a base was provided and this is the first u64, it applies the base.
129    pub fn read_u64(&mut self) -> Result<u64, Error> {
130        let is_first = self.is_first;
131        let v = self.read_varint()?;
132        if let Some(base) = self.base.take() {
133            assert!(is_first, "read_u64 with base must be the first item");
134            Ok(v.checked_add(base).ok_or_else(|| {
135                anyhow::anyhow!("Delta decoding overflow: v ({}) + base ({})", v, base)
136            })?)
137        } else {
138            Ok(v)
139        }
140    }
141
142    /// Extracts an order-preserving decoded 64-bit variable length integer from stream.
143    pub fn read_varint(&mut self) -> Result<u64, Error> {
144        self.is_first = false;
145        let (v, len) = varint::decode_varint(self.data)?;
146        self.data = &self.data[len..];
147        Ok(v)
148    }
149
150    fn read_bytes(&mut self, len: usize) -> Result<&'a [u8], Error> {
151        self.is_first = false;
152        if self.data.len() < len {
153            anyhow::bail!("Data array boundary overrun");
154        }
155        let (head, tail) = self.data.split_at(len);
156        self.data = tail;
157        Ok(head)
158    }
159
160    fn read_last(&mut self) -> Vec<u8> {
161        self.is_first = false;
162        let data = self.data;
163        self.data = &[];
164        data.to_vec()
165    }
166}
167
168/// Trait defining the translation logic from Fxfs types into order-consistent binaries.
169pub trait SerializeKey: Sized {
170    /// Encodes key representation sequentially into serialization stream.
171    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>);
172
173    /// Decodes serializations sequentially from underlying raw bytes.
174    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error>;
175}
176
177impl SerializeKey for u8 {
178    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
179        serializer.write_bytes(std::slice::from_ref(self));
180    }
181    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error> {
182        Ok(deserializer.read_bytes(1)?[0])
183    }
184}
185
186impl SerializeKey for u32 {
187    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
188        serializer.write_varint(*self as u64)
189    }
190    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error> {
191        Ok(deserializer.read_varint()?.try_into()?)
192    }
193}
194
195impl SerializeKey for u64 {
196    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
197        serializer.write_u64(*self);
198    }
199    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error> {
200        deserializer.read_u64()
201    }
202}
203
204impl SerializeKey for String {
205    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
206        serializer.write_last(self.as_bytes());
207    }
208    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error> {
209        Ok(String::from_utf8(deserializer.read_last())?)
210    }
211}
212
213impl SerializeKey for fxfs_unicode::CasefoldString {
214    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
215        let s: &str = self.as_ref();
216        serializer.write_last(s.as_bytes());
217    }
218    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error> {
219        Ok(Self::new(String::deserialize_key_from(deserializer)?))
220    }
221}
222
223impl SerializeKey for Vec<u8> {
224    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
225        serializer.write_last(self);
226    }
227    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error> {
228        Ok(deserializer.read_last())
229    }
230}
231
232impl SerializeKey for std::ops::Range<u64> {
233    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
234        // Range upper-bounds are typically critical when evaluating extent allocations
235        // in tree merges, so we write end values before start values.
236        self.end.serialize_key_to(serializer);
237        self.start.serialize_key_to(serializer);
238    }
239    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error> {
240        let end = u64::deserialize_key_from(deserializer)?;
241        let start = u64::deserialize_key_from(deserializer)?;
242        Ok(start..end)
243    }
244}
245
246impl SerializeKey for std::num::NonZeroU64 {
247    fn serialize_key_to<B: Buffer>(&self, serializer: &mut KeySerializer<'_, B>) {
248        self.get().serialize_key_to(serializer);
249    }
250    fn deserialize_key_from(deserializer: &mut KeyDeserializer<'_>) -> Result<Self, Error> {
251        let raw = u64::deserialize_key_from(deserializer)?;
252        Self::new(raw).ok_or_else(|| anyhow::anyhow!("Expected non-zero value"))
253    }
254}
255
256#[cfg(test)]
257mod tests {
258    use super::*;
259    use crate::lsm_tree::types::OrdUpperBound;
260    use crate::object_store::allocator::AllocatorKey;
261    use crate::object_store::object_record::{ObjectKey, ObjectKeyData};
262    use crate::object_store::{AttributeId, Extent};
263
264    #[test]
265    fn test_object_key_order_matches_cmp_upper_bound() {
266        let mut keys = Vec::new();
267        // Varint edge cases
268        keys.push(ObjectKey::object(0));
269        keys.push(ObjectKey::object(1));
270        keys.push(ObjectKey::object(0xbe));
271        keys.push(ObjectKey::object(0xbf));
272        keys.push(ObjectKey::object(0xc0));
273        keys.push(ObjectKey::object(0x1ffe));
274        keys.push(ObjectKey::object(0x1fff));
275        keys.push(ObjectKey::object(0x2000));
276        keys.push(ObjectKey::object(0x0fff_ffff));
277        keys.push(ObjectKey::object(0x1000_0000));
278
279        // String edge cases
280        keys.push(ObjectKey { object_id: 1, data: ObjectKeyData::Child { name: "".to_string() } });
281        keys.push(ObjectKey { object_id: 1, data: ObjectKeyData::Child { name: "a".to_string() } });
282        keys.push(ObjectKey { object_id: 1, data: ObjectKeyData::Child { name: "b".to_string() } });
283        keys.push(ObjectKey {
284            object_id: 1,
285            data: ObjectKeyData::Child { name: "aa".to_string() },
286        });
287        keys.push(ObjectKey { object_id: 1, data: ObjectKeyData::Child { name: "a".repeat(300) } });
288
289        // Extent edge cases
290        keys.push(ObjectKey::extent(1, AttributeId::TEST_ID, 100 * 512..200 * 512));
291        keys.push(ObjectKey::extent(1, AttributeId::TEST_ID, 100 * 512..150 * 512));
292        keys.push(ObjectKey::extent(1, AttributeId::TEST_ID, 50 * 512..150 * 512));
293        keys.push(ObjectKey::extent(1, AttributeId::TEST_ID, 150 * 512..200 * 512));
294        keys.push(ObjectKey::extent(2, AttributeId::TEST_ID, 100 * 512..200 * 512));
295        keys.push(ObjectKey::extent(1, AttributeId::TEST_ID, 0..100 * 512));
296        keys.push(ObjectKey::extent(1, AttributeId::TEST_ID, 50 * 512..100 * 512));
297
298        // Compare all pairs. We compare against `cmp_upper_bound` which is now a total order
299        // for ranges (comparing end then start), matching serialization order.
300        for i in 0..keys.len() {
301            for j in 0..keys.len() {
302                let mut buf_a = Vec::new();
303                let mut ser_a = KeySerializer::new(&mut buf_a, Some(0));
304                keys[i].serialize_key_to(&mut ser_a);
305                ser_a.finalize();
306
307                let mut buf_b = Vec::new();
308                let mut ser_b = KeySerializer::new(&mut buf_b, Some(0));
309                keys[j].serialize_key_to(&mut ser_b);
310                ser_b.finalize();
311
312                std::mem::drop(ser_a);
313                std::mem::drop(ser_b);
314
315                let cmp = keys[i].cmp_upper_bound(&keys[j]);
316                let ser_cmp = compare_keys(&buf_a, &buf_b).unwrap();
317                assert_eq!(cmp, ser_cmp, "Mismatch for keys {:?} and {:?}", keys[i], keys[j]);
318            }
319        }
320    }
321
322    #[test]
323    fn test_allocator_key_order_matches_cmp_upper_bound() {
324        let mut keys = Vec::new();
325        keys.push(AllocatorKey { device_range: Extent(0..100 * 512) });
326        keys.push(AllocatorKey { device_range: Extent(0..200 * 512) });
327        keys.push(AllocatorKey { device_range: Extent(100 * 512..200 * 512) });
328        keys.push(AllocatorKey { device_range: Extent(100 * 512..150 * 512) });
329        keys.push(AllocatorKey { device_range: Extent(50 * 512..150 * 512) });
330        keys.push(AllocatorKey { device_range: Extent(0..50 * 512) });
331        keys.push(AllocatorKey { device_range: Extent(50 * 512..100 * 512) });
332
333        // Compare all pairs. We compare against `cmp_upper_bound` which is now a total order
334        // for ranges, matching serialization order.
335        for i in 0..keys.len() {
336            for j in 0..keys.len() {
337                let mut buf_a = Vec::new();
338                let mut ser_a = KeySerializer::new(&mut buf_a, Some(0));
339                keys[i].serialize_key_to(&mut ser_a);
340                ser_a.finalize();
341
342                let mut buf_b = Vec::new();
343                let mut ser_b = KeySerializer::new(&mut buf_b, Some(0));
344                keys[j].serialize_key_to(&mut ser_b);
345                ser_b.finalize();
346
347                std::mem::drop(ser_a);
348                std::mem::drop(ser_b);
349
350                let cmp = keys[i].cmp_upper_bound(&keys[j]);
351                let ser_cmp = compare_keys(&buf_a, &buf_b).unwrap();
352
353                assert_eq!(cmp, ser_cmp, "Mismatch for keys {:?} and {:?}", keys[i], keys[j]);
354            }
355        }
356    }
357
358    #[test]
359    fn test_delta_encoding() {
360        let mut buf = Vec::new();
361        let base = 100;
362        let val = 150;
363
364        // Serialize
365        {
366            let mut ser = KeySerializer::new(&mut buf, Some(base));
367            ser.write_u64(val);
368            ser.finalize();
369        }
370
371        // Deserialize
372        let (mut deser, length) = KeyDeserializer::new(&buf, Some(base)).unwrap();
373        assert_eq!(length, buf.len());
374        let decoded_val = deser.read_u64().unwrap();
375
376        assert_eq!(val, decoded_val);
377
378        // Verify bytes (val - base = 50)
379        assert_eq!(buf, vec![0, 1, 50]);
380    }
381
382    #[test]
383    #[should_panic(expected = "Delta encoding underflow")]
384    fn test_delta_encoding_underflow_panics() {
385        let mut buf = Vec::new();
386        let base = 100;
387        let val = 50;
388
389        let mut ser = KeySerializer::new(&mut buf, Some(base));
390        ser.write_u64(val);
391    }
392
393    #[test]
394    fn test_delta_encoding_only_first() {
395        let mut buf = Vec::new();
396        let base = 100;
397        let val1 = 150;
398        let val2 = 200;
399
400        // Serialize
401        {
402            let mut ser = KeySerializer::new(&mut buf, Some(base));
403            ser.write_u64(val1);
404            ser.write_u64(val2);
405            ser.finalize();
406        }
407
408        // Deserialize
409        let (mut deser, length) = KeyDeserializer::new(&buf, Some(base)).unwrap();
410        assert_eq!(length, buf.len());
411        let decoded_val1 = deser.read_u64().unwrap();
412        let decoded_val2 = deser.read_u64().unwrap();
413
414        assert_eq!(val1, decoded_val1);
415        assert_eq!(val2, decoded_val2);
416    }
417
418    #[test]
419    fn test_delta_encoding_none() {
420        let mut buf = Vec::new();
421        let val = 150;
422
423        // Serialize
424        {
425            let mut ser = KeySerializer::new(&mut buf, None);
426            ser.write_u64(val);
427            ser.finalize();
428        }
429
430        // Deserialize
431        let (mut deser, length) = KeyDeserializer::new(&buf, None).unwrap();
432        assert_eq!(length, buf.len());
433        let decoded_val = deser.read_u64().unwrap();
434
435        assert_eq!(val, decoded_val);
436
437        // Verify bytes (should be regular varint of 150, which fits in 1 byte in this encoding)
438        assert_eq!(buf, vec![0, 1, 150]);
439    }
440
441    #[test]
442    fn test_delta_encoding_overflow_on_read_returns_error() {
443        // Forge a buffer with a large varint.
444        // Varint of u64::MAX is 9 bytes of 0xff.
445        let buf = vec![0, 9, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff];
446
447        // Deserialize with base = Some(1).
448        // read_u64 should try to add 1 to u64::MAX and return error.
449        let (mut deser, length) = KeyDeserializer::new(&buf, Some(1)).unwrap();
450        assert_eq!(length, buf.len());
451        assert!(deser.read_u64().is_err());
452    }
453
454    #[test]
455    #[should_panic(expected = "write_u64 with base must be the first item")]
456    fn test_write_u64_with_base_not_first_panics() {
457        let mut buf = Vec::new();
458        let base = 100;
459        let val = 150;
460
461        let mut ser = KeySerializer::new(&mut buf, Some(base));
462        ser.write_varint(5); // Write something else first
463        ser.write_u64(val); // Should panic here
464    }
465
466    #[test]
467    #[should_panic(expected = "read_u64 with base must be the first item")]
468    fn test_read_u64_with_base_not_first_panics() {
469        let mut buf = Vec::new();
470        let base = 100;
471        let val1 = 150;
472        let val2 = 200;
473
474        {
475            let mut ser = KeySerializer::new(&mut buf, Some(base));
476            ser.write_u64(val1);
477            ser.write_u64(val2);
478            ser.finalize();
479        }
480
481        let (mut deser, length) = KeyDeserializer::new(&buf, Some(base)).unwrap();
482        assert_eq!(length, buf.len());
483        deser.read_varint().unwrap(); // Reads val1 (delta)
484        deser.read_u64().unwrap(); // Tries to read val2 as u64.
485    }
486}