Skip to main content

fxfs/
zerocopy_serialization.rs

1// Copyright 2026 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Efficient serialization and deserialization for `Vec<T>` and `Box<[T]>` where `T` supports
6//! zerocopy.
7
8use fprint::TypeFingerprint;
9use serde::{Deserializer, Serializer};
10use std::marker::PhantomData;
11use zerocopy::{FromBytes, Immutable, IntoBytes};
12
13// Only little endian is supported.
14static_assertions::assert_cfg!(target_endian = "little");
15/// A trait for container types that can be efficiently serialized as a byte slice and
16/// reconstructed from a `Vec` of their elements.
17pub trait SerializeAsBytes {
18    type Inner: FromBytes + Immutable + Copy;
19
20    /// Returns a byte slice that represents the container.
21    fn as_bytes(&self) -> &[u8];
22
23    /// Constructs a container from a `Vec` of its elements.
24    fn from_vec(slice: Vec<Self::Inner>) -> Self;
25}
26
27impl<T: FromBytes + IntoBytes + Immutable + Copy> SerializeAsBytes for Vec<T> {
28    type Inner = T;
29
30    fn as_bytes(&self) -> &[u8] {
31        self.as_slice().as_bytes()
32    }
33
34    fn from_vec(slice: Vec<T>) -> Self {
35        slice
36    }
37}
38
39impl<T: FromBytes + IntoBytes + Immutable + Copy> SerializeAsBytes for Box<[T]> {
40    type Inner = T;
41
42    fn as_bytes(&self) -> &[u8] {
43        (&**self).as_bytes()
44    }
45
46    fn from_vec(slice: Vec<T>) -> Self {
47        slice.into_boxed_slice()
48    }
49}
50
51/// Serializes a container type as a byte slice.
52pub fn serialize<T, S>(value: &T, serializer: S) -> Result<S::Ok, S::Error>
53where
54    T: SerializeAsBytes,
55    S: Serializer,
56{
57    serializer.serialize_bytes(value.as_bytes())
58}
59
60/// Deserializes a container type from a byte slice.
61pub fn deserialize<'de, T, D>(deserializer: D) -> Result<T, D::Error>
62where
63    T: SerializeAsBytes,
64    D: Deserializer<'de>,
65{
66    // We shouldn't be deserializing zero-sized types.
67    debug_assert!(size_of::<T::Inner>() > 0);
68
69    // Bincode reads the bytes into a `Vec<u8>`. If the type we're deserializing has the same
70    // alignment as `u8` then it's possible to take the `Vec<u8>` instead of copying the data.
71    if align_of::<T::Inner>() == align_of::<u8>() {
72        // Calls `visit_byte_buf`.
73        deserializer.deserialize_byte_buf(Visitor(PhantomData::<T>))
74    } else {
75        // Calls `visit_bytes`.
76        deserializer.deserialize_bytes(Visitor(PhantomData::<T>))
77    }
78}
79
80/// A visitor for deserializing a container type from a byte slice.
81struct Visitor<T>(PhantomData<T>);
82impl<'de, T> serde::de::Visitor<'de> for Visitor<T>
83where
84    T: SerializeAsBytes,
85{
86    type Value = T;
87
88    fn expecting(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
89        write!(
90            formatter,
91            "a byte array with a length that is a multiple of {}",
92            size_of::<T::Inner>()
93        )
94    }
95
96    fn visit_bytes<E>(self, bytes: &[u8]) -> Result<Self::Value, E>
97    where
98        E: serde::de::Error,
99    {
100        if !bytes.len().is_multiple_of(size_of::<T::Inner>()) {
101            return Err(E::custom(
102                "input bytes are not a multiple of the size of the desired type",
103            ));
104        }
105        let elements = bytes.len() / size_of::<T::Inner>();
106        let mut vec: Vec<T::Inner> = Vec::with_capacity(elements);
107        let dst = vec.spare_capacity_mut();
108        unsafe {
109            // SAFETY:
110            //   - Both `bytes` and `dst` are nonnull, aligned, and nonoverlapping.
111            //   - `bytes` is valid for reading `bytes.len()` bytes.
112            //   - `dst` is valid for writing `bytes.len()` bytes.
113            //   - `T::Inner` implements `FromBytes` which means that any bit pattern is valid and
114            //     it's safe to initialize the elements this way.
115            std::ptr::copy_nonoverlapping(
116                bytes.as_ptr(),
117                dst.as_mut_ptr().cast::<u8>(),
118                bytes.len(),
119            );
120            // SAFETY: All of the elements were initialized.
121            vec.set_len(elements);
122        }
123        Ok(T::from_vec(vec))
124    }
125
126    fn visit_byte_buf<E>(self, bytes: Vec<u8>) -> Result<Self::Value, E>
127    where
128        E: serde::de::Error,
129    {
130        // This method should only be invoked from `deserialize` when the inner type has `u8`
131        // alignment.
132        debug_assert!(align_of::<T::Inner>() == align_of::<u8>());
133        // We shouldn't be deserializing zero-sized types.
134        debug_assert!(size_of::<T::Inner>() > 0);
135
136        if !bytes.len().is_multiple_of(size_of::<T::Inner>()) {
137            return Err(E::custom(
138                "input bytes are not a multiple of the size of the desired type",
139            ));
140        }
141        let elements = bytes.len() / size_of::<T::Inner>();
142
143        // Both the size and capacity of `bytes` must be a multiple of the size of `T::Inner` to be
144        // able to change the type of the `Vec`.
145        let vec = if !bytes.capacity().is_multiple_of(size_of::<T::Inner>()) {
146            // Calling `Vec::into_boxed_slice` will realloc the allocation to drop the excess
147            // capacity. If we're lucky, the new and old capacity will be in the same allocator
148            // bucket and no reallocation will happen.
149            let ptr = Box::into_raw(bytes.into_boxed_slice());
150            // SAFETY:
151            //   - All of the requirements for `Vec::from_raw_parts` are upheld:
152            //     - Fxfs only uses the global allocator.
153            //     - `u8` and `T::Inner` have the same alignment.
154            //     - The size of the allocation is `size_of::<T::Inner>() * elements` because of the
155            //       `Vec::into_boxed_slice` call.
156            //     - All of the elements are initialized.
157            //   - The pointer cast is safe because `T::Inner` implements `FromBytes`.
158            unsafe { Vec::from_raw_parts(ptr.cast::<T::Inner>(), elements, elements) }
159        } else {
160            let (ptr, _size, capacity) = bytes.into_raw_parts();
161            let capacity = capacity / size_of::<T::Inner>();
162            // SAFETY:
163            //   - All of the requirements for `Vec::from_raw_parts` are upheld:
164            //     - Fxfs only uses the global allocator.
165            //     - `u8` and `T::Inner` have the same alignment.
166            //     - `size_of::<T::Inner>() * capacity` is the same as `bytes.capacity()`.
167            //     - All of the elements are initialized.
168            //   - The pointer cast is safe because `T::Inner` implements `FromBytes`.
169            unsafe { Vec::from_raw_parts(ptr.cast::<T::Inner>(), elements, capacity) }
170        };
171        Ok(T::from_vec(vec))
172    }
173}
174
175pub fn fingerprint<T: TypeFingerprint>() -> String {
176    format!("AsBytes<{}>", T::fingerprint())
177}
178
179#[cfg(test)]
180mod tests {
181    use crate::serialized_types::{LATEST_VERSION, Versioned};
182    use serde::{Deserialize, Serialize};
183
184    #[fuchsia::test]
185    fn test_boxed_slice_of_u8_is_the_same() {
186        #[derive(Serialize, Deserialize, Versioned)]
187        struct Regular(Box<[u8]>);
188        #[derive(Serialize, Deserialize, Versioned, PartialEq, Eq, Debug)]
189        struct Optimized(#[serde(with = "crate::zerocopy_serialization")] Box<[u8]>);
190
191        let regular = Regular(vec![0, 1, 2, 3, 254, 255].into_boxed_slice());
192        let mut regular_serialized = Vec::new();
193        regular.serialize_into(&mut regular_serialized).unwrap();
194
195        let optimized = Optimized(regular.0);
196        let mut optimized_serialized = Vec::new();
197        optimized.serialize_into(&mut optimized_serialized).unwrap();
198
199        assert_eq!(regular_serialized, optimized_serialized);
200
201        assert_eq!(
202            Optimized::deserialize_from(&mut optimized_serialized.as_slice(), LATEST_VERSION)
203                .unwrap(),
204            optimized
205        );
206    }
207
208    #[fuchsia::test]
209    fn test_vec_of_u8_is_the_same() {
210        #[derive(Serialize, Deserialize, Versioned)]
211        struct Regular(Vec<u8>);
212        #[derive(Serialize, Deserialize, Versioned, PartialEq, Eq, Debug)]
213        struct Optimized(#[serde(with = "crate::zerocopy_serialization")] Vec<u8>);
214
215        let regular = Regular(vec![0, 1, 2, 3, 254, 255]);
216        let mut regular_serialized = Vec::new();
217        regular.serialize_into(&mut regular_serialized).unwrap();
218
219        let optimized = Optimized(regular.0);
220        let mut optimized_serialized = Vec::new();
221        optimized.serialize_into(&mut optimized_serialized).unwrap();
222
223        assert_eq!(regular_serialized, optimized_serialized);
224
225        assert_eq!(
226            Optimized::deserialize_from(&mut optimized_serialized.as_slice(), LATEST_VERSION)
227                .unwrap(),
228            optimized
229        );
230    }
231
232    #[fuchsia::test]
233    fn test_boxed_slice_of_array_of_u8() {
234        #[derive(Serialize, Deserialize, Versioned)]
235        struct Regular(Box<[[u8; 4]]>);
236        #[derive(Serialize, Deserialize, Versioned, PartialEq, Eq, Debug)]
237        struct Optimized(#[serde(with = "crate::zerocopy_serialization")] Box<[[u8; 4]]>);
238
239        let regular = Regular(vec![[0, 1, 2, 3], [252, 253, 254, 255]].into_boxed_slice());
240        let mut regular_serialized = Vec::new();
241        regular.serialize_into(&mut regular_serialized).unwrap();
242
243        let optimized = Optimized(regular.0);
244        let mut optimized_serialized = Vec::new();
245        optimized.serialize_into(&mut optimized_serialized).unwrap();
246
247        // The serialized data is the same, but the number of elements is different.
248        assert_eq!(&regular_serialized[1..], &optimized_serialized[1..]);
249        assert_eq!(regular_serialized[0], 2);
250        assert_eq!(optimized_serialized[0], 8);
251
252        assert_eq!(
253            Optimized::deserialize_from(&mut optimized_serialized.as_slice(), LATEST_VERSION)
254                .unwrap(),
255            optimized
256        );
257    }
258
259    #[fuchsia::test]
260    fn test_vec_of_array_of_u8() {
261        #[derive(Serialize, Deserialize, Versioned)]
262        struct Regular(Vec<[u8; 4]>);
263        #[derive(Serialize, Deserialize, Versioned, PartialEq, Eq, Debug)]
264        struct Optimized(#[serde(with = "crate::zerocopy_serialization")] Vec<[u8; 4]>);
265
266        let regular = Regular(vec![[0, 1, 2, 3], [252, 253, 254, 255]]);
267        let mut regular_serialized = Vec::new();
268        regular.serialize_into(&mut regular_serialized).unwrap();
269
270        let optimized = Optimized(regular.0);
271        let mut optimized_serialized = Vec::new();
272        optimized.serialize_into(&mut optimized_serialized).unwrap();
273
274        // The serialized data is the same, but the number of elements is different.
275        assert_eq!(&regular_serialized[1..], &optimized_serialized[1..]);
276        assert_eq!(regular_serialized[0], 2);
277        assert_eq!(optimized_serialized[0], 8);
278
279        assert_eq!(
280            Optimized::deserialize_from(&mut optimized_serialized.as_slice(), LATEST_VERSION)
281                .unwrap(),
282            optimized
283        );
284    }
285
286    #[fuchsia::test]
287    fn test_vec_of_u64_round_trip() {
288        #[derive(Serialize, Deserialize, Versioned, PartialEq, Eq, Debug)]
289        struct Optimized(#[serde(with = "crate::zerocopy_serialization")] Vec<u64>);
290
291        let optimized =
292            Optimized(vec![0, 1, u32::MAX as u64, u32::MAX as u64 + 1, u64::MAX - 1, u64::MAX]);
293        let mut optimized_serialized = Vec::new();
294        optimized.serialize_into(&mut optimized_serialized).unwrap();
295
296        // 1 byte varint encoded length + 6 * 8 bytes per u64.
297        assert_eq!(optimized_serialized.len(), 1 + 6 * 8);
298
299        assert_eq!(
300            Optimized::deserialize_from(&mut optimized_serialized.as_slice(), LATEST_VERSION)
301                .unwrap(),
302            optimized
303        );
304    }
305
306    #[fuchsia::test]
307    fn test_empty_vec() {
308        #[derive(Serialize, Deserialize, Versioned, PartialEq, Eq, Debug)]
309        struct OptimizedVec(#[serde(with = "crate::zerocopy_serialization")] Vec<u8>);
310        let optimized = OptimizedVec(Vec::new());
311        let mut buf = Vec::new();
312        optimized.serialize_into(&mut buf).unwrap();
313        assert_eq!(
314            OptimizedVec::deserialize_from(&mut buf.as_slice(), LATEST_VERSION).unwrap(),
315            optimized
316        );
317    }
318
319    #[fuchsia::test]
320    fn test_visit_byte_buf_capacity_mismatch() {
321        // Create a Vec<u8> with capacity that is not a multiple of 4.
322        let mut bytes = Vec::with_capacity(11);
323        bytes.extend_from_slice(&[1, 2, 3, 4, 5, 6, 7, 8]);
324        assert_eq!(bytes.len(), 8);
325        assert!(bytes.capacity() >= 11);
326        assert!(!bytes.capacity().is_multiple_of(size_of::<u32>()));
327
328        // [u8; 4] has size 4, alignment 1.
329        let visitor = super::Visitor(std::marker::PhantomData::<Vec<[u8; 4]>>);
330        let result: Vec<[u8; 4]> =
331            serde::de::Visitor::visit_byte_buf::<serde::de::value::Error>(visitor, bytes)
332                .expect("visit_byte_buf failed");
333        assert_eq!(&result, &[[1, 2, 3, 4], [5, 6, 7, 8]]);
334    }
335
336    #[fuchsia::test]
337    fn test_visit_byte_buf_invalid_length() {
338        let bytes = vec![1, 2, 3, 4, 5]; // Length 5 is not a multiple of 4.
339        let visitor = super::Visitor(std::marker::PhantomData::<Vec<[u8; 4]>>);
340        let result: Result<_, serde::de::value::Error> =
341            serde::de::Visitor::visit_byte_buf(visitor, bytes);
342        assert!(result.is_err());
343    }
344
345    #[fuchsia::test]
346    fn test_visit_bytes_invalid_length() {
347        let bytes = vec![1, 2, 3, 4, 5]; // Length 5 is not a multiple of 2.
348        let visitor = super::Visitor(std::marker::PhantomData::<Vec<u16>>);
349        let result: Result<_, serde::de::value::Error> =
350            serde::de::Visitor::visit_bytes(visitor, &bytes);
351        assert!(result.is_err());
352    }
353
354    #[fuchsia::test]
355    fn test_visit_bytes_with_bad_alignment() {
356        const E1: u64 = 0x0123456789ABCDEF;
357        const E2: u64 = u64::MAX;
358        // Scudo guarantees 16 byte alignment for allocations. Extra bytes are added at the front to
359        // force the u64s to be misaligned.
360        let mut bytes: Vec<u8> = vec![0, 1, 2];
361        bytes.extend_from_slice(&E1.to_le_bytes());
362        bytes.extend_from_slice(&E2.to_le_bytes());
363        assert!(!(&bytes[3..]).as_ptr().cast::<u64>().is_aligned());
364        let visitor = super::Visitor(std::marker::PhantomData::<Vec<u64>>);
365        let result: Vec<u64> =
366            serde::de::Visitor::visit_bytes::<serde::de::value::Error>(visitor, &bytes[3..])
367                .expect("visit_bytes failed");
368        assert_eq!(&result, &[E1, E2]);
369    }
370
371    #[fuchsia::test]
372    fn test_visit_bytes_with_empty_slice() {
373        let visitor = super::Visitor(std::marker::PhantomData::<Vec<u64>>);
374        let result: Vec<u64> =
375            serde::de::Visitor::visit_bytes::<serde::de::value::Error>(visitor, &[])
376                .expect("visit_bytes failed");
377        assert!(result.is_empty());
378    }
379}