Skip to main content

ext4_metadata/
lib.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use flyweights::{FlyByteStr, FlyStr};
6use serde::{Deserialize, Serialize};
7use sorted_vec_map_rs::SortedVecMap;
8use std::collections::{BTreeMap, HashSet};
9use std::sync::Arc;
10use thiserror::Error;
11
12// This is deliberately the same as ext4.
13pub const ROOT_INODE_NUM: u64 = 2;
14
15pub const S_IFDIR: u16 = 0x4000;
16pub const S_IFREG: u16 = 0x8000;
17pub const S_IFLNK: u16 = 0xa000;
18pub const S_IFMT: u16 = 0xf000;
19
20#[derive(Error, Debug)]
21pub enum MetadataError {
22    #[error("Node not found")]
23    NotFound,
24    #[error("Node is not a directory")]
25    NotDir,
26    #[error("Failed to deserialize metadata (corrupt?)")]
27    FailedToDeserialize,
28}
29
30#[derive(Serialize, Deserialize)]
31pub struct Metadata {
32    nodes: SortedVecMap<u64, Node>,
33}
34
35impl Metadata {
36    pub fn new() -> Self {
37        Self { nodes: SortedVecMap::new() }
38    }
39
40    /// Finds the node with name `name` in directory with inode number `parent`.
41    pub fn lookup(&self, parent: u64, name: &str) -> Result<u64, MetadataError> {
42        self.nodes
43            .get(&parent)
44            .ok_or(MetadataError::NotFound)?
45            .directory()
46            .ok_or(MetadataError::NotDir)?
47            .children
48            .get(&FlyStr::from(name))
49            .ok_or(MetadataError::NotFound)
50            .cloned()
51    }
52
53    /// Returns the node with inode number `inode_num`.
54    pub fn get(&self, inode_num: u64) -> Option<&Node> {
55        self.nodes.get(&inode_num)
56    }
57
58    /// Deserializes the metadata.
59    pub fn deserialize(bytes: &[u8]) -> Result<Self, MetadataError> {
60        let mut metadata: Self =
61            bincode::deserialize(bytes).map_err(|_| MetadataError::FailedToDeserialize)?;
62        metadata.dedup_xattrs();
63        Ok(metadata)
64    }
65
66    /// Serializes the metadata.
67    pub fn serialize(&self) -> Vec<u8> {
68        bincode::serialize(&self).unwrap()
69    }
70
71    /// Add a child at `path` with inode number `inode_num`.
72    pub fn add_child(&mut self, path: &[&str], inode_num: u64) {
73        for component in path {
74            assert!(
75                *component != "."
76                    && *component != ".."
77                    && !component.contains(|c| c == '/' || c == '\0'),
78                "Invalid path component {component:?} in {path:?}"
79            );
80        }
81
82        let mut node = self.nodes.get_mut(&ROOT_INODE_NUM).unwrap().directory_mut().unwrap();
83        let mut iter = path.iter();
84        let name = *iter.next_back().unwrap();
85        for component in iter {
86            let inode_num = *node.children.get(&FlyStr::from(*component)).unwrap();
87            node = self.nodes.get_mut(&inode_num).unwrap().directory_mut().unwrap();
88        }
89        node.children.insert(name.into(), inode_num);
90    }
91
92    /// Inserts a directory node.  This will not add a child to a directory; see `add_child`.
93    pub fn insert_directory(
94        &mut self,
95        inode_num: u64,
96        mode: u16,
97        uid: u16,
98        gid: u16,
99        extended_attributes: ExtendedAttributes,
100    ) {
101        assert_eq!(mode & S_IFMT, S_IFDIR);
102        self.nodes.insert(
103            inode_num,
104            Node {
105                info: NodeInfo::Directory(Directory { children: SortedVecMap::new() }),
106                mode,
107                uid,
108                gid,
109                extended_attributes,
110            },
111        );
112    }
113
114    /// Inserts a file node.  This will not add a child to a directory; see `add_child`.
115    pub fn insert_file(
116        &mut self,
117        inode_num: u64,
118        mode: u16,
119        uid: u16,
120        gid: u16,
121        extended_attributes: ExtendedAttributes,
122    ) {
123        assert_eq!(mode & S_IFMT, S_IFREG);
124        self.nodes.insert(
125            inode_num,
126            Node { info: NodeInfo::File(File), mode, uid, gid, extended_attributes },
127        );
128    }
129
130    /// Inserts a symlink node.  This will not add a child to a directory; see `add_child`.
131    pub fn insert_symlink(
132        &mut self,
133        inode_num: u64,
134        target: String,
135        mode: u16,
136        uid: u16,
137        gid: u16,
138        extended_attributes: ExtendedAttributes,
139    ) {
140        assert_eq!(mode & S_IFMT, S_IFLNK);
141        self.nodes.insert(
142            inode_num,
143            Node {
144                info: NodeInfo::Symlink(Symlink { target: target.into() }),
145                mode,
146                uid,
147                gid,
148                extended_attributes,
149            },
150        );
151    }
152
153    /// Nodes will often have the exact same set of extended attributes as other nodes. For this
154    /// reason, entire sets of extended attributes are reference counted and shared between nodes.
155    /// The serialized format of the metadata writes out the extended attributes along with each
156    /// node. After deserializing the metadata, each node will contain its own copy of the extended
157    /// attributes. This function scans all of the nodes and deduplicates the extended attributes to
158    /// save memory.
159    fn dedup_xattrs(&mut self) {
160        let mut xattrs: HashSet<Arc<SortedVecMap<FlyByteStr, FlyByteStr>>> = HashSet::new();
161        for node in self.nodes.values_mut() {
162            match xattrs.get(node.extended_attributes.0.as_ref()) {
163                Some(existing) => {
164                    node.extended_attributes.0 = existing.clone();
165                }
166                None => {
167                    xattrs.insert(node.extended_attributes.0.clone());
168                }
169            }
170        }
171    }
172}
173
174#[derive(Default, Clone, PartialEq, Eq, Debug)]
175pub struct ExtendedAttributes(Arc<SortedVecMap<FlyByteStr, FlyByteStr>>);
176
177impl<'de> serde::Deserialize<'de> for ExtendedAttributes {
178    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
179    where
180        D: serde::Deserializer<'de>,
181    {
182        Ok(Self(Arc::new(SortedVecMap::deserialize(deserializer)?)))
183    }
184}
185
186impl serde::Serialize for ExtendedAttributes {
187    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
188    where
189        S: serde::Serializer,
190    {
191        self.0.as_ref().serialize(serializer)
192    }
193}
194
195impl FromIterator<(FlyByteStr, FlyByteStr)> for ExtendedAttributes {
196    fn from_iter<T: IntoIterator<Item = (FlyByteStr, FlyByteStr)>>(iter: T) -> Self {
197        ExtendedAttributes(Arc::new(SortedVecMap::from_iter(iter)))
198    }
199}
200
201impl<const N: usize> From<[(FlyByteStr, FlyByteStr); N]> for ExtendedAttributes {
202    fn from(value: [(FlyByteStr, FlyByteStr); N]) -> Self {
203        value.into_iter().collect()
204    }
205}
206
207impl From<BTreeMap<Box<[u8]>, Box<[u8]>>> for ExtendedAttributes {
208    fn from(value: BTreeMap<Box<[u8]>, Box<[u8]>>) -> Self {
209        value.into_iter().map(|(k, v)| (k.into(), v.into())).collect()
210    }
211}
212
213impl ExtendedAttributes {
214    pub fn get(&self, key: &[u8]) -> Option<&FlyByteStr> {
215        self.0.get(&FlyByteStr::from(key))
216    }
217
218    pub fn keys(&self) -> impl Iterator<Item = &FlyByteStr> {
219        self.0.keys()
220    }
221
222    pub fn is_empty(&self) -> bool {
223        self.0.is_empty()
224    }
225}
226
227#[derive(Serialize, Deserialize)]
228pub struct Node {
229    info: NodeInfo,
230    pub mode: u16,
231    pub uid: u16,
232    pub gid: u16,
233    pub extended_attributes: ExtendedAttributes,
234}
235
236impl Node {
237    pub fn info(&self) -> &NodeInfo {
238        &self.info
239    }
240
241    pub fn directory(&self) -> Option<&Directory> {
242        match &self.info {
243            NodeInfo::Directory(d) => Some(d),
244            _ => None,
245        }
246    }
247
248    pub fn directory_mut(&mut self) -> Option<&mut Directory> {
249        match &mut self.info {
250            NodeInfo::Directory(d) => Some(d),
251            _ => None,
252        }
253    }
254
255    pub fn file(&self) -> Option<&File> {
256        match &self.info {
257            NodeInfo::File(f) => Some(f),
258            _ => None,
259        }
260    }
261
262    pub fn symlink(&self) -> Option<&Symlink> {
263        match &self.info {
264            NodeInfo::Symlink(s) => Some(s),
265            _ => None,
266        }
267    }
268}
269
270#[derive(Debug, Serialize, Deserialize)]
271pub enum NodeInfo {
272    Directory(Directory),
273    File(File),
274    Symlink(Symlink),
275}
276
277#[derive(Debug, Serialize, Deserialize)]
278pub struct Directory {
279    pub children: SortedVecMap<FlyStr, u64>,
280}
281
282#[derive(Debug, Serialize, Deserialize)]
283pub struct File;
284
285#[derive(Debug, Serialize, Deserialize)]
286pub struct Symlink {
287    pub target: FlyStr,
288}
289
290#[cfg(test)]
291mod tests {
292    use super::{
293        ExtendedAttributes, Metadata, NodeInfo, ROOT_INODE_NUM, S_IFDIR, S_IFLNK, S_IFREG,
294    };
295    use assert_matches::assert_matches;
296    use std::collections::BTreeMap;
297    use std::sync::Arc;
298
299    #[test]
300    fn test_serialize_and_deserialize() {
301        let mut m = Metadata::new();
302        let xattr: ExtendedAttributes =
303            [((*b"a").into(), (*b"apple").into()), ((*b"b").into(), (*b"ball").into())].into();
304        m.insert_directory(ROOT_INODE_NUM, S_IFDIR | 0o755, 2, 3, Default::default());
305        m.insert_directory(3, S_IFDIR | 0o775, 2, 3, xattr.clone());
306        m.add_child(&["foo"], 3);
307        m.insert_file(4, S_IFREG | 0o644, 2, 3, xattr.clone());
308        m.add_child(&["foo", "bar"], 4);
309        m.insert_symlink(5, "symlink-target".to_string(), S_IFLNK | 0o777, 2, 3, xattr.clone());
310        m.add_child(&["foo", "baz"], 5);
311
312        let m = Metadata::deserialize(&m.serialize()).expect("deserialize failed");
313        let node = m.get(ROOT_INODE_NUM).expect("root not found");
314        assert_matches!(node.info(), NodeInfo::Directory(_));
315        assert_eq!(node.mode, S_IFDIR | 0o755);
316        assert_eq!(node.uid, 2);
317        assert_eq!(node.gid, 3);
318        assert!(node.extended_attributes.is_empty());
319
320        assert_eq!(m.lookup(ROOT_INODE_NUM, "foo").expect("foo not found"), 3);
321        let node = m.get(3).expect("root not found");
322        assert_matches!(node.info(), NodeInfo::Directory(_));
323        assert_eq!(node.mode, S_IFDIR | 0o775);
324        assert_eq!(node.uid, 2);
325        assert_eq!(node.gid, 3);
326        assert_eq!(&node.extended_attributes, &xattr);
327
328        assert_eq!(m.lookup(3, "bar").expect("foo/bar not found"), 4);
329        let node = m.get(4).expect("root not found");
330        assert_matches!(node.info(), NodeInfo::File(_));
331        assert_eq!(node.mode, S_IFREG | 0o644);
332        assert_eq!(node.uid, 2);
333        assert_eq!(node.gid, 3);
334        assert_eq!(&node.extended_attributes, &xattr);
335
336        assert_eq!(m.lookup(3, "baz").expect("foo/baz not found"), 5);
337        let node = m.get(5).expect("root not found");
338        assert_matches!(node.info(), NodeInfo::Symlink(_));
339        assert_eq!(node.mode, S_IFLNK | 0o777);
340        assert_eq!(node.uid, 2);
341        assert_eq!(node.gid, 3);
342        assert_eq!(&node.extended_attributes, &xattr);
343    }
344
345    #[test]
346    fn test_serialization_is_deterministic() {
347        // Builds a Metadata instance with fixed contents.
348        let build_fs = || {
349            let mut m = Metadata::new();
350            let xattr: ExtendedAttributes =
351                [((*b"a").into(), (*b"apple").into()), ((*b"b").into(), (*b"ball").into())].into();
352            m.insert_directory(ROOT_INODE_NUM, S_IFDIR | 0o755, 2, 3, Default::default());
353            m.insert_directory(3, S_IFDIR | 0o775, 2, 3, xattr.clone());
354            m.add_child(&["foo"], 3);
355            m.insert_file(4, S_IFREG | 0o644, 2, 3, xattr.clone());
356            m.add_child(&["foo", "bar"], 4);
357            m.insert_symlink(5, "symlink-target".to_string(), S_IFLNK | 0o777, 2, 3, xattr.clone());
358            m.add_child(&["foo", "baz"], 5);
359            m
360        };
361
362        // Build it twice and verify that the serialized representations match.
363        let data1 = build_fs().serialize();
364        let data2 = build_fs().serialize();
365        assert_eq!(data1, data2);
366    }
367
368    #[test]
369    fn test_dedup_extended_attributes() {
370        let mut m = Metadata::new();
371        m.insert_directory(ROOT_INODE_NUM, S_IFDIR | 0o755, 2, 3, Default::default());
372        m.insert_file(3, S_IFREG, 2, 3, [("a".into(), "b".into())].into());
373        m.add_child(&["foo"], 3);
374        m.insert_file(4, S_IFREG, 2, 3, [("a".into(), "b".into())].into());
375        m.add_child(&["bar"], 4);
376
377        // The extended attributes are equal but they haven't been deduplicated yet.
378        assert_eq!(
379            m.get(3).unwrap().extended_attributes.0,
380            m.get(4).unwrap().extended_attributes.0
381        );
382        assert!(!Arc::ptr_eq(
383            &m.get(3).unwrap().extended_attributes.0,
384            &m.get(4).unwrap().extended_attributes.0
385        ));
386
387        m.dedup_xattrs();
388        // The extended attributes are now deduplicated.
389        assert!(Arc::ptr_eq(
390            &m.get(3).unwrap().extended_attributes.0,
391            &m.get(4).unwrap().extended_attributes.0
392        ));
393
394        // The extended attributes are duplicated in the serialized data but get deduplicated again
395        // when being deserialized.
396        let m = Metadata::deserialize(&m.serialize()).unwrap();
397        assert!(Arc::ptr_eq(
398            &m.get(3).unwrap().extended_attributes.0,
399            &m.get(4).unwrap().extended_attributes.0
400        ));
401    }
402
403    #[test]
404    fn test_extended_attribute_serialization_backwards_compatibility() {
405        let xattrs = [(b"a", b"b"), (b"c", b"d"), (b"e", b"f")];
406        let btree_map: BTreeMap<Box<[u8]>, Box<[u8]>> =
407            xattrs.iter().map(|&(&k, &v)| (k.into(), v.into())).collect();
408
409        let serialized = bincode::serialize(&btree_map).unwrap();
410
411        let extended_attributes: ExtendedAttributes = bincode::deserialize(&serialized).unwrap();
412        let expected_xattrs: ExtendedAttributes =
413            xattrs.iter().map(|&(&k, &v)| (k.into(), v.into())).collect();
414        assert_eq!(extended_attributes, expected_xattrs);
415    }
416
417    #[test]
418    fn test_extended_attribute_serialization_forwards_compatibility() {
419        let xattrs = [(b"a", b"b"), (b"c", b"d"), (b"e", b"f")];
420        let extended_attributes: ExtendedAttributes =
421            xattrs.iter().map(|&(&k, &v)| (k.into(), v.into())).collect();
422
423        let serialized = bincode::serialize(&extended_attributes).unwrap();
424
425        let btree_map: BTreeMap<Box<[u8]>, Box<[u8]>> = bincode::deserialize(&serialized).unwrap();
426        let expected_xattrs: BTreeMap<Box<[u8]>, Box<[u8]>> =
427            xattrs.iter().map(|&(&k, &v)| (k.into(), v.into())).collect();
428        assert_eq!(btree_map, expected_xattrs);
429    }
430}