fuchsia_merkle/
merkle_root_builder.rs

1// Copyright 2025 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::util::{HASHES_PER_BLOCK, make_hash_hasher, update_with_zeros};
6use crate::{BLOCK_SIZE, Hash, hash_block};
7use bssl_crypto::digest::Sha256;
8use fuchsia_hash::HASH_SIZE;
9
10const EMPTY_MERKLE_TREE_ROOT: Hash = Hash::from_array([
11    0x15, 0xec, 0x7b, 0xf0, 0xb5, 0x07, 0x32, 0xb4, 0x9f, 0x82, 0x28, 0xe0, 0x7d, 0x24, 0x36, 0x53,
12    0x38, 0xf9, 0xe3, 0xab, 0x99, 0x4b, 0x00, 0xaf, 0x08, 0xe5, 0xa3, 0xbf, 0xfe, 0x55, 0xfd, 0x8b,
13]);
14
15/// Hashes blocks of hashes.
16struct LevelHasher {
17    hasher: Sha256,
18    offset: usize,
19}
20
21/// Calculates the merkle root for a given set of data. The leaf hashes of the merkle tree can
22/// optionally be collected for use with [`crate::MerkleVerifier`].
23///
24/// [`MerkleRootBuilder`] only accepts complete blocks of data except for the last block. This
25/// avoids buffering data internally. If complete blocks of data can't be guaranteed then
26/// [`BufferedMerkleRootBuilder`] should be used instead.
27///
28/// Most users of [`crate::MerkleTreeBuilder`] are only interested in the root of the merkle tree
29/// but [`crate::MerkleTreeBuilder`] generates and keeps the entire tree in memory. It also buffers
30/// the hashes of inner nodes. [`MerkleRootBuilder`] is able to generate just the root using less
31/// memory and a lot less buffering.
32pub struct MerkleRootBuilder<T> {
33    levels: Vec<LevelHasher>,
34    offset: usize,
35    leaf_hash_collector: T,
36
37    /// Holds the root of the tree when the offset of every level of the tree is a multiple of
38    /// [`BLOCK_SIZE`]. This only happens when [`offset`] is equal to `8192 * 256 ^ N` for every
39    /// `N > 0`.
40    ///
41    /// If [`Self::push_hash()`] immediately turned the root into a new level then
42    /// [`Self::complete`] wouldn't be able to recover the root if no more data was added.
43    root: Option<Hash>,
44}
45
46impl<T: LeafHashCollector> MerkleRootBuilder<T> {
47    /// Creates a new `MerkleRootBuilder` with a `LeafHashCollector`.
48    ///
49    /// Use `MerkleRootBuilder::default()` if a `LeafHashCollector` isn't needed.
50    pub fn new(leaf_hash_collector: T) -> Self {
51        Self { levels: Vec::new(), offset: 0, root: None, leaf_hash_collector }
52    }
53
54    /// Appends a full block of data to the merkle tree.
55    ///
56    /// `MerkleRootBuilder` doesn't buffer any data so only full blocks can be added until
57    /// [`Self::complete`] is called. Use [`BufferedMerkleRootBuilder`] if buffering is required.
58    pub fn add_block(&mut self, data: &[u8; BLOCK_SIZE]) {
59        self.push_hash(hash_block(&data[..], self.offset));
60    }
61
62    /// Appends a final amount of data to the merkle tree and returns the merkle root.
63    pub fn complete(mut self, data: &[u8]) -> T::Output {
64        // Add all of the complete blocks from `data`.
65        let remaining = self.add_blocks(data);
66
67        // Handle the last partial block when `data` isn't a block multiple.
68        if !remaining.is_empty() {
69            self.push_hash(hash_block(remaining, self.offset));
70        }
71
72        // Special case for the merkle tree of no data.
73        if self.offset == 0 {
74            self.leaf_hash_collector.add_leaf_hash(EMPTY_MERKLE_TREE_ROOT);
75            return self.leaf_hash_collector.complete(EMPTY_MERKLE_TREE_ROOT);
76        }
77
78        // Special case for when every level of the tree is already complete.
79        if let Some(root) = self.root {
80            return self.leaf_hash_collector.complete(root);
81        }
82
83        // Each row of the tree is padded with zeros to fill the last block and the resulting hash
84        // is bubbled up to the next level. Since `root` wasn't set, there's at least 1 level that
85        // needs padding so `bubbling_digest` is guaranteed to be set by the end of the loop.
86        let mut bubbling_digest: Option<[u8; HASH_SIZE]> = None;
87        for mut level_hasher in self.levels {
88            if level_hasher.offset.is_multiple_of(BLOCK_SIZE) && bubbling_digest.is_none() {
89                continue;
90            }
91            if let Some(digest) = bubbling_digest.take() {
92                level_hasher.hasher.update(&digest);
93                level_hasher.offset += HASH_SIZE;
94            }
95            let zeros = BLOCK_SIZE - (level_hasher.offset % BLOCK_SIZE);
96            update_with_zeros(&mut level_hasher.hasher, zeros);
97            bubbling_digest = Some(level_hasher.hasher.digest());
98        }
99        self.leaf_hash_collector.complete(Hash::from(bubbling_digest.unwrap()))
100    }
101
102    /// Adds a hash of a single block of data to the merkle tree.
103    pub(crate) fn push_hash(&mut self, mut digest: Hash) {
104        self.offset += BLOCK_SIZE;
105        self.leaf_hash_collector.add_leaf_hash(digest);
106
107        if let Some(root) = self.root.take() {
108            // The root was set but more data has now been added. Create a new level containing the
109            // root.
110            let mut hasher = make_hash_hasher(HASHES_PER_BLOCK, self.levels.len() + 1, 0);
111            hasher.update(root.as_bytes());
112            self.levels.push(LevelHasher { hasher, offset: HASH_SIZE });
113        }
114
115        for (level, level_hasher) in self.levels.iter_mut().enumerate() {
116            level_hasher.hasher.update(digest.as_bytes());
117            level_hasher.offset += HASH_SIZE;
118
119            // Only bubble the hash up to the next level when this level completes a block.
120            if !level_hasher.offset.is_multiple_of(BLOCK_SIZE) {
121                return;
122            }
123            // Initialize a new hasher for this level and bubble the old hasher up to the next
124            // level.
125            let new_hasher = make_hash_hasher(HASHES_PER_BLOCK, level + 1, level_hasher.offset);
126            let old_hasher = std::mem::replace(&mut level_hasher.hasher, new_hasher);
127            digest = Hash::from_array(old_hasher.digest());
128        }
129
130        self.root = Some(digest);
131    }
132
133    /// Adds all of the whole blocks from `data` and returns the remaining bytes.
134    fn add_blocks<'a>(&mut self, mut data: &'a [u8]) -> &'a [u8] {
135        while let Some((block, remainder)) = data.split_first_chunk::<BLOCK_SIZE>() {
136            data = remainder;
137            self.add_block(block);
138        }
139        data
140    }
141}
142
143impl Default for MerkleRootBuilder<NoopLeafHashCollector> {
144    fn default() -> Self {
145        Self::new(NoopLeafHashCollector)
146    }
147}
148
149/// Calculates the merkle root for a given set of data. The leaf hashes of the merkle tree can
150/// optionally be collected for use with a `MerkleVerifier`.
151///
152/// [`BufferedMerkleRootBuilder`] is able to accept data that isn't a multiple of [`BLOCK_SIZE`] by
153/// internally buffering the data.
154///
155/// If all of the data is already in memory then `MerkleRootBuilder::default().complete(data)`
156/// should be used instead as it avoids buffering the tail end of the data if it's not a complete
157/// block.
158pub struct BufferedMerkleRootBuilder<T> {
159    buffer: Vec<u8>,
160    builder: MerkleRootBuilder<T>,
161}
162
163impl<T: LeafHashCollector> BufferedMerkleRootBuilder<T> {
164    /// Creates a new `BufferedMerkleRootBuilder` with a `LeafHashCollector`.
165    ///
166    /// Use `BufferedMerkleRootBuilder::default()` if a `LeafHashCollector` isn't needed.
167    pub fn new(leaf_hash_collector: T) -> Self {
168        Self { buffer: Vec::new(), builder: MerkleRootBuilder::new(leaf_hash_collector) }
169    }
170
171    /// Appends data to the merkle tree.
172    pub fn write(&mut self, mut data: &[u8]) {
173        if data.is_empty() {
174            return;
175        }
176        if self.buffer.len().saturating_add(data.len()) < BLOCK_SIZE {
177            // The current buffer and the new data won't complete a block. Buffer the data and
178            // return.
179            self.buffer.extend_from_slice(data);
180            return;
181        }
182
183        if !self.buffer.is_empty() {
184            // If the buffered data isn't empty then take bytes from the start of the new data to
185            // complete a block. The above case handles when a block wouldn't be completed so this
186            // is guaranteed to complete a block.
187            let (head, tail) = data.split_at(BLOCK_SIZE - self.buffer.len());
188            self.buffer.extend_from_slice(head);
189            self.builder.add_block(self.buffer.as_slice().try_into().unwrap());
190            self.buffer.clear();
191            data = tail;
192        }
193
194        // Add all remaining whole blocks.
195        data = self.builder.add_blocks(data);
196        if !data.is_empty() {
197            // Buffer any remaining data.
198            self.buffer.extend_from_slice(data);
199        }
200    }
201
202    /// Finishes building the merkle tree and returns the merkle root.
203    pub fn complete(self) -> T::Output {
204        self.builder.complete(&self.buffer)
205    }
206}
207
208impl Default for BufferedMerkleRootBuilder<NoopLeafHashCollector> {
209    fn default() -> Self {
210        Self::new(NoopLeafHashCollector)
211    }
212}
213
214impl<T: LeafHashCollector> std::io::Write for BufferedMerkleRootBuilder<T> {
215    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
216        self.write(buf);
217        Ok(buf.len())
218    }
219
220    fn flush(&mut self) -> std::io::Result<()> {
221        Ok(())
222    }
223}
224
225/// A trait for collecting the leaf hashes of a merkle tree while constructing a merkle root.
226pub trait LeafHashCollector {
227    /// The output type of [`MerkleRootBuilder::complete`]. This allows for `complete` to return
228    /// just the merkle root with `NoopLeafHashCollector` and also return the leaf hashes when a
229    /// real `LeafHashCollector` is used.
230    type Output;
231
232    /// This method is called as each leaf hash in the merkle tree is created.
233    ///
234    /// If the merkle tree consists of just a root node then this method will be called for the root
235    /// node.
236    fn add_leaf_hash(&mut self, hash: Hash);
237
238    /// Transforms the merkle root into Self::Output.
239    fn complete(self, root: Hash) -> Self::Output;
240}
241
242/// A `LeafHashCollector` that doesn't collect leaf hashes.
243pub struct NoopLeafHashCollector;
244impl LeafHashCollector for NoopLeafHashCollector {
245    type Output = Hash;
246
247    fn add_leaf_hash(&mut self, _hash: Hash) {}
248
249    fn complete(self, root: Hash) -> Self::Output {
250        root
251    }
252}
253
254impl LeafHashCollector for Vec<Hash> {
255    type Output = (Hash, Vec<Hash>);
256
257    fn add_leaf_hash(&mut self, hash: Hash) {
258        self.push(hash);
259    }
260
261    fn complete(self, root: Hash) -> Self::Output {
262        (root, self)
263    }
264}
265
266#[cfg(test)]
267mod tests {
268    use super::*;
269    use test_case::test_case;
270
271    const AB_BLOCK: [u8; BLOCK_SIZE] = [0xAB; BLOCK_SIZE];
272
273    fn add_blocks<T: LeafHashCollector>(block_count: usize, builder: &mut MerkleRootBuilder<T>) {
274        for _ in 0..block_count {
275            builder.add_block(&AB_BLOCK);
276        }
277    }
278
279    // Hashes and the length of repeated 0xAB bytes. These were generated from the C++
280    // implementation in //src/lib/digest.
281    #[test_case("15ec7bf0b50732b49f8228e07d24365338f9e3ab994b00af08e5a3bffe55fd8b", 0; "0")]
282    #[test_case("2fabda7adeac120f587ae7b159c39d1716758b2c65e25b856470bbbb8b94e25e", 1; "1")]
283    #[test_case("fd2acae20c551eea1df052f39478b33536ebf3b0c6ceef649a010de06b71822b", 4096; "4096")]
284    #[test_case("85a0918a035140e720f4a5f10b7768d5b04959ed526da401cf4893aac73f436a", 8191; "8191")]
285    #[test_case("1dc023a7a88f56fab52e0b078a92cd566546d11aed479c66a14d6592e0f011eb", 8192; "8192")]
286    #[test_case("b6eae68aa1ad1b4fc74618575135f0d1daedc6bb07dc10f8fa41916d0578c318", 8193; "8193")]
287    #[test_case(
288        "35689edb3c4c46e44bb4ff00a7f8ac628d97ba16e1233f78759a41da597ed8dd",
289        8192 * 2 - 1; "16383"
290    )]
291    #[test_case(
292        "a6a903830d911d7555f9243b748d27a5aec1e385f91d845c854f46e02339070a",
293        8192 * 2; "16384"
294    )]
295    #[test_case(
296        "54c9eb182fc5bfc2984ca45049cf8cf1d071f2165e5190605b7970f824e03d8f",
297        8192 * 2 + 1; "16385"
298    )]
299    #[test_case(
300        "c96631f69f0cfaf2626dc4230aeb7fa50a05cd7e897039d2f56cdec99dc0b3b1",
301        8192 * 256 - 1; "2097151"
302    )]
303    #[test_case(
304        "cb717b240ae7ddf1ad197a3272db3cb4175bb7b7242d5050c6317df2bb8ed4b4",
305        8192 * 256; "2097152"
306    )]
307    #[test_case(
308        "9942b1f926e3295053e97b2b5a1471b6dff7df3d130f589c2881f9b98eee2fd0",
309        8192 * 256 + 1; "2097153"
310    )]
311    #[test_case(
312        "59f9f6275ec8b672446bc8fc2da0a7ce625a4c55fcbb3891144ace7a145f23e2",
313        8192 * 256 * 2 - 1; "4194303"
314    )]
315    #[test_case(
316        "fa21580bb27be533d2846e75e7fb3acca3fa2605f3fd3a4d49db2d023b10324b",
317        8192 * 256 * 2; "4194304"
318    )]
319    #[test_case(
320        "aec444c86e0553ed4a2a5e49faa2db9e6c9f0bdb6b97a8bd2b84aebec30b4558",
321        8192 * 256 * 2 + 1; "4194305"
322    )]
323    #[test_case(
324        "799bb12a5f633f9949f4619e4e0af447c980f895e7261521abf69a2dba67ce9e",
325        8192 * 256 * 256 - 1; "536870911"
326    )]
327    #[test_case(
328        "b3b1a0ceb8e5949a5ba3f6db646e037438fe15053113c1b50b290678596467e3",
329        8192 * 256 * 256; "536870912"
330    )]
331    #[test_case(
332        "53b85ef0285254b7bb32bc6cd5092fdf1a610d96446848f4dd2841e4c93a55d2",
333        8192 * 256 * 256 + 1; "536870913"
334    )]
335    fn test_merkle_root_builder_matches_lib_digest(hash: &str, size: usize) {
336        let expected: Hash = hash.parse().unwrap();
337        let mut builder = MerkleRootBuilder::default();
338        add_blocks(size / BLOCK_SIZE, &mut builder);
339        let actual = builder.complete(&AB_BLOCK[0..(size % BLOCK_SIZE)]);
340        assert_eq!(actual, expected, "size={size}");
341    }
342
343    #[test_case(0, &[], false; "0")]
344    #[test_case(8192, &[], true; "8192")]
345    #[test_case(8192 * 2, &[64], false; "16384")]
346    #[test_case(8192 * 255, &[8192 - 32], false; "2088960")]
347    #[test_case(8192 * 256, &[8192], true; "2097152")]
348    #[test_case(8192 * 257, &[8192 + 32, 32], false; "2105344")]
349    #[test_case(8192 * 256 * 2, &[8192 * 2, 64], false; "4194304")]
350    #[test_case(8192 * 256 * 255, &[8192 * 255, 8192 - 32], false; "534773760")]
351    #[test_case(8192 * 256 * 256, &[8192 * 256, 8192], true; "536870912")]
352    fn test_merkle_tree_structure(size: usize, expected_offsets: &[usize], is_root_set: bool) {
353        let mut builder = MerkleRootBuilder::default();
354        add_blocks(size / BLOCK_SIZE, &mut builder);
355        let actual_offsets = builder.levels.iter().map(|level| level.offset).collect::<Vec<_>>();
356        assert_eq!(&actual_offsets, expected_offsets);
357        assert_eq!(builder.root.is_some(), is_root_set);
358    }
359
360    #[test_case(0; "0")]
361    #[test_case(1; "1")]
362    #[test_case(4096; "4096")]
363    #[test_case(8191; "8191")]
364    #[test_case(8192; "8192")]
365    #[test_case(8193; "8193")]
366    #[test_case(8192 * 2 - 1; "16383")]
367    #[test_case(8192 * 2; "16384")]
368    #[test_case(8192 * 2 + 1; "16385")]
369    #[test_case(8192 * 256 - 1; "2097151")]
370    #[test_case(8192 * 256; "2097152")]
371    #[test_case(8192 * 256 + 1; "2097153")]
372    #[test_case(8192 * 256 * 2 - 1; "4194303")]
373    #[test_case(8192 * 256 * 2; "4194304")]
374    #[test_case(8192 * 256 * 2 + 1; "4194305")]
375    #[test_case(8192 * 256 * 256 - 1; "536870911")]
376    #[test_case(8192 * 256 * 256; "536870912")]
377    #[test_case(8192 * 256 * 256 + 1; "536870913")]
378    fn test_rebuild_from_leaf_hashes(size: usize) {
379        let mut builder = MerkleRootBuilder::new(Vec::new());
380        add_blocks(size / BLOCK_SIZE, &mut builder);
381        let (expected_root, leaf_hashes) = builder.complete(&AB_BLOCK[0..(size % BLOCK_SIZE)]);
382
383        // There should always be leaf hashes, even for the empty input.
384        assert!(!leaf_hashes.is_empty());
385
386        let mut builder = MerkleRootBuilder::default();
387        for hash in leaf_hashes {
388            builder.push_hash(hash);
389        }
390
391        assert_eq!(builder.complete(&[]), expected_root);
392    }
393
394    #[test_case(0; "0")]
395    #[test_case(1; "1")]
396    #[test_case(4096; "4096")]
397    #[test_case(8191; "8191")]
398    #[test_case(8192; "8192")]
399    #[test_case(8193; "8193")]
400    #[test_case(8192 * 2 - 1; "16383")]
401    #[test_case(8192 * 2; "16384")]
402    #[test_case(8192 * 2 + 1; "16385")]
403    #[test_case(8192 * 256 - 1; "2097151")]
404    #[test_case(8192 * 256; "2097152")]
405    #[test_case(8192 * 256 + 1; "2097153")]
406    #[test_case(8192 * 256 * 2 - 1; "4194303")]
407    #[test_case(8192 * 256 * 2; "4194304")]
408    #[test_case(8192 * 256 * 2 + 1; "4194305")]
409    fn test_buffered_merkle_root_builder(size: usize) {
410        let data = vec![0; size];
411        let expected_root = MerkleRootBuilder::default().complete(&data);
412
413        let mut builder = BufferedMerkleRootBuilder::default();
414        for chunk in data.chunks(6079) {
415            builder.write(chunk);
416        }
417        assert_eq!(builder.complete(), expected_root);
418
419        let mut builder = BufferedMerkleRootBuilder::default();
420        builder.write(&data);
421        assert_eq!(builder.complete(), expected_root);
422    }
423
424    #[test]
425    fn test_buffered_merkle_root_builder_as_writer() {
426        let data = vec![0; 8192 * 3 + 60];
427        let expected_root = MerkleRootBuilder::default().complete(&data);
428
429        let mut reader = std::io::Cursor::new(data);
430        let mut builder = BufferedMerkleRootBuilder::default();
431        std::io::copy(&mut reader, &mut builder).unwrap();
432
433        assert_eq!(builder.complete(), expected_root);
434    }
435}