fuchsia_merkle/
merkle_root_builder.rs

1// Copyright 2025 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::util::{HASHES_PER_BLOCK, hash_block, make_hash_hasher, update_with_zeros};
6use crate::{BLOCK_SIZE, Hash};
7use bssl_crypto::digest::Sha256;
8use fuchsia_hash::HASH_SIZE;
9
10const EMPTY_MERKLE_TREE_ROOT: Hash = Hash::from_array([
11    0x15, 0xec, 0x7b, 0xf0, 0xb5, 0x07, 0x32, 0xb4, 0x9f, 0x82, 0x28, 0xe0, 0x7d, 0x24, 0x36, 0x53,
12    0x38, 0xf9, 0xe3, 0xab, 0x99, 0x4b, 0x00, 0xaf, 0x08, 0xe5, 0xa3, 0xbf, 0xfe, 0x55, 0xfd, 0x8b,
13]);
14
15/// Hashes blocks of hashes.
16struct LevelHasher {
17    hasher: Sha256,
18    offset: usize,
19}
20
21/// Calculates the merkle root for a given set of data. The leaf hashes of the merkle tree can
22/// optionally be collected for use with [`crate::MerkleVerifier`].
23///
24/// [`MerkleRootBuilder`] only accepts complete blocks of data except for the last block. This
25/// avoids buffering data internally. If complete blocks of data can't be guaranteed then
26/// [`BufferedMerkleRootBuilder`] should be used instead.
27pub struct MerkleRootBuilder<T> {
28    levels: Vec<LevelHasher>,
29    offset: usize,
30    leaf_hash_collector: T,
31
32    /// Holds the root of the tree when the offset of every level of the tree is a multiple of
33    /// [`BLOCK_SIZE`]. This only happens when [`offset`] is equal to `8192 * 256 ^ N` for every
34    /// `N > 0`.
35    ///
36    /// If [`Self::push_hash()`] immediately turned the root into a new level then
37    /// [`Self::complete`] wouldn't be able to recover the root if no more data was added.
38    root: Option<Hash>,
39}
40
41impl<T: LeafHashCollector> MerkleRootBuilder<T> {
42    /// Creates a new `MerkleRootBuilder` with a `LeafHashCollector`.
43    ///
44    /// Use `MerkleRootBuilder::default()` if a `LeafHashCollector` isn't needed.
45    pub fn new(leaf_hash_collector: T) -> Self {
46        Self { levels: Vec::new(), offset: 0, root: None, leaf_hash_collector }
47    }
48
49    /// Appends a full block of data to the merkle tree.
50    ///
51    /// `MerkleRootBuilder` doesn't buffer any data so only full blocks can be added until
52    /// [`Self::complete`] is called. Use [`BufferedMerkleRootBuilder`] if buffering is required.
53    pub fn add_block(&mut self, data: &[u8; BLOCK_SIZE]) {
54        self.push_hash(hash_block(&data[..], self.offset));
55    }
56
57    /// Appends a final amount of data to the merkle tree and returns the merkle root.
58    pub fn complete(mut self, data: &[u8]) -> T::Output {
59        // Add all of the complete blocks from `data`.
60        let remaining = self.add_blocks(data);
61
62        // Handle the last partial block when `data` isn't a block multiple.
63        if !remaining.is_empty() {
64            self.push_hash(hash_block(remaining, self.offset));
65        }
66
67        // Special case for the merkle tree of no data.
68        if self.offset == 0 {
69            self.leaf_hash_collector.add_leaf_hash(EMPTY_MERKLE_TREE_ROOT);
70            return self.leaf_hash_collector.complete(EMPTY_MERKLE_TREE_ROOT);
71        }
72
73        // Special case for when every level of the tree is already complete.
74        if let Some(root) = self.root {
75            return self.leaf_hash_collector.complete(root);
76        }
77
78        // Each row of the tree is padded with zeros to fill the last block and the resulting hash
79        // is bubbled up to the next level. Since `root` wasn't set, there's at least 1 level that
80        // needs padding so `bubbling_digest` is guaranteed to be set by the end of the loop.
81        let mut bubbling_digest: Option<[u8; HASH_SIZE]> = None;
82        for mut level_hasher in self.levels {
83            if level_hasher.offset.is_multiple_of(BLOCK_SIZE) && bubbling_digest.is_none() {
84                continue;
85            }
86            if let Some(digest) = bubbling_digest.take() {
87                level_hasher.hasher.update(&digest);
88                level_hasher.offset += HASH_SIZE;
89            }
90            let zeros = BLOCK_SIZE - (level_hasher.offset % BLOCK_SIZE);
91            update_with_zeros(&mut level_hasher.hasher, zeros);
92            bubbling_digest = Some(level_hasher.hasher.digest());
93        }
94        self.leaf_hash_collector.complete(Hash::from(bubbling_digest.unwrap()))
95    }
96
97    /// Adds a hash of a single block of data to the merkle tree.
98    pub(crate) fn push_hash(&mut self, mut digest: Hash) {
99        self.offset += BLOCK_SIZE;
100        self.leaf_hash_collector.add_leaf_hash(digest);
101
102        if let Some(root) = self.root.take() {
103            // The root was set but more data has now been added. Create a new level containing the
104            // root.
105            let mut hasher = make_hash_hasher(HASHES_PER_BLOCK, self.levels.len() + 1, 0);
106            hasher.update(root.as_bytes());
107            self.levels.push(LevelHasher { hasher, offset: HASH_SIZE });
108        }
109
110        for (level, level_hasher) in self.levels.iter_mut().enumerate() {
111            level_hasher.hasher.update(digest.as_bytes());
112            level_hasher.offset += HASH_SIZE;
113
114            // Only bubble the hash up to the next level when this level completes a block.
115            if !level_hasher.offset.is_multiple_of(BLOCK_SIZE) {
116                return;
117            }
118            // Initialize a new hasher for this level and bubble the old hasher up to the next
119            // level.
120            let new_hasher = make_hash_hasher(HASHES_PER_BLOCK, level + 1, level_hasher.offset);
121            let old_hasher = std::mem::replace(&mut level_hasher.hasher, new_hasher);
122            digest = Hash::from_array(old_hasher.digest());
123        }
124
125        self.root = Some(digest);
126    }
127
128    /// Adds all of the whole blocks from `data` and returns the remaining bytes.
129    fn add_blocks<'a>(&mut self, mut data: &'a [u8]) -> &'a [u8] {
130        while let Some((block, remainder)) = data.split_first_chunk::<BLOCK_SIZE>() {
131            data = remainder;
132            self.add_block(block);
133        }
134        data
135    }
136}
137
138impl Default for MerkleRootBuilder<NoopLeafHashCollector> {
139    fn default() -> Self {
140        Self::new(NoopLeafHashCollector)
141    }
142}
143
144/// Calculates the merkle root for a given set of data. The leaf hashes of the merkle tree can
145/// optionally be collected for use with a `MerkleVerifier`.
146///
147/// [`BufferedMerkleRootBuilder`] is able to accept data that isn't a multiple of [`BLOCK_SIZE`] by
148/// internally buffering the data.
149///
150/// If all of the data is already in memory then `MerkleRootBuilder::default().complete(data)`
151/// should be used instead as it avoids buffering the tail end of the data if it's not a complete
152/// block.
153pub struct BufferedMerkleRootBuilder<T> {
154    buffer: Vec<u8>,
155    builder: MerkleRootBuilder<T>,
156}
157
158impl<T: LeafHashCollector> BufferedMerkleRootBuilder<T> {
159    /// Creates a new `BufferedMerkleRootBuilder` with a `LeafHashCollector`.
160    ///
161    /// Use `BufferedMerkleRootBuilder::default()` if a `LeafHashCollector` isn't needed.
162    pub fn new(leaf_hash_collector: T) -> Self {
163        Self { buffer: Vec::new(), builder: MerkleRootBuilder::new(leaf_hash_collector) }
164    }
165
166    /// Appends data to the merkle tree.
167    pub fn write(&mut self, mut data: &[u8]) {
168        if data.is_empty() {
169            return;
170        }
171        if self.buffer.len().saturating_add(data.len()) < BLOCK_SIZE {
172            // The current buffer and the new data won't complete a block. Buffer the data and
173            // return.
174            self.buffer.extend_from_slice(data);
175            return;
176        }
177
178        if !self.buffer.is_empty() {
179            // If the buffered data isn't empty then take bytes from the start of the new data to
180            // complete a block. The above case handles when a block wouldn't be completed so this
181            // is guaranteed to complete a block.
182            let (head, tail) = data.split_at(BLOCK_SIZE - self.buffer.len());
183            self.buffer.extend_from_slice(head);
184            self.builder.add_block(self.buffer.as_slice().try_into().unwrap());
185            self.buffer.clear();
186            data = tail;
187        }
188
189        // Add all remaining whole blocks.
190        data = self.builder.add_blocks(data);
191        if !data.is_empty() {
192            // Buffer any remaining data.
193            self.buffer.extend_from_slice(data);
194        }
195    }
196
197    /// Finishes building the merkle tree and returns the merkle root.
198    pub fn complete(self) -> T::Output {
199        self.builder.complete(&self.buffer)
200    }
201}
202
203impl Default for BufferedMerkleRootBuilder<NoopLeafHashCollector> {
204    fn default() -> Self {
205        Self::new(NoopLeafHashCollector)
206    }
207}
208
209impl<T: LeafHashCollector> std::io::Write for BufferedMerkleRootBuilder<T> {
210    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
211        self.write(buf);
212        Ok(buf.len())
213    }
214
215    fn flush(&mut self) -> std::io::Result<()> {
216        Ok(())
217    }
218}
219
220/// A trait for collecting the leaf hashes of a merkle tree while constructing a merkle root.
221pub trait LeafHashCollector {
222    /// The output type of [`MerkleRootBuilder::complete`]. This allows for `complete` to return
223    /// just the merkle root with `NoopLeafHashCollector` and also return the leaf hashes when a
224    /// real `LeafHashCollector` is used.
225    type Output;
226
227    /// This method is called as each leaf hash in the merkle tree is created.
228    ///
229    /// If the merkle tree consists of just a root node then this method will be called for the root
230    /// node.
231    fn add_leaf_hash(&mut self, hash: Hash);
232
233    /// Transforms the merkle root into Self::Output.
234    fn complete(self, root: Hash) -> Self::Output;
235}
236
237/// A `LeafHashCollector` that doesn't collect leaf hashes.
238pub struct NoopLeafHashCollector;
239impl LeafHashCollector for NoopLeafHashCollector {
240    type Output = Hash;
241
242    fn add_leaf_hash(&mut self, _hash: Hash) {}
243
244    fn complete(self, root: Hash) -> Self::Output {
245        root
246    }
247}
248
249impl LeafHashCollector for Vec<Hash> {
250    type Output = (Hash, Vec<Hash>);
251
252    fn add_leaf_hash(&mut self, hash: Hash) {
253        self.push(hash);
254    }
255
256    fn complete(self, root: Hash) -> Self::Output {
257        (root, self)
258    }
259}
260
261#[cfg(test)]
262mod tests {
263    use super::*;
264    use test_case::test_case;
265
266    const AB_BLOCK: [u8; BLOCK_SIZE] = [0xAB; BLOCK_SIZE];
267
268    fn add_blocks<T: LeafHashCollector>(block_count: usize, builder: &mut MerkleRootBuilder<T>) {
269        for _ in 0..block_count {
270            builder.add_block(&AB_BLOCK);
271        }
272    }
273
274    // Hashes and the length of repeated 0xAB bytes. These were generated from the C++
275    // implementation in //src/lib/digest.
276    #[test_case("15ec7bf0b50732b49f8228e07d24365338f9e3ab994b00af08e5a3bffe55fd8b", 0; "0")]
277    #[test_case("2fabda7adeac120f587ae7b159c39d1716758b2c65e25b856470bbbb8b94e25e", 1; "1")]
278    #[test_case("fd2acae20c551eea1df052f39478b33536ebf3b0c6ceef649a010de06b71822b", 4096; "4096")]
279    #[test_case("85a0918a035140e720f4a5f10b7768d5b04959ed526da401cf4893aac73f436a", 8191; "8191")]
280    #[test_case("1dc023a7a88f56fab52e0b078a92cd566546d11aed479c66a14d6592e0f011eb", 8192; "8192")]
281    #[test_case("b6eae68aa1ad1b4fc74618575135f0d1daedc6bb07dc10f8fa41916d0578c318", 8193; "8193")]
282    #[test_case(
283        "35689edb3c4c46e44bb4ff00a7f8ac628d97ba16e1233f78759a41da597ed8dd",
284        8192 * 2 - 1; "16383"
285    )]
286    #[test_case(
287        "a6a903830d911d7555f9243b748d27a5aec1e385f91d845c854f46e02339070a",
288        8192 * 2; "16384"
289    )]
290    #[test_case(
291        "54c9eb182fc5bfc2984ca45049cf8cf1d071f2165e5190605b7970f824e03d8f",
292        8192 * 2 + 1; "16385"
293    )]
294    #[test_case(
295        "c96631f69f0cfaf2626dc4230aeb7fa50a05cd7e897039d2f56cdec99dc0b3b1",
296        8192 * 256 - 1; "2097151"
297    )]
298    #[test_case(
299        "cb717b240ae7ddf1ad197a3272db3cb4175bb7b7242d5050c6317df2bb8ed4b4",
300        8192 * 256; "2097152"
301    )]
302    #[test_case(
303        "9942b1f926e3295053e97b2b5a1471b6dff7df3d130f589c2881f9b98eee2fd0",
304        8192 * 256 + 1; "2097153"
305    )]
306    #[test_case(
307        "59f9f6275ec8b672446bc8fc2da0a7ce625a4c55fcbb3891144ace7a145f23e2",
308        8192 * 256 * 2 - 1; "4194303"
309    )]
310    #[test_case(
311        "fa21580bb27be533d2846e75e7fb3acca3fa2605f3fd3a4d49db2d023b10324b",
312        8192 * 256 * 2; "4194304"
313    )]
314    #[test_case(
315        "aec444c86e0553ed4a2a5e49faa2db9e6c9f0bdb6b97a8bd2b84aebec30b4558",
316        8192 * 256 * 2 + 1; "4194305"
317    )]
318    #[test_case(
319        "799bb12a5f633f9949f4619e4e0af447c980f895e7261521abf69a2dba67ce9e",
320        8192 * 256 * 256 - 1; "536870911"
321    )]
322    #[test_case(
323        "b3b1a0ceb8e5949a5ba3f6db646e037438fe15053113c1b50b290678596467e3",
324        8192 * 256 * 256; "536870912"
325    )]
326    #[test_case(
327        "53b85ef0285254b7bb32bc6cd5092fdf1a610d96446848f4dd2841e4c93a55d2",
328        8192 * 256 * 256 + 1; "536870913"
329    )]
330    fn test_merkle_root_builder_matches_lib_digest(hash: &str, size: usize) {
331        let expected: Hash = hash.parse().unwrap();
332        let mut builder = MerkleRootBuilder::default();
333        add_blocks(size / BLOCK_SIZE, &mut builder);
334        let actual = builder.complete(&AB_BLOCK[0..(size % BLOCK_SIZE)]);
335        assert_eq!(actual, expected, "size={size}");
336    }
337
338    #[test_case(0, &[], false; "0")]
339    #[test_case(8192, &[], true; "8192")]
340    #[test_case(8192 * 2, &[64], false; "16384")]
341    #[test_case(8192 * 255, &[8192 - 32], false; "2088960")]
342    #[test_case(8192 * 256, &[8192], true; "2097152")]
343    #[test_case(8192 * 257, &[8192 + 32, 32], false; "2105344")]
344    #[test_case(8192 * 256 * 2, &[8192 * 2, 64], false; "4194304")]
345    #[test_case(8192 * 256 * 255, &[8192 * 255, 8192 - 32], false; "534773760")]
346    #[test_case(8192 * 256 * 256, &[8192 * 256, 8192], true; "536870912")]
347    fn test_merkle_tree_structure(size: usize, expected_offsets: &[usize], is_root_set: bool) {
348        let mut builder = MerkleRootBuilder::default();
349        add_blocks(size / BLOCK_SIZE, &mut builder);
350        let actual_offsets = builder.levels.iter().map(|level| level.offset).collect::<Vec<_>>();
351        assert_eq!(&actual_offsets, expected_offsets);
352        assert_eq!(builder.root.is_some(), is_root_set);
353    }
354
355    #[test_case(0; "0")]
356    #[test_case(1; "1")]
357    #[test_case(4096; "4096")]
358    #[test_case(8191; "8191")]
359    #[test_case(8192; "8192")]
360    #[test_case(8193; "8193")]
361    #[test_case(8192 * 2 - 1; "16383")]
362    #[test_case(8192 * 2; "16384")]
363    #[test_case(8192 * 2 + 1; "16385")]
364    #[test_case(8192 * 256 - 1; "2097151")]
365    #[test_case(8192 * 256; "2097152")]
366    #[test_case(8192 * 256 + 1; "2097153")]
367    #[test_case(8192 * 256 * 2 - 1; "4194303")]
368    #[test_case(8192 * 256 * 2; "4194304")]
369    #[test_case(8192 * 256 * 2 + 1; "4194305")]
370    #[test_case(8192 * 256 * 256 - 1; "536870911")]
371    #[test_case(8192 * 256 * 256; "536870912")]
372    #[test_case(8192 * 256 * 256 + 1; "536870913")]
373    fn test_rebuild_from_leaf_hashes(size: usize) {
374        let mut builder = MerkleRootBuilder::new(Vec::new());
375        add_blocks(size / BLOCK_SIZE, &mut builder);
376        let (expected_root, leaf_hashes) = builder.complete(&AB_BLOCK[0..(size % BLOCK_SIZE)]);
377
378        // There should always be leaf hashes, even for the empty input.
379        assert!(!leaf_hashes.is_empty());
380
381        let mut builder = MerkleRootBuilder::default();
382        for hash in leaf_hashes {
383            builder.push_hash(hash);
384        }
385
386        assert_eq!(builder.complete(&[]), expected_root);
387    }
388
389    #[test_case(0; "0")]
390    #[test_case(1; "1")]
391    #[test_case(4096; "4096")]
392    #[test_case(8191; "8191")]
393    #[test_case(8192; "8192")]
394    #[test_case(8193; "8193")]
395    #[test_case(8192 * 2 - 1; "16383")]
396    #[test_case(8192 * 2; "16384")]
397    #[test_case(8192 * 2 + 1; "16385")]
398    #[test_case(8192 * 256 - 1; "2097151")]
399    #[test_case(8192 * 256; "2097152")]
400    #[test_case(8192 * 256 + 1; "2097153")]
401    #[test_case(8192 * 256 * 2 - 1; "4194303")]
402    #[test_case(8192 * 256 * 2; "4194304")]
403    #[test_case(8192 * 256 * 2 + 1; "4194305")]
404    fn test_buffered_merkle_root_builder(size: usize) {
405        let data = vec![0; size];
406        let expected_root = MerkleRootBuilder::default().complete(&data);
407
408        let mut builder = BufferedMerkleRootBuilder::default();
409        for chunk in data.chunks(6079) {
410            builder.write(chunk);
411        }
412        assert_eq!(builder.complete(), expected_root);
413
414        let mut builder = BufferedMerkleRootBuilder::default();
415        builder.write(&data);
416        assert_eq!(builder.complete(), expected_root);
417    }
418
419    #[test]
420    fn test_buffered_merkle_root_builder_as_writer() {
421        let data = vec![0; 8192 * 3 + 60];
422        let expected_root = MerkleRootBuilder::default().complete(&data);
423
424        let mut reader = std::io::Cursor::new(data);
425        let mut builder = BufferedMerkleRootBuilder::default();
426        std::io::copy(&mut reader, &mut builder).unwrap();
427
428        assert_eq!(builder.complete(), expected_root);
429    }
430}