fuchsia_merkle/
merkle_verifier.rs

1// Copyright 2025 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::util::{make_hash_hasher, update_with_zeros};
6use crate::{BLOCK_SIZE, HASH_SIZE, Hash, MerkleRootBuilder, hash_block};
7use zx_status::Status;
8
9/// Verifies data against the leaf hashes of a merkle tree.
10#[derive(Clone)]
11pub struct MerkleVerifier {
12    hashes: Box<[Hash]>,
13}
14
15impl MerkleVerifier {
16    /// Constructs a [`MerkleVerifier`] from the root and leaf hashes of a merkle tree.
17    ///
18    /// Returns `IO_DATA_INTEGRITY` if the leaf hashes are inconsistent with the root.
19    pub fn new(root: Hash, hashes: Box<[Hash]>) -> Result<Self, Status> {
20        if rebuild_root(&hashes) != root {
21            Err(Status::IO_DATA_INTEGRITY)
22        } else {
23            Ok(Self { hashes })
24        }
25    }
26
27    /// Verifies a `data` slice against the Merkle tree, assuming it corresponds to original data
28    /// starting at `offset`.
29    ///
30    /// # Requirements:
31    /// - The `offset` must be aligned to `BLOCK_SIZE`.
32    /// - The length of `data` must be a multiple of `BLOCK_SIZE`, *except* if `data` contains the
33    ///   final chunk of the original data source.
34    pub fn verify(&self, offset: usize, data: &[u8]) -> Result<(), Status> {
35        if !offset.is_multiple_of(BLOCK_SIZE) {
36            return Err(Status::INVALID_ARGS);
37        }
38        let ending = data.len().checked_add(offset).ok_or(Status::INVALID_ARGS)?;
39        if ending.div_ceil(BLOCK_SIZE) > self.hashes.len() {
40            return Err(Status::INVALID_ARGS);
41        }
42
43        for (i, chunk) in data.chunks(BLOCK_SIZE).enumerate() {
44            let hash = hash_block(chunk, offset + i * BLOCK_SIZE);
45            if self.hashes[offset / BLOCK_SIZE + i] != hash {
46                return Err(Status::IO_DATA_INTEGRITY);
47            }
48        }
49
50        Ok(())
51    }
52}
53
54impl std::fmt::Debug for MerkleVerifier {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        // The leaf hashes are unlikely to be useful for debugging and could be megabytes of data.
57        // The root of the merkle tree uniquely identifies the blob and the hash count can quickly
58        // give a rough idea of the size of the blob.
59        let root = rebuild_root(&self.hashes);
60        f.debug_struct("MerkleVerifier")
61            .field("root", &root)
62            .field("hash_count", &self.hashes.len())
63            .finish()
64    }
65}
66
67fn rebuild_root(leaf_hashes: &[Hash]) -> Hash {
68    let mut builder = MerkleRootBuilder::default();
69    for hash in leaf_hashes {
70        builder.push_hash(*hash);
71    }
72    builder.complete(&[])
73}
74
75fn hash_hashes(
76    hashes_per_hash: usize,
77    index: usize,
78    hashes: impl ExactSizeIterator<Item = Hash>,
79) -> Hash {
80    let mut hasher = make_hash_hasher(hashes_per_hash, 1, index * hashes_per_hash * HASH_SIZE);
81    let hash_count = hashes.len();
82    for hash in hashes {
83        hasher.update(hash.as_bytes());
84    }
85    if hash_count != hashes_per_hash {
86        update_with_zeros(&mut hasher, (hashes_per_hash - hash_count) * HASH_SIZE);
87    }
88    Hash::from_array(hasher.digest())
89}
90
91/// Verifies reads against a merkle tree.
92///
93/// [`MerkleVerifier`] verifies data at a granularity of [`BLOCK_SIZE`]. Consequently, it stores one
94/// hash (of size [`HASH_SIZE`]) for each data block. This hash storage consumes memory equal to
95/// 1/256th of the original data size.
96///
97/// [`ReadSizedMerkleVerifier`] optimizes memory usage when reads are always aligned and are
98/// guaranteed to be a multiple of the [`BLOCK_SIZE`]. Instead of storing a hash for every
99/// [`BLOCK_SIZE`] blocks like [`MerkleVerifier`], it stores only one hash for each read sized
100/// chunk. For example, 128KiB aligned reads would require storing 1/16th the number of hashes.
101#[derive(Clone)]
102pub struct ReadSizedMerkleVerifier {
103    read_size: usize,
104    hashes: Box<[Hash]>,
105}
106
107impl ReadSizedMerkleVerifier {
108    /// Constructs a [`ReadSizedMerkleVerifier`] from an existing [`MerkleVerifier`] and a
109    /// `read_size`.
110    ///
111    /// Returns an error if `read_size` is not a multiple of [`BLOCK_SIZE`].
112    pub fn new(verifier: MerkleVerifier, read_size: usize) -> Result<Self, Status> {
113        if read_size == 0 || !read_size.is_multiple_of(BLOCK_SIZE) {
114            return Err(Status::INVALID_ARGS);
115        }
116        let hashes_per_hash = read_size / BLOCK_SIZE;
117        let mut level_1_hashes =
118            Vec::with_capacity(verifier.hashes.len().div_ceil(hashes_per_hash));
119        for (i, hashes) in verifier.hashes.chunks(hashes_per_hash).enumerate() {
120            level_1_hashes.push(hash_hashes(hashes_per_hash, i, hashes.iter().copied()));
121        }
122        Ok(ReadSizedMerkleVerifier { read_size, hashes: level_1_hashes.into_boxed_slice() })
123    }
124
125    /// Verifies a `data` slice against the Merkle tree, assuming it corresponds to original data
126    /// starting at `offset`.
127    ///
128    /// # Requirements:
129    /// - The `offset` must be aligned to the configured read size granularity.
130    /// - The length of `data` must be a multiple of the read size, *except* if `data` represents
131    ///   the final chunk of the original data source (in which case it can be shorter).
132    pub fn verify(&self, offset: usize, data: &[u8]) -> Result<(), Status> {
133        let end = offset.checked_add(data.len()).ok_or(Status::INVALID_ARGS)?;
134        if !offset.is_multiple_of(self.read_size) {
135            // The offset must read aligned.
136            return Err(Status::INVALID_ARGS);
137        }
138
139        let hash_start_index = offset / self.read_size;
140        let hash_end_index = end.div_ceil(self.read_size);
141
142        if !end.is_multiple_of(self.read_size) && hash_end_index != self.hashes.len() {
143            // The end is not aligned and it's not the end of the data.
144            return Err(Status::INVALID_ARGS);
145        }
146        if hash_end_index > self.hashes.len() {
147            return Err(Status::INVALID_ARGS);
148        }
149
150        let hashes_per_hash = self.read_size / BLOCK_SIZE;
151        for (i, chunk) in data.chunks(self.read_size).enumerate() {
152            let hash = hash_hashes(
153                hashes_per_hash,
154                hash_start_index + i,
155                chunk.chunks(BLOCK_SIZE).enumerate().map(|(j, chunk)| {
156                    hash_block(chunk, offset + self.read_size * i + j * BLOCK_SIZE)
157                }),
158            );
159            if hash != self.hashes[hash_start_index + i] {
160                return Err(Status::IO_DATA_INTEGRITY);
161            }
162        }
163
164        Ok(())
165    }
166}
167
168impl std::fmt::Debug for ReadSizedMerkleVerifier {
169    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
170        // The leaf hashes are unlikely to be useful for debugging and could be megabytes of data.
171        // The root of the merkle tree can't be recovered either. The read size and hash count can
172        // give a rough idea of the size of the blob.
173        f.debug_struct("ReadAlignedMerkleVerifier")
174            .field("read_size", &self.read_size)
175            .field("hash_count", &self.hashes.len())
176            .finish()
177    }
178}
179
180#[cfg(test)]
181mod tests {
182    use super::*;
183    use anyhow::{Context, Error, anyhow};
184    use assert_matches::assert_matches;
185    use test_case::test_case;
186
187    fn create_data(size: usize) -> Vec<u8> {
188        const USIZE_SIZE: usize = size_of::<usize>();
189        let mut data = vec![0xABu8; size];
190        for (i, block) in data.chunks_mut(BLOCK_SIZE).enumerate() {
191            // Place the index of the block at the start of the block to make every block unique.
192            if block.len() < USIZE_SIZE {
193                block.copy_from_slice(&i.to_le_bytes()[0..block.len()]);
194            } else {
195                block[0..USIZE_SIZE].copy_from_slice(&i.to_le_bytes());
196            }
197        }
198        data
199    }
200
201    #[test_case(0; "0")]
202    #[test_case(1; "1")]
203    #[test_case(4096; "4096")]
204    #[test_case(8191; "8191")]
205    #[test_case(8192; "8192")]
206    #[test_case(8193; "8193")]
207    #[test_case(8192 * 2 - 1; "16383")]
208    #[test_case(8192 * 2; "16384")]
209    #[test_case(8192 * 2 + 1; "16385")]
210    #[test_case(8192 * 256 - 1; "2097151")]
211    #[test_case(8192 * 256; "2097152")]
212    #[test_case(8192 * 256 + 1; "2097153")]
213    #[test_case(8192 * 256 * 2 - 1; "4194303")]
214    #[test_case(8192 * 256 * 2; "4194304")]
215    #[test_case(8192 * 256 * 2 + 1; "4194305")]
216    fn test_successfully_validate_root(size: usize) {
217        let data = create_data(size);
218        let (root, leaf_hashes) = MerkleRootBuilder::new(Vec::new()).complete(&data);
219
220        MerkleVerifier::new(root, leaf_hashes.into_boxed_slice()).unwrap();
221    }
222
223    #[test_case(8193; "8193")]
224    #[test_case(8192 * 2 - 1; "16383")]
225    #[test_case(8192 * 2; "16384")]
226    #[test_case(8192 * 2 + 1; "16385")]
227    #[test_case(8192 * 256 - 1; "2097151")]
228    #[test_case(8192 * 256; "2097152")]
229    #[test_case(8192 * 256 + 1; "2097153")]
230    #[test_case(8192 * 256 * 2 - 1; "4194303")]
231    #[test_case(8192 * 256 * 2; "4194304")]
232    #[test_case(8192 * 256 * 2 + 1; "4194305")]
233    fn test_fail_to_validate_root(size: usize) {
234        let data = create_data(size);
235        let (root, leaf_hashes) = MerkleRootBuilder::new(Vec::new()).complete(&data);
236
237        {
238            let mut leaf_hashes = leaf_hashes.clone().into_boxed_slice();
239            let mut first_hash: [u8; HASH_SIZE] = leaf_hashes[0].into();
240            first_hash[0] ^= 0xFF;
241            leaf_hashes[0] = Hash::from_array(first_hash);
242            MerkleVerifier::new(root, leaf_hashes).expect_err("The merkle root shouldn't match");
243        }
244
245        {
246            let mut leaf_hashes = leaf_hashes.into_boxed_slice();
247            let mut last_hash: [u8; HASH_SIZE] = (*leaf_hashes.last().unwrap()).into();
248            last_hash[31] ^= 0xFF;
249            *leaf_hashes.last_mut().unwrap() = Hash::from_array(last_hash);
250            MerkleVerifier::new(root, leaf_hashes).expect_err("The merkle root shouldn't match");
251        }
252    }
253
254    #[test]
255    fn test_verify_empty_data() {
256        let (root, leaf_hashes) = MerkleRootBuilder::new(Vec::new()).complete(&[]);
257        let verifier = MerkleVerifier::new(root, leaf_hashes.into_boxed_slice()).unwrap();
258        verifier.verify(0, &[]).unwrap();
259        assert_matches!(verifier.verify(1, &[]), Err(Status::INVALID_ARGS));
260        assert_matches!(verifier.verify(0, &[0x00]), Err(Status::IO_DATA_INTEGRITY));
261        assert_matches!(verifier.verify(0, &[0x01]), Err(Status::IO_DATA_INTEGRITY));
262    }
263
264    #[test]
265    fn test_verify_with_invalid_args() {
266        let data = create_data(16 * 1024 + 20);
267        let (root, leaf_hashes) = MerkleRootBuilder::new(Vec::new()).complete(&data);
268        let verifier = MerkleVerifier::new(root, leaf_hashes.into_boxed_slice()).unwrap();
269        // Offset isn't aligned.
270        assert_matches!(verifier.verify(1, &data[1..]), Err(Status::INVALID_ARGS));
271        // Too much data.
272        assert_matches!(verifier.verify(8192, &data), Err(Status::INVALID_ARGS));
273        // Still too much data but it's within the same block as the original data it's detected
274        // with the hashes.
275        assert_matches!(verifier.verify(8192, &data[8191..]), Err(Status::IO_DATA_INTEGRITY));
276        assert_matches!(verifier.verify(8192, &data[8192..]), Ok(()));
277    }
278
279    #[test]
280    fn test_invalid_read_sizes() {
281        let data = create_data(16 * 1024 + 20);
282        let (root, leaf_hashes) = MerkleRootBuilder::new(Vec::new()).complete(&data);
283        let verifier = MerkleVerifier::new(root, leaf_hashes.into_boxed_slice()).unwrap();
284        assert_matches!(
285            ReadSizedMerkleVerifier::new(verifier.clone(), 0),
286            Err(Status::INVALID_ARGS)
287        );
288        assert_matches!(
289            ReadSizedMerkleVerifier::new(verifier.clone(), 1),
290            Err(Status::INVALID_ARGS)
291        );
292        assert_matches!(
293            ReadSizedMerkleVerifier::new(verifier.clone(), 8191),
294            Err(Status::INVALID_ARGS)
295        );
296        assert_matches!(
297            ReadSizedMerkleVerifier::new(verifier, 100 * 1024),
298            Err(Status::INVALID_ARGS)
299        );
300    }
301
302    #[test]
303    fn test_verify_reads_with_multiple_reads() {
304        const READ_SIZE: usize = 16 * 1024;
305        let data = create_data(READ_SIZE * 3 + 20);
306        let (root, leaf_hashes) = MerkleRootBuilder::new(Vec::new()).complete(&data);
307        let verifier = MerkleVerifier::new(root, leaf_hashes.into_boxed_slice()).unwrap();
308        let verifier = ReadSizedMerkleVerifier::new(verifier, READ_SIZE).unwrap();
309        verifier.verify(0, &data).unwrap();
310        verifier.verify(0, &data[0..READ_SIZE * 2]).unwrap();
311        verifier.verify(READ_SIZE, &data[READ_SIZE..READ_SIZE * 3]).unwrap();
312        verifier.verify(READ_SIZE, &data[READ_SIZE..READ_SIZE * 3 + 20]).unwrap();
313        verifier.verify(READ_SIZE * 2, &data[READ_SIZE * 2..READ_SIZE * 3 + 20]).unwrap();
314    }
315
316    #[test]
317    fn test_verify_reads_with_invalid_args() {
318        const READ_SIZE: usize = 16 * 1024;
319        let data = create_data(READ_SIZE * 3 + 20);
320        let (root, leaf_hashes) = MerkleRootBuilder::new(Vec::new()).complete(&data);
321        let verifier = MerkleVerifier::new(root, leaf_hashes.into_boxed_slice()).unwrap();
322        let verifier = ReadSizedMerkleVerifier::new(verifier, READ_SIZE).unwrap();
323        // Offset isn't aligned.
324        assert_matches!(verifier.verify(1, &data[1..READ_SIZE + 1]), Err(Status::INVALID_ARGS));
325        // Read past the end.
326        assert_matches!(
327            verifier.verify(READ_SIZE * 4, &data[0..READ_SIZE]),
328            Err(Status::INVALID_ARGS)
329        );
330        // Not the end of the data and the data is not a multiple of the read size.
331        assert_matches!(verifier.verify(0, &data[0..READ_SIZE - 10]), Err(Status::INVALID_ARGS));
332        // At end of the data and it's the wrong amount of data but it's within the last block so
333        // it's detected by the hashes.
334        assert_matches!(
335            verifier.verify(READ_SIZE * 3, &data[READ_SIZE * 3..READ_SIZE * 3 + 19]),
336            Err(Status::IO_DATA_INTEGRITY)
337        );
338        let mut last_block_with_an_extra_byte = data[READ_SIZE * 3..READ_SIZE * 3 + 20].to_vec();
339        last_block_with_an_extra_byte.push(0xAB);
340        assert_matches!(
341            verifier.verify(READ_SIZE * 3, &last_block_with_an_extra_byte),
342            Err(Status::IO_DATA_INTEGRITY)
343        );
344        assert_matches!(
345            verifier.verify(READ_SIZE * 3, &data[READ_SIZE * 3..READ_SIZE * 3 + 20]),
346            Ok(())
347        );
348    }
349
350    fn verify_with_first_bit_flipped(
351        verifier: &MerkleVerifier,
352        offset: usize,
353        data: &mut [u8],
354    ) -> Result<(), Error> {
355        data[0] ^= 0x01;
356        match verifier.verify(offset, data) {
357            Ok(()) => Err(anyhow!("verify_with_first_bit_flipped should have failed")),
358            Err(Status::IO_DATA_INTEGRITY) => {
359                data[0] ^= 0x01;
360                Ok(())
361            }
362            Err(e) => Err(anyhow!("unexpected error in verify_with_first_bit_flipped: {e:?}")),
363        }
364    }
365
366    fn verify_with_last_bit_flipped(
367        verifier: &MerkleVerifier,
368        offset: usize,
369        data: &mut [u8],
370    ) -> Result<(), Error> {
371        *data.last_mut().unwrap() ^= 0x80;
372        match verifier.verify(offset, data) {
373            Ok(()) => Err(anyhow!("verify_with_last_bit_flipped should have failed")),
374            Err(Status::IO_DATA_INTEGRITY) => {
375                *data.last_mut().unwrap() ^= 0x80;
376                Ok(())
377            }
378            Err(e) => Err(anyhow!("unexpected error in verify_with_last_bit_flipped: {e:?}")),
379        }
380    }
381
382    fn run_verify_tests(data: &mut [u8], verifier: MerkleVerifier) -> Result<(), Error> {
383        // Verify all of the data at once.
384        verifier.verify(0, data).context("verify all data")?;
385        verify_with_first_bit_flipped(&verifier, 0, data).context("verify all data")?;
386        verify_with_last_bit_flipped(&verifier, 0, data).context("verify all data")?;
387
388        // Verify 1 block at a time.
389        for (i, block) in data.chunks_mut(BLOCK_SIZE).enumerate() {
390            let offset = i * BLOCK_SIZE;
391            let context = || format!("verify 1 block at a time: offset={offset}");
392            verifier.verify(offset, block).with_context(context)?;
393            verify_with_first_bit_flipped(&verifier, offset, block).with_context(context)?;
394            verify_with_last_bit_flipped(&verifier, offset, block).with_context(context)?;
395        }
396
397        // Verify 4 blocks at a time.
398        const BLOCK_COUNT: usize = 4;
399        for (i, block) in data.chunks_mut(BLOCK_SIZE * BLOCK_COUNT).enumerate() {
400            let offset = i * BLOCK_SIZE * BLOCK_COUNT;
401            let context = || format!("verify {BLOCK_COUNT} blocks at a time: offset={offset}");
402            verifier.verify(offset, block).with_context(context)?;
403            verify_with_first_bit_flipped(&verifier, offset, block).with_context(context)?;
404            verify_with_last_bit_flipped(&verifier, offset, block).with_context(context)?;
405        }
406        Ok(())
407    }
408
409    fn verify_reads_with_first_bit_flipped(
410        verifier: &ReadSizedMerkleVerifier,
411        offset: usize,
412        data: &mut [u8],
413    ) -> Result<(), Error> {
414        data[0] ^= 0x01;
415        match verifier.verify(offset, data) {
416            Ok(()) => Err(anyhow!("verify_reads_with_first_bit_flipped should have failed")),
417            Err(Status::IO_DATA_INTEGRITY) => {
418                data[0] ^= 0x01;
419                Ok(())
420            }
421            Err(e) => {
422                Err(anyhow!("unexpected error in verify_reads_with_first_bit_flipped: {e:?}"))
423            }
424        }
425    }
426
427    fn verify_reads_with_last_bit_flipped(
428        verifier: &ReadSizedMerkleVerifier,
429        offset: usize,
430        data: &mut [u8],
431    ) -> Result<(), Error> {
432        *data.last_mut().unwrap() ^= 0x80;
433        match verifier.verify(offset, data) {
434            Ok(()) => Err(anyhow!("verify_reads_with_last_bit_flipped should have failed")),
435            Err(Status::IO_DATA_INTEGRITY) => {
436                *data.last_mut().unwrap() ^= 0x80;
437                Ok(())
438            }
439            Err(e) => Err(anyhow!("unexpected error in verify_reads_with_last_bit_flipped: {e:?}")),
440        }
441    }
442
443    fn run_read_sized_verify_tests(
444        data: &mut [u8],
445        verifier: MerkleVerifier,
446        read_size: usize,
447    ) -> Result<(), Error> {
448        let verifier = ReadSizedMerkleVerifier::new(verifier, read_size)
449            .context("Failed to create read aligned verifier")?;
450
451        for (i, chunk) in data.chunks_mut(read_size).enumerate() {
452            let offset = i * read_size;
453            let context = || format!("verify read: offset={offset}");
454            verifier.verify(offset, chunk).with_context(context)?;
455            verify_reads_with_first_bit_flipped(&verifier, offset, chunk).with_context(context)?;
456            verify_reads_with_last_bit_flipped(&verifier, offset, chunk).with_context(context)?;
457        }
458        Ok(())
459    }
460
461    fn verify_test(data: &mut [u8]) -> Result<(), Error> {
462        let hashes = Vec::with_capacity(data.len().div_ceil(BLOCK_SIZE));
463        let (root, hashes) = MerkleRootBuilder::new(hashes).complete(data);
464        let verifier = MerkleVerifier::new(root, hashes.into_boxed_slice())
465            .with_context(|| format!("create verifier: data-size={}", data.len()))?;
466        run_verify_tests(data, verifier.clone())
467            .with_context(|| format!("verify data-size={}", data.len()))?;
468        for read_size in [8 * 1024, 32 * 1024, 96 * 1024, 128 * 1024, 216 * 1024] {
469            run_read_sized_verify_tests(data, verifier.clone(), read_size).with_context(|| {
470                format!("verify read read-size={} data-size={}", read_size, data.len())
471            })?;
472        }
473        Ok(())
474    }
475
476    #[test_case(1; "1")]
477    #[test_case(4096; "4096")]
478    #[test_case(8192 - 1; "8191")]
479    #[test_case(8192; "8192")]
480    #[test_case(8192 + 1; "8193")]
481    #[test_case(8192 * 2 - 1; "16383")]
482    #[test_case(8192 * 2; "16384")]
483    #[test_case(8192 * 2 + 1; "16385")]
484    #[test_case(8192 * 256 - 1; "2097151")]
485    #[test_case(8192 * 256; "2097152")]
486    #[test_case(8192 * 256 + 1; "2097153")]
487    fn test_verification(size: usize) {
488        verify_test(&mut create_data(size)).unwrap();
489    }
490
491    #[test]
492    #[ignore]
493    fn test_very_large_verification() {
494        const MAX_BUF: usize = 256 * 1024 * 1024 + 8192;
495        let parallelism = std::thread::available_parallelism().unwrap().get();
496        std::thread::scope(|scope| {
497            for thread in 0..parallelism {
498                scope.spawn(move || {
499                    let mut data = create_data(MAX_BUF + 1);
500                    for size in ((8192 * (thread + 1))..MAX_BUF).step_by(8192 * parallelism) {
501                        verify_test(&mut data[0..size - 1]).unwrap();
502                        verify_test(&mut data[0..size]).unwrap();
503                        verify_test(&mut data[0..size + 1]).unwrap();
504                    }
505                });
506            }
507        });
508    }
509}