Skip to main content

sparse/
builder.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::format::{CHUNK_HEADER_SIZE, SPARSE_HEADER_SIZE};
6use crate::{
7    BLK_SIZE, Chunk, NO_SOURCE, SparseDataType, SparseError, SparseHeader, UnalignedSource,
8};
9use std::io::{Cursor, Read, Seek, SeekFrom, Write};
10use std::ops::Range;
11
12/// Input data for a SparseImageBuilder.
13pub enum DataSource {
14    /// Heap allocated buffer.
15    Buffer(Box<[u8]>),
16    /// Read `size` bytes from `reader`.
17    Reader { reader: Box<dyn Read>, size: u64 },
18    /// Skips this many bytes.
19    Skip(u64),
20    /// Repeats the given u32, this many times.
21    Fill(u32, u64),
22    #[cfg(target_os = "fuchsia")]
23    /// Read `size` bytes from `vmo` at `offset`.
24    Vmo { vmo: zx::Vmo, size: u64, offset: u64 },
25}
26
27impl DataSource {
28    /// The size of the source in bytes.
29    fn data_size(&self) -> u64 {
30        match &self {
31            DataSource::Buffer(buf) => buf.len() as u64,
32            DataSource::Reader { reader: _, size } => *size,
33            DataSource::Skip(size) => *size,
34            DataSource::Fill(_, count) => *count * std::mem::size_of::<u32>() as u64,
35            #[cfg(target_os = "fuchsia")]
36            DataSource::Vmo { vmo: _, size, offset: _ } => *size,
37        }
38    }
39}
40
41/// Builds sparse image files from a set of input DataSources.
42pub struct SparseImageBuilder {
43    block_size: u32,
44    sources: Vec<DataSource>,
45
46    // The `total_sz` field in the chunk's header is 32 bits and includes the 12 bytes for the
47    // header itself. The chunk size must be a multiple of the block size so the maximum chunk size
48    // is 4GiB minus the block size provided the block size is greater than or equal to 12. This
49    // field could be derived from `block_size` but is stored separately so it can be set in tests
50    // to avoid creating 4GiB data sources.
51    max_chunk_size: u32,
52}
53
54impl SparseImageBuilder {
55    pub fn new() -> Self {
56        Self { block_size: BLK_SIZE, sources: vec![], max_chunk_size: u32::MAX - BLK_SIZE + 1 }
57    }
58
59    pub fn set_block_size(mut self, block_size: u32) -> Self {
60        assert!(
61            block_size >= CHUNK_HEADER_SIZE,
62            "The block size must be greater than {}",
63            CHUNK_HEADER_SIZE
64        );
65        self.max_chunk_size = u32::MAX - block_size + 1;
66        self.block_size = block_size;
67        self
68    }
69
70    /// Adds the given data `source` to the image. `source` may be encoded as one or more chunks.
71    pub fn add_source(mut self, source: DataSource) -> Self {
72        self.sources.push(source);
73        self
74    }
75
76    /// Calculates total amount of space required to build the sparse image given the current set of
77    /// data sources.
78    pub fn built_size(&self) -> u64 {
79        let mut built_size = SPARSE_HEADER_SIZE as u64;
80        for source in &self.sources {
81            for size in ChunkedRange::new(0..source.data_size(), self.max_chunk_size) {
82                let size = size as u64;
83                // We only need the encoded size for each chunk once it complies with max chunk size
84                // so we can ignore start offset.
85                let start = 0;
86                let chunk = match &source {
87                    DataSource::Buffer(..) => Chunk::Raw { start, size },
88                    DataSource::Reader { .. } => Chunk::Raw { start, size },
89                    DataSource::Skip(..) => Chunk::DontCare { start, size },
90                    DataSource::Fill(..) => Chunk::Fill { start, size, value: 0 },
91                    #[cfg(target_os = "fuchsia")]
92                    DataSource::Vmo { .. } => Chunk::Raw { start, size },
93                };
94                built_size += chunk.chunk_data_len() as u64;
95            }
96        }
97        built_size
98    }
99
100    pub fn build<W: Write + Seek>(self, output: &mut W) -> Result<(), SparseError> {
101        // We'll fill the header in later.
102        output.seek(SeekFrom::Start(SPARSE_HEADER_SIZE as u64))?;
103        let mut chunk_writer = ChunkWriter::new(self.block_size, output);
104        for source in self.sources {
105            match source {
106                DataSource::Buffer(buf) => {
107                    if buf.len() % self.block_size as usize != 0 {
108                        return Err(SparseError::UnalignedDataSource(UnalignedSource::Buffer(
109                            buf.len(),
110                        )));
111                    }
112                    for slice in buf.chunks(self.max_chunk_size as usize) {
113                        chunk_writer
114                            .write_raw_chunk(slice.len().try_into().unwrap(), Cursor::new(slice))?;
115                    }
116                }
117                DataSource::Reader { mut reader, size } => {
118                    if size % self.block_size as u64 != 0 {
119                        return Err(SparseError::UnalignedDataSource(UnalignedSource::Reader(
120                            size,
121                        )));
122                    }
123                    for size in ChunkedRange::new(0..size, self.max_chunk_size) {
124                        chunk_writer.write_raw_chunk(size, (&mut reader).take(size as u64))?;
125                    }
126                }
127                DataSource::Skip(size) => {
128                    if size % self.block_size as u64 != 0 {
129                        return Err(SparseError::UnalignedDataSource(UnalignedSource::Skip(size)));
130                    }
131                    for size in ChunkedRange::new(0..size, self.max_chunk_size) {
132                        chunk_writer.write_dont_care_chunk(size)?;
133                    }
134                }
135                DataSource::Fill(value, count) => {
136                    let size = count * std::mem::size_of::<u32>() as u64;
137                    if size % self.block_size as u64 != 0 {
138                        return Err(SparseError::UnalignedDataSource(UnalignedSource::Fill(size)));
139                    }
140                    for size in ChunkedRange::new(0..size, self.max_chunk_size) {
141                        chunk_writer.write_fill_chunk(size, value)?;
142                    }
143                }
144                #[cfg(target_os = "fuchsia")]
145                DataSource::Vmo { vmo, size, mut offset } => {
146                    if size % self.block_size as u64 != 0 {
147                        return Err(SparseError::UnalignedDataSource(UnalignedSource::Vmo(size)));
148                    }
149                    let mut buffer =
150                        vec![0; std::cmp::min(size as usize, self.max_chunk_size as usize)];
151                    for size in ChunkedRange::new(0..size, self.max_chunk_size) {
152                        let buffer = &mut buffer[0..size as usize];
153                        vmo.read(buffer, offset).unwrap();
154                        chunk_writer.write_raw_chunk(size, Cursor::new(buffer))?;
155                        offset += size as u64;
156                    }
157                }
158            };
159        }
160
161        let ChunkWriter { num_blocks, num_chunks, .. } = chunk_writer;
162        output.seek(SeekFrom::Start(0))?;
163        let header = SparseHeader::new(self.block_size, num_blocks, num_chunks);
164        bincode::serialize_into(&mut *output, &header)
165            .map_err(|e| SparseError::Serialize { ty: SparseDataType::Header, source: e })?;
166
167        output.flush()?;
168        Ok(())
169    }
170
171    #[cfg(target_os = "fuchsia")]
172    pub fn build_vmo(self) -> Result<zx::Vmo, SparseError> {
173        let vmo = zx::Vmo::create(self.built_size())?;
174        let mut stream = zx::Stream::create(zx::StreamOptions::MODE_WRITE, &vmo, 0)?;
175        self.build(&mut stream)?;
176        Ok(vmo)
177    }
178}
179
180struct ChunkWriter<'a, W> {
181    block_size: u32,
182    current_offset: u64,
183    num_chunks: u32,
184    num_blocks: u32,
185    writer: &'a mut W,
186}
187
188impl<'a, W: Write> ChunkWriter<'a, W> {
189    fn new(block_size: u32, writer: &'a mut W) -> Self {
190        Self { block_size, current_offset: 0, num_chunks: 0, num_blocks: 0, writer }
191    }
192
193    fn write_chunk_impl<R: Read>(
194        &mut self,
195        chunk: Chunk,
196        source: Option<&mut R>,
197    ) -> Result<(), SparseError> {
198        chunk.write(source, &mut self.writer, self.block_size)?;
199        self.num_blocks = self
200            .num_blocks
201            .checked_add(chunk.output_blocks(self.block_size))
202            .ok_or(SparseError::TooManyBlocks)?;
203        // The number of blocks and chunks are both a u32. Each chunk contains at least 1 block so
204        // the number of blocks will overflow above before the number of chunks.
205        self.num_chunks += 1;
206        self.current_offset += chunk.output_size() as u64;
207        Ok(())
208    }
209
210    fn write_raw_chunk<R: Read>(&mut self, size: u32, mut source: R) -> Result<(), SparseError> {
211        self.write_chunk_impl(
212            Chunk::Raw { start: self.current_offset, size: size.into() },
213            Some(&mut source),
214        )
215    }
216
217    fn write_dont_care_chunk(&mut self, size: u32) -> Result<(), SparseError> {
218        self.write_chunk_impl(
219            Chunk::DontCare { start: self.current_offset, size: size.into() },
220            NO_SOURCE,
221        )
222    }
223
224    fn write_fill_chunk(&mut self, size: u32, value: u32) -> Result<(), SparseError> {
225        self.write_chunk_impl(
226            Chunk::Fill { start: self.current_offset, size: size.into(), value },
227            NO_SOURCE,
228        )
229    }
230}
231
232/// An iterator that yields `max_chunk_size` `(range.end - range.start) / max_chunk_size` times
233/// followed by `(range.end - range.start) % max_chunk_size` if it's none zero.
234///
235/// # Examples
236/// ```
237/// assert_eq!(ChunkedRange::new(0..10, 5).collect::<Vec<_>>(), vec![5, 5]);
238/// assert_eq!(ChunkedRange::new(0..13, 5).collect::<Vec<_>>(), vec![5, 5, 3]);
239/// ```
240struct ChunkedRange {
241    range: Range<u64>,
242    max_chunk_size: u32,
243}
244
245impl ChunkedRange {
246    fn new(range: Range<u64>, max_chunk_size: u32) -> Self {
247        Self { range, max_chunk_size }
248    }
249}
250
251impl Iterator for ChunkedRange {
252    type Item = u32;
253
254    fn next(&mut self) -> Option<Self::Item> {
255        let size = self.range.end - self.range.start;
256        if size == 0 {
257            None
258        } else if size >= self.max_chunk_size as u64 {
259            self.range.start += self.max_chunk_size as u64;
260            Some(self.max_chunk_size)
261        } else {
262            self.range.start = self.range.end;
263            Some(size as u32)
264        }
265    }
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271    use crate::format::CHUNK_HEADER_SIZE;
272    use crate::reader::SparseReader;
273
274    #[test]
275    fn test_chunked_range() {
276        assert_eq!(&ChunkedRange::new(0..0, 32).collect::<Vec<_>>(), &[]);
277        assert_eq!(&ChunkedRange::new(0..10, 32).collect::<Vec<_>>(), &[10]);
278        assert_eq!(&ChunkedRange::new(100..101, 32).collect::<Vec<_>>(), &[1]);
279        assert_eq!(&ChunkedRange::new(0..100, 32).collect::<Vec<_>>(), &[32, 32, 32, 4]);
280        assert_eq!(&ChunkedRange::new(10..100, 32).collect::<Vec<_>>(), &[32, 32, 26]);
281        assert_eq!(
282            &ChunkedRange::new((u32::MAX as u64)..(u32::MAX as u64 + 80), 32).collect::<Vec<_>>(),
283            &[32, 32, 16]
284        );
285        assert_eq!(
286            &ChunkedRange::new((u64::MAX - 50)..u64::MAX, 32).collect::<Vec<_>>(),
287            &[32, 18]
288        );
289    }
290
291    #[test]
292    fn test_build_with_buffer() {
293        let mut builder = SparseImageBuilder::new();
294        builder.max_chunk_size = BLK_SIZE;
295        let mut buf = Vec::with_capacity((BLK_SIZE * 2) as usize);
296        let part1 = vec![0xABu8; BLK_SIZE as usize];
297        let part2 = vec![0xCDu8; BLK_SIZE as usize];
298        buf.extend_from_slice(&part1);
299        buf.extend_from_slice(&part2);
300        let mut output = vec![];
301        let builder = builder.add_source(DataSource::Buffer(buf.into_boxed_slice()));
302        let expected_size = builder.built_size() as usize;
303        builder.build(&mut Cursor::new(&mut output)).unwrap();
304        assert_eq!(output.len(), expected_size);
305
306        let reader = SparseReader::new(Cursor::new(&output)).unwrap();
307        assert_eq!(
308            reader.chunks(),
309            &[
310                (
311                    Chunk::Raw { start: 0, size: BLK_SIZE.into() },
312                    Some((SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE) as u64)
313                ),
314                (
315                    Chunk::Raw { start: BLK_SIZE as u64, size: BLK_SIZE.into() },
316                    Some((SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE) as u64)
317                )
318            ]
319        );
320        assert_eq!(
321            &output[(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE) as usize
322                ..(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE + BLK_SIZE) as usize],
323            &part1
324        );
325        assert_eq!(
326            &output[(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE) as usize
327                ..(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE * 2) as usize],
328            &part2
329        );
330    }
331
332    #[test]
333    fn test_build_with_reader() {
334        let part1 = vec![0xABu8; BLK_SIZE as usize];
335        let part2 = vec![0xCDu8; BLK_SIZE as usize];
336        let mut buf = Vec::with_capacity(BLK_SIZE as usize * 2);
337        buf.extend_from_slice(&part1);
338        buf.extend_from_slice(&part2);
339
340        let mut builder = SparseImageBuilder::new();
341        builder.max_chunk_size = BLK_SIZE;
342        let mut output = vec![];
343
344        let reader1 = Cursor::new(buf.clone());
345        let mut reader2 = Cursor::new(buf);
346        reader2.seek(SeekFrom::Start(BLK_SIZE as u64)).unwrap();
347
348        let builder = builder
349            .add_source(DataSource::Reader {
350                reader: Box::new(reader1),
351                size: (BLK_SIZE * 2) as u64,
352            })
353            .add_source(DataSource::Reader { reader: Box::new(reader2), size: BLK_SIZE as u64 });
354        let expected_size = builder.built_size() as usize;
355        builder.build(&mut Cursor::new(&mut output)).unwrap();
356        assert_eq!(output.len(), expected_size);
357
358        let reader = SparseReader::new(Cursor::new(&output)).unwrap();
359        assert_eq!(
360            reader.chunks(),
361            &[
362                (
363                    Chunk::Raw { start: 0, size: BLK_SIZE.into() },
364                    Some((SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE) as u64)
365                ),
366                (
367                    Chunk::Raw { start: BLK_SIZE as u64, size: BLK_SIZE.into() },
368                    Some((SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE) as u64)
369                ),
370                (
371                    Chunk::Raw { start: (BLK_SIZE * 2) as u64, size: BLK_SIZE.into() },
372                    Some((SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 3 + BLK_SIZE * 2) as u64)
373                ),
374            ]
375        );
376        assert_eq!(
377            &output[(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE) as usize
378                ..(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE + BLK_SIZE) as usize],
379            &part1
380        );
381        assert_eq!(
382            &output[(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE) as usize
383                ..(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE * 2) as usize],
384            &part2
385        );
386        assert_eq!(
387            &output[(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 3 + BLK_SIZE * 2) as usize
388                ..(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 3 + BLK_SIZE * 3) as usize],
389            &part2
390        );
391    }
392
393    #[test]
394    fn test_build_with_skip() {
395        let mut builder = SparseImageBuilder::new();
396        builder.max_chunk_size = BLK_SIZE;
397        let mut output = vec![];
398        let builder = builder.add_source(DataSource::Skip((BLK_SIZE * 2) as u64));
399        let expected_size = builder.built_size() as usize;
400        builder.build(&mut Cursor::new(&mut output)).unwrap();
401        assert_eq!(output.len(), expected_size);
402
403        let reader = SparseReader::new(Cursor::new(&output)).unwrap();
404        assert_eq!(
405            reader.chunks(),
406            &[
407                (Chunk::DontCare { start: 0, size: BLK_SIZE.into() }, None),
408                (Chunk::DontCare { start: BLK_SIZE as u64, size: BLK_SIZE.into() }, None)
409            ]
410        );
411    }
412
413    #[test]
414    fn test_build_with_fill() {
415        let mut builder = SparseImageBuilder::new();
416        builder.max_chunk_size = BLK_SIZE;
417        let mut output = vec![];
418        let builder = builder.add_source(DataSource::Fill(0xAB, (BLK_SIZE / 2) as u64));
419        let expected_size = builder.built_size() as usize;
420        builder.build(&mut Cursor::new(&mut output)).unwrap();
421        assert_eq!(output.len(), expected_size);
422
423        let reader = SparseReader::new(Cursor::new(&output)).unwrap();
424        assert_eq!(
425            reader.chunks(),
426            &[
427                (Chunk::Fill { start: 0, size: BLK_SIZE.into(), value: 0xAB }, None),
428                (Chunk::Fill { start: BLK_SIZE as u64, size: BLK_SIZE.into(), value: 0xAB }, None)
429            ]
430        );
431    }
432
433    #[test]
434    fn test_overflow_block_count() {
435        struct Sink;
436
437        impl Write for Sink {
438            fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
439                Ok(buf.len())
440            }
441
442            fn flush(&mut self) -> std::io::Result<()> {
443                Ok(())
444            }
445        }
446
447        impl Seek for Sink {
448            fn seek(&mut self, _pos: SeekFrom) -> std::io::Result<u64> {
449                Ok(0)
450            }
451        }
452
453        let result = SparseImageBuilder::new()
454            .set_block_size(16)
455            .add_source(DataSource::Skip(u64::MAX - 15))
456            .build(&mut Sink);
457        assert!(result.is_err());
458    }
459
460    #[cfg(target_os = "fuchsia")]
461    #[test]
462    fn test_build_with_vmo() {
463        let mut builder = SparseImageBuilder::new();
464        builder.max_chunk_size = BLK_SIZE;
465        let size = (BLK_SIZE * 2) as u64;
466        let vmo = zx::Vmo::create(size).unwrap();
467        const PART_1: [u8; BLK_SIZE as usize] = [0xABu8; BLK_SIZE as usize];
468        const PART_2: [u8; BLK_SIZE as usize] = [0xCBu8; BLK_SIZE as usize];
469        vmo.write(&PART_1, 0).unwrap();
470        vmo.write(&PART_2, BLK_SIZE as u64).unwrap();
471        // We add two separate data sources sharing the same VMO but with a different offset.
472        let mut output = vec![];
473        let builder = builder
474            .add_source(DataSource::Vmo {
475                vmo: vmo.duplicate_handle(zx::Rights::SAME_RIGHTS).unwrap(),
476                size: BLK_SIZE as u64,
477                offset: 0,
478            })
479            .add_source(DataSource::Vmo { vmo, size: BLK_SIZE as u64, offset: BLK_SIZE as u64 });
480        let expected_size = builder.built_size() as usize;
481        builder.build(&mut Cursor::new(&mut output)).unwrap();
482        assert_eq!(output.len(), expected_size);
483
484        let reader = SparseReader::new(Cursor::new(&output)).unwrap();
485        assert_eq!(
486            reader.chunks(),
487            &[
488                (
489                    Chunk::Raw { start: 0, size: BLK_SIZE.into() },
490                    Some((SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE) as u64)
491                ),
492                (
493                    Chunk::Raw { start: BLK_SIZE as u64, size: BLK_SIZE.into() },
494                    Some((SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE) as u64)
495                )
496            ]
497        );
498        assert_eq!(
499            output[(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE) as usize
500                ..(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE + BLK_SIZE) as usize],
501            PART_1
502        );
503        assert_eq!(
504            output[(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE) as usize
505                ..(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE * 2) as usize],
506            PART_2
507        );
508    }
509}