Skip to main content

sparse/
builder.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::format::{CHUNK_HEADER_SIZE, SPARSE_HEADER_SIZE};
6use crate::{
7    BLK_SIZE, Chunk, NO_SOURCE, SparseDataType, SparseError, SparseHeader, UnalignedSource,
8};
9use std::io::{Cursor, Read, Seek, SeekFrom, Write};
10use std::ops::Range;
11
12/// Input data for a SparseImageBuilder.
13pub enum DataSource {
14    /// Heap allocated buffer.
15    Buffer(Box<[u8]>),
16    /// Read `size` bytes from `reader`.
17    Reader { reader: Box<dyn Read>, size: u64 },
18    /// Skips this many bytes.
19    Skip(u64),
20    /// Repeats the given u32, this many times.
21    Fill(u32, u64),
22    #[cfg(target_os = "fuchsia")]
23    /// Read `size` bytes from `vmo` at `offset`.
24    Vmo { vmo: zx::Vmo, size: u64, offset: u64 },
25}
26
27impl DataSource {
28    /// The size of the source in bytes.
29    fn data_size(&self) -> u64 {
30        match &self {
31            DataSource::Buffer(buf) => buf.len() as u64,
32            DataSource::Reader { reader: _, size } => *size,
33            DataSource::Skip(size) => *size,
34            DataSource::Fill(_, count) => *count * std::mem::size_of::<u32>() as u64,
35            #[cfg(target_os = "fuchsia")]
36            DataSource::Vmo { vmo: _, size, offset: _ } => *size,
37        }
38    }
39}
40
41/// Builds sparse image files from a set of input DataSources.
42pub struct SparseImageBuilder {
43    block_size: u32,
44    sources: Vec<DataSource>,
45
46    // The `total_sz` field in the chunk's header is 32 bits and includes the 12 bytes for the
47    // header itself. The chunk size must be a multiple of the block size so the maximum chunk size
48    // is 4GiB minus the block size provided the block size is greater than or equal to 12. This
49    // field could be derived from `block_size` but is stored separately so it can be set in tests
50    // to avoid creating 4GiB data sources.
51    max_chunk_size: u32,
52}
53
54impl SparseImageBuilder {
55    pub fn new() -> Self {
56        Self { block_size: BLK_SIZE, sources: vec![], max_chunk_size: u32::MAX - BLK_SIZE + 1 }
57    }
58
59    pub fn set_block_size(mut self, block_size: u32) -> Self {
60        assert!(
61            block_size >= CHUNK_HEADER_SIZE,
62            "The block size must be greater than {}",
63            CHUNK_HEADER_SIZE
64        );
65        self.max_chunk_size = u32::MAX - block_size + 1;
66        self.block_size = block_size;
67        self
68    }
69
70    /// Adds the given data `source` to the image. `source` may be encoded as one or more chunks.
71    pub fn add_source(mut self, source: DataSource) -> Self {
72        self.sources.push(source);
73        self
74    }
75
76    /// Calculates total amount of space required to build the sparse image given the current set of
77    /// data sources.
78    pub fn built_size(&self) -> u64 {
79        let mut built_size = SPARSE_HEADER_SIZE as u64;
80        for source in &self.sources {
81            for size in ChunkedRange::new(0..source.data_size(), self.max_chunk_size) {
82                let size = size as u64;
83                // We only need the encoded size for each chunk once it complies with max chunk size
84                // so we can ignore start offset.
85                let start = 0;
86                let chunk = match &source {
87                    DataSource::Buffer(..) => Chunk::Raw { start, size },
88                    DataSource::Reader { .. } => Chunk::Raw { start, size },
89                    DataSource::Skip(..) => Chunk::DontCare { start, size },
90                    DataSource::Fill(..) => Chunk::Fill { start, size, value: 0 },
91                    #[cfg(target_os = "fuchsia")]
92                    DataSource::Vmo { .. } => Chunk::Raw { start, size },
93                };
94                built_size += chunk.chunk_data_len() as u64;
95            }
96        }
97        built_size
98    }
99
100    pub fn build<W: Write + Seek>(self, output: &mut W) -> Result<(), SparseError> {
101        // We'll fill the header in later.
102        output.seek(SeekFrom::Start(SPARSE_HEADER_SIZE as u64))?;
103        let mut chunk_writer = ChunkWriter::new(self.block_size, output);
104        for source in self.sources {
105            match source {
106                DataSource::Buffer(buf) => {
107                    if buf.len() % self.block_size as usize != 0 {
108                        return Err(SparseError::UnalignedDataSource(UnalignedSource::Buffer(
109                            buf.len(),
110                        )));
111                    }
112                    for slice in buf.chunks(self.max_chunk_size as usize) {
113                        chunk_writer
114                            .write_raw_chunk(slice.len().try_into().unwrap(), Cursor::new(slice))?;
115                    }
116                }
117                DataSource::Reader { mut reader, size } => {
118                    if size % self.block_size as u64 != 0 {
119                        return Err(SparseError::UnalignedDataSource(UnalignedSource::Reader(
120                            size,
121                        )));
122                    }
123                    for size in ChunkedRange::new(0..size, self.max_chunk_size) {
124                        chunk_writer.write_raw_chunk(size, (&mut reader).take(size as u64))?;
125                    }
126                }
127                DataSource::Skip(size) => {
128                    if size % self.block_size as u64 != 0 {
129                        return Err(SparseError::UnalignedDataSource(UnalignedSource::Skip(size)));
130                    }
131                    for size in ChunkedRange::new(0..size, self.max_chunk_size) {
132                        chunk_writer.write_dont_care_chunk(size)?;
133                    }
134                }
135                DataSource::Fill(value, count) => {
136                    let size = count * std::mem::size_of::<u32>() as u64;
137                    if size % self.block_size as u64 != 0 {
138                        return Err(SparseError::UnalignedDataSource(UnalignedSource::Fill(size)));
139                    }
140                    for size in ChunkedRange::new(0..size, self.max_chunk_size) {
141                        chunk_writer.write_fill_chunk(size, value)?;
142                    }
143                }
144                #[cfg(target_os = "fuchsia")]
145                DataSource::Vmo { vmo, size, mut offset } => {
146                    if size % self.block_size as u64 != 0 {
147                        return Err(SparseError::UnalignedDataSource(UnalignedSource::Vmo(size)));
148                    }
149                    let mut buffer =
150                        vec![0; std::cmp::min(size as usize, self.max_chunk_size as usize)];
151                    for size in ChunkedRange::new(0..size, self.max_chunk_size) {
152                        let buffer = &mut buffer[0..size as usize];
153                        vmo.read(buffer, offset).unwrap();
154                        chunk_writer.write_raw_chunk(size, Cursor::new(buffer))?;
155                        offset += size as u64;
156                    }
157                }
158            };
159        }
160
161        let ChunkWriter { num_blocks, num_chunks, .. } = chunk_writer;
162        output.seek(SeekFrom::Start(0))?;
163        let header = SparseHeader::new(self.block_size, num_blocks, num_chunks);
164        bincode::serialize_into(&mut *output, &header)
165            .map_err(|e| SparseError::Serialize { ty: SparseDataType::Header, source: e })?;
166
167        output.flush()?;
168        Ok(())
169    }
170
171    #[cfg(target_os = "fuchsia")]
172    pub fn build_vmo(self) -> Result<zx::Vmo, SparseError> {
173        let vmo = zx::Vmo::create(self.built_size())?;
174        let mut stream = zx::Stream::create(zx::StreamOptions::MODE_WRITE, &vmo, 0)?;
175        self.build(&mut stream)?;
176        Ok(vmo)
177    }
178}
179
180struct ChunkWriter<'a, W> {
181    block_size: u32,
182    current_offset: u64,
183    num_chunks: u32,
184    num_blocks: u32,
185    writer: &'a mut W,
186}
187
188impl<'a, W: Write> ChunkWriter<'a, W> {
189    fn new(block_size: u32, writer: &'a mut W) -> Self {
190        Self { block_size, current_offset: 0, num_chunks: 0, num_blocks: 0, writer }
191    }
192
193    fn write_chunk_impl<R: Read>(
194        &mut self,
195        chunk: Chunk,
196        source: Option<&mut R>,
197    ) -> Result<(), SparseError> {
198        chunk.write(source, &mut self.writer, self.block_size)?;
199        self.num_blocks = self
200            .num_blocks
201            .checked_add(chunk.output_blocks(self.block_size))
202            .ok_or(SparseError::TooManyBlocks)?;
203        // The number of blocks and chunks are both a u32. Each chunk contains at least 1 block so
204        // the number of blocks will overflow above before the number of chunks.
205        self.num_chunks += 1;
206        self.current_offset += chunk.output_size() as u64;
207        Ok(())
208    }
209
210    fn write_raw_chunk<R: Read>(&mut self, size: u32, mut source: R) -> Result<(), SparseError> {
211        self.write_chunk_impl(
212            Chunk::Raw { start: self.current_offset, size: size.into() },
213            Some(&mut source),
214        )
215    }
216
217    fn write_dont_care_chunk(&mut self, size: u32) -> Result<(), SparseError> {
218        self.write_chunk_impl(
219            Chunk::DontCare { start: self.current_offset, size: size.into() },
220            NO_SOURCE,
221        )
222    }
223
224    fn write_fill_chunk(&mut self, size: u32, value: u32) -> Result<(), SparseError> {
225        self.write_chunk_impl(
226            Chunk::Fill { start: self.current_offset, size: size.into(), value },
227            NO_SOURCE,
228        )
229    }
230}
231
232/// An iterator that yields `max_chunk_size` `(range.end - range.start) / max_chunk_size` times
233/// followed by `(range.end - range.start) % max_chunk_size` if it's none zero.
234///
235/// # Examples
236/// ```
237/// assert_eq!(ChunkedRange::new(0..10, 5).collect::<Vec<_>>(), vec![5, 5]);
238/// assert_eq!(ChunkedRange::new(0..13, 5).collect::<Vec<_>>(), vec![5, 5, 3]);
239/// ```
240struct ChunkedRange {
241    range: Range<u64>,
242    max_chunk_size: u32,
243}
244
245impl ChunkedRange {
246    fn new(range: Range<u64>, max_chunk_size: u32) -> Self {
247        Self { range, max_chunk_size }
248    }
249}
250
251impl Iterator for ChunkedRange {
252    type Item = u32;
253
254    fn next(&mut self) -> Option<Self::Item> {
255        let size = self.range.end - self.range.start;
256        if size == 0 {
257            None
258        } else if size >= self.max_chunk_size as u64 {
259            self.range.start += self.max_chunk_size as u64;
260            Some(self.max_chunk_size)
261        } else {
262            self.range.start = self.range.end;
263            Some(size as u32)
264        }
265    }
266}
267
268#[cfg(test)]
269mod tests {
270    use super::*;
271    use crate::format::CHUNK_HEADER_SIZE;
272    use crate::reader::SparseReader;
273    #[cfg(target_os = "fuchsia")]
274    use zx::HandleBased as _;
275
276    #[test]
277    fn test_chunked_range() {
278        assert_eq!(&ChunkedRange::new(0..0, 32).collect::<Vec<_>>(), &[]);
279        assert_eq!(&ChunkedRange::new(0..10, 32).collect::<Vec<_>>(), &[10]);
280        assert_eq!(&ChunkedRange::new(100..101, 32).collect::<Vec<_>>(), &[1]);
281        assert_eq!(&ChunkedRange::new(0..100, 32).collect::<Vec<_>>(), &[32, 32, 32, 4]);
282        assert_eq!(&ChunkedRange::new(10..100, 32).collect::<Vec<_>>(), &[32, 32, 26]);
283        assert_eq!(
284            &ChunkedRange::new((u32::MAX as u64)..(u32::MAX as u64 + 80), 32).collect::<Vec<_>>(),
285            &[32, 32, 16]
286        );
287        assert_eq!(
288            &ChunkedRange::new((u64::MAX - 50)..u64::MAX, 32).collect::<Vec<_>>(),
289            &[32, 18]
290        );
291    }
292
293    #[test]
294    fn test_build_with_buffer() {
295        let mut builder = SparseImageBuilder::new();
296        builder.max_chunk_size = BLK_SIZE;
297        let mut buf = Vec::with_capacity((BLK_SIZE * 2) as usize);
298        let part1 = vec![0xABu8; BLK_SIZE as usize];
299        let part2 = vec![0xCDu8; BLK_SIZE as usize];
300        buf.extend_from_slice(&part1);
301        buf.extend_from_slice(&part2);
302        let mut output = vec![];
303        let builder = builder.add_source(DataSource::Buffer(buf.into_boxed_slice()));
304        let expected_size = builder.built_size() as usize;
305        builder.build(&mut Cursor::new(&mut output)).unwrap();
306        assert_eq!(output.len(), expected_size);
307
308        let reader = SparseReader::new(Cursor::new(&output)).unwrap();
309        assert_eq!(
310            reader.chunks(),
311            &[
312                (
313                    Chunk::Raw { start: 0, size: BLK_SIZE.into() },
314                    Some((SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE) as u64)
315                ),
316                (
317                    Chunk::Raw { start: BLK_SIZE as u64, size: BLK_SIZE.into() },
318                    Some((SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE) as u64)
319                )
320            ]
321        );
322        assert_eq!(
323            &output[(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE) as usize
324                ..(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE + BLK_SIZE) as usize],
325            &part1
326        );
327        assert_eq!(
328            &output[(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE) as usize
329                ..(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE * 2) as usize],
330            &part2
331        );
332    }
333
334    #[test]
335    fn test_build_with_reader() {
336        let part1 = vec![0xABu8; BLK_SIZE as usize];
337        let part2 = vec![0xCDu8; BLK_SIZE as usize];
338        let mut buf = Vec::with_capacity(BLK_SIZE as usize * 2);
339        buf.extend_from_slice(&part1);
340        buf.extend_from_slice(&part2);
341
342        let mut builder = SparseImageBuilder::new();
343        builder.max_chunk_size = BLK_SIZE;
344        let mut output = vec![];
345
346        let reader1 = Cursor::new(buf.clone());
347        let mut reader2 = Cursor::new(buf);
348        reader2.seek(SeekFrom::Start(BLK_SIZE as u64)).unwrap();
349
350        let builder = builder
351            .add_source(DataSource::Reader {
352                reader: Box::new(reader1),
353                size: (BLK_SIZE * 2) as u64,
354            })
355            .add_source(DataSource::Reader { reader: Box::new(reader2), size: BLK_SIZE as u64 });
356        let expected_size = builder.built_size() as usize;
357        builder.build(&mut Cursor::new(&mut output)).unwrap();
358        assert_eq!(output.len(), expected_size);
359
360        let reader = SparseReader::new(Cursor::new(&output)).unwrap();
361        assert_eq!(
362            reader.chunks(),
363            &[
364                (
365                    Chunk::Raw { start: 0, size: BLK_SIZE.into() },
366                    Some((SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE) as u64)
367                ),
368                (
369                    Chunk::Raw { start: BLK_SIZE as u64, size: BLK_SIZE.into() },
370                    Some((SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE) as u64)
371                ),
372                (
373                    Chunk::Raw { start: (BLK_SIZE * 2) as u64, size: BLK_SIZE.into() },
374                    Some((SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 3 + BLK_SIZE * 2) as u64)
375                ),
376            ]
377        );
378        assert_eq!(
379            &output[(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE) as usize
380                ..(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE + BLK_SIZE) as usize],
381            &part1
382        );
383        assert_eq!(
384            &output[(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE) as usize
385                ..(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE * 2) as usize],
386            &part2
387        );
388        assert_eq!(
389            &output[(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 3 + BLK_SIZE * 2) as usize
390                ..(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 3 + BLK_SIZE * 3) as usize],
391            &part2
392        );
393    }
394
395    #[test]
396    fn test_build_with_skip() {
397        let mut builder = SparseImageBuilder::new();
398        builder.max_chunk_size = BLK_SIZE;
399        let mut output = vec![];
400        let builder = builder.add_source(DataSource::Skip((BLK_SIZE * 2) as u64));
401        let expected_size = builder.built_size() as usize;
402        builder.build(&mut Cursor::new(&mut output)).unwrap();
403        assert_eq!(output.len(), expected_size);
404
405        let reader = SparseReader::new(Cursor::new(&output)).unwrap();
406        assert_eq!(
407            reader.chunks(),
408            &[
409                (Chunk::DontCare { start: 0, size: BLK_SIZE.into() }, None),
410                (Chunk::DontCare { start: BLK_SIZE as u64, size: BLK_SIZE.into() }, None)
411            ]
412        );
413    }
414
415    #[test]
416    fn test_build_with_fill() {
417        let mut builder = SparseImageBuilder::new();
418        builder.max_chunk_size = BLK_SIZE;
419        let mut output = vec![];
420        let builder = builder.add_source(DataSource::Fill(0xAB, (BLK_SIZE / 2) as u64));
421        let expected_size = builder.built_size() as usize;
422        builder.build(&mut Cursor::new(&mut output)).unwrap();
423        assert_eq!(output.len(), expected_size);
424
425        let reader = SparseReader::new(Cursor::new(&output)).unwrap();
426        assert_eq!(
427            reader.chunks(),
428            &[
429                (Chunk::Fill { start: 0, size: BLK_SIZE.into(), value: 0xAB }, None),
430                (Chunk::Fill { start: BLK_SIZE as u64, size: BLK_SIZE.into(), value: 0xAB }, None)
431            ]
432        );
433    }
434
435    #[test]
436    fn test_overflow_block_count() {
437        struct Sink;
438
439        impl Write for Sink {
440            fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
441                Ok(buf.len())
442            }
443
444            fn flush(&mut self) -> std::io::Result<()> {
445                Ok(())
446            }
447        }
448
449        impl Seek for Sink {
450            fn seek(&mut self, _pos: SeekFrom) -> std::io::Result<u64> {
451                Ok(0)
452            }
453        }
454
455        let result = SparseImageBuilder::new()
456            .set_block_size(16)
457            .add_source(DataSource::Skip(u64::MAX - 15))
458            .build(&mut Sink);
459        assert!(result.is_err());
460    }
461
462    #[cfg(target_os = "fuchsia")]
463    #[test]
464    fn test_build_with_vmo() {
465        let mut builder = SparseImageBuilder::new();
466        builder.max_chunk_size = BLK_SIZE;
467        let size = (BLK_SIZE * 2) as u64;
468        let vmo = zx::Vmo::create(size).unwrap();
469        const PART_1: [u8; BLK_SIZE as usize] = [0xABu8; BLK_SIZE as usize];
470        const PART_2: [u8; BLK_SIZE as usize] = [0xCBu8; BLK_SIZE as usize];
471        vmo.write(&PART_1, 0).unwrap();
472        vmo.write(&PART_2, BLK_SIZE as u64).unwrap();
473        // We add two separate data sources sharing the same VMO but with a different offset.
474        let mut output = vec![];
475        let builder = builder
476            .add_source(DataSource::Vmo {
477                vmo: vmo.duplicate_handle(zx::Rights::SAME_RIGHTS).unwrap(),
478                size: BLK_SIZE as u64,
479                offset: 0,
480            })
481            .add_source(DataSource::Vmo { vmo, size: BLK_SIZE as u64, offset: BLK_SIZE as u64 });
482        let expected_size = builder.built_size() as usize;
483        builder.build(&mut Cursor::new(&mut output)).unwrap();
484        assert_eq!(output.len(), expected_size);
485
486        let reader = SparseReader::new(Cursor::new(&output)).unwrap();
487        assert_eq!(
488            reader.chunks(),
489            &[
490                (
491                    Chunk::Raw { start: 0, size: BLK_SIZE.into() },
492                    Some((SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE) as u64)
493                ),
494                (
495                    Chunk::Raw { start: BLK_SIZE as u64, size: BLK_SIZE.into() },
496                    Some((SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE) as u64)
497                )
498            ]
499        );
500        assert_eq!(
501            output[(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE) as usize
502                ..(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE + BLK_SIZE) as usize],
503            PART_1
504        );
505        assert_eq!(
506            output[(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE) as usize
507                ..(SPARSE_HEADER_SIZE + CHUNK_HEADER_SIZE * 2 + BLK_SIZE * 2) as usize],
508            PART_2
509        );
510    }
511}