sparse/
reader.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::{deserialize_from, Chunk, SparseHeader};
6use anyhow::{ensure, Context, Result};
7use byteorder::{ByteOrder as _, LE};
8use std::io::{Read, Seek, SeekFrom};
9
10/// SparseReader is an implementation of std::io::Read which transparently unpacks the underlying
11/// sparse image as it is read.
12/// If random access reads are not required, it is more performant to use `unsparse` to completely
13/// unpack a sparse image.
14pub struct SparseReader<R> {
15    reader: R,
16    // Offset into the logical (unsparsed) image.
17    offset: u64,
18    // Size of the logical (unsparsed) image.
19    size: u64,
20    // The second field is the offset into `reader` at which the payload of the chunk appears, for
21    // Raw chunks.
22    chunks: Vec<(Chunk, Option<u64>)>,
23}
24
25impl<R: Read + Seek> SparseReader<R> {
26    /// Attempts to create a SparseReader from the given image.  Returns failure if the image is
27    /// malformed.
28    pub fn new(mut reader: R) -> Result<Self> {
29        let header: SparseHeader =
30            deserialize_from(&mut reader).context("Failed to read header")?;
31        ensure!(header.valid(), "Invalid header");
32        let num_chunks = header.total_chunks as usize;
33
34        let mut chunks = vec![];
35        let mut offset = 0;
36        for _ in 0..num_chunks {
37            let chunk = Chunk::read_metadata(&mut reader, offset, header.blk_sz)?;
38            let data_offset = if chunk.chunk_type() == crate::format::CHUNK_TYPE_RAW {
39                let data_offset = reader.stream_position()?;
40                // Skip past the data payload
41                reader.seek(SeekFrom::Current(chunk.output_size() as i64))?;
42                Some(data_offset)
43            } else {
44                None
45            };
46            offset += chunk.output_size() as u64;
47            chunks.push((chunk, data_offset));
48        }
49
50        reader.seek(SeekFrom::Start(0)).context("Failed to rewind reader")?;
51        Ok(Self { reader, offset: 0, size: offset, chunks })
52    }
53
54    /// Returns the index of the current chunk in `self.chunks`.
55    fn current_chunk(&self) -> Option<usize> {
56        let mut off = 0;
57        let mut i = 0;
58        for (chunk, _) in &self.chunks {
59            let size = chunk.output_size() as u64;
60            if self.offset >= off && self.offset < off + size {
61                return Some(i);
62            }
63            off += size;
64            i += 1;
65        }
66        None
67    }
68
69    pub fn is_sparse_file(reader: &mut R) -> Result<bool> {
70        let header: SparseHeader = deserialize_from(reader)?;
71        let res = header.valid();
72        reader.seek(SeekFrom::Start(0)).context("Failed to rewind reader")?;
73        Ok(res)
74    }
75
76    #[cfg(test)]
77    pub(crate) fn chunks(self) -> Vec<(Chunk, Option<u64>)> {
78        self.chunks
79    }
80}
81
82// It's assumed that `reader` already points at the right offset to read from the chunk, and `buf`
83// won't read past the end of the chunk.
84// `output_offset` is the logical position in the output stream.
85fn read_from_chunk<R: Read + Seek>(
86    reader: &mut R,
87    chunk: &Chunk,
88    output_offset: u64,
89    buf: &mut [u8],
90) -> std::io::Result<usize> {
91    match chunk {
92        Chunk::Raw { .. } => reader.read(buf),
93        Chunk::Fill { value, .. } => {
94            let mut value_bytes = value.to_le_bytes();
95            value_bytes.rotate_left(output_offset as usize % std::mem::size_of::<u32>());
96            let value_rotated = LE::read_u32(&value_bytes);
97            // Safety: `std::slice::align_to_mut` requires that everything in the dst slice is a
98            // valid type, which is true when going from [u8; 4] to [u32; 1].
99            let (prefix, wholes, suffix) = unsafe { buf.align_to_mut::<u32>() };
100            prefix.copy_from_slice(&value_bytes[value_bytes.len() - prefix.len()..]);
101            wholes.fill(value_rotated);
102            suffix.copy_from_slice(&value_bytes[..suffix.len()]);
103            Ok(buf.len())
104        }
105        Chunk::DontCare { .. } => {
106            buf.fill(0);
107            Ok(buf.len())
108        }
109        _ => unreachable!(),
110    }
111}
112
113impl<R: Read + Seek> Read for SparseReader<R> {
114    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
115        let mut bytes_read = 0;
116        while bytes_read < buf.len() {
117            let current_chunk_idx = match self.current_chunk() {
118                Some(i) => i,
119                None => return Ok(bytes_read),
120            };
121            let (current_chunk, chunk_start_offset) = &self.chunks[current_chunk_idx];
122            let offset_in_chunk = self.offset - current_chunk.output_offset().unwrap();
123            debug_assert!(offset_in_chunk < current_chunk.output_size() as u64);
124            let to_read = std::cmp::min(
125                buf.len() - bytes_read,
126                current_chunk.output_size() as usize - offset_in_chunk as usize,
127            );
128            if let Some(offset) = chunk_start_offset {
129                self.reader.seek(SeekFrom::Start(*offset + offset_in_chunk))?;
130            }
131            let bytes_read_from_chunk = read_from_chunk(
132                &mut self.reader,
133                current_chunk,
134                self.offset,
135                &mut buf[bytes_read..bytes_read + to_read],
136            )?;
137            bytes_read += bytes_read_from_chunk;
138            self.offset += bytes_read_from_chunk as u64;
139        }
140        Ok(bytes_read)
141    }
142}
143
144impl<R: Read + Seek> Seek for SparseReader<R> {
145    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
146        self.offset = match pos {
147            SeekFrom::Start(pos) => pos,
148            SeekFrom::Current(delta) => self
149                .offset
150                .checked_add_signed(delta)
151                .ok_or_else(|| std::io::Error::from(std::io::ErrorKind::InvalidInput))?,
152            SeekFrom::End(delta) => self
153                .size
154                .checked_add_signed(delta)
155                .ok_or_else(|| std::io::Error::from(std::io::ErrorKind::InvalidInput))?,
156        };
157        Ok(self.offset)
158    }
159}
160
161#[cfg(test)]
162mod test {
163    use crate::builder::{DataSource, SparseImageBuilder};
164    use crate::reader::SparseReader;
165    use rand::rngs::SmallRng;
166    use rand::{RngCore, SeedableRng};
167    use std::io::{Read as _, Seek as _, SeekFrom, Write as _};
168    use tempfile::{NamedTempFile, TempDir};
169
170    #[test]
171    fn empty_reader() {
172        let tmpdir = TempDir::new().unwrap();
173
174        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
175        SparseImageBuilder::new().build(&mut sparse_file).expect("Build sparse image failed");
176        sparse_file.seek(SeekFrom::Start(0)).unwrap();
177
178        let mut reader =
179            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
180
181        let mut unsparsed_bytes = vec![];
182        reader.read_to_end(&mut unsparsed_bytes).expect("Failed to read unsparsed image");
183        assert_eq!(unsparsed_bytes.len(), 0);
184    }
185
186    #[test]
187    fn is_sparse_file() {
188        let tmpdir = TempDir::new().unwrap();
189
190        let data = {
191            let mut data = Box::new([0u8; 8192]);
192            let mut i: u8 = 0;
193            for d in data.as_mut() {
194                *d = i;
195                i = i.wrapping_add(1);
196            }
197            data
198        };
199
200        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
201        SparseImageBuilder::new()
202            .add_chunk(DataSource::Buffer(data))
203            .build(&mut sparse_file)
204            .expect("Build sparse image failed");
205        sparse_file.seek(SeekFrom::Start(0)).unwrap();
206
207        assert!(SparseReader::is_sparse_file(&mut sparse_file).expect("Should be a sparse file"));
208
209        let mut garbage_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
210        let garbage_data = vec![1; 4096];
211        garbage_file.write_all(&garbage_data).expect("Writing garbage file");
212        garbage_file.seek(SeekFrom::Start(0)).unwrap();
213
214        assert!(!SparseReader::is_sparse_file(&mut garbage_file).unwrap());
215    }
216
217    #[test]
218    fn seek() {
219        let tmpdir = TempDir::new().unwrap();
220
221        let data = {
222            let mut data = Box::new([0u8; 8192]);
223            let mut i: u8 = 0;
224            for d in data.as_mut() {
225                *d = i;
226                i = i.wrapping_add(1);
227            }
228            data
229        };
230
231        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
232        SparseImageBuilder::new()
233            .add_chunk(DataSource::Buffer(data))
234            .build(&mut sparse_file)
235            .expect("Build sparse image failed");
236        sparse_file.seek(SeekFrom::Start(0)).unwrap();
237        let mut reader =
238            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
239
240        let mut buf = [0u8; 1];
241        assert_eq!(0, reader.seek(SeekFrom::Start(0)).unwrap());
242        assert_eq!(1, reader.read(&mut buf).unwrap());
243        assert_eq!(buf[0], 0u8);
244
245        assert_eq!(100, reader.seek(SeekFrom::Start(100)).unwrap());
246        assert_eq!(1, reader.read(&mut buf).unwrap());
247        assert_eq!(buf[0], 100u8);
248
249        assert_eq!(99, reader.seek(SeekFrom::Current(-2)).unwrap());
250        assert_eq!(1, reader.read(&mut buf).unwrap());
251        assert_eq!(buf[0], 99u8);
252
253        assert_eq!(100, reader.seek(SeekFrom::Current(0)).unwrap());
254        assert_eq!(1, reader.read(&mut buf).unwrap());
255        assert_eq!(buf[0], 100u8);
256
257        assert_eq!(102, reader.seek(SeekFrom::Current(1)).unwrap());
258        assert_eq!(1, reader.read(&mut buf).unwrap());
259        assert_eq!(buf[0], 102u8);
260
261        assert_eq!(8191, reader.seek(SeekFrom::End(-1)).unwrap());
262        assert_eq!(1, reader.read(&mut buf).unwrap());
263        assert_eq!(buf[0], 255u8);
264
265        assert_eq!(8192, reader.seek(SeekFrom::End(0)).unwrap());
266        assert_eq!(0, reader.read(&mut buf).unwrap());
267
268        assert_eq!(8193, reader.seek(SeekFrom::End(1)).unwrap());
269        assert_eq!(0, reader.read(&mut buf).unwrap());
270    }
271
272    #[test]
273    fn read_past_eof() {
274        let tmpdir = TempDir::new().unwrap();
275
276        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
277        SparseImageBuilder::new()
278            .add_chunk(DataSource::Buffer(Box::new([0xffu8; 8192])))
279            .build(&mut sparse_file)
280            .expect("Build sparse image failed");
281        sparse_file.seek(SeekFrom::Start(0)).unwrap();
282
283        let mut reader =
284            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
285
286        let mut buf = [0u8; 2];
287
288        reader.seek(SeekFrom::Start(8191)).expect("Seek failed");
289        assert_eq!(reader.read(&mut buf).expect("Failed to read"), 1);
290
291        reader.seek(SeekFrom::Start(8192)).expect("Seek failed");
292        assert_eq!(reader.read(&mut buf).expect("Failed to read"), 0);
293    }
294
295    #[test]
296    fn full_read() {
297        let tmpdir = TempDir::new().unwrap();
298
299        // Generate a large temporary file
300        let (mut file, _temp_path) = NamedTempFile::new_in(&tmpdir).unwrap().into_parts();
301        let mut rng = SmallRng::from_entropy();
302        let mut data = Vec::<u8>::new();
303        data.resize(100 * 4096, 0);
304        rng.fill_bytes(&mut data);
305        file.write_all(&data).unwrap();
306        file.flush().unwrap();
307        file.seek(SeekFrom::Start(0)).unwrap();
308        let content_size = data.len();
309
310        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
311        SparseImageBuilder::new()
312            .add_chunk(DataSource::Buffer(Box::new([0xffu8; 8192])))
313            .add_chunk(DataSource::Reader { reader: Box::new(file), size: content_size as u64 })
314            .add_chunk(DataSource::Skip(16384))
315            .add_chunk(DataSource::Fill(0xaaaa_aaaau32, 1024))
316            .add_chunk(DataSource::Skip(4096))
317            .build(&mut sparse_file)
318            .expect("Build sparse image failed");
319        sparse_file.seek(SeekFrom::Start(0)).unwrap();
320
321        let mut reader =
322            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
323
324        let mut unsparsed_bytes = vec![];
325        reader.read_to_end(&mut unsparsed_bytes).expect("Failed to read unsparsed image");
326        assert_eq!(unsparsed_bytes.len(), 8192 + content_size + 16384 + 4096 + 4096);
327        assert_eq!(&unsparsed_bytes[..8192], &[0xffu8; 8192]);
328        assert_eq!(&unsparsed_bytes[8192..8192 + content_size], &data[..]);
329        assert_eq!(
330            &unsparsed_bytes[8192 + content_size..8192 + content_size + 16384],
331            &[0u8; 16384]
332        );
333        assert_eq!(
334            &unsparsed_bytes[8192 + content_size + 16384..8192 + content_size + 16384 + 4096],
335            &[0xaau8; 4096]
336        );
337        assert_eq!(&unsparsed_bytes[8192 + content_size + 16384 + 4096..], &[0u8; 4096]);
338    }
339
340    #[test]
341    fn unaligned_reads() {
342        let tmpdir = TempDir::new().unwrap();
343
344        // Generate a large temporary file
345        let (mut file, _temp_path) = NamedTempFile::new_in(&tmpdir).unwrap().into_parts();
346        let mut rng = SmallRng::from_entropy();
347        let mut data = Vec::<u8>::new();
348        data.resize(100 * 4096, 0);
349        rng.fill_bytes(&mut data);
350        file.write_all(&data).unwrap();
351        file.flush().unwrap();
352        file.seek(SeekFrom::Start(0)).unwrap();
353        let content_size = data.len();
354
355        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
356        SparseImageBuilder::new()
357            .add_chunk(DataSource::Buffer(Box::new([0xffu8; 8192])))
358            .add_chunk(DataSource::Reader { reader: Box::new(file), size: content_size as u64 })
359            .add_chunk(DataSource::Skip(16384))
360            .add_chunk(DataSource::Fill(0x0102_0304u32, 1024))
361            .add_chunk(DataSource::Skip(4096))
362            .build(&mut sparse_file)
363            .expect("Build sparse image failed");
364        sparse_file.seek(SeekFrom::Start(0)).unwrap();
365
366        let mut reader =
367            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
368
369        let mut buffer = [0u8; 4096];
370
371        // Do an unaligned read from each section
372
373        // DataSource::Buffer
374        reader.seek(SeekFrom::Start(10)).expect("Failed to seek");
375        let _ = reader.read(&mut buffer[..20]).expect("Failed to read");
376        assert_eq!(&buffer[..20], &[0xffu8; 20]);
377
378        // DataSource::File
379        reader.seek(SeekFrom::Start(8192 + 4095)).expect("Failed to seek");
380        let _ = reader.read(&mut buffer[..2]).expect("Failed to read");
381        assert_eq!(&buffer[..2], &data[4095..4097]);
382
383        // DataSource::Skip
384        reader.seek(SeekFrom::Start(8192 + content_size as u64 + 4090)).expect("Failed to seek");
385        let _ = reader.read(&mut buffer[..6]).expect("Failed to read");
386        assert_eq!(&buffer[..6], &[0u8; 6]);
387
388        // DataSource::Fill
389        reader
390            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 3))
391            .expect("Failed to seek");
392        let _ = reader.read(&mut buffer[..9]).expect("Failed to read");
393        // Bear in mind the byte ordering is LE, so 0x01020304 == [0x04, 0x03, 0x02, 0x01]
394        assert_eq!(&buffer[..9], &[0x01, 0x04, 0x03, 0x02, 0x01, 0x04, 0x03, 0x02, 0x01]);
395
396        // DataSource::Skip
397        reader
398            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 4096 + 1))
399            .expect("Failed to seek");
400        let _ = reader.read(&mut buffer[..4095]).expect("Failed to read");
401        assert_eq!(&buffer[..4095], &[0u8; 4095]);
402
403        // Do an unaligned read spanning two sections (the last Fill and Skip)
404        reader
405            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 4090))
406            .expect("Failed to seek");
407        let _ = reader.read(&mut buffer[..9]).expect("Failed to read");
408        assert_eq!(&buffer[..9], &[0x02, 0x01, 0x04, 0x03, 0x02, 0x01, 0x00, 0x00, 0x00]);
409    }
410}