Skip to main content

sparse/
reader.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::{Chunk, SparseDataType, SparseError, SparseHeader, deserialize_from};
6
7use byteorder::{ByteOrder as _, LE};
8use std::io::{Read, Seek, SeekFrom};
9
10/// SparseReader is an implementation of std::io::Read which transparently unpacks the underlying
11/// sparse image as it is read.
12/// If random access reads are not required, it is more performant to use `unsparse` to completely
13/// unpack a sparse image.
14pub struct SparseReader<R> {
15    reader: R,
16    // Offset into the logical (unsparsed) image.
17    offset: u64,
18    // Size of the logical (unsparsed) image.
19    size: u64,
20    // The second field is the offset into `reader` at which the payload of the chunk appears, for
21    // Raw chunks.
22    chunks: Vec<(Chunk, Option<u64>)>,
23    // The block size of each chunk.
24    block_size: u32,
25}
26
27impl<R: Read + Seek> SparseReader<R> {
28    /// Attempts to create a SparseReader from the given image.  Returns failure if the image is
29    /// malformed.
30    pub fn new(mut reader: R) -> std::result::Result<Self, SparseError> {
31        let header: SparseHeader = deserialize_from(&mut reader)
32            .map_err(|e| SparseError::Deserialize { ty: SparseDataType::Header, source: e })?;
33        if !header.valid() {
34            return Err(SparseError::InvalidHeader);
35        }
36        let num_chunks = header.total_chunks as usize;
37
38        let mut chunks = vec![];
39        let mut offset = 0;
40        for _ in 0..num_chunks {
41            let chunk = Chunk::read_metadata(&mut reader, offset, header.blk_sz)?;
42            let data_offset = if chunk.chunk_type() == crate::format::CHUNK_TYPE_RAW {
43                let data_offset = reader.stream_position()?;
44                // Skip past the data payload
45                reader.seek(SeekFrom::Current(chunk.output_size() as i64))?;
46                Some(data_offset)
47            } else {
48                None
49            };
50            offset += chunk.output_size() as u64;
51            chunks.push((chunk, data_offset));
52        }
53
54        reader.seek(SeekFrom::Start(0)).map_err(|e| SparseError::Io(e))?;
55        Ok(Self { reader, offset: 0, size: offset, chunks, block_size: header.blk_sz })
56    }
57
58    /// Returns the index of the current chunk in `self.chunks`.
59    fn current_chunk(&self) -> Option<usize> {
60        let mut off = 0;
61        let mut i = 0;
62        for (chunk, _) in &self.chunks {
63            let size = chunk.output_size() as u64;
64            if self.offset >= off && self.offset < off + size {
65                return Some(i);
66            }
67            off += size;
68            i += 1;
69        }
70        None
71    }
72
73    pub fn is_sparse_file(reader: &mut R) -> std::result::Result<bool, SparseError> {
74        let header: SparseHeader = deserialize_from(reader)
75            .map_err(|e| SparseError::Deserialize { ty: SparseDataType::Header, source: e })?;
76        let res = header.valid();
77        reader.seek(SeekFrom::Start(0)).map_err(|e| SparseError::Io(e))?;
78        Ok(res)
79    }
80
81    pub fn chunks(&self) -> &Vec<(Chunk, Option<u64>)> {
82        &self.chunks
83    }
84
85    pub fn unsparsed_size(&self) -> u64 {
86        self.size
87    }
88
89    pub fn block_size(&self) -> u32 {
90        self.block_size
91    }
92}
93
94// It's assumed that `reader` already points at the right offset to read from the chunk, and `buf`
95// won't read past the end of the chunk.
96// `output_offset` is the logical position in the output stream.
97fn read_from_chunk<R: Read + Seek>(
98    reader: &mut R,
99    chunk: &Chunk,
100    output_offset: u64,
101    buf: &mut [u8],
102) -> std::io::Result<usize> {
103    match chunk {
104        Chunk::Raw { .. } => reader.read(buf),
105        Chunk::Fill { value, .. } => {
106            let mut value_bytes = value.to_le_bytes();
107            value_bytes.rotate_left(output_offset as usize % std::mem::size_of::<u32>());
108            let value_rotated = LE::read_u32(&value_bytes);
109            // Safety: `std::slice::align_to_mut` requires that everything in the dst slice is a
110            // valid type, which is true when going from [u8; 4] to [u32; 1].
111            let (prefix, wholes, suffix) = unsafe { buf.align_to_mut::<u32>() };
112            prefix.copy_from_slice(&value_bytes[value_bytes.len() - prefix.len()..]);
113            wholes.fill(value_rotated);
114            suffix.copy_from_slice(&value_bytes[..suffix.len()]);
115            Ok(buf.len())
116        }
117        Chunk::DontCare { .. } => {
118            buf.fill(0);
119            Ok(buf.len())
120        }
121        _ => unreachable!(),
122    }
123}
124
125impl<R: Read + Seek> Read for SparseReader<R> {
126    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
127        let mut bytes_read = 0;
128        while bytes_read < buf.len() {
129            let current_chunk_idx = match self.current_chunk() {
130                Some(i) => i,
131                None => return Ok(bytes_read),
132            };
133            let (current_chunk, chunk_start_offset) = &self.chunks[current_chunk_idx];
134            let offset_in_chunk = self.offset - current_chunk.output_offset().unwrap();
135            debug_assert!(offset_in_chunk < current_chunk.output_size() as u64);
136            let to_read = std::cmp::min(
137                buf.len() - bytes_read,
138                current_chunk.output_size() as usize - offset_in_chunk as usize,
139            );
140            if let Some(offset) = chunk_start_offset {
141                self.reader.seek(SeekFrom::Start(*offset + offset_in_chunk))?;
142            }
143            let bytes_read_from_chunk = read_from_chunk(
144                &mut self.reader,
145                current_chunk,
146                self.offset,
147                &mut buf[bytes_read..bytes_read + to_read],
148            )?;
149            bytes_read += bytes_read_from_chunk;
150            self.offset += bytes_read_from_chunk as u64;
151        }
152        Ok(bytes_read)
153    }
154}
155
156impl<R: Read + Seek> Seek for SparseReader<R> {
157    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
158        self.offset = match pos {
159            SeekFrom::Start(pos) => pos,
160            SeekFrom::Current(delta) => self
161                .offset
162                .checked_add_signed(delta)
163                .ok_or_else(|| std::io::Error::from(std::io::ErrorKind::InvalidInput))?,
164            SeekFrom::End(delta) => self
165                .size
166                .checked_add_signed(delta)
167                .ok_or_else(|| std::io::Error::from(std::io::ErrorKind::InvalidInput))?,
168        };
169        Ok(self.offset)
170    }
171}
172
173#[cfg(test)]
174mod test {
175    use crate::builder::{DataSource, SparseImageBuilder};
176    use crate::reader::SparseReader;
177    use rand::rngs::SmallRng;
178    use rand::{RngCore, SeedableRng};
179    use std::io::{Read as _, Seek as _, SeekFrom, Write as _};
180    use tempfile::{NamedTempFile, TempDir};
181
182    #[test]
183    fn empty_reader() {
184        let tmpdir = TempDir::new().unwrap();
185
186        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
187        SparseImageBuilder::new().build(&mut sparse_file).expect("Build sparse image failed");
188        sparse_file.seek(SeekFrom::Start(0)).unwrap();
189
190        let mut reader =
191            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
192
193        let mut unsparsed_bytes = vec![];
194        reader.read_to_end(&mut unsparsed_bytes).expect("Failed to read unsparsed image");
195        assert_eq!(unsparsed_bytes.len(), 0);
196    }
197
198    #[test]
199    fn is_sparse_file() {
200        let tmpdir = TempDir::new().unwrap();
201
202        let data = {
203            let mut data = Box::new([0u8; 8192]);
204            let mut i: u8 = 0;
205            for d in data.as_mut() {
206                *d = i;
207                i = i.wrapping_add(1);
208            }
209            data
210        };
211
212        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
213        SparseImageBuilder::new()
214            .add_source(DataSource::Buffer(data))
215            .build(&mut sparse_file)
216            .expect("Build sparse image failed");
217        sparse_file.seek(SeekFrom::Start(0)).unwrap();
218
219        assert!(SparseReader::is_sparse_file(&mut sparse_file).expect("Should be a sparse file"));
220
221        let mut garbage_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
222        let garbage_data = vec![1; 4096];
223        garbage_file.write_all(&garbage_data).expect("Writing garbage file");
224        garbage_file.seek(SeekFrom::Start(0)).unwrap();
225
226        assert!(!SparseReader::is_sparse_file(&mut garbage_file).unwrap());
227    }
228
229    #[test]
230    fn seek() {
231        let tmpdir = TempDir::new().unwrap();
232
233        let data = {
234            let mut data = Box::new([0u8; 8192]);
235            let mut i: u8 = 0;
236            for d in data.as_mut() {
237                *d = i;
238                i = i.wrapping_add(1);
239            }
240            data
241        };
242
243        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
244        SparseImageBuilder::new()
245            .add_source(DataSource::Buffer(data))
246            .build(&mut sparse_file)
247            .expect("Build sparse image failed");
248        sparse_file.seek(SeekFrom::Start(0)).unwrap();
249        let mut reader =
250            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
251
252        let mut buf = [0u8; 1];
253        assert_eq!(0, reader.seek(SeekFrom::Start(0)).unwrap());
254        assert_eq!(1, reader.read(&mut buf).unwrap());
255        assert_eq!(buf[0], 0u8);
256
257        assert_eq!(100, reader.seek(SeekFrom::Start(100)).unwrap());
258        assert_eq!(1, reader.read(&mut buf).unwrap());
259        assert_eq!(buf[0], 100u8);
260
261        assert_eq!(99, reader.seek(SeekFrom::Current(-2)).unwrap());
262        assert_eq!(1, reader.read(&mut buf).unwrap());
263        assert_eq!(buf[0], 99u8);
264
265        assert_eq!(100, reader.seek(SeekFrom::Current(0)).unwrap());
266        assert_eq!(1, reader.read(&mut buf).unwrap());
267        assert_eq!(buf[0], 100u8);
268
269        assert_eq!(102, reader.seek(SeekFrom::Current(1)).unwrap());
270        assert_eq!(1, reader.read(&mut buf).unwrap());
271        assert_eq!(buf[0], 102u8);
272
273        assert_eq!(8191, reader.seek(SeekFrom::End(-1)).unwrap());
274        assert_eq!(1, reader.read(&mut buf).unwrap());
275        assert_eq!(buf[0], 255u8);
276
277        assert_eq!(8192, reader.seek(SeekFrom::End(0)).unwrap());
278        assert_eq!(0, reader.read(&mut buf).unwrap());
279
280        assert_eq!(8193, reader.seek(SeekFrom::End(1)).unwrap());
281        assert_eq!(0, reader.read(&mut buf).unwrap());
282    }
283
284    #[test]
285    fn read_past_eof() {
286        let tmpdir = TempDir::new().unwrap();
287
288        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
289        SparseImageBuilder::new()
290            .add_source(DataSource::Buffer(Box::new([0xffu8; 8192])))
291            .build(&mut sparse_file)
292            .expect("Build sparse image failed");
293        sparse_file.seek(SeekFrom::Start(0)).unwrap();
294
295        let mut reader =
296            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
297
298        let mut buf = [0u8; 2];
299
300        reader.seek(SeekFrom::Start(8191)).expect("Seek failed");
301        assert_eq!(reader.read(&mut buf).expect("Failed to read"), 1);
302
303        reader.seek(SeekFrom::Start(8192)).expect("Seek failed");
304        assert_eq!(reader.read(&mut buf).expect("Failed to read"), 0);
305    }
306
307    #[test]
308    fn full_read() {
309        let tmpdir = TempDir::new().unwrap();
310
311        // Generate a large temporary file
312        let (mut file, _temp_path) = NamedTempFile::new_in(&tmpdir).unwrap().into_parts();
313        let mut rng = SmallRng::from_os_rng();
314        let mut data = Vec::<u8>::new();
315        data.resize(100 * 4096, 0);
316        rng.fill_bytes(&mut data);
317        file.write_all(&data).unwrap();
318        file.flush().unwrap();
319        file.seek(SeekFrom::Start(0)).unwrap();
320        let content_size = data.len();
321
322        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
323        SparseImageBuilder::new()
324            .add_source(DataSource::Buffer(Box::new([0xffu8; 8192])))
325            .add_source(DataSource::Reader { reader: Box::new(file), size: content_size as u64 })
326            .add_source(DataSource::Skip(16384))
327            .add_source(DataSource::Fill(0xaaaa_aaaau32, 1024))
328            .add_source(DataSource::Skip(4096))
329            .build(&mut sparse_file)
330            .expect("Build sparse image failed");
331        sparse_file.seek(SeekFrom::Start(0)).unwrap();
332
333        let mut reader =
334            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
335
336        let mut unsparsed_bytes = vec![];
337        reader.read_to_end(&mut unsparsed_bytes).expect("Failed to read unsparsed image");
338        assert_eq!(unsparsed_bytes.len(), 8192 + content_size + 16384 + 4096 + 4096);
339        assert_eq!(&unsparsed_bytes[..8192], &[0xffu8; 8192]);
340        assert_eq!(&unsparsed_bytes[8192..8192 + content_size], &data[..]);
341        assert_eq!(
342            &unsparsed_bytes[8192 + content_size..8192 + content_size + 16384],
343            &[0u8; 16384]
344        );
345        assert_eq!(
346            &unsparsed_bytes[8192 + content_size + 16384..8192 + content_size + 16384 + 4096],
347            &[0xaau8; 4096]
348        );
349        assert_eq!(&unsparsed_bytes[8192 + content_size + 16384 + 4096..], &[0u8; 4096]);
350    }
351
352    #[test]
353    fn unaligned_reads() {
354        let tmpdir = TempDir::new().unwrap();
355
356        // Generate a large temporary file
357        let (mut file, _temp_path) = NamedTempFile::new_in(&tmpdir).unwrap().into_parts();
358        let mut rng = SmallRng::from_os_rng();
359        let mut data = Vec::<u8>::new();
360        data.resize(100 * 4096, 0);
361        rng.fill_bytes(&mut data);
362        file.write_all(&data).unwrap();
363        file.flush().unwrap();
364        file.seek(SeekFrom::Start(0)).unwrap();
365        let content_size = data.len();
366
367        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
368        SparseImageBuilder::new()
369            .add_source(DataSource::Buffer(Box::new([0xffu8; 8192])))
370            .add_source(DataSource::Reader { reader: Box::new(file), size: content_size as u64 })
371            .add_source(DataSource::Skip(16384))
372            .add_source(DataSource::Fill(0x0102_0304u32, 1024))
373            .add_source(DataSource::Skip(4096))
374            .build(&mut sparse_file)
375            .expect("Build sparse image failed");
376        sparse_file.seek(SeekFrom::Start(0)).unwrap();
377
378        let mut reader =
379            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
380
381        let mut buffer = [0u8; 4096];
382
383        // Do an unaligned read from each section
384
385        // DataSource::Buffer
386        reader.seek(SeekFrom::Start(10)).expect("Failed to seek");
387        let _ = reader.read(&mut buffer[..20]).expect("Failed to read");
388        assert_eq!(&buffer[..20], &[0xffu8; 20]);
389
390        // DataSource::File
391        reader.seek(SeekFrom::Start(8192 + 4095)).expect("Failed to seek");
392        let _ = reader.read(&mut buffer[..2]).expect("Failed to read");
393        assert_eq!(&buffer[..2], &data[4095..4097]);
394
395        // DataSource::Skip
396        reader.seek(SeekFrom::Start(8192 + content_size as u64 + 4090)).expect("Failed to seek");
397        let _ = reader.read(&mut buffer[..6]).expect("Failed to read");
398        assert_eq!(&buffer[..6], &[0u8; 6]);
399
400        // DataSource::Fill
401        reader
402            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 3))
403            .expect("Failed to seek");
404        let _ = reader.read(&mut buffer[..9]).expect("Failed to read");
405        // Bear in mind the byte ordering is LE, so 0x01020304 == [0x04, 0x03, 0x02, 0x01]
406        assert_eq!(&buffer[..9], &[0x01, 0x04, 0x03, 0x02, 0x01, 0x04, 0x03, 0x02, 0x01]);
407
408        // DataSource::Skip
409        reader
410            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 4096 + 1))
411            .expect("Failed to seek");
412        let _ = reader.read(&mut buffer[..4095]).expect("Failed to read");
413        assert_eq!(&buffer[..4095], &[0u8; 4095]);
414
415        // Do an unaligned read spanning two sections (the last Fill and Skip)
416        reader
417            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 4090))
418            .expect("Failed to seek");
419        let _ = reader.read(&mut buffer[..9]).expect("Failed to read");
420        assert_eq!(&buffer[..9], &[0x02, 0x01, 0x04, 0x03, 0x02, 0x01, 0x00, 0x00, 0x00]);
421    }
422}