sparse/
reader.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::{deserialize_from, Chunk, SparseHeader};
6use anyhow::{ensure, Context, Result};
7use byteorder::{ByteOrder as _, LE};
8use std::io::{Read, Seek, SeekFrom};
9
10/// SparseReader is an implementation of std::io::Read which transparently unpacks the underlying
11/// sparse image as it is read.
12/// If random access reads are not required, it is more performant to use `unsparse` to completely
13/// unpack a sparse image.
14pub struct SparseReader<R> {
15    reader: R,
16    // Offset into the logical (unsparsed) image.
17    offset: u64,
18    // Size of the logical (unsparsed) image.
19    size: u64,
20    // The second field is the offset into `reader` at which the payload of the chunk appears, for
21    // Raw chunks.
22    chunks: Vec<(Chunk, Option<u64>)>,
23}
24
25impl<R: Read + Seek> SparseReader<R> {
26    /// Attempts to create a SparseReader from the given image.  Returns failure if the image is
27    /// malformed.
28    pub fn new(mut reader: R) -> Result<Self> {
29        let header: SparseHeader =
30            deserialize_from(&mut reader).context("Failed to read header")?;
31        ensure!(header.valid(), "Invalid header");
32        let num_chunks = header.total_chunks as usize;
33
34        let mut chunks = vec![];
35        let mut offset = 0;
36        for _ in 0..num_chunks {
37            let chunk = Chunk::read_metadata(&mut reader, offset, header.blk_sz)?;
38            let data_offset = if chunk.chunk_type() == crate::format::CHUNK_TYPE_RAW {
39                let data_offset = reader.stream_position()?;
40                // Skip past the data payload
41                reader.seek(SeekFrom::Current(chunk.output_size() as i64))?;
42                Some(data_offset)
43            } else {
44                None
45            };
46            offset += chunk.output_size() as u64;
47            chunks.push((chunk, data_offset));
48        }
49
50        reader.seek(SeekFrom::Start(0)).context("Failed to rewind reader")?;
51        Ok(Self { reader, offset: 0, size: offset, chunks })
52    }
53
54    /// Returns the index of the current chunk in `self.chunks`.
55    fn current_chunk(&self) -> Option<usize> {
56        let mut off = 0;
57        let mut i = 0;
58        for (chunk, _) in &self.chunks {
59            let size = chunk.output_size() as u64;
60            if self.offset >= off && self.offset < off + size {
61                return Some(i);
62            }
63            off += size;
64            i += 1;
65        }
66        None
67    }
68
69    #[cfg(test)]
70    pub(crate) fn chunks(self) -> Vec<(Chunk, Option<u64>)> {
71        self.chunks
72    }
73}
74
75// It's assumed that `reader` already points at the right offset to read from the chunk, and `buf`
76// won't read past the end of the chunk.
77// `output_offset` is the logical position in the output stream.
78fn read_from_chunk<R: Read + Seek>(
79    reader: &mut R,
80    chunk: &Chunk,
81    output_offset: u64,
82    buf: &mut [u8],
83) -> std::io::Result<usize> {
84    match chunk {
85        Chunk::Raw { .. } => reader.read(buf),
86        Chunk::Fill { value, .. } => {
87            let mut value_bytes = value.to_le_bytes();
88            value_bytes.rotate_left(output_offset as usize % std::mem::size_of::<u32>());
89            let value_rotated = LE::read_u32(&value_bytes);
90            // Safety: `std::slice::align_to_mut` requires that everything in the dst slice is a
91            // valid type, which is true when going from [u8; 4] to [u32; 1].
92            let (prefix, wholes, suffix) = unsafe { buf.align_to_mut::<u32>() };
93            prefix.copy_from_slice(&value_bytes[value_bytes.len() - prefix.len()..]);
94            wholes.fill(value_rotated);
95            suffix.copy_from_slice(&value_bytes[..suffix.len()]);
96            Ok(buf.len())
97        }
98        Chunk::DontCare { .. } => {
99            buf.fill(0);
100            Ok(buf.len())
101        }
102        _ => unreachable!(),
103    }
104}
105
106impl<R: Read + Seek> Read for SparseReader<R> {
107    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
108        let mut bytes_read = 0;
109        while bytes_read < buf.len() {
110            let current_chunk_idx = match self.current_chunk() {
111                Some(i) => i,
112                None => return Ok(bytes_read),
113            };
114            let (current_chunk, chunk_start_offset) = &self.chunks[current_chunk_idx];
115            let offset_in_chunk = self.offset - current_chunk.output_offset().unwrap();
116            debug_assert!(offset_in_chunk < current_chunk.output_size() as u64);
117            let to_read = std::cmp::min(
118                buf.len() - bytes_read,
119                current_chunk.output_size() as usize - offset_in_chunk as usize,
120            );
121            if let Some(offset) = chunk_start_offset {
122                self.reader.seek(SeekFrom::Start(*offset + offset_in_chunk))?;
123            }
124            let bytes_read_from_chunk = read_from_chunk(
125                &mut self.reader,
126                current_chunk,
127                self.offset,
128                &mut buf[bytes_read..bytes_read + to_read],
129            )?;
130            bytes_read += bytes_read_from_chunk;
131            self.offset += bytes_read_from_chunk as u64;
132        }
133        Ok(bytes_read)
134    }
135}
136
137impl<R: Read + Seek> Seek for SparseReader<R> {
138    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
139        self.offset = match pos {
140            SeekFrom::Start(pos) => pos,
141            SeekFrom::Current(delta) => self
142                .offset
143                .checked_add_signed(delta)
144                .ok_or_else(|| std::io::Error::from(std::io::ErrorKind::InvalidInput))?,
145            SeekFrom::End(delta) => self
146                .size
147                .checked_add_signed(delta)
148                .ok_or_else(|| std::io::Error::from(std::io::ErrorKind::InvalidInput))?,
149        };
150        Ok(self.offset)
151    }
152}
153
154#[cfg(test)]
155mod test {
156    use crate::builder::{DataSource, SparseImageBuilder};
157    use crate::reader::SparseReader;
158    use rand::rngs::SmallRng;
159    use rand::{RngCore, SeedableRng};
160    use std::io::{Read as _, Seek as _, SeekFrom, Write as _};
161    use tempfile::{NamedTempFile, TempDir};
162
163    #[test]
164    fn empty_reader() {
165        let tmpdir = TempDir::new().unwrap();
166
167        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
168        SparseImageBuilder::new().build(&mut sparse_file).expect("Build sparse image failed");
169        sparse_file.seek(SeekFrom::Start(0)).unwrap();
170
171        let mut reader =
172            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
173
174        let mut unsparsed_bytes = vec![];
175        reader.read_to_end(&mut unsparsed_bytes).expect("Failed to read unsparsed image");
176        assert_eq!(unsparsed_bytes.len(), 0);
177    }
178
179    #[test]
180    fn seek() {
181        let tmpdir = TempDir::new().unwrap();
182
183        let data = {
184            let mut data = Box::new([0u8; 8192]);
185            let mut i: u8 = 0;
186            for d in data.as_mut() {
187                *d = i;
188                i = i.wrapping_add(1);
189            }
190            data
191        };
192
193        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
194        SparseImageBuilder::new()
195            .add_chunk(DataSource::Buffer(data))
196            .build(&mut sparse_file)
197            .expect("Build sparse image failed");
198        sparse_file.seek(SeekFrom::Start(0)).unwrap();
199        let mut reader =
200            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
201
202        let mut buf = [0u8; 1];
203        assert_eq!(0, reader.seek(SeekFrom::Start(0)).unwrap());
204        assert_eq!(1, reader.read(&mut buf).unwrap());
205        assert_eq!(buf[0], 0u8);
206
207        assert_eq!(100, reader.seek(SeekFrom::Start(100)).unwrap());
208        assert_eq!(1, reader.read(&mut buf).unwrap());
209        assert_eq!(buf[0], 100u8);
210
211        assert_eq!(99, reader.seek(SeekFrom::Current(-2)).unwrap());
212        assert_eq!(1, reader.read(&mut buf).unwrap());
213        assert_eq!(buf[0], 99u8);
214
215        assert_eq!(100, reader.seek(SeekFrom::Current(0)).unwrap());
216        assert_eq!(1, reader.read(&mut buf).unwrap());
217        assert_eq!(buf[0], 100u8);
218
219        assert_eq!(102, reader.seek(SeekFrom::Current(1)).unwrap());
220        assert_eq!(1, reader.read(&mut buf).unwrap());
221        assert_eq!(buf[0], 102u8);
222
223        assert_eq!(8191, reader.seek(SeekFrom::End(-1)).unwrap());
224        assert_eq!(1, reader.read(&mut buf).unwrap());
225        assert_eq!(buf[0], 255u8);
226
227        assert_eq!(8192, reader.seek(SeekFrom::End(0)).unwrap());
228        assert_eq!(0, reader.read(&mut buf).unwrap());
229
230        assert_eq!(8193, reader.seek(SeekFrom::End(1)).unwrap());
231        assert_eq!(0, reader.read(&mut buf).unwrap());
232    }
233
234    #[test]
235    fn read_past_eof() {
236        let tmpdir = TempDir::new().unwrap();
237
238        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
239        SparseImageBuilder::new()
240            .add_chunk(DataSource::Buffer(Box::new([0xffu8; 8192])))
241            .build(&mut sparse_file)
242            .expect("Build sparse image failed");
243        sparse_file.seek(SeekFrom::Start(0)).unwrap();
244
245        let mut reader =
246            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
247
248        let mut buf = [0u8; 2];
249
250        reader.seek(SeekFrom::Start(8191)).expect("Seek failed");
251        assert_eq!(reader.read(&mut buf).expect("Failed to read"), 1);
252
253        reader.seek(SeekFrom::Start(8192)).expect("Seek failed");
254        assert_eq!(reader.read(&mut buf).expect("Failed to read"), 0);
255    }
256
257    #[test]
258    fn full_read() {
259        let tmpdir = TempDir::new().unwrap();
260
261        // Generate a large temporary file
262        let (mut file, _temp_path) = NamedTempFile::new_in(&tmpdir).unwrap().into_parts();
263        let mut rng = SmallRng::from_entropy();
264        let mut data = Vec::<u8>::new();
265        data.resize(100 * 4096, 0);
266        rng.fill_bytes(&mut data);
267        file.write_all(&data).unwrap();
268        file.flush().unwrap();
269        file.seek(SeekFrom::Start(0)).unwrap();
270        let content_size = data.len();
271
272        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
273        SparseImageBuilder::new()
274            .add_chunk(DataSource::Buffer(Box::new([0xffu8; 8192])))
275            .add_chunk(DataSource::Reader { reader: Box::new(file), size: content_size as u64 })
276            .add_chunk(DataSource::Skip(16384))
277            .add_chunk(DataSource::Fill(0xaaaa_aaaau32, 1024))
278            .add_chunk(DataSource::Skip(4096))
279            .build(&mut sparse_file)
280            .expect("Build sparse image failed");
281        sparse_file.seek(SeekFrom::Start(0)).unwrap();
282
283        let mut reader =
284            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
285
286        let mut unsparsed_bytes = vec![];
287        reader.read_to_end(&mut unsparsed_bytes).expect("Failed to read unsparsed image");
288        assert_eq!(unsparsed_bytes.len(), 8192 + content_size + 16384 + 4096 + 4096);
289        assert_eq!(&unsparsed_bytes[..8192], &[0xffu8; 8192]);
290        assert_eq!(&unsparsed_bytes[8192..8192 + content_size], &data[..]);
291        assert_eq!(
292            &unsparsed_bytes[8192 + content_size..8192 + content_size + 16384],
293            &[0u8; 16384]
294        );
295        assert_eq!(
296            &unsparsed_bytes[8192 + content_size + 16384..8192 + content_size + 16384 + 4096],
297            &[0xaau8; 4096]
298        );
299        assert_eq!(&unsparsed_bytes[8192 + content_size + 16384 + 4096..], &[0u8; 4096]);
300    }
301
302    #[test]
303    fn unaligned_reads() {
304        let tmpdir = TempDir::new().unwrap();
305
306        // Generate a large temporary file
307        let (mut file, _temp_path) = NamedTempFile::new_in(&tmpdir).unwrap().into_parts();
308        let mut rng = SmallRng::from_entropy();
309        let mut data = Vec::<u8>::new();
310        data.resize(100 * 4096, 0);
311        rng.fill_bytes(&mut data);
312        file.write_all(&data).unwrap();
313        file.flush().unwrap();
314        file.seek(SeekFrom::Start(0)).unwrap();
315        let content_size = data.len();
316
317        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
318        SparseImageBuilder::new()
319            .add_chunk(DataSource::Buffer(Box::new([0xffu8; 8192])))
320            .add_chunk(DataSource::Reader { reader: Box::new(file), size: content_size as u64 })
321            .add_chunk(DataSource::Skip(16384))
322            .add_chunk(DataSource::Fill(0x0102_0304u32, 1024))
323            .add_chunk(DataSource::Skip(4096))
324            .build(&mut sparse_file)
325            .expect("Build sparse image failed");
326        sparse_file.seek(SeekFrom::Start(0)).unwrap();
327
328        let mut reader =
329            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
330
331        let mut buffer = [0u8; 4096];
332
333        // Do an unaligned read from each section
334
335        // DataSource::Buffer
336        reader.seek(SeekFrom::Start(10)).expect("Failed to seek");
337        let _ = reader.read(&mut buffer[..20]).expect("Failed to read");
338        assert_eq!(&buffer[..20], &[0xffu8; 20]);
339
340        // DataSource::File
341        reader.seek(SeekFrom::Start(8192 + 4095)).expect("Failed to seek");
342        let _ = reader.read(&mut buffer[..2]).expect("Failed to read");
343        assert_eq!(&buffer[..2], &data[4095..4097]);
344
345        // DataSource::Skip
346        reader.seek(SeekFrom::Start(8192 + content_size as u64 + 4090)).expect("Failed to seek");
347        let _ = reader.read(&mut buffer[..6]).expect("Failed to read");
348        assert_eq!(&buffer[..6], &[0u8; 6]);
349
350        // DataSource::Fill
351        reader
352            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 3))
353            .expect("Failed to seek");
354        let _ = reader.read(&mut buffer[..9]).expect("Failed to read");
355        // Bear in mind the byte ordering is LE, so 0x01020304 == [0x04, 0x03, 0x02, 0x01]
356        assert_eq!(&buffer[..9], &[0x01, 0x04, 0x03, 0x02, 0x01, 0x04, 0x03, 0x02, 0x01]);
357
358        // DataSource::Skip
359        reader
360            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 4096 + 1))
361            .expect("Failed to seek");
362        let _ = reader.read(&mut buffer[..4095]).expect("Failed to read");
363        assert_eq!(&buffer[..4095], &[0u8; 4095]);
364
365        // Do an unaligned read spanning two sections (the last Fill and Skip)
366        reader
367            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 4090))
368            .expect("Failed to seek");
369        let _ = reader.read(&mut buffer[..9]).expect("Failed to read");
370        assert_eq!(&buffer[..9], &[0x02, 0x01, 0x04, 0x03, 0x02, 0x01, 0x00, 0x00, 0x00]);
371    }
372}