sparse/
reader.rs

1// Copyright 2023 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5use crate::{deserialize_from, Chunk, Reader, SparseHeader};
6use anyhow::{ensure, Context, Result};
7use byteorder::{ByteOrder as _, LE};
8use std::io::{Read, Seek, SeekFrom};
9
10/// SparseReader is an implementation of std::io::Read which transparently unpacks the underlying
11/// sparse image as it is read.
12/// If random access reads are not required, it is more performant to use `unsparse` to completely
13/// unpack a sparse image.
14pub struct SparseReader {
15    reader: Box<dyn Reader + Send + Sync>,
16    // Offset into the logical (unsparsed) image.
17    offset: u64,
18    // Size of the logical (unsparsed) image.
19    size: u64,
20    // The second field is the offset into `reader` at which the payload of the chunk appears, for
21    // Raw chunks.
22    chunks: Vec<(Chunk, Option<u64>)>,
23}
24
25impl SparseReader {
26    /// Attempts to create a SparseReader from the given image.  Returns failure if the image is
27    /// malformed.
28    pub fn new(mut reader: Box<dyn Reader + Send + Sync>) -> Result<Self> {
29        let header: SparseHeader =
30            deserialize_from(&mut reader).context("Failed to read header")?;
31        ensure!(header.valid(), "Invalid header");
32        let num_chunks = header.total_chunks as usize;
33
34        let mut chunks = vec![];
35        let mut offset = 0;
36        for _ in 0..num_chunks {
37            let chunk = Chunk::read_metadata(&mut reader, offset, header.blk_sz)?;
38            let data_offset = if chunk.chunk_type() == crate::format::CHUNK_TYPE_RAW {
39                let data_offset = reader.stream_position()?;
40                // Skip past the data payload
41                reader.seek(SeekFrom::Current(chunk.output_size() as i64))?;
42                Some(data_offset)
43            } else {
44                None
45            };
46            offset += chunk.output_size() as u64;
47            chunks.push((chunk, data_offset));
48        }
49
50        reader.seek(SeekFrom::Start(0)).context("Failed to rewind reader")?;
51        Ok(Self { reader, offset: 0, size: offset, chunks })
52    }
53
54    /// Returns the index of the current chunk in `self.chunks`.
55    fn current_chunk(&self) -> Option<usize> {
56        let mut off = 0;
57        let mut i = 0;
58        for (chunk, _) in &self.chunks {
59            let size = chunk.output_size() as u64;
60            if self.offset >= off && self.offset < off + size {
61                return Some(i);
62            }
63            off += size;
64            i += 1;
65        }
66        None
67    }
68}
69
70// It's assumed that `reader` already points at the right offset to read from the chunk, and `buf`
71// won't read past the end of the chunk.
72// `output_offset` is the logical position in the output stream.
73fn read_from_chunk<R: Reader>(
74    reader: &mut R,
75    chunk: &Chunk,
76    output_offset: u64,
77    buf: &mut [u8],
78) -> std::io::Result<usize> {
79    match chunk {
80        Chunk::Raw { .. } => reader.read(buf),
81        Chunk::Fill { value, .. } => {
82            let mut value_bytes = value.to_le_bytes();
83            value_bytes.rotate_left(output_offset as usize % std::mem::size_of::<u32>());
84            let value_rotated = LE::read_u32(&value_bytes);
85            // Safety: `std::slice::align_to_mut` requires that everything in the dst slice is a
86            // valid type, which is true when going from [u8; 4] to [u32; 1].
87            let (prefix, wholes, suffix) = unsafe { buf.align_to_mut::<u32>() };
88            prefix.copy_from_slice(&value_bytes[value_bytes.len() - prefix.len()..]);
89            wholes.fill(value_rotated);
90            suffix.copy_from_slice(&value_bytes[..suffix.len()]);
91            Ok(buf.len())
92        }
93        Chunk::DontCare { .. } => {
94            buf.fill(0);
95            Ok(buf.len())
96        }
97        _ => unreachable!(),
98    }
99}
100
101impl Read for SparseReader {
102    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
103        let mut bytes_read = 0;
104        while bytes_read < buf.len() {
105            let current_chunk_idx = match self.current_chunk() {
106                Some(i) => i,
107                None => return Ok(bytes_read),
108            };
109            let (current_chunk, chunk_start_offset) = &self.chunks[current_chunk_idx];
110            let offset_in_chunk = self.offset - current_chunk.output_offset().unwrap();
111            debug_assert!(offset_in_chunk < current_chunk.output_size() as u64);
112            let to_read = std::cmp::min(
113                buf.len() - bytes_read,
114                current_chunk.output_size() - offset_in_chunk as usize,
115            );
116            if let Some(offset) = chunk_start_offset {
117                self.reader.seek(SeekFrom::Start(*offset + offset_in_chunk))?;
118            }
119            let bytes_read_from_chunk = read_from_chunk(
120                &mut self.reader,
121                current_chunk,
122                self.offset,
123                &mut buf[bytes_read..bytes_read + to_read],
124            )?;
125            bytes_read += bytes_read_from_chunk;
126            self.offset += bytes_read_from_chunk as u64;
127        }
128        Ok(bytes_read)
129    }
130}
131
132impl Seek for SparseReader {
133    fn seek(&mut self, pos: SeekFrom) -> std::io::Result<u64> {
134        self.offset = match pos {
135            SeekFrom::Start(pos) => pos,
136            SeekFrom::Current(delta) => self
137                .offset
138                .checked_add_signed(delta)
139                .ok_or_else(|| std::io::Error::from(std::io::ErrorKind::InvalidInput))?,
140            SeekFrom::End(delta) => self
141                .size
142                .checked_add_signed(delta)
143                .ok_or_else(|| std::io::Error::from(std::io::ErrorKind::InvalidInput))?,
144        };
145        Ok(self.offset)
146    }
147}
148
149#[cfg(test)]
150mod test {
151    use crate::builder::{DataSource, SparseImageBuilder};
152    use crate::reader::SparseReader;
153    use rand::rngs::SmallRng;
154    use rand::{RngCore, SeedableRng};
155    use std::io::{Read as _, Seek as _, SeekFrom, Write as _};
156    use tempfile::{NamedTempFile, TempDir};
157
158    #[test]
159    fn empty_reader() {
160        let tmpdir = TempDir::new().unwrap();
161
162        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
163        SparseImageBuilder::new().build(&mut sparse_file).expect("Build sparse image failed");
164        sparse_file.seek(SeekFrom::Start(0)).unwrap();
165
166        let mut reader =
167            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
168
169        let mut unsparsed_bytes = vec![];
170        reader.read_to_end(&mut unsparsed_bytes).expect("Failed to read unsparsed image");
171        assert_eq!(unsparsed_bytes.len(), 0);
172    }
173
174    #[test]
175    fn seek() {
176        let tmpdir = TempDir::new().unwrap();
177
178        let data = {
179            let mut data = Box::new([0u8; 8192]);
180            let mut i: u8 = 0;
181            for d in data.as_mut() {
182                *d = i;
183                i = i.wrapping_add(1);
184            }
185            data
186        };
187
188        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
189        SparseImageBuilder::new()
190            .add_chunk(DataSource::Buffer(data))
191            .build(&mut sparse_file)
192            .expect("Build sparse image failed");
193        sparse_file.seek(SeekFrom::Start(0)).unwrap();
194        let mut reader =
195            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
196
197        let mut buf = [0u8; 1];
198        assert_eq!(0, reader.seek(SeekFrom::Start(0)).unwrap());
199        assert_eq!(1, reader.read(&mut buf).unwrap());
200        assert_eq!(buf[0], 0u8);
201
202        assert_eq!(100, reader.seek(SeekFrom::Start(100)).unwrap());
203        assert_eq!(1, reader.read(&mut buf).unwrap());
204        assert_eq!(buf[0], 100u8);
205
206        assert_eq!(99, reader.seek(SeekFrom::Current(-2)).unwrap());
207        assert_eq!(1, reader.read(&mut buf).unwrap());
208        assert_eq!(buf[0], 99u8);
209
210        assert_eq!(100, reader.seek(SeekFrom::Current(0)).unwrap());
211        assert_eq!(1, reader.read(&mut buf).unwrap());
212        assert_eq!(buf[0], 100u8);
213
214        assert_eq!(102, reader.seek(SeekFrom::Current(1)).unwrap());
215        assert_eq!(1, reader.read(&mut buf).unwrap());
216        assert_eq!(buf[0], 102u8);
217
218        assert_eq!(8191, reader.seek(SeekFrom::End(-1)).unwrap());
219        assert_eq!(1, reader.read(&mut buf).unwrap());
220        assert_eq!(buf[0], 255u8);
221
222        assert_eq!(8192, reader.seek(SeekFrom::End(0)).unwrap());
223        assert_eq!(0, reader.read(&mut buf).unwrap());
224
225        assert_eq!(8193, reader.seek(SeekFrom::End(1)).unwrap());
226        assert_eq!(0, reader.read(&mut buf).unwrap());
227    }
228
229    #[test]
230    fn read_past_eof() {
231        let tmpdir = TempDir::new().unwrap();
232
233        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
234        SparseImageBuilder::new()
235            .add_chunk(DataSource::Buffer(Box::new([0xffu8; 8192])))
236            .build(&mut sparse_file)
237            .expect("Build sparse image failed");
238        sparse_file.seek(SeekFrom::Start(0)).unwrap();
239
240        let mut reader =
241            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
242
243        let mut buf = [0u8; 2];
244
245        reader.seek(SeekFrom::Start(8191)).expect("Seek failed");
246        assert_eq!(reader.read(&mut buf).expect("Failed to read"), 1);
247
248        reader.seek(SeekFrom::Start(8192)).expect("Seek failed");
249        assert_eq!(reader.read(&mut buf).expect("Failed to read"), 0);
250    }
251
252    #[test]
253    fn full_read() {
254        let tmpdir = TempDir::new().unwrap();
255
256        // Generate a large temporary file
257        let (mut file, _temp_path) = NamedTempFile::new_in(&tmpdir).unwrap().into_parts();
258        let mut rng = SmallRng::from_entropy();
259        let mut data = Vec::<u8>::new();
260        data.resize(100 * 4096, 0);
261        rng.fill_bytes(&mut data);
262        file.write_all(&data).unwrap();
263        file.flush().unwrap();
264        file.seek(SeekFrom::Start(0)).unwrap();
265        let content_size = data.len();
266
267        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
268        SparseImageBuilder::new()
269            .add_chunk(DataSource::Buffer(Box::new([0xffu8; 8192])))
270            .add_chunk(DataSource::Reader(Box::new(file)))
271            .add_chunk(DataSource::Skip(16384))
272            .add_chunk(DataSource::Fill(0xaaaa_aaaau32, 1024))
273            .add_chunk(DataSource::Skip(4096))
274            .build(&mut sparse_file)
275            .expect("Build sparse image failed");
276        sparse_file.seek(SeekFrom::Start(0)).unwrap();
277
278        let mut reader =
279            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
280
281        let mut unsparsed_bytes = vec![];
282        reader.read_to_end(&mut unsparsed_bytes).expect("Failed to read unsparsed image");
283        assert_eq!(unsparsed_bytes.len(), 8192 + content_size + 16384 + 4096 + 4096);
284        assert_eq!(&unsparsed_bytes[..8192], &[0xffu8; 8192]);
285        assert_eq!(&unsparsed_bytes[8192..8192 + content_size], &data[..]);
286        assert_eq!(
287            &unsparsed_bytes[8192 + content_size..8192 + content_size + 16384],
288            &[0u8; 16384]
289        );
290        assert_eq!(
291            &unsparsed_bytes[8192 + content_size + 16384..8192 + content_size + 16384 + 4096],
292            &[0xaau8; 4096]
293        );
294        assert_eq!(&unsparsed_bytes[8192 + content_size + 16384 + 4096..], &[0u8; 4096]);
295    }
296
297    #[test]
298    fn unaligned_reads() {
299        let tmpdir = TempDir::new().unwrap();
300
301        // Generate a large temporary file
302        let (mut file, _temp_path) = NamedTempFile::new_in(&tmpdir).unwrap().into_parts();
303        let mut rng = SmallRng::from_entropy();
304        let mut data = Vec::<u8>::new();
305        data.resize(100 * 4096, 0);
306        rng.fill_bytes(&mut data);
307        file.write_all(&data).unwrap();
308        file.flush().unwrap();
309        file.seek(SeekFrom::Start(0)).unwrap();
310        let content_size = data.len();
311
312        let mut sparse_file = NamedTempFile::new_in(&tmpdir).unwrap().into_file();
313        SparseImageBuilder::new()
314            .add_chunk(DataSource::Buffer(Box::new([0xffu8; 8192])))
315            .add_chunk(DataSource::Reader(Box::new(file)))
316            .add_chunk(DataSource::Skip(16384))
317            .add_chunk(DataSource::Fill(0x0102_0304u32, 1024))
318            .add_chunk(DataSource::Skip(4096))
319            .build(&mut sparse_file)
320            .expect("Build sparse image failed");
321        sparse_file.seek(SeekFrom::Start(0)).unwrap();
322
323        let mut reader =
324            SparseReader::new(Box::new(sparse_file)).expect("Failed to create SparseReader");
325
326        let mut buffer = [0u8; 4096];
327
328        // Do an unaligned read from each section
329
330        // DataSource::Buffer
331        reader.seek(SeekFrom::Start(10)).expect("Failed to seek");
332        let _ = reader.read(&mut buffer[..20]).expect("Failed to read");
333        assert_eq!(&buffer[..20], &[0xffu8; 20]);
334
335        // DataSource::File
336        reader.seek(SeekFrom::Start(8192 + 4095)).expect("Failed to seek");
337        let _ = reader.read(&mut buffer[..2]).expect("Failed to read");
338        assert_eq!(&buffer[..2], &data[4095..4097]);
339
340        // DataSource::Skip
341        reader.seek(SeekFrom::Start(8192 + content_size as u64 + 4090)).expect("Failed to seek");
342        let _ = reader.read(&mut buffer[..6]).expect("Failed to read");
343        assert_eq!(&buffer[..6], &[0u8; 6]);
344
345        // DataSource::Fill
346        reader
347            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 3))
348            .expect("Failed to seek");
349        let _ = reader.read(&mut buffer[..9]).expect("Failed to read");
350        // Bear in mind the byte ordering is LE, so 0x01020304 == [0x04, 0x03, 0x02, 0x01]
351        assert_eq!(&buffer[..9], &[0x01, 0x04, 0x03, 0x02, 0x01, 0x04, 0x03, 0x02, 0x01]);
352
353        // DataSource::Skip
354        reader
355            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 4096 + 1))
356            .expect("Failed to seek");
357        let _ = reader.read(&mut buffer[..4095]).expect("Failed to read");
358        assert_eq!(&buffer[..4095], &[0u8; 4095]);
359
360        // Do an unaligned read spanning two sections (the last Fill and Skip)
361        reader
362            .seek(SeekFrom::Start(8192 + content_size as u64 + 16384 + 4090))
363            .expect("Failed to seek");
364        let _ = reader.read(&mut buffer[..9]).expect("Failed to read");
365        assert_eq!(&buffer[..9], &[0x02, 0x01, 0x04, 0x03, 0x02, 0x01, 0x00, 0x00, 0x00]);
366    }
367}