qcow/
lib.rs

1// Copyright 2022 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5mod wire;
6
7use anyhow::{anyhow, Error};
8use std::io::{Read, Seek};
9use std::mem;
10use zerocopy::{FromBytes, Immutable, KnownLayout};
11
12// Each QCOW file starts with this magic value "QFI\xfb".
13const QCOW_MAGIC: u32 = 0x514649fb;
14
15#[inline]
16const fn cluster_size(cluster_bits: u32) -> u64 {
17    1 << cluster_bits
18}
19
20#[inline]
21const fn cluster_mask(cluster_bits: u32) -> u64 {
22    cluster_size(cluster_bits) - 1
23}
24
25#[inline]
26const fn l2_bits(cluster_bits: u32) -> u32 {
27    assert!(cluster_bits > 3);
28    cluster_bits - 3
29}
30
31#[inline]
32const fn l2_size(cluster_bits: u32) -> u64 {
33    1 << l2_bits(cluster_bits)
34}
35
36#[inline]
37const fn l2_mask(cluster_bits: u32) -> u64 {
38    l2_size(cluster_bits) - 1
39}
40
41#[inline]
42fn required_l1_size(disk_size: u64, cluster_bits: u32) -> u32 {
43    // l1_entry_size is the addressable disk space that is enabled by a single L1 entry.
44    let l1_entry_size = cluster_size(cluster_bits) * l2_size(cluster_bits);
45    // Round up disk size to the nearest l1_entry_size.
46    let disk_size = disk_size + l1_entry_size - 1;
47    // Return the required number of L1 entries to address the entire disk.
48    (disk_size / l1_entry_size).try_into().unwrap()
49}
50
51fn read_header(file: &mut std::fs::File) -> Result<wire::Header, Error> {
52    const HEADER_SIZE: usize = mem::size_of::<wire::Header>();
53    let mut buf = vec![0u8; HEADER_SIZE];
54    file.seek(std::io::SeekFrom::Start(0))?;
55    file.read_exact(&mut buf)?;
56    // Header::read_from_bytes should not fail if `buf` is of the correct size.
57    Ok(wire::Header::read_from_bytes(buf.as_slice()).expect("read_from failed unexpectedly"))
58}
59
60/// Loads a translation table from a backing file.
61///
62/// This is used to load both the L1 and L2 tables
63fn load_tranlsation_table<Entry: FromBytes + KnownLayout + Immutable + Sized>(
64    file: &mut std::fs::File,
65    num_entries: u64,
66    table_offset: u64,
67) -> Result<Vec<Entry>, Error> {
68    let entry_size = std::mem::size_of::<Entry>() as u64;
69    // Not explicitly needed, but in practice L1 and L2 tables are 8 bytes so we don't expect
70    // this to be anything else.
71    assert!(entry_size == 8);
72
73    let bytes_to_read = num_entries * entry_size;
74    let mut table = vec![0u8; bytes_to_read as usize];
75
76    file.seek(std::io::SeekFrom::Start(table_offset))?;
77    file.read_exact(&mut table)?;
78    Ok(table
79        // Break the bytes up into entry size slices
80        .chunks_exact(entry_size as usize)
81        // Deserialize and unwrap. This should never fail so long as we pass the correct
82        // slice size to `chunks_exact`.
83        .map(Entry::read_from_bytes)
84        .map(Result::unwrap)
85        .collect::<Vec<Entry>>())
86}
87
88/// Describes how bytes for a region of disk are stored in the qcow translation table.
89#[derive(Debug, Clone)]
90pub enum Mapping {
91    /// The requested guest cluster has a corresponding physical cluster specified in the
92    /// translation table.
93    Mapped {
94        /// The physical offset (in the QCOW file) that maps to the requested guest offset.
95        physical_offset: u64,
96        /// The mapping is valid for at least these many bytes. If length does not cover the range
97        /// requested then a new translation should be requested for the range immediately
98        /// following this one.
99        length: u64,
100    },
101    /// The requested linear range is unmapped in the translation table for the next `length`
102    /// bytes.
103    ///
104    /// Unmapped sectors read as zero.
105    Unmapped { length: u64 },
106}
107
108impl Mapping {
109    pub fn len(&self) -> u64 {
110        match self {
111            Mapping::Mapped { length, .. } => *length,
112            Mapping::Unmapped { length } => *length,
113        }
114    }
115}
116
117/// Implements an iterable type over the set of mappings for a linear disk range.
118///
119/// See `TranslationTable::translation` for more details.
120pub struct Translation<'a> {
121    translation: &'a TranslationTable,
122    linear_range: std::ops::Range<u64>,
123}
124
125impl<'a> Iterator for Translation<'a> {
126    type Item = Mapping;
127
128    fn next(&mut self) -> Option<Self::Item> {
129        if self.linear_range.is_empty() {
130            return None;
131        }
132        let translation = self.translation.translate_range(&self.linear_range);
133        if let Some(translation) = translation.as_ref() {
134            self.linear_range.start += translation.len();
135        }
136        translation
137    }
138}
139
140/// QCOW uses a 2-level translation table to map guest-clusters to host clusters.
141///
142/// The translation table is a way of mapping a linear disk address to a physical offset in the
143/// QCOW file. Not every linear address may be mapped in the QCOW file, in which case reads to
144/// those regions would read-as-zero. These mappings are done with 'cluster' granularity such that
145/// a single, contiguous linear cluster maps to a contiguous region in the host file. The exact
146/// size of clusters used is determined by a field in the QCOW header.
147///
148///  Ex: a linear address can be decomposed into 3 parts:
149///
150///    * l1_index - The index into the top-level L1 translation table. The entry in the L1 table
151///            can either be a pointer to an L2 translation table, or the entry can indicate that
152///            the entire region is un-mapped, regardless of l2_index or cluster_offset.
153///    * l2_index - If the l1_index indicates that there is a valid L2 table for a translation, the
154///            l2_index is offset into that L2 table that defines the per-cluster mapping for a
155///            translation. This mapping can either indicate there is a physical cluster allocated
156///            for a linear cluster or it can indicate that the cluster is unmapped and no
157///            translation exists.
158///    * cluster_offset - If there is a valid l1_table entry and a valid l2_table entry for a
159///            linear disk address, that means there is physical cluster that has been allocated to
160///            the linear cluster. The cluster_offset is then the remaining byte-offset into this
161///            cluster.
162///
163pub struct TranslationTable {
164    /// The number of bits in a linear address that represent the cluster offset.
165    ///
166    ///    cluster_size == 1 << cluster_bits
167    cluster_bits: u32,
168    /// The linear size of the qcow file.
169    linear_size: u64,
170    /// The L1 table is stored as a fully loaded vector of L2 tables. This is simple but does
171    /// require that we retain all L2 tables in memory at all times.
172    l1: Vec<Option<Vec<wire::L2Entry>>>,
173}
174
175impl TranslationTable {
176    pub fn load(file: &mut std::fs::File) -> Result<Self, Error> {
177        let mut header = read_header(file)?;
178        // Every file must start with this magic value.
179        if header.magic.get() != QCOW_MAGIC {
180            return Err(anyhow!("File has bad magic"));
181        }
182
183        // Version check. We don't make any assumptions that we can properly load files with a
184        // version greater than 3.
185        let version = header.version.get();
186        if version != 2 && version != 3 {
187            return Err(anyhow!("QCOW file has unsupported version {}", version));
188        }
189        if version == 2 {
190            // These were added in version 3 with the following defaults with version 2.
191            header.incompatible_features.set(0);
192            header.compatible_features.set(0);
193            header.autoclear_features.set(0);
194            header.refcount_order.set(4);
195            header.header_length.set(72);
196        }
197
198        // Backing files allow for a copy-on-write shadow of a read-only backing file. We don't
199        // support this feature so if we're provided an image the relies on a backing file we will
200        // not be able to properly support it.
201        let backing_file_size = header.backing_file_size.get();
202        if backing_file_size != 0 {
203            return Err(anyhow!("QCOW file has backing file, which is not supported"));
204        }
205
206        // Some guard-rails for the cluster bits.
207        //
208        // The QCOW specification indicates this must be at least 9 bits (512-byte clusters). The
209        // spec also indicates that QEMU may not support cluster sizes above 2MiB so we also go
210        // ahead an adopt that upper bound.
211        let cluster_bits = header.cluster_bits.get();
212        if cluster_bits < 9 || cluster_bits > 22 {
213            return Err(anyhow!("cluster_bits is out of the supported range."));
214        }
215
216        // Size is the linear size of the file in bytes.
217        let size = header.size.get();
218        if size == 0 {
219            return Err(anyhow!("QCOW file has 0 size"));
220        }
221
222        // QCOW files can be encrypted, but we don't support that.
223        if header.crypt_method.get() != wire::QCOW_CRYPT_NONE {
224            return Err(anyhow!("QCOW encryption is not supported"));
225        }
226
227        // The l1 should be large enough to cover the reported disk size.
228        let l1_size = header.l1_size.get();
229        if l1_size < required_l1_size(size, cluster_bits) {
230            return Err(anyhow!("QCOW L1 table is not large enough to address the entire disk"));
231        }
232
233        // Load L1 Table
234        //
235        // First we load a vector of the 8-byte table entries.
236        let l1_entries = load_tranlsation_table::<wire::L1Entry>(
237            file,
238            header.l1_size.get().into(),
239            header.l1_table_offset.get(),
240        )?;
241
242        // Now iterate over each L1 entry and load the corresponding L2 table if necessary.
243        let l1 = l1_entries
244            .into_iter()
245            .map(move |entry| {
246                let entry: Option<Vec<wire::L2Entry>> = if let Some(offset) = entry.offset() {
247                    let l2 = load_tranlsation_table::<wire::L2Entry>(
248                        file,
249                        l2_size(cluster_bits),
250                        offset,
251                    )?;
252                    if l2.iter().find(|e| e.compressed()).is_some() {
253                        return Err(anyhow!("QCOW contains compressed sectors"));
254                    }
255                    Some(l2)
256                } else {
257                    None
258                };
259                Ok::<Option<Vec<wire::L2Entry>>, Error>(entry)
260            })
261            .collect::<Result<Vec<Option<Vec<wire::L2Entry>>>, Error>>()?;
262
263        Ok(Self { cluster_bits, linear_size: size, l1 })
264    }
265
266    /// The logical size of the QCOW disk as specified in the header.
267    pub fn linear_size(&self) -> u64 {
268        self.linear_size
269    }
270
271    /// Looks up translations for a linear disk range.
272    ///
273    /// This takes a `linear_range` describing a region of the qcow file to read from and returns
274    /// an iterator over `Mapping`s of that region.
275    ///
276    /// The returned iterator will yield mappings that indicate how the linear rante is represented
277    /// in the qcow file. This can be a combination of physical cluster mappings and also unmapped
278    /// regions if the translation table contains no data for the linear range.
279    ///
280    /// If any part of `linear_range` extends beyond the disk (bounded by `linear_size()`) then
281    /// the iterator will not yield any mappings for those regions. In other words, no Mapping is a
282    /// distinct situation for a `Mapping::Unmapped`. The former means there is no logical disk
283    /// backing the range and the latter means that the linear range is valid but no physical disk
284    /// clusters have been allocated to it.
285    pub fn translate<'a>(&'a self, linear_range: std::ops::Range<u64>) -> Translation<'a> {
286        Translation { linear_range: linear_range, translation: self }
287    }
288
289    fn translate_range(&self, linear_range: &std::ops::Range<u64>) -> Option<Mapping> {
290        if linear_range.start >= self.linear_size() {
291            return None;
292        }
293
294        // cluster offset is the offset into the translated cluster.
295        let offset = linear_range.start;
296        let cluster_offset = offset & cluster_mask(self.cluster_bits);
297
298        // Now shift off the cluster bits and compute the L2 index
299        let offset = offset >> self.cluster_bits;
300        let l2_index = offset & l2_mask(self.cluster_bits);
301
302        // Now compute the l1 index
303        //
304        // The l1 table index contains the remaining most-significant bits of the linear address.
305        let l1_index = (offset >> l2_bits(self.cluster_bits)) as u32;
306
307        // Now walk the tables
308        //
309        // First find the L2 table by looking at the corresponding index in the L1 table. If this
310        // is None, the entire linear range covered by that L1 entry is unallocated.
311        let maybe_physical_cluster = self.l1[l1_index as usize]
312            .as_ref()
313            // If the L1 entry is valid, then we have an L2 table that defines per-cluster
314            // translations. This will just lookup the L2 translation entry for the requested
315            // sector.
316            .map(|l2_table| &l2_table[l2_index as usize])
317            // The specific L2 entry can indicate this cluster is either mapped to some physical
318            // cluster or it is an unallocated. `L2Entry::offset` will handle decoding the table
319            // entry and will the physical ofset for the cluster if it exists.
320            .and_then(|entry| entry.offset());
321
322        // The mapping length is valid to the end of the cluster, limited to the end of the range
323        // requested by the caller.
324        //
325        // TODO: As a refinement, we could detect contiguous physical clusters and coalesce
326        // contiguous sectors into a single range.
327        let length = std::cmp::min(
328            linear_range.end - linear_range.start,
329            cluster_size(self.cluster_bits) - cluster_offset,
330        );
331
332        // This will contain a physical cluster that maps to the start of the requested
333        // `linear_range` if a cluster is allocated to that region.
334        let transation = match maybe_physical_cluster {
335            Some(physical_cluster) => {
336                Mapping::Mapped { physical_offset: physical_cluster + cluster_offset, length }
337            }
338            None => Mapping::Unmapped { length },
339        };
340        Some(transation)
341    }
342}
343
344/// A very simple interface for reading from a qcow file.
345#[cfg(test)]
346struct QcowFileReadOnly {
347    file: std::cell::RefCell<std::fs::File>,
348    translation: TranslationTable,
349}
350
351#[cfg(test)]
352impl QcowFileReadOnly {
353    pub fn new(mut file: std::fs::File) -> Result<Self, Error> {
354        Ok(Self {
355            translation: TranslationTable::load(&mut file)?,
356            file: std::cell::RefCell::new(file),
357        })
358    }
359
360    pub fn size(&self) -> u64 {
361        self.translation.linear_size()
362    }
363
364    pub fn read_at(&self, length: u64, offset: u64) -> Result<Vec<u8>, Error> {
365        // Iterate over the set of translations for this linear range and accumulate the result
366        // into a Vec.
367        self.translation
368            .translate(std::ops::Range { start: offset, end: offset + length })
369            .try_fold(Vec::new(), |mut result, translation| -> Result<Vec<u8>, Error> {
370                // 0-extend our result vector to add capacity for this translation.
371                let result_len = result.len();
372                result.resize(result_len + translation.len() as usize, 0);
373
374                match translation {
375                    // For translations that have a physical cluster mapping we can read the bytes
376                    // from the file using the physica offset.
377                    Mapping::Mapped { physical_offset, .. } => {
378                        self.file.borrow_mut().seek(std::io::SeekFrom::Start(physical_offset))?;
379                        self.file.borrow_mut().read_exact(&mut result[result_len..])?;
380                    }
381                    // If there exists no translation then the bytes should read-as-zero. This is
382                    // a no-op here because we have already 0-extended the result vector.
383                    Mapping::Unmapped { .. } => {}
384                }
385                Ok(result)
386            })
387    }
388}
389
390#[cfg(test)]
391mod test {
392    use super::*;
393    use std::fs::File;
394
395    fn open_qcow_file(path: &str) -> QcowFileReadOnly {
396        let test_image = File::open(path).expect("Failed to open file");
397        QcowFileReadOnly::new(test_image).expect("Failed to create QcowFileReadOnly")
398    }
399
400    fn check_range(file: &QcowFileReadOnly, start: u64, length: u64, value: u8) {
401        let bytes = file.read_at(length, start).expect("Failed to read from file");
402        assert_eq!(bytes.len() as u64, length);
403        for byte in bytes {
404            assert_eq!(byte, value);
405        }
406    }
407
408    #[test]
409    fn test_empty_1gb() {
410        const SIZE: u64 = 1 * 1024 * 1024 * 1024;
411        let qcow = open_qcow_file("/pkg/data/empty_1gb.qcow2");
412
413        assert_eq!(SIZE, qcow.size());
414        check_range(&qcow, 0, 1024, 0);
415        check_range(&qcow, SIZE - 1024, 1024, 0);
416    }
417
418    #[test]
419    fn test_read_basic() {
420        const SIZE: u64 = 1 * 1024 * 1024 * 1024;
421        let qcow = open_qcow_file("/pkg/data/sparse.qcow2");
422
423        assert_eq!(SIZE, qcow.size());
424
425        // Verify we can read the expected data clusters.
426        {
427            const REGION_START: u64 = 0;
428            check_range(&qcow, REGION_START, 1024, 0xaa);
429            check_range(&qcow, REGION_START + 1024, 1024, 0);
430        }
431        {
432            const REGION_START: u64 = 512 * 1024 * 1024;
433            check_range(&qcow, REGION_START - 1024, 1024, 0);
434            check_range(&qcow, REGION_START, 1024, 0xcc);
435            check_range(&qcow, REGION_START + 1024, 1024, 0);
436        }
437        {
438            const REGION_START: u64 = 1 * 1024 * 1024 * 1024 - 1024;
439            check_range(&qcow, REGION_START - 1024, 1024, 0);
440            check_range(&qcow, REGION_START, 1024, 0xbb);
441        }
442    }
443
444    #[test]
445    fn test_read_across_translations() {
446        const SIZE: u64 = 1 * 1024 * 1024 * 1024;
447        let qcow = open_qcow_file("/pkg/data/sparse.qcow2");
448
449        assert_eq!(SIZE, qcow.size());
450
451        // Test reading a buffer that is partially translated and partially not translated.
452        let bytes = qcow.read_at(4096, 0).expect("Failed to read the last byte from file");
453
454        assert_eq!(bytes[0..1024], vec![0xaa; 1024]);
455        assert_eq!(bytes[1024..2048], vec![0; 1024]);
456        assert_eq!(bytes[2048..3072], vec![0xab; 1024]);
457        assert_eq!(bytes[3072..4096], vec![0; 1024]);
458    }
459
460    #[test]
461    fn test_read_short() {
462        const SIZE: u64 = 1 * 1024 * 1024 * 1024;
463        let qcow = open_qcow_file("/pkg/data/sparse.qcow2");
464
465        assert_eq!(SIZE, qcow.size());
466
467        // Test reading past the end of the file.
468        //
469        // Behavior here is Similar to std::io::Read in that the read_at call will not fail but may
470        // be short. For read_at calls that are beyond the end of the file this will result in a
471        // 0-byte Ok result.
472        let bytes = qcow.read_at(1, SIZE - 1).expect("Failed to read the last byte from file");
473        assert_eq!(1, bytes.len());
474        assert_eq!(0xbb, bytes[0]);
475
476        // Reading past the end of the file should be short.
477        let bytes =
478            qcow.read_at(10, SIZE - 1).expect("Failed to read 1 byte past the end of the file");
479        assert_eq!(1, bytes.len());
480
481        let bytes =
482            qcow.read_at(100, SIZE).expect("Failed to read entire buffer past the end of the file");
483        assert_eq!(0, bytes.len());
484
485        let bytes =
486            qcow.read_at(100, 2 * SIZE).expect("Failed to read far past the end of the file");
487        assert_eq!(0, bytes.len());
488    }
489}