qcow/lib.rs
1// Copyright 2022 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5mod wire;
6
7use anyhow::{anyhow, Error};
8use std::io::{Read, Seek};
9use std::mem;
10use zerocopy::{FromBytes, Immutable, KnownLayout};
11
12// Each QCOW file starts with this magic value "QFI\xfb".
13const QCOW_MAGIC: u32 = 0x514649fb;
14
15#[inline]
16const fn cluster_size(cluster_bits: u32) -> u64 {
17 1 << cluster_bits
18}
19
20#[inline]
21const fn cluster_mask(cluster_bits: u32) -> u64 {
22 cluster_size(cluster_bits) - 1
23}
24
25#[inline]
26const fn l2_bits(cluster_bits: u32) -> u32 {
27 assert!(cluster_bits > 3);
28 cluster_bits - 3
29}
30
31#[inline]
32const fn l2_size(cluster_bits: u32) -> u64 {
33 1 << l2_bits(cluster_bits)
34}
35
36#[inline]
37const fn l2_mask(cluster_bits: u32) -> u64 {
38 l2_size(cluster_bits) - 1
39}
40
41#[inline]
42fn required_l1_size(disk_size: u64, cluster_bits: u32) -> u32 {
43 // l1_entry_size is the addressable disk space that is enabled by a single L1 entry.
44 let l1_entry_size = cluster_size(cluster_bits) * l2_size(cluster_bits);
45 // Round up disk size to the nearest l1_entry_size.
46 let disk_size = disk_size + l1_entry_size - 1;
47 // Return the required number of L1 entries to address the entire disk.
48 (disk_size / l1_entry_size).try_into().unwrap()
49}
50
51fn read_header(file: &mut std::fs::File) -> Result<wire::Header, Error> {
52 const HEADER_SIZE: usize = mem::size_of::<wire::Header>();
53 let mut buf = vec![0u8; HEADER_SIZE];
54 file.seek(std::io::SeekFrom::Start(0))?;
55 file.read_exact(&mut buf)?;
56 // Header::read_from_bytes should not fail if `buf` is of the correct size.
57 Ok(wire::Header::read_from_bytes(buf.as_slice()).expect("read_from failed unexpectedly"))
58}
59
60/// Loads a translation table from a backing file.
61///
62/// This is used to load both the L1 and L2 tables
63fn load_tranlsation_table<Entry: FromBytes + KnownLayout + Immutable + Sized>(
64 file: &mut std::fs::File,
65 num_entries: u64,
66 table_offset: u64,
67) -> Result<Vec<Entry>, Error> {
68 let entry_size = std::mem::size_of::<Entry>() as u64;
69 // Not explicitly needed, but in practice L1 and L2 tables are 8 bytes so we don't expect
70 // this to be anything else.
71 assert!(entry_size == 8);
72
73 let bytes_to_read = num_entries * entry_size;
74 let mut table = vec![0u8; bytes_to_read as usize];
75
76 file.seek(std::io::SeekFrom::Start(table_offset))?;
77 file.read_exact(&mut table)?;
78 Ok(table
79 // Break the bytes up into entry size slices
80 .chunks_exact(entry_size as usize)
81 // Deserialize and unwrap. This should never fail so long as we pass the correct
82 // slice size to `chunks_exact`.
83 .map(Entry::read_from_bytes)
84 .map(Result::unwrap)
85 .collect::<Vec<Entry>>())
86}
87
88/// Describes how bytes for a region of disk are stored in the qcow translation table.
89#[derive(Debug, Clone)]
90pub enum Mapping {
91 /// The requested guest cluster has a corresponding physical cluster specified in the
92 /// translation table.
93 Mapped {
94 /// The physical offset (in the QCOW file) that maps to the requested guest offset.
95 physical_offset: u64,
96 /// The mapping is valid for at least these many bytes. If length does not cover the range
97 /// requested then a new translation should be requested for the range immediately
98 /// following this one.
99 length: u64,
100 },
101 /// The requested linear range is unmapped in the translation table for the next `length`
102 /// bytes.
103 ///
104 /// Unmapped sectors read as zero.
105 Unmapped { length: u64 },
106}
107
108impl Mapping {
109 pub fn len(&self) -> u64 {
110 match self {
111 Mapping::Mapped { length, .. } => *length,
112 Mapping::Unmapped { length } => *length,
113 }
114 }
115}
116
117/// Implements an iterable type over the set of mappings for a linear disk range.
118///
119/// See `TranslationTable::translation` for more details.
120pub struct Translation<'a> {
121 translation: &'a TranslationTable,
122 linear_range: std::ops::Range<u64>,
123}
124
125impl<'a> Iterator for Translation<'a> {
126 type Item = Mapping;
127
128 fn next(&mut self) -> Option<Self::Item> {
129 if self.linear_range.is_empty() {
130 return None;
131 }
132 let translation = self.translation.translate_range(&self.linear_range);
133 if let Some(translation) = translation.as_ref() {
134 self.linear_range.start += translation.len();
135 }
136 translation
137 }
138}
139
140/// QCOW uses a 2-level translation table to map guest-clusters to host clusters.
141///
142/// The translation table is a way of mapping a linear disk address to a physical offset in the
143/// QCOW file. Not every linear address may be mapped in the QCOW file, in which case reads to
144/// those regions would read-as-zero. These mappings are done with 'cluster' granularity such that
145/// a single, contiguous linear cluster maps to a contiguous region in the host file. The exact
146/// size of clusters used is determined by a field in the QCOW header.
147///
148/// Ex: a linear address can be decomposed into 3 parts:
149///
150/// * l1_index - The index into the top-level L1 translation table. The entry in the L1 table
151/// can either be a pointer to an L2 translation table, or the entry can indicate that
152/// the entire region is un-mapped, regardless of l2_index or cluster_offset.
153/// * l2_index - If the l1_index indicates that there is a valid L2 table for a translation, the
154/// l2_index is offset into that L2 table that defines the per-cluster mapping for a
155/// translation. This mapping can either indicate there is a physical cluster allocated
156/// for a linear cluster or it can indicate that the cluster is unmapped and no
157/// translation exists.
158/// * cluster_offset - If there is a valid l1_table entry and a valid l2_table entry for a
159/// linear disk address, that means there is physical cluster that has been allocated to
160/// the linear cluster. The cluster_offset is then the remaining byte-offset into this
161/// cluster.
162///
163pub struct TranslationTable {
164 /// The number of bits in a linear address that represent the cluster offset.
165 ///
166 /// cluster_size == 1 << cluster_bits
167 cluster_bits: u32,
168 /// The linear size of the qcow file.
169 linear_size: u64,
170 /// The L1 table is stored as a fully loaded vector of L2 tables. This is simple but does
171 /// require that we retain all L2 tables in memory at all times.
172 l1: Vec<Option<Vec<wire::L2Entry>>>,
173}
174
175impl TranslationTable {
176 pub fn load(file: &mut std::fs::File) -> Result<Self, Error> {
177 let mut header = read_header(file)?;
178 // Every file must start with this magic value.
179 if header.magic.get() != QCOW_MAGIC {
180 return Err(anyhow!("File has bad magic"));
181 }
182
183 // Version check. We don't make any assumptions that we can properly load files with a
184 // version greater than 3.
185 let version = header.version.get();
186 if version != 2 && version != 3 {
187 return Err(anyhow!("QCOW file has unsupported version {}", version));
188 }
189 if version == 2 {
190 // These were added in version 3 with the following defaults with version 2.
191 header.incompatible_features.set(0);
192 header.compatible_features.set(0);
193 header.autoclear_features.set(0);
194 header.refcount_order.set(4);
195 header.header_length.set(72);
196 }
197
198 // Backing files allow for a copy-on-write shadow of a read-only backing file. We don't
199 // support this feature so if we're provided an image the relies on a backing file we will
200 // not be able to properly support it.
201 let backing_file_size = header.backing_file_size.get();
202 if backing_file_size != 0 {
203 return Err(anyhow!("QCOW file has backing file, which is not supported"));
204 }
205
206 // Some guard-rails for the cluster bits.
207 //
208 // The QCOW specification indicates this must be at least 9 bits (512-byte clusters). The
209 // spec also indicates that QEMU may not support cluster sizes above 2MiB so we also go
210 // ahead an adopt that upper bound.
211 let cluster_bits = header.cluster_bits.get();
212 if cluster_bits < 9 || cluster_bits > 22 {
213 return Err(anyhow!("cluster_bits is out of the supported range."));
214 }
215
216 // Size is the linear size of the file in bytes.
217 let size = header.size.get();
218 if size == 0 {
219 return Err(anyhow!("QCOW file has 0 size"));
220 }
221
222 // QCOW files can be encrypted, but we don't support that.
223 if header.crypt_method.get() != wire::QCOW_CRYPT_NONE {
224 return Err(anyhow!("QCOW encryption is not supported"));
225 }
226
227 // The l1 should be large enough to cover the reported disk size.
228 let l1_size = header.l1_size.get();
229 if l1_size < required_l1_size(size, cluster_bits) {
230 return Err(anyhow!("QCOW L1 table is not large enough to address the entire disk"));
231 }
232
233 // Load L1 Table
234 //
235 // First we load a vector of the 8-byte table entries.
236 let l1_entries = load_tranlsation_table::<wire::L1Entry>(
237 file,
238 header.l1_size.get().into(),
239 header.l1_table_offset.get(),
240 )?;
241
242 // Now iterate over each L1 entry and load the corresponding L2 table if necessary.
243 let l1 = l1_entries
244 .into_iter()
245 .map(move |entry| {
246 let entry: Option<Vec<wire::L2Entry>> = if let Some(offset) = entry.offset() {
247 let l2 = load_tranlsation_table::<wire::L2Entry>(
248 file,
249 l2_size(cluster_bits),
250 offset,
251 )?;
252 if l2.iter().find(|e| e.compressed()).is_some() {
253 return Err(anyhow!("QCOW contains compressed sectors"));
254 }
255 Some(l2)
256 } else {
257 None
258 };
259 Ok::<Option<Vec<wire::L2Entry>>, Error>(entry)
260 })
261 .collect::<Result<Vec<Option<Vec<wire::L2Entry>>>, Error>>()?;
262
263 Ok(Self { cluster_bits, linear_size: size, l1 })
264 }
265
266 /// The logical size of the QCOW disk as specified in the header.
267 pub fn linear_size(&self) -> u64 {
268 self.linear_size
269 }
270
271 /// Looks up translations for a linear disk range.
272 ///
273 /// This takes a `linear_range` describing a region of the qcow file to read from and returns
274 /// an iterator over `Mapping`s of that region.
275 ///
276 /// The returned iterator will yield mappings that indicate how the linear rante is represented
277 /// in the qcow file. This can be a combination of physical cluster mappings and also unmapped
278 /// regions if the translation table contains no data for the linear range.
279 ///
280 /// If any part of `linear_range` extends beyond the disk (bounded by `linear_size()`) then
281 /// the iterator will not yield any mappings for those regions. In other words, no Mapping is a
282 /// distinct situation for a `Mapping::Unmapped`. The former means there is no logical disk
283 /// backing the range and the latter means that the linear range is valid but no physical disk
284 /// clusters have been allocated to it.
285 pub fn translate<'a>(&'a self, linear_range: std::ops::Range<u64>) -> Translation<'a> {
286 Translation { linear_range: linear_range, translation: self }
287 }
288
289 fn translate_range(&self, linear_range: &std::ops::Range<u64>) -> Option<Mapping> {
290 if linear_range.start >= self.linear_size() {
291 return None;
292 }
293
294 // cluster offset is the offset into the translated cluster.
295 let offset = linear_range.start;
296 let cluster_offset = offset & cluster_mask(self.cluster_bits);
297
298 // Now shift off the cluster bits and compute the L2 index
299 let offset = offset >> self.cluster_bits;
300 let l2_index = offset & l2_mask(self.cluster_bits);
301
302 // Now compute the l1 index
303 //
304 // The l1 table index contains the remaining most-significant bits of the linear address.
305 let l1_index = (offset >> l2_bits(self.cluster_bits)) as u32;
306
307 // Now walk the tables
308 //
309 // First find the L2 table by looking at the corresponding index in the L1 table. If this
310 // is None, the entire linear range covered by that L1 entry is unallocated.
311 let maybe_physical_cluster = self.l1[l1_index as usize]
312 .as_ref()
313 // If the L1 entry is valid, then we have an L2 table that defines per-cluster
314 // translations. This will just lookup the L2 translation entry for the requested
315 // sector.
316 .map(|l2_table| &l2_table[l2_index as usize])
317 // The specific L2 entry can indicate this cluster is either mapped to some physical
318 // cluster or it is an unallocated. `L2Entry::offset` will handle decoding the table
319 // entry and will the physical ofset for the cluster if it exists.
320 .and_then(|entry| entry.offset());
321
322 // The mapping length is valid to the end of the cluster, limited to the end of the range
323 // requested by the caller.
324 //
325 // TODO: As a refinement, we could detect contiguous physical clusters and coalesce
326 // contiguous sectors into a single range.
327 let length = std::cmp::min(
328 linear_range.end - linear_range.start,
329 cluster_size(self.cluster_bits) - cluster_offset,
330 );
331
332 // This will contain a physical cluster that maps to the start of the requested
333 // `linear_range` if a cluster is allocated to that region.
334 let transation = match maybe_physical_cluster {
335 Some(physical_cluster) => {
336 Mapping::Mapped { physical_offset: physical_cluster + cluster_offset, length }
337 }
338 None => Mapping::Unmapped { length },
339 };
340 Some(transation)
341 }
342}
343
344/// A very simple interface for reading from a qcow file.
345#[cfg(test)]
346struct QcowFileReadOnly {
347 file: std::cell::RefCell<std::fs::File>,
348 translation: TranslationTable,
349}
350
351#[cfg(test)]
352impl QcowFileReadOnly {
353 pub fn new(mut file: std::fs::File) -> Result<Self, Error> {
354 Ok(Self {
355 translation: TranslationTable::load(&mut file)?,
356 file: std::cell::RefCell::new(file),
357 })
358 }
359
360 pub fn size(&self) -> u64 {
361 self.translation.linear_size()
362 }
363
364 pub fn read_at(&self, length: u64, offset: u64) -> Result<Vec<u8>, Error> {
365 // Iterate over the set of translations for this linear range and accumulate the result
366 // into a Vec.
367 self.translation
368 .translate(std::ops::Range { start: offset, end: offset + length })
369 .try_fold(Vec::new(), |mut result, translation| -> Result<Vec<u8>, Error> {
370 // 0-extend our result vector to add capacity for this translation.
371 let result_len = result.len();
372 result.resize(result_len + translation.len() as usize, 0);
373
374 match translation {
375 // For translations that have a physical cluster mapping we can read the bytes
376 // from the file using the physica offset.
377 Mapping::Mapped { physical_offset, .. } => {
378 self.file.borrow_mut().seek(std::io::SeekFrom::Start(physical_offset))?;
379 self.file.borrow_mut().read_exact(&mut result[result_len..])?;
380 }
381 // If there exists no translation then the bytes should read-as-zero. This is
382 // a no-op here because we have already 0-extended the result vector.
383 Mapping::Unmapped { .. } => {}
384 }
385 Ok(result)
386 })
387 }
388}
389
390#[cfg(test)]
391mod test {
392 use super::*;
393 use std::fs::File;
394
395 fn open_qcow_file(path: &str) -> QcowFileReadOnly {
396 let test_image = File::open(path).expect("Failed to open file");
397 QcowFileReadOnly::new(test_image).expect("Failed to create QcowFileReadOnly")
398 }
399
400 fn check_range(file: &QcowFileReadOnly, start: u64, length: u64, value: u8) {
401 let bytes = file.read_at(length, start).expect("Failed to read from file");
402 assert_eq!(bytes.len() as u64, length);
403 for byte in bytes {
404 assert_eq!(byte, value);
405 }
406 }
407
408 #[test]
409 fn test_empty_1gb() {
410 const SIZE: u64 = 1 * 1024 * 1024 * 1024;
411 let qcow = open_qcow_file("/pkg/data/empty_1gb.qcow2");
412
413 assert_eq!(SIZE, qcow.size());
414 check_range(&qcow, 0, 1024, 0);
415 check_range(&qcow, SIZE - 1024, 1024, 0);
416 }
417
418 #[test]
419 fn test_read_basic() {
420 const SIZE: u64 = 1 * 1024 * 1024 * 1024;
421 let qcow = open_qcow_file("/pkg/data/sparse.qcow2");
422
423 assert_eq!(SIZE, qcow.size());
424
425 // Verify we can read the expected data clusters.
426 {
427 const REGION_START: u64 = 0;
428 check_range(&qcow, REGION_START, 1024, 0xaa);
429 check_range(&qcow, REGION_START + 1024, 1024, 0);
430 }
431 {
432 const REGION_START: u64 = 512 * 1024 * 1024;
433 check_range(&qcow, REGION_START - 1024, 1024, 0);
434 check_range(&qcow, REGION_START, 1024, 0xcc);
435 check_range(&qcow, REGION_START + 1024, 1024, 0);
436 }
437 {
438 const REGION_START: u64 = 1 * 1024 * 1024 * 1024 - 1024;
439 check_range(&qcow, REGION_START - 1024, 1024, 0);
440 check_range(&qcow, REGION_START, 1024, 0xbb);
441 }
442 }
443
444 #[test]
445 fn test_read_across_translations() {
446 const SIZE: u64 = 1 * 1024 * 1024 * 1024;
447 let qcow = open_qcow_file("/pkg/data/sparse.qcow2");
448
449 assert_eq!(SIZE, qcow.size());
450
451 // Test reading a buffer that is partially translated and partially not translated.
452 let bytes = qcow.read_at(4096, 0).expect("Failed to read the last byte from file");
453
454 assert_eq!(bytes[0..1024], vec![0xaa; 1024]);
455 assert_eq!(bytes[1024..2048], vec![0; 1024]);
456 assert_eq!(bytes[2048..3072], vec![0xab; 1024]);
457 assert_eq!(bytes[3072..4096], vec![0; 1024]);
458 }
459
460 #[test]
461 fn test_read_short() {
462 const SIZE: u64 = 1 * 1024 * 1024 * 1024;
463 let qcow = open_qcow_file("/pkg/data/sparse.qcow2");
464
465 assert_eq!(SIZE, qcow.size());
466
467 // Test reading past the end of the file.
468 //
469 // Behavior here is Similar to std::io::Read in that the read_at call will not fail but may
470 // be short. For read_at calls that are beyond the end of the file this will result in a
471 // 0-byte Ok result.
472 let bytes = qcow.read_at(1, SIZE - 1).expect("Failed to read the last byte from file");
473 assert_eq!(1, bytes.len());
474 assert_eq!(0xbb, bytes[0]);
475
476 // Reading past the end of the file should be short.
477 let bytes =
478 qcow.read_at(10, SIZE - 1).expect("Failed to read 1 byte past the end of the file");
479 assert_eq!(1, bytes.len());
480
481 let bytes =
482 qcow.read_at(100, SIZE).expect("Failed to read entire buffer past the end of the file");
483 assert_eq!(0, bytes.len());
484
485 let bytes =
486 qcow.read_at(100, 2 * SIZE).expect("Failed to read far past the end of the file");
487 assert_eq!(0, bytes.len());
488 }
489}