1use crc::Hasher32;
10use itertools::Itertools;
11use rayon::prelude::*;
12use std::ops::Range;
13use thiserror::Error;
14use zerocopy::byteorder::{LE, U16, U32, U64};
15use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Ref, Unaligned};
16
17#[derive(Debug, Error)]
23pub enum ChunkedArchiveError {
24 #[error("Invalid or unsupported archive version.")]
25 InvalidVersion,
26
27 #[error("Archive header has incorrect magic.")]
28 BadMagic,
29
30 #[error("Integrity checks failed (e.g. incorrect CRC, inconsistent header fields).")]
31 IntegrityError,
32
33 #[error("Value is out of range or cannot be represented in specified type.")]
34 OutOfRange,
35
36 #[error("Error invoking Zstd function: `{0:?}`.")]
37 ZstdError(std::io::Error),
38
39 #[error("Error decompressing chunk {index}: `{error}`.")]
40 DecompressionError { index: usize, error: std::io::Error },
41
42 #[error("Error compressing chunk {index}: `{error}`.")]
43 CompressionError { index: usize, error: std::io::Error },
44}
45
46#[derive(Clone, Debug, Eq, PartialEq)]
49pub struct ChunkInfo {
50 pub decompressed_range: Range<usize>,
51 pub compressed_range: Range<usize>,
52}
53
54pub fn decode_archive(
58 data: &[u8],
59 archive_length: usize,
60) -> Result<Option<(Vec<ChunkInfo>, &[u8])>, ChunkedArchiveError> {
61 match Ref::<_, ChunkedArchiveHeader>::from_prefix(data).map_err(Into::into) {
62 Ok((header, data)) => header.decode_seek_table(data, archive_length as u64),
63 Err(zerocopy::SizeError { .. }) => Ok(None), }
65}
66
67impl ChunkInfo {
68 fn from_entry(
69 entry: &SeekTableEntry,
70 header_length: usize,
71 ) -> Result<Self, ChunkedArchiveError> {
72 let decompressed_start = entry.decompressed_offset.get() as usize;
73 let decompressed_size = entry.decompressed_size.get() as usize;
74 let decompressed_range = decompressed_start
75 ..decompressed_start
76 .checked_add(decompressed_size)
77 .ok_or(ChunkedArchiveError::OutOfRange)?;
78
79 let compressed_offset = entry.compressed_offset.get() as usize;
80 let compressed_start = compressed_offset
81 .checked_sub(header_length)
82 .ok_or(ChunkedArchiveError::IntegrityError)?;
83 let compressed_size = entry.compressed_size.get() as usize;
84 let compressed_range = compressed_start
85 ..compressed_start
86 .checked_add(compressed_size)
87 .ok_or(ChunkedArchiveError::OutOfRange)?;
88
89 Ok(Self { decompressed_range, compressed_range })
90 }
91}
92
93#[derive(IntoBytes, KnownLayout, FromBytes, Immutable, Unaligned, Clone, Copy, Debug)]
95#[repr(C)]
96struct ChunkedArchiveHeader {
97 magic: [u8; 8],
98 version: U16<LE>,
99 reserved_0: U16<LE>,
100 num_entries: U32<LE>,
101 checksum: U32<LE>,
102 reserved_1: U32<LE>,
103 reserved_2: U64<LE>,
104}
105
106#[derive(IntoBytes, KnownLayout, FromBytes, Immutable, Unaligned, Clone, Copy, Debug)]
108#[repr(C)]
109struct SeekTableEntry {
110 decompressed_offset: U64<LE>,
111 decompressed_size: U64<LE>,
112 compressed_offset: U64<LE>,
113 compressed_size: U64<LE>,
114}
115
116impl ChunkedArchiveHeader {
117 const CHUNKED_ARCHIVE_MAGIC: [u8; 8] = [0x46, 0x9b, 0x78, 0xef, 0x0f, 0xd0, 0xb2, 0x03];
118 const CHUNKED_ARCHIVE_VERSION: u16 = 2;
119 const CHUNKED_ARCHIVE_MAX_FRAMES: usize = 1023;
120 const CHUNKED_ARCHIVE_CHECKSUM_OFFSET: usize = 16;
121
122 fn new(seek_table: &[SeekTableEntry]) -> Result<Self, ChunkedArchiveError> {
123 let header: ChunkedArchiveHeader = Self {
124 magic: Self::CHUNKED_ARCHIVE_MAGIC,
125 version: Self::CHUNKED_ARCHIVE_VERSION.into(),
126 reserved_0: 0.into(),
127 num_entries: TryInto::<u32>::try_into(seek_table.len())
128 .or(Err(ChunkedArchiveError::OutOfRange))?
129 .into(),
130 checksum: 0.into(), reserved_1: 0.into(),
132 reserved_2: 0.into(),
133 };
134 Ok(Self { checksum: header.checksum(seek_table).into(), ..header })
135 }
136
137 fn checksum(&self, entries: &[SeekTableEntry]) -> u32 {
139 let mut first_crc = crc::crc32::Digest::new(crc::crc32::IEEE);
140 first_crc.write(&self.as_bytes()[..Self::CHUNKED_ARCHIVE_CHECKSUM_OFFSET]);
141 let mut crc = crc::crc32::Digest::new_with_initial(crc::crc32::IEEE, first_crc.sum32());
142 crc.write(
143 &self.as_bytes()
144 [Self::CHUNKED_ARCHIVE_CHECKSUM_OFFSET + self.checksum.as_bytes().len()..],
145 );
146 crc.write(entries.as_bytes());
147 crc.sum32()
148 }
149
150 fn header_length(num_entries: usize) -> usize {
152 std::mem::size_of::<ChunkedArchiveHeader>()
153 + (std::mem::size_of::<SeekTableEntry>() * num_entries)
154 }
155
156 fn decode_seek_table(
160 self,
161 data: &[u8],
162 archive_length: u64,
163 ) -> Result<Option<(Vec<ChunkInfo>, &[u8])>, ChunkedArchiveError> {
164 let num_entries = self.num_entries.get() as usize;
166 let Ok((entries, chunk_data)) =
167 Ref::<_, [SeekTableEntry]>::from_prefix_with_elems(data, num_entries)
168 else {
169 return Ok(None);
170 };
171 let entries: &[SeekTableEntry] = Ref::into_ref(entries);
172
173 if self.magic != Self::CHUNKED_ARCHIVE_MAGIC {
175 return Err(ChunkedArchiveError::BadMagic);
176 }
177 if self.version.get() != Self::CHUNKED_ARCHIVE_VERSION {
178 return Err(ChunkedArchiveError::InvalidVersion);
179 }
180 if self.checksum.get() != self.checksum(entries) {
181 return Err(ChunkedArchiveError::IntegrityError);
182 }
183 if entries.len() > Self::CHUNKED_ARCHIVE_MAX_FRAMES {
184 return Err(ChunkedArchiveError::IntegrityError);
185 }
186
187 if !entries.is_empty() && entries[0].decompressed_offset.get() != 0 {
191 return Err(ChunkedArchiveError::IntegrityError);
192 }
193
194 let header_length = Self::header_length(entries.len());
196 if entries.iter().any(|entry| entry.compressed_offset.get() < header_length as u64) {
197 return Err(ChunkedArchiveError::IntegrityError);
198 }
199
200 for (prev, curr) in entries.iter().tuple_windows() {
203 if (prev.decompressed_offset.get() + prev.decompressed_size.get())
204 != curr.decompressed_offset.get()
205 {
206 return Err(ChunkedArchiveError::IntegrityError);
207 }
208 }
209
210 for (prev, curr) in entries.iter().tuple_windows() {
213 if (prev.compressed_offset.get() + prev.compressed_size.get())
214 > curr.compressed_offset.get()
215 {
216 return Err(ChunkedArchiveError::IntegrityError);
217 }
218 }
219
220 for entry in entries.iter() {
222 if entry.decompressed_size.get() == 0 || entry.compressed_size.get() == 0 {
223 return Err(ChunkedArchiveError::IntegrityError);
224 }
225 }
226
227 for entry in entries.iter() {
229 let compressed_end = entry.compressed_offset.get() + entry.compressed_size.get();
230 if compressed_end > archive_length {
231 return Err(ChunkedArchiveError::IntegrityError);
232 }
233 }
234
235 let seek_table = entries
236 .into_iter()
237 .map(|entry| ChunkInfo::from_entry(entry, header_length))
238 .try_collect()?;
239 Ok(Some((seek_table, chunk_data)))
240 }
241}
242
243pub struct CompressedChunk {
245 pub compressed_data: Vec<u8>,
247 pub decompressed_size: usize,
249}
250
251pub struct ChunkedArchive {
253 chunks: Vec<CompressedChunk>,
256 chunk_size: usize,
258}
259
260impl ChunkedArchive {
261 const MAX_CHUNKS: usize = ChunkedArchiveHeader::CHUNKED_ARCHIVE_MAX_FRAMES;
262 const TARGET_CHUNK_SIZE: usize = 32 * 1024;
263 const COMPRESSION_LEVEL: i32 = 14;
264
265 pub fn new(data: &[u8], chunk_alignment: usize) -> Result<Self, ChunkedArchiveError> {
269 let chunk_size = ChunkedArchive::chunk_size_for(data.len(), chunk_alignment);
270 let mut chunks: Vec<Result<CompressedChunk, ChunkedArchiveError>> = vec![];
271 data.par_chunks(chunk_size)
272 .enumerate()
273 .map(|(index, chunk)| {
274 thread_local! {
277 static COMPRESSOR: std::cell::RefCell<zstd::bulk::Compressor<'static>> =
278 std::cell::RefCell::new({
279 let mut compressor =
280 zstd::bulk::Compressor::new(ChunkedArchive::COMPRESSION_LEVEL)
281 .unwrap();
282 compressor
283 .set_parameter(zstd::zstd_safe::CParameter::ChecksumFlag(true))
284 .unwrap();
285 compressor
286 });
287 }
288 let compressed_data = COMPRESSOR.with(|compressor| {
289 let mut compressor = compressor.borrow_mut();
290 compressor
291 .compress(chunk)
292 .map_err(|error| ChunkedArchiveError::CompressionError { index, error })
293 })?;
294 Ok(CompressedChunk { compressed_data, decompressed_size: chunk.len() })
295 })
296 .collect_into_vec(&mut chunks);
297 let chunks: Vec<_> = chunks.into_iter().try_collect()?;
298 Ok(ChunkedArchive { chunks, chunk_size })
299 }
300
301 pub fn chunks(&self) -> &Vec<CompressedChunk> {
303 &self.chunks
304 }
305
306 pub fn chunk_size(&self) -> usize {
310 self.chunk_size
311 }
312
313 pub fn compressed_data_size(&self) -> usize {
315 self.chunks.iter().map(|chunk| chunk.compressed_data.len()).sum()
316 }
317
318 pub fn serialized_size(&self) -> usize {
320 ChunkedArchiveHeader::header_length(self.chunks.len()) + self.compressed_data_size()
321 }
322
323 pub fn write(self, mut writer: impl std::io::Write) -> Result<(), std::io::Error> {
325 let seek_table = self.make_seek_table();
326 let header = ChunkedArchiveHeader::new(&seek_table).unwrap();
327 writer.write_all(header.as_bytes())?;
328 writer.write_all(seek_table.as_slice().as_bytes())?;
329 for chunk in self.chunks {
330 writer.write_all(&chunk.compressed_data)?;
331 }
332 Ok(())
333 }
334
335 fn chunk_size_for(uncompressed_length: usize, chunk_alignment: usize) -> usize {
337 if uncompressed_length <= (Self::MAX_CHUNKS * Self::TARGET_CHUNK_SIZE) {
338 return Self::TARGET_CHUNK_SIZE;
339 }
340 let chunk_size =
343 round_up(uncompressed_length, ChunkedArchive::MAX_CHUNKS) / ChunkedArchive::MAX_CHUNKS;
344 return round_up(chunk_size, chunk_alignment);
345 }
346
347 fn make_seek_table(&self) -> Vec<SeekTableEntry> {
349 let header_length = ChunkedArchiveHeader::header_length(self.chunks.len());
350 let mut seek_table = vec![];
351 seek_table.reserve(self.chunks.len());
352 let mut compressed_size: usize = 0;
353 let mut decompressed_offset: usize = 0;
354 for chunk in &self.chunks {
355 seek_table.push(SeekTableEntry {
356 decompressed_offset: (decompressed_offset as u64).into(),
357 decompressed_size: (chunk.decompressed_size as u64).into(),
358 compressed_offset: ((header_length + compressed_size) as u64).into(),
359 compressed_size: (chunk.compressed_data.len() as u64).into(),
360 });
361 compressed_size += chunk.compressed_data.len();
362 decompressed_offset += chunk.decompressed_size;
363 }
364 seek_table
365 }
366}
367
368pub struct ChunkedDecompressor {
383 seek_table: Vec<ChunkInfo>,
384 buffer: Vec<u8>,
385 data_written: usize,
386 curr_chunk: usize,
387 total_compressed_size: usize,
388 decompressor: zstd::bulk::Decompressor<'static>,
389 decompressed_buffer: Vec<u8>,
390 error_handler: Option<ErrorHandler>,
391}
392
393type ErrorHandler = Box<dyn Fn(usize, ChunkInfo, &[u8]) -> () + Send + 'static>;
394
395impl ChunkedDecompressor {
396 pub fn new(seek_table: Vec<ChunkInfo>) -> Result<Self, ChunkedArchiveError> {
398 let total_compressed_size =
399 seek_table.last().map(|last_chunk| last_chunk.compressed_range.end).unwrap_or(0);
400 let decompressed_buffer =
401 vec![0u8; seek_table.first().map(|c| c.decompressed_range.len()).unwrap_or(0)];
402 let decompressor =
403 zstd::bulk::Decompressor::new().map_err(ChunkedArchiveError::ZstdError)?;
404 Ok(Self {
405 seek_table,
406 buffer: vec![],
407 data_written: 0,
408 curr_chunk: 0,
409 total_compressed_size,
410 decompressor,
411 decompressed_buffer,
412 error_handler: None,
413 })
414 }
415
416 pub fn new_with_error_handler(
419 seek_table: Vec<ChunkInfo>,
420 error_handler: ErrorHandler,
421 ) -> Result<Self, ChunkedArchiveError> {
422 Ok(Self { error_handler: Some(error_handler), ..Self::new(seek_table)? })
423 }
424
425 pub fn seek_table(&self) -> &Vec<ChunkInfo> {
426 &self.seek_table
427 }
428
429 fn finish_chunk(
430 &mut self,
431 data: &[u8],
432 chunk_callback: &mut impl FnMut(&[u8]) -> (),
433 ) -> Result<(), ChunkedArchiveError> {
434 debug_assert_eq!(data.len(), self.seek_table[self.curr_chunk].compressed_range.len());
435 let chunk = &self.seek_table[self.curr_chunk];
436 let decompressed_size = self
437 .decompressor
438 .decompress_to_buffer(data, self.decompressed_buffer.as_mut_slice())
439 .map_err(|error| {
440 if let Some(ref error_handler) = self.error_handler {
441 error_handler(self.curr_chunk, chunk.clone(), data.as_bytes());
442 }
443 ChunkedArchiveError::DecompressionError { index: self.curr_chunk, error }
444 })?;
445 if decompressed_size != chunk.decompressed_range.len() {
446 return Err(ChunkedArchiveError::IntegrityError);
447 }
448 chunk_callback(&self.decompressed_buffer[..decompressed_size]);
449 self.curr_chunk += 1;
450 Ok(())
451 }
452
453 pub fn update(
455 &mut self,
456 mut data: &[u8],
457 chunk_callback: &mut impl FnMut(&[u8]) -> (),
458 ) -> Result<(), ChunkedArchiveError> {
459 if self.data_written + data.len() > self.total_compressed_size {
461 return Err(ChunkedArchiveError::OutOfRange);
462 }
463 self.data_written += data.len();
464
465 if !self.buffer.is_empty() {
467 let to_read = std::cmp::min(
468 data.len(),
469 self.seek_table[self.curr_chunk]
470 .compressed_range
471 .len()
472 .checked_sub(self.buffer.len())
473 .unwrap(),
474 );
475 self.buffer.extend_from_slice(&data[..to_read]);
476 if self.buffer.len() == self.seek_table[self.curr_chunk].compressed_range.len() {
477 let full_chunk = std::mem::take(&mut self.buffer);
481 self.finish_chunk(&full_chunk[..], chunk_callback)?;
482 self.buffer = full_chunk;
483 self.buffer.drain(..);
485 }
486 data = &data[to_read..];
487 }
488
489 while !data.is_empty()
491 && self.curr_chunk < self.seek_table.len()
492 && self.seek_table[self.curr_chunk].compressed_range.len() <= data.len()
493 {
494 let len = self.seek_table[self.curr_chunk].compressed_range.len();
495 self.finish_chunk(&data[..len], chunk_callback)?;
496 data = &data[len..];
497 }
498
499 if !data.is_empty() {
501 debug_assert!(self.curr_chunk < self.seek_table.len());
502 debug_assert!(self.data_written < self.total_compressed_size);
503 self.buffer.extend_from_slice(data);
504 }
505
506 debug_assert!(
507 self.data_written < self.total_compressed_size
508 || self.curr_chunk == self.seek_table.len()
509 );
510
511 Ok(())
512 }
513}
514
515fn round_up(value: usize, multiple: usize) -> usize {
518 let remainder = value % multiple;
519 if remainder > 0 { value.checked_add(multiple - remainder).unwrap() } else { value }
520}
521
522#[cfg(test)]
523mod tests {
524
525 use super::*;
526 use rand::Rng;
527 use std::matches;
528
529 const BLOCK_SIZE: usize = 8192;
530
531 #[test]
534 fn compress_simple() {
535 let data: Vec<u8> = vec![0; 32 * 1024 * 16];
536 let archive = ChunkedArchive::new(&data, BLOCK_SIZE).unwrap();
537 let mut compressed: Vec<u8> = vec![];
539 archive.write(&mut compressed).unwrap();
540 assert!(compressed.len() <= data.len());
541 assert!(decode_archive(&compressed, compressed.len()).unwrap().is_some());
543 }
544
545 fn generate_archive(
547 num_entries: usize,
548 ) -> (ChunkedArchiveHeader, Vec<SeekTableEntry>, u64) {
549 let mut seek_table = vec![];
550 seek_table.reserve(num_entries);
551 let header_length = ChunkedArchiveHeader::header_length(num_entries) as u64;
552 const COMPRESSED_CHUNK_SIZE: u64 = 1024;
553 const DECOMPRESSED_CHUNK_SIZE: u64 = 2048;
554 for n in 0..(num_entries as u64) {
555 seek_table.push(SeekTableEntry {
556 compressed_offset: (header_length + (n * COMPRESSED_CHUNK_SIZE)).into(),
557 compressed_size: COMPRESSED_CHUNK_SIZE.into(),
558 decompressed_offset: (n * DECOMPRESSED_CHUNK_SIZE).into(),
559 decompressed_size: DECOMPRESSED_CHUNK_SIZE.into(),
560 });
561 }
562 let header = ChunkedArchiveHeader::new(&seek_table).unwrap();
563 let archive_length: u64 = header_length + (num_entries as u64 * COMPRESSED_CHUNK_SIZE);
564 (header, seek_table, archive_length)
565 }
566
567 #[test]
568 fn should_validate_self() {
569 let (header, seek_table, archive_length) = generate_archive(4);
570 let serialized_table = seek_table.as_slice().as_bytes();
571 assert!(header.decode_seek_table(serialized_table, archive_length).unwrap().is_some());
572 }
573
574 #[test]
575 fn should_validate_empty() {
576 let (header, _, archive_length) = generate_archive(0);
577 assert!(header.decode_seek_table(&[], archive_length).unwrap().is_some());
578 }
579
580 #[test]
581 fn should_detect_bad_magic() {
582 let (header, seek_table, archive_length) = generate_archive(4);
583 let mut corrupt_magic = ChunkedArchiveHeader::CHUNKED_ARCHIVE_MAGIC;
584 corrupt_magic[0] = !corrupt_magic[0];
585 let bad_magic = ChunkedArchiveHeader { magic: corrupt_magic, ..header };
586 let serialized_table = seek_table.as_slice().as_bytes();
587 assert!(matches!(
588 bad_magic.decode_seek_table(serialized_table, archive_length).unwrap_err(),
589 ChunkedArchiveError::BadMagic
590 ));
591 }
592 #[test]
593 fn should_detect_wrong_version() {
594 let (header, seek_table, archive_length) = generate_archive(4);
595 let wrong_version = ChunkedArchiveHeader {
596 version: (ChunkedArchiveHeader::CHUNKED_ARCHIVE_VERSION + 1).into(),
597 ..header
598 };
599 let serialized_table = seek_table.as_slice().as_bytes();
600 assert!(matches!(
601 wrong_version.decode_seek_table(serialized_table, archive_length).unwrap_err(),
602 ChunkedArchiveError::InvalidVersion
603 ));
604 }
605
606 #[test]
607 fn should_detect_corrupt_checksum() {
608 let (header, seek_table, archive_length) = generate_archive(4);
609 let corrupt_checksum =
610 ChunkedArchiveHeader { checksum: (!header.checksum.get()).into(), ..header };
611 let serialized_table = seek_table.as_slice().as_bytes();
612 assert!(matches!(
613 corrupt_checksum.decode_seek_table(serialized_table, archive_length).unwrap_err(),
614 ChunkedArchiveError::IntegrityError
615 ));
616 }
617
618 #[test]
619 fn should_reject_too_many_entries() {
620 let (too_many_entries, seek_table, archive_length) =
621 generate_archive(ChunkedArchiveHeader::CHUNKED_ARCHIVE_MAX_FRAMES + 1);
622
623 let serialized_table = seek_table.as_slice().as_bytes();
624 assert!(matches!(
625 too_many_entries.decode_seek_table(serialized_table, archive_length).unwrap_err(),
626 ChunkedArchiveError::IntegrityError
627 ));
628 }
629
630 #[test]
631 fn invariant_i0_first_entry_zero() {
632 let (header, mut seek_table, archive_length) = generate_archive(4);
633 assert_eq!(seek_table[0].decompressed_offset.get(), 0);
634 seek_table[0].decompressed_offset = 1.into();
635
636 let serialized_table = seek_table.as_slice().as_bytes();
637 assert!(matches!(
638 header.decode_seek_table(serialized_table, archive_length).unwrap_err(),
639 ChunkedArchiveError::IntegrityError
640 ));
641 }
642
643 #[test]
644 fn invariant_i1_no_header_overlap() {
645 let (header, mut seek_table, archive_length) = generate_archive(4);
646 let header_end = ChunkedArchiveHeader::header_length(seek_table.len()) as u64;
647 assert!(seek_table[0].compressed_offset.get() >= header_end);
648 seek_table[0].compressed_offset = (header_end - 1).into();
649 let serialized_table = seek_table.as_slice().as_bytes();
650 assert!(matches!(
651 header.decode_seek_table(serialized_table, archive_length).unwrap_err(),
652 ChunkedArchiveError::IntegrityError
653 ));
654 }
655
656 #[test]
657 fn invariant_i2_decompressed_monotonic() {
658 let (header, mut seek_table, archive_length) = generate_archive(4);
659 assert_eq!(
660 seek_table[0].decompressed_offset.get() + seek_table[0].decompressed_size.get(),
661 seek_table[1].decompressed_offset.get()
662 );
663 seek_table[1].decompressed_offset = (seek_table[1].decompressed_offset.get() - 1).into();
664 let serialized_table = seek_table.as_slice().as_bytes();
665 assert!(matches!(
666 header.decode_seek_table(serialized_table, archive_length).unwrap_err(),
667 ChunkedArchiveError::IntegrityError
668 ));
669 }
670
671 #[test]
672 fn invariant_i3_compressed_monotonic() {
673 let (header, mut seek_table, archive_length) = generate_archive(4);
674 assert!(
675 (seek_table[0].compressed_offset.get() + seek_table[0].compressed_size.get())
676 <= seek_table[1].compressed_offset.get()
677 );
678 seek_table[1].compressed_offset = (seek_table[1].compressed_offset.get() - 1).into();
679 let serialized_table = seek_table.as_slice().as_bytes();
680 assert!(matches!(
681 header.decode_seek_table(serialized_table, archive_length).unwrap_err(),
682 ChunkedArchiveError::IntegrityError
683 ));
684 }
685
686 #[test]
687 fn invariant_i4_nonzero_compressed_size() {
688 let (header, mut seek_table, archive_length) = generate_archive(4);
689 assert!(seek_table[0].compressed_size.get() > 0);
690 seek_table[0].compressed_size = 0.into();
691 let serialized_table = seek_table.as_slice().as_bytes();
692 assert!(matches!(
693 header.decode_seek_table(serialized_table, archive_length).unwrap_err(),
694 ChunkedArchiveError::IntegrityError
695 ));
696 }
697
698 #[test]
699 fn invariant_i4_nonzero_decompressed_size() {
700 let (header, mut seek_table, archive_length) = generate_archive(4);
701 assert!(seek_table[0].decompressed_size.get() > 0);
702 seek_table[0].decompressed_size = 0.into();
703 let serialized_table = seek_table.as_slice().as_bytes();
704 assert!(matches!(
705 header.decode_seek_table(serialized_table, archive_length).unwrap_err(),
706 ChunkedArchiveError::IntegrityError
707 ));
708 }
709
710 #[test]
711 fn invariant_i5_within_archive() {
712 let (header, mut seek_table, archive_length) = generate_archive(4);
713 let last_entry = seek_table.last_mut().unwrap();
714 assert!(
715 (last_entry.compressed_offset.get() + last_entry.compressed_size.get())
716 <= archive_length
717 );
718 last_entry.compressed_offset = (archive_length + 1).into();
719 let serialized_table = seek_table.as_slice().as_bytes();
720 assert!(matches!(
721 header.decode_seek_table(serialized_table, archive_length).unwrap_err(),
722 ChunkedArchiveError::IntegrityError
723 ));
724 }
725
726 #[test]
727 fn max_chunks() {
728 assert_eq!(
729 ChunkedArchive::chunk_size_for(
730 ChunkedArchive::MAX_CHUNKS * ChunkedArchive::TARGET_CHUNK_SIZE,
731 BLOCK_SIZE,
732 ),
733 ChunkedArchive::TARGET_CHUNK_SIZE
734 );
735 assert_eq!(
736 ChunkedArchive::chunk_size_for(
737 ChunkedArchive::MAX_CHUNKS * ChunkedArchive::TARGET_CHUNK_SIZE + 1,
738 BLOCK_SIZE,
739 ),
740 ChunkedArchive::TARGET_CHUNK_SIZE + BLOCK_SIZE
741 );
742 }
743
744 #[test]
745 fn test_decompressor_empty_archive() {
746 let mut compressed: Vec<u8> = vec![];
747 ChunkedArchive::new(&[], BLOCK_SIZE)
748 .expect("compress")
749 .write(&mut compressed)
750 .expect("write archive");
751 let (seek_table, chunk_data) =
752 decode_archive(&compressed, compressed.len()).unwrap().unwrap();
753 assert!(seek_table.is_empty());
754 let mut decompressor = ChunkedDecompressor::new(seek_table).unwrap();
755 let mut chunk_callback = |_chunk: &[u8]| panic!("Archive doesn't have any chunks.");
756 chunk_data
758 .chunks(4)
759 .for_each(|data| decompressor.update(data, &mut chunk_callback).unwrap());
760 }
761
762 #[test]
763 fn test_decompressor() {
764 const UNCOMPRESSED_LENGTH: usize = 3_000_000;
765 let data: Vec<u8> = {
766 let range = rand::distr::Uniform::<u8>::new_inclusive(0, 255).unwrap();
767 rand::rng().sample_iter(&range).take(UNCOMPRESSED_LENGTH).collect()
768 };
769 let mut compressed: Vec<u8> = vec![];
770 ChunkedArchive::new(&data, BLOCK_SIZE)
771 .expect("compress")
772 .write(&mut compressed)
773 .expect("write archive");
774 let (seek_table, chunk_data) =
775 decode_archive(&compressed, compressed.len()).unwrap().unwrap();
776
777 let num_chunks = seek_table.len();
779 assert!(num_chunks > 1);
780
781 let mut decompressor = ChunkedDecompressor::new(seek_table).unwrap();
782
783 let mut decoded_chunks: usize = 0;
784 let mut decompressed_offset: usize = 0;
785 let mut chunk_callback = |decompressed_chunk: &[u8]| {
786 assert!(
787 decompressed_chunk
788 == &data[decompressed_offset..decompressed_offset + decompressed_chunk.len()]
789 );
790 decompressed_offset += decompressed_chunk.len();
791 decoded_chunks += 1;
792 };
793
794 chunk_data
796 .chunks(4)
797 .for_each(|data| decompressor.update(data, &mut chunk_callback).unwrap());
798 assert_eq!(decoded_chunks, num_chunks);
799 }
800
801 #[test]
802 fn test_decompressor_corrupt_decompressed_size() {
803 let data = vec![0; 3_000_000];
804 let mut compressed: Vec<u8> = vec![];
805 ChunkedArchive::new(&data, BLOCK_SIZE)
806 .expect("compress")
807 .write(&mut compressed)
808 .expect("write archive");
809 let (mut seek_table, chunk_data) =
810 decode_archive(&compressed, compressed.len()).unwrap().unwrap();
811
812 seek_table[0].decompressed_range =
814 seek_table[0].decompressed_range.start..seek_table[0].decompressed_range.end + 1;
815
816 let mut decompressor = ChunkedDecompressor::new(seek_table).unwrap();
817 assert!(matches!(
818 decompressor.update(&chunk_data, &mut |_chunk| {}),
819 Err(ChunkedArchiveError::IntegrityError)
820 ));
821 }
822
823 #[test]
824 fn test_decompressor_corrupt_compressed_size() {
825 let data = vec![0; 3_000_000];
826 let mut compressed: Vec<u8> = vec![];
827 ChunkedArchive::new(&data, BLOCK_SIZE)
828 .expect("compress")
829 .write(&mut compressed)
830 .expect("write archive");
831 let (mut seek_table, chunk_data) =
832 decode_archive(&compressed, compressed.len()).unwrap().unwrap();
833
834 seek_table[0].compressed_range =
836 seek_table[0].compressed_range.start..seek_table[0].compressed_range.end - 1;
837 let first_chunk_info = seek_table[0].clone();
838 let error_handler = move |chunk_index: usize, chunk_info: ChunkInfo, chunk_data: &[u8]| {
839 assert_eq!(chunk_index, 0);
840 assert_eq!(chunk_info, first_chunk_info);
841 assert_eq!(chunk_data.len(), chunk_info.compressed_range.len());
842 };
843
844 let mut decompressor =
845 ChunkedDecompressor::new_with_error_handler(seek_table, Box::new(error_handler))
846 .unwrap();
847 assert!(matches!(
848 decompressor.update(&chunk_data, &mut |_chunk| {}),
849 Err(ChunkedArchiveError::DecompressionError { index: 0, .. })
850 ));
851 }
852}