use crc::Hasher32;
use itertools::Itertools;
use rayon::prelude::*;
use std::ops::Range;
use thiserror::Error;
use zerocopy::byteorder::{LE, U16, U32, U64};
use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Ref, Unaligned};
#[derive(Debug, Error)]
pub enum ChunkedArchiveError {
#[error("Invalid or unsupported archive version.")]
InvalidVersion,
#[error("Archive header has incorrect magic.")]
BadMagic,
#[error("Integrity checks failed (e.g. incorrect CRC, inconsistent header fields).")]
IntegrityError,
#[error("Value is out of range or cannot be represented in specified type.")]
OutOfRange,
#[error("Error invoking Zstd function: `{0:?}`.")]
ZstdError(std::io::Error),
#[error("Error decompressing chunk {index}: `{error}`.")]
DecompressionError { index: usize, error: std::io::Error },
#[error("Error compressing chunk {index}: `{error}`.")]
CompressionError { index: usize, error: std::io::Error },
}
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct ChunkInfo {
pub decompressed_range: Range<usize>,
pub compressed_range: Range<usize>,
}
pub fn decode_archive(
data: &[u8],
archive_length: usize,
) -> Result<Option<(Vec<ChunkInfo>, &[u8])>, ChunkedArchiveError> {
match Ref::<_, ChunkedArchiveHeader>::from_prefix(data).map_err(Into::into) {
Ok((header, data)) => header.decode_seek_table(data, archive_length as u64),
Err(zerocopy::SizeError { .. }) => Ok(None), }
}
impl ChunkInfo {
fn from_entry(
entry: &SeekTableEntry,
header_length: usize,
) -> Result<Self, ChunkedArchiveError> {
let decompressed_start = entry.decompressed_offset.get() as usize;
let decompressed_size = entry.decompressed_size.get() as usize;
let decompressed_range = decompressed_start
..decompressed_start
.checked_add(decompressed_size)
.ok_or(ChunkedArchiveError::OutOfRange)?;
let compressed_offset = entry.compressed_offset.get() as usize;
let compressed_start = compressed_offset
.checked_sub(header_length)
.ok_or(ChunkedArchiveError::IntegrityError)?;
let compressed_size = entry.compressed_size.get() as usize;
let compressed_range = compressed_start
..compressed_start
.checked_add(compressed_size)
.ok_or(ChunkedArchiveError::OutOfRange)?;
Ok(Self { decompressed_range, compressed_range })
}
}
#[derive(IntoBytes, KnownLayout, FromBytes, Immutable, Unaligned, Clone, Copy, Debug)]
#[repr(C)]
struct ChunkedArchiveHeader {
magic: [u8; 8],
version: U16<LE>,
reserved_0: U16<LE>,
num_entries: U32<LE>,
checksum: U32<LE>,
reserved_1: U32<LE>,
reserved_2: U64<LE>,
}
#[derive(IntoBytes, KnownLayout, FromBytes, Immutable, Unaligned, Clone, Copy, Debug)]
#[repr(C)]
struct SeekTableEntry {
decompressed_offset: U64<LE>,
decompressed_size: U64<LE>,
compressed_offset: U64<LE>,
compressed_size: U64<LE>,
}
impl ChunkedArchiveHeader {
const CHUNKED_ARCHIVE_MAGIC: [u8; 8] = [0x46, 0x9b, 0x78, 0xef, 0x0f, 0xd0, 0xb2, 0x03];
const CHUNKED_ARCHIVE_VERSION: u16 = 2;
const CHUNKED_ARCHIVE_MAX_FRAMES: usize = 1023;
const CHUNKED_ARCHIVE_CHECKSUM_OFFSET: usize = 16;
fn new(seek_table: &[SeekTableEntry]) -> Result<Self, ChunkedArchiveError> {
let header: ChunkedArchiveHeader = Self {
magic: Self::CHUNKED_ARCHIVE_MAGIC,
version: Self::CHUNKED_ARCHIVE_VERSION.into(),
reserved_0: 0.into(),
num_entries: TryInto::<u32>::try_into(seek_table.len())
.or(Err(ChunkedArchiveError::OutOfRange))?
.into(),
checksum: 0.into(), reserved_1: 0.into(),
reserved_2: 0.into(),
};
Ok(Self { checksum: header.checksum(seek_table).into(), ..header })
}
fn checksum(&self, entries: &[SeekTableEntry]) -> u32 {
let mut first_crc = crc::crc32::Digest::new(crc::crc32::IEEE);
first_crc.write(&self.as_bytes()[..Self::CHUNKED_ARCHIVE_CHECKSUM_OFFSET]);
let mut crc = crc::crc32::Digest::new_with_initial(crc::crc32::IEEE, first_crc.sum32());
crc.write(
&self.as_bytes()
[Self::CHUNKED_ARCHIVE_CHECKSUM_OFFSET + self.checksum.as_bytes().len()..],
);
crc.write(entries.as_bytes());
crc.sum32()
}
fn header_length(num_entries: usize) -> usize {
std::mem::size_of::<ChunkedArchiveHeader>()
+ (std::mem::size_of::<SeekTableEntry>() * num_entries)
}
fn decode_seek_table(
self,
data: &[u8],
archive_length: u64,
) -> Result<Option<(Vec<ChunkInfo>, &[u8])>, ChunkedArchiveError> {
let num_entries = self.num_entries.get() as usize;
let Ok((entries, chunk_data)) =
Ref::<_, [SeekTableEntry]>::from_prefix_with_elems(data, num_entries)
else {
return Ok(None);
};
let entries: &[SeekTableEntry] = Ref::into_ref(entries);
if self.magic != Self::CHUNKED_ARCHIVE_MAGIC {
return Err(ChunkedArchiveError::BadMagic);
}
if self.version.get() != Self::CHUNKED_ARCHIVE_VERSION {
return Err(ChunkedArchiveError::InvalidVersion);
}
if self.checksum.get() != self.checksum(entries) {
return Err(ChunkedArchiveError::IntegrityError);
}
if entries.len() > Self::CHUNKED_ARCHIVE_MAX_FRAMES {
return Err(ChunkedArchiveError::IntegrityError);
}
if !entries.is_empty() && entries[0].decompressed_offset.get() != 0 {
return Err(ChunkedArchiveError::IntegrityError);
}
let header_length = Self::header_length(entries.len());
if entries.iter().any(|entry| entry.compressed_offset.get() < header_length as u64) {
return Err(ChunkedArchiveError::IntegrityError);
}
for (prev, curr) in entries.iter().tuple_windows() {
if (prev.decompressed_offset.get() + prev.decompressed_size.get())
!= curr.decompressed_offset.get()
{
return Err(ChunkedArchiveError::IntegrityError);
}
}
for (prev, curr) in entries.iter().tuple_windows() {
if (prev.compressed_offset.get() + prev.compressed_size.get())
> curr.compressed_offset.get()
{
return Err(ChunkedArchiveError::IntegrityError);
}
}
for entry in entries.iter() {
if entry.decompressed_size.get() == 0 || entry.compressed_size.get() == 0 {
return Err(ChunkedArchiveError::IntegrityError);
}
}
for entry in entries.iter() {
let compressed_end = entry.compressed_offset.get() + entry.compressed_size.get();
if compressed_end > archive_length {
return Err(ChunkedArchiveError::IntegrityError);
}
}
let seek_table = entries
.into_iter()
.map(|entry| ChunkInfo::from_entry(entry, header_length))
.try_collect()?;
Ok(Some((seek_table, chunk_data)))
}
}
pub struct CompressedChunk {
pub compressed_data: Vec<u8>,
pub decompressed_size: usize,
}
pub struct ChunkedArchive {
chunks: Vec<CompressedChunk>,
chunk_size: usize,
}
impl ChunkedArchive {
const MAX_CHUNKS: usize = ChunkedArchiveHeader::CHUNKED_ARCHIVE_MAX_FRAMES;
const TARGET_CHUNK_SIZE: usize = 32 * 1024;
const COMPRESSION_LEVEL: i32 = 14;
pub fn new(data: &[u8], chunk_alignment: usize) -> Result<Self, ChunkedArchiveError> {
let chunk_size = ChunkedArchive::chunk_size_for(data.len(), chunk_alignment);
let mut chunks: Vec<Result<CompressedChunk, ChunkedArchiveError>> = vec![];
data.par_chunks(chunk_size)
.enumerate()
.map(|(index, chunk)| {
thread_local! {
static COMPRESSOR: std::cell::RefCell<zstd::bulk::Compressor<'static>> =
std::cell::RefCell::new({
let mut compressor =
zstd::bulk::Compressor::new(ChunkedArchive::COMPRESSION_LEVEL)
.unwrap();
compressor
.set_parameter(zstd::zstd_safe::CParameter::ChecksumFlag(true))
.unwrap();
compressor
});
}
let compressed_data = COMPRESSOR.with(|compressor| {
let mut compressor = compressor.borrow_mut();
compressor
.compress(chunk)
.map_err(|error| ChunkedArchiveError::CompressionError { index, error })
})?;
Ok(CompressedChunk { compressed_data, decompressed_size: chunk.len() })
})
.collect_into_vec(&mut chunks);
let chunks: Vec<_> = chunks.into_iter().try_collect()?;
Ok(ChunkedArchive { chunks, chunk_size })
}
pub fn chunks(&self) -> &Vec<CompressedChunk> {
&self.chunks
}
pub fn chunk_size(&self) -> usize {
self.chunk_size
}
pub fn compressed_data_size(&self) -> usize {
self.chunks.iter().map(|chunk| chunk.compressed_data.len()).sum()
}
pub fn serialized_size(&self) -> usize {
ChunkedArchiveHeader::header_length(self.chunks.len()) + self.compressed_data_size()
}
pub fn write(self, mut writer: impl std::io::Write) -> Result<(), std::io::Error> {
let seek_table = self.make_seek_table();
let header = ChunkedArchiveHeader::new(&seek_table).unwrap();
writer.write_all(header.as_bytes())?;
writer.write_all(seek_table.as_slice().as_bytes())?;
for chunk in self.chunks {
writer.write_all(&chunk.compressed_data)?;
}
Ok(())
}
fn chunk_size_for(uncompressed_length: usize, chunk_alignment: usize) -> usize {
if uncompressed_length <= (Self::MAX_CHUNKS * Self::TARGET_CHUNK_SIZE) {
return Self::TARGET_CHUNK_SIZE;
}
let chunk_size =
round_up(uncompressed_length, ChunkedArchive::MAX_CHUNKS) / ChunkedArchive::MAX_CHUNKS;
return round_up(chunk_size, chunk_alignment);
}
fn make_seek_table(&self) -> Vec<SeekTableEntry> {
let header_length = ChunkedArchiveHeader::header_length(self.chunks.len());
let mut seek_table = vec![];
seek_table.reserve(self.chunks.len());
let mut compressed_size: usize = 0;
let mut decompressed_offset: usize = 0;
for chunk in &self.chunks {
seek_table.push(SeekTableEntry {
decompressed_offset: (decompressed_offset as u64).into(),
decompressed_size: (chunk.decompressed_size as u64).into(),
compressed_offset: ((header_length + compressed_size) as u64).into(),
compressed_size: (chunk.compressed_data.len() as u64).into(),
});
compressed_size += chunk.compressed_data.len();
decompressed_offset += chunk.decompressed_size;
}
seek_table
}
}
pub struct ChunkedDecompressor {
seek_table: Vec<ChunkInfo>,
buffer: Vec<u8>,
data_written: usize,
curr_chunk: usize,
total_compressed_size: usize,
decompressor: zstd::bulk::Decompressor<'static>,
decompressed_buffer: Vec<u8>,
error_handler: Option<ErrorHandler>,
}
type ErrorHandler = Box<dyn Fn(usize, ChunkInfo, &[u8]) -> () + Send + 'static>;
impl ChunkedDecompressor {
pub fn new(seek_table: Vec<ChunkInfo>) -> Result<Self, ChunkedArchiveError> {
let total_compressed_size =
seek_table.last().map(|last_chunk| last_chunk.compressed_range.end).unwrap_or(0);
let decompressed_buffer =
vec![0u8; seek_table.first().map(|c| c.decompressed_range.len()).unwrap_or(0)];
let decompressor =
zstd::bulk::Decompressor::new().map_err(ChunkedArchiveError::ZstdError)?;
Ok(Self {
seek_table,
buffer: vec![],
data_written: 0,
curr_chunk: 0,
total_compressed_size,
decompressor,
decompressed_buffer,
error_handler: None,
})
}
pub fn new_with_error_handler(
seek_table: Vec<ChunkInfo>,
error_handler: ErrorHandler,
) -> Result<Self, ChunkedArchiveError> {
Ok(Self { error_handler: Some(error_handler), ..Self::new(seek_table)? })
}
pub fn seek_table(&self) -> &Vec<ChunkInfo> {
&self.seek_table
}
fn finish_chunk(
&mut self,
data: &[u8],
chunk_callback: &mut impl FnMut(&[u8]) -> (),
) -> Result<(), ChunkedArchiveError> {
debug_assert_eq!(data.len(), self.seek_table[self.curr_chunk].compressed_range.len());
let chunk = &self.seek_table[self.curr_chunk];
let decompressed_size = self
.decompressor
.decompress_to_buffer(data, self.decompressed_buffer.as_mut_slice())
.map_err(|error| {
if let Some(ref error_handler) = self.error_handler {
error_handler(self.curr_chunk, chunk.clone(), data.as_bytes());
}
ChunkedArchiveError::DecompressionError { index: self.curr_chunk, error }
})?;
if decompressed_size != chunk.decompressed_range.len() {
return Err(ChunkedArchiveError::IntegrityError);
}
chunk_callback(&self.decompressed_buffer[..decompressed_size]);
self.curr_chunk += 1;
Ok(())
}
pub fn update(
&mut self,
mut data: &[u8],
chunk_callback: &mut impl FnMut(&[u8]) -> (),
) -> Result<(), ChunkedArchiveError> {
if self.data_written + data.len() > self.total_compressed_size {
return Err(ChunkedArchiveError::OutOfRange);
}
self.data_written += data.len();
if !self.buffer.is_empty() {
let to_read = std::cmp::min(
data.len(),
self.seek_table[self.curr_chunk]
.compressed_range
.len()
.checked_sub(self.buffer.len())
.unwrap(),
);
self.buffer.extend_from_slice(&data[..to_read]);
if self.buffer.len() == self.seek_table[self.curr_chunk].compressed_range.len() {
let full_chunk = std::mem::take(&mut self.buffer);
self.finish_chunk(&full_chunk[..], chunk_callback)?;
self.buffer = full_chunk;
self.buffer.drain(..);
}
data = &data[to_read..];
}
while !data.is_empty()
&& self.curr_chunk < self.seek_table.len()
&& self.seek_table[self.curr_chunk].compressed_range.len() <= data.len()
{
let len = self.seek_table[self.curr_chunk].compressed_range.len();
self.finish_chunk(&data[..len], chunk_callback)?;
data = &data[len..];
}
if !data.is_empty() {
debug_assert!(self.curr_chunk < self.seek_table.len());
debug_assert!(self.data_written < self.total_compressed_size);
self.buffer.extend_from_slice(data);
}
debug_assert!(
self.data_written < self.total_compressed_size
|| self.curr_chunk == self.seek_table.len()
);
Ok(())
}
}
fn round_up(value: usize, multiple: usize) -> usize {
let remainder = value % multiple;
if remainder > 0 {
value.checked_add(multiple - remainder).unwrap()
} else {
value
}
}
#[cfg(test)]
mod tests {
use super::*;
use rand::Rng;
use std::matches;
const BLOCK_SIZE: usize = 8192;
#[test]
fn compress_simple() {
let data: Vec<u8> = vec![0; 32 * 1024 * 16];
let archive = ChunkedArchive::new(&data, BLOCK_SIZE).unwrap();
let mut compressed: Vec<u8> = vec![];
archive.write(&mut compressed).unwrap();
assert!(compressed.len() <= data.len());
assert!(decode_archive(&compressed, compressed.len()).unwrap().is_some());
}
fn generate_archive(
num_entries: usize,
) -> (ChunkedArchiveHeader, Vec<SeekTableEntry>, u64) {
let mut seek_table = vec![];
seek_table.reserve(num_entries);
let header_length = ChunkedArchiveHeader::header_length(num_entries) as u64;
const COMPRESSED_CHUNK_SIZE: u64 = 1024;
const DECOMPRESSED_CHUNK_SIZE: u64 = 2048;
for n in 0..(num_entries as u64) {
seek_table.push(SeekTableEntry {
compressed_offset: (header_length + (n * COMPRESSED_CHUNK_SIZE)).into(),
compressed_size: COMPRESSED_CHUNK_SIZE.into(),
decompressed_offset: (n * DECOMPRESSED_CHUNK_SIZE).into(),
decompressed_size: DECOMPRESSED_CHUNK_SIZE.into(),
});
}
let header = ChunkedArchiveHeader::new(&seek_table).unwrap();
let archive_length: u64 = header_length + (num_entries as u64 * COMPRESSED_CHUNK_SIZE);
(header, seek_table, archive_length)
}
#[test]
fn should_validate_self() {
let (header, seek_table, archive_length) = generate_archive(4);
let serialized_table = seek_table.as_slice().as_bytes();
assert!(header.decode_seek_table(serialized_table, archive_length).unwrap().is_some());
}
#[test]
fn should_validate_empty() {
let (header, _, archive_length) = generate_archive(0);
assert!(header.decode_seek_table(&[], archive_length).unwrap().is_some());
}
#[test]
fn should_detect_bad_magic() {
let (header, seek_table, archive_length) = generate_archive(4);
let mut corrupt_magic = ChunkedArchiveHeader::CHUNKED_ARCHIVE_MAGIC;
corrupt_magic[0] = !corrupt_magic[0];
let bad_magic = ChunkedArchiveHeader { magic: corrupt_magic, ..header };
let serialized_table = seek_table.as_slice().as_bytes();
assert!(matches!(
bad_magic.decode_seek_table(serialized_table, archive_length).unwrap_err(),
ChunkedArchiveError::BadMagic
));
}
#[test]
fn should_detect_wrong_version() {
let (header, seek_table, archive_length) = generate_archive(4);
let wrong_version = ChunkedArchiveHeader {
version: (ChunkedArchiveHeader::CHUNKED_ARCHIVE_VERSION + 1).into(),
..header
};
let serialized_table = seek_table.as_slice().as_bytes();
assert!(matches!(
wrong_version.decode_seek_table(serialized_table, archive_length).unwrap_err(),
ChunkedArchiveError::InvalidVersion
));
}
#[test]
fn should_detect_corrupt_checksum() {
let (header, seek_table, archive_length) = generate_archive(4);
let corrupt_checksum =
ChunkedArchiveHeader { checksum: (!header.checksum.get()).into(), ..header };
let serialized_table = seek_table.as_slice().as_bytes();
assert!(matches!(
corrupt_checksum.decode_seek_table(serialized_table, archive_length).unwrap_err(),
ChunkedArchiveError::IntegrityError
));
}
#[test]
fn should_reject_too_many_entries() {
let (too_many_entries, seek_table, archive_length) =
generate_archive(ChunkedArchiveHeader::CHUNKED_ARCHIVE_MAX_FRAMES + 1);
let serialized_table = seek_table.as_slice().as_bytes();
assert!(matches!(
too_many_entries.decode_seek_table(serialized_table, archive_length).unwrap_err(),
ChunkedArchiveError::IntegrityError
));
}
#[test]
fn invariant_i0_first_entry_zero() {
let (header, mut seek_table, archive_length) = generate_archive(4);
assert_eq!(seek_table[0].decompressed_offset.get(), 0);
seek_table[0].decompressed_offset = 1.into();
let serialized_table = seek_table.as_slice().as_bytes();
assert!(matches!(
header.decode_seek_table(serialized_table, archive_length).unwrap_err(),
ChunkedArchiveError::IntegrityError
));
}
#[test]
fn invariant_i1_no_header_overlap() {
let (header, mut seek_table, archive_length) = generate_archive(4);
let header_end = ChunkedArchiveHeader::header_length(seek_table.len()) as u64;
assert!(seek_table[0].compressed_offset.get() >= header_end);
seek_table[0].compressed_offset = (header_end - 1).into();
let serialized_table = seek_table.as_slice().as_bytes();
assert!(matches!(
header.decode_seek_table(serialized_table, archive_length).unwrap_err(),
ChunkedArchiveError::IntegrityError
));
}
#[test]
fn invariant_i2_decompressed_monotonic() {
let (header, mut seek_table, archive_length) = generate_archive(4);
assert_eq!(
seek_table[0].decompressed_offset.get() + seek_table[0].decompressed_size.get(),
seek_table[1].decompressed_offset.get()
);
seek_table[1].decompressed_offset = (seek_table[1].decompressed_offset.get() - 1).into();
let serialized_table = seek_table.as_slice().as_bytes();
assert!(matches!(
header.decode_seek_table(serialized_table, archive_length).unwrap_err(),
ChunkedArchiveError::IntegrityError
));
}
#[test]
fn invariant_i3_compressed_monotonic() {
let (header, mut seek_table, archive_length) = generate_archive(4);
assert!(
(seek_table[0].compressed_offset.get() + seek_table[0].compressed_size.get())
<= seek_table[1].compressed_offset.get()
);
seek_table[1].compressed_offset = (seek_table[1].compressed_offset.get() - 1).into();
let serialized_table = seek_table.as_slice().as_bytes();
assert!(matches!(
header.decode_seek_table(serialized_table, archive_length).unwrap_err(),
ChunkedArchiveError::IntegrityError
));
}
#[test]
fn invariant_i4_nonzero_compressed_size() {
let (header, mut seek_table, archive_length) = generate_archive(4);
assert!(seek_table[0].compressed_size.get() > 0);
seek_table[0].compressed_size = 0.into();
let serialized_table = seek_table.as_slice().as_bytes();
assert!(matches!(
header.decode_seek_table(serialized_table, archive_length).unwrap_err(),
ChunkedArchiveError::IntegrityError
));
}
#[test]
fn invariant_i4_nonzero_decompressed_size() {
let (header, mut seek_table, archive_length) = generate_archive(4);
assert!(seek_table[0].decompressed_size.get() > 0);
seek_table[0].decompressed_size = 0.into();
let serialized_table = seek_table.as_slice().as_bytes();
assert!(matches!(
header.decode_seek_table(serialized_table, archive_length).unwrap_err(),
ChunkedArchiveError::IntegrityError
));
}
#[test]
fn invariant_i5_within_archive() {
let (header, mut seek_table, archive_length) = generate_archive(4);
let last_entry = seek_table.last_mut().unwrap();
assert!(
(last_entry.compressed_offset.get() + last_entry.compressed_size.get())
<= archive_length
);
last_entry.compressed_offset = (archive_length + 1).into();
let serialized_table = seek_table.as_slice().as_bytes();
assert!(matches!(
header.decode_seek_table(serialized_table, archive_length).unwrap_err(),
ChunkedArchiveError::IntegrityError
));
}
#[test]
fn max_chunks() {
assert_eq!(
ChunkedArchive::chunk_size_for(
ChunkedArchive::MAX_CHUNKS * ChunkedArchive::TARGET_CHUNK_SIZE,
BLOCK_SIZE,
),
ChunkedArchive::TARGET_CHUNK_SIZE
);
assert_eq!(
ChunkedArchive::chunk_size_for(
ChunkedArchive::MAX_CHUNKS * ChunkedArchive::TARGET_CHUNK_SIZE + 1,
BLOCK_SIZE,
),
ChunkedArchive::TARGET_CHUNK_SIZE + BLOCK_SIZE
);
}
#[test]
fn test_decompressor_empty_archive() {
let mut compressed: Vec<u8> = vec![];
ChunkedArchive::new(&[], BLOCK_SIZE)
.expect("compress")
.write(&mut compressed)
.expect("write archive");
let (seek_table, chunk_data) =
decode_archive(&compressed, compressed.len()).unwrap().unwrap();
assert!(seek_table.is_empty());
let mut decompressor = ChunkedDecompressor::new(seek_table).unwrap();
let mut chunk_callback = |_chunk: &[u8]| panic!("Archive doesn't have any chunks.");
chunk_data
.chunks(4)
.for_each(|data| decompressor.update(data, &mut chunk_callback).unwrap());
}
#[test]
fn test_decompressor() {
const UNCOMPRESSED_LENGTH: usize = 3_000_000;
let data: Vec<u8> = {
let range = rand::distributions::Uniform::<u8>::new_inclusive(0, 255);
rand::thread_rng().sample_iter(&range).take(UNCOMPRESSED_LENGTH).collect()
};
let mut compressed: Vec<u8> = vec![];
ChunkedArchive::new(&data, BLOCK_SIZE)
.expect("compress")
.write(&mut compressed)
.expect("write archive");
let (seek_table, chunk_data) =
decode_archive(&compressed, compressed.len()).unwrap().unwrap();
let num_chunks = seek_table.len();
assert!(num_chunks > 1);
let mut decompressor = ChunkedDecompressor::new(seek_table).unwrap();
let mut decoded_chunks: usize = 0;
let mut decompressed_offset: usize = 0;
let mut chunk_callback = |decompressed_chunk: &[u8]| {
assert!(
decompressed_chunk
== &data[decompressed_offset..decompressed_offset + decompressed_chunk.len()]
);
decompressed_offset += decompressed_chunk.len();
decoded_chunks += 1;
};
chunk_data
.chunks(4)
.for_each(|data| decompressor.update(data, &mut chunk_callback).unwrap());
assert_eq!(decoded_chunks, num_chunks);
}
#[test]
fn test_decompressor_corrupt_decompressed_size() {
let data = vec![0; 3_000_000];
let mut compressed: Vec<u8> = vec![];
ChunkedArchive::new(&data, BLOCK_SIZE)
.expect("compress")
.write(&mut compressed)
.expect("write archive");
let (mut seek_table, chunk_data) =
decode_archive(&compressed, compressed.len()).unwrap().unwrap();
seek_table[0].decompressed_range =
seek_table[0].decompressed_range.start..seek_table[0].decompressed_range.end + 1;
let mut decompressor = ChunkedDecompressor::new(seek_table).unwrap();
assert!(matches!(
decompressor.update(&chunk_data, &mut |_chunk| {}),
Err(ChunkedArchiveError::IntegrityError)
));
}
#[test]
fn test_decompressor_corrupt_compressed_size() {
let data = vec![0; 3_000_000];
let mut compressed: Vec<u8> = vec![];
ChunkedArchive::new(&data, BLOCK_SIZE)
.expect("compress")
.write(&mut compressed)
.expect("write archive");
let (mut seek_table, chunk_data) =
decode_archive(&compressed, compressed.len()).unwrap().unwrap();
seek_table[0].compressed_range =
seek_table[0].compressed_range.start..seek_table[0].compressed_range.end - 1;
let first_chunk_info = seek_table[0].clone();
let error_handler = move |chunk_index: usize, chunk_info: ChunkInfo, chunk_data: &[u8]| {
assert_eq!(chunk_index, 0);
assert_eq!(chunk_info, first_chunk_info);
assert_eq!(chunk_data.len(), chunk_info.compressed_range.len());
};
let mut decompressor =
ChunkedDecompressor::new_with_error_handler(seek_table, Box::new(error_handler))
.unwrap();
assert!(matches!(
decompressor.update(&chunk_data, &mut |_chunk| {}),
Err(ChunkedArchiveError::DecompressionError { index: 0, .. })
));
}
}