storage_stress_test_utils/data.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
// Copyright 2020 The Fuchsia Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
use rand::rngs::SmallRng;
use rand::seq::SliceRandom;
use rand::Rng;
use std::cmp::min;
/// Controls the compressibility of data generated for a file.
/// This parameter is only meaningful for filesystems that support transparent compression.
pub enum Compressibility {
Compressible,
Uncompressible,
}
// Controls the uncompressed size of the file
pub enum UncompressedSize {
Exact(u64),
InRange(u64, u64),
}
/// Run lengths and bytes are sampled from a uniform distribution.
/// Data created by this function achieves roughly 50% size reduction after compression.
fn generate_compressible_data_bytes(rng: &mut SmallRng, size_bytes: u64) -> Vec<u8> {
let mut bytes: Vec<u8> = Vec::with_capacity(size_bytes as usize);
// The file is filled with compressible runs of one of the following bytes.
let byte_choices: [u8; 6] = [0xde, 0xad, 0xbe, 0xef, 0x42, 0x0];
let mut ptr = 0;
while ptr < size_bytes {
// A run is 10..1024 bytes long
let mut run_length = rng.gen_range(10..1024);
// In case the run goes past the file size
run_length = min(run_length, size_bytes - ptr);
// Decide whether this run should be compressible or not.
// This results in files that compress reasonably well (target 50% size reduction).
if rng.gen_bool(0.5) {
// Choose a byte for this run.
let choice = byte_choices.choose(rng).unwrap();
// Generate a run of compressible data.
for _ in 0..run_length {
bytes.push(*choice);
}
} else {
// Generate a run of random data.
for _ in 0..run_length {
bytes.push(rng.gen());
}
}
ptr += run_length;
}
// The file must be of the expected size
assert!(bytes.len() == size_bytes as usize);
bytes
}
/// Bytes are sampled from a uniform distribution.
/// Data created by this function compresses badly.
fn generate_uncompressible_data_bytes(rng: &mut SmallRng, size_bytes: u64) -> Vec<u8> {
let mut bytes: Vec<u8> = Vec::with_capacity(size_bytes as usize);
for _ in 0..size_bytes {
bytes.push(rng.gen());
}
bytes
}
/// A compact in-memory representation of data that can be stored in a file.
pub struct FileFactory {
pub rng: SmallRng,
pub uncompressed_size: UncompressedSize,
pub compressibility: Compressibility,
pub file_name_count: u64,
}
impl FileFactory {
#[must_use]
pub fn new(
rng: SmallRng,
uncompressed_size: UncompressedSize,
compressibility: Compressibility,
) -> Self {
Self { rng, uncompressed_size, compressibility, file_name_count: 0 }
}
/// Generate a unique filename that can be used for a file created by this factory.
pub fn generate_filename(&mut self) -> String {
self.file_name_count += 1;
format!("file_{}", self.file_name_count)
}
/// Generate all the bytes for this file in memory.
/// For a given FileData, this function is deterministic.
pub fn generate_bytes(&mut self) -> Vec<u8> {
let size_bytes = match self.uncompressed_size {
UncompressedSize::Exact(size_bytes) => size_bytes,
UncompressedSize::InRange(min, max) => self.rng.gen_range(min..max),
};
match self.compressibility {
Compressibility::Compressible => {
generate_compressible_data_bytes(&mut self.rng, size_bytes)
}
Compressibility::Uncompressible => {
generate_uncompressible_data_bytes(&mut self.rng, size_bytes)
}
}
}
}