zvault/src/cli/algotest.rs

261 lines
8.0 KiB
Rust
Raw Normal View History

2017-07-21 09:21:59 +00:00
use prelude::*;
2017-03-21 10:28:11 +00:00
2017-03-17 10:03:07 +00:00
use std::io::{self, Cursor, Read, Write};
2017-03-10 11:43:32 +00:00
use std::fs::File;
2017-03-17 10:03:07 +00:00
use std::collections::HashSet;
use chrono::Duration;
2017-03-10 11:43:32 +00:00
2017-03-17 10:03:07 +00:00
struct ChunkSink {
chunks: Vec<(usize, usize)>,
pos: usize,
written: usize
2017-03-10 11:43:32 +00:00
}
2017-03-17 10:03:07 +00:00
impl ChunkSink {
fn end_chunk(&mut self) {
self.chunks.push((self.pos, self.written));
self.pos += self.written;
self.written = 0;
}
2017-03-10 11:43:32 +00:00
}
2017-03-17 10:03:07 +00:00
impl Write for ChunkSink {
fn write(&mut self, data: &[u8]) -> Result<usize, io::Error> {
self.written += data.len();
Ok(data.len())
}
2017-03-10 11:43:32 +00:00
2017-03-17 10:03:07 +00:00
fn flush(&mut self) -> Result<(), io::Error> {
Ok(())
}
2017-03-10 11:43:32 +00:00
}
fn chunk(data: &[u8], mut chunker: Box<Chunker>, sink: &mut ChunkSink) {
2017-03-17 10:03:07 +00:00
let mut cursor = Cursor::new(data);
while chunker.chunk(&mut cursor, sink).unwrap() == ChunkerStatus::Continue {
sink.end_chunk();
2017-03-10 11:43:32 +00:00
}
2017-03-17 10:03:07 +00:00
sink.end_chunk();
2017-03-10 11:43:32 +00:00
}
#[allow(dead_code)]
2017-07-21 09:21:59 +00:00
pub fn run(
path: &str,
bundle_size: usize,
chunker: ChunkerType,
compression: Option<Compression>,
encrypt: bool,
hash: HashMethod,
) {
2017-03-18 16:22:11 +00:00
let mut total_write_time = 0.0;
let mut total_read_time = 0.0;
2017-03-17 10:03:07 +00:00
println!("Reading input file ...");
2017-03-10 11:43:32 +00:00
let mut file = File::open(path).unwrap();
2017-03-17 10:03:07 +00:00
let total_size = file.metadata().unwrap().len();
let mut size = total_size;
let mut data = Vec::with_capacity(size as usize);
2017-07-21 09:21:59 +00:00
let read_time = Duration::span(|| { file.read_to_end(&mut data).unwrap(); })
.num_milliseconds() as f32 / 1_000.0;
println!(
"- {}, {}",
to_duration(read_time),
to_speed(size, read_time)
);
2017-03-17 10:03:07 +00:00
2017-03-10 11:43:32 +00:00
println!();
2017-03-17 10:03:07 +00:00
2017-07-21 09:21:59 +00:00
println!(
"Chunking data with {}, avg chunk size {} ...",
chunker.name(),
to_file_size(chunker.avg_size() as u64)
);
2017-03-17 10:03:07 +00:00
let mut chunk_sink = ChunkSink {
2017-07-21 09:21:59 +00:00
chunks: Vec::with_capacity(2 * size as usize / chunker.avg_size()),
2017-03-17 10:03:07 +00:00
written: 0,
pos: 0
};
let chunker = chunker.create();
2017-07-21 09:21:59 +00:00
let chunk_time = Duration::span(|| chunk(&data, chunker, &mut chunk_sink))
.num_milliseconds() as f32 / 1_000.0;
2017-03-18 16:22:11 +00:00
total_write_time += chunk_time;
2017-07-21 09:21:59 +00:00
println!(
"- {}, {}",
to_duration(chunk_time),
to_speed(size, chunk_time)
);
2017-03-17 10:03:07 +00:00
let mut chunks = chunk_sink.chunks;
assert_eq!(chunks.iter().map(|c| c.1).sum::<usize>(), size as usize);
let chunk_size_avg = size as f32 / chunks.len() as f32;
2017-07-21 09:21:59 +00:00
let chunk_size_stddev = (chunks
.iter()
.map(|c| (c.1 as f32 - chunk_size_avg).powi(2))
.sum::<f32>() /
(chunks.len() as f32 - 1.0))
.sqrt();
println!(
"- {} chunks, avg size: {} ±{}",
chunks.len(),
to_file_size(chunk_size_avg as u64),
to_file_size(chunk_size_stddev as u64)
);
2017-03-17 10:03:07 +00:00
2017-03-10 11:43:32 +00:00
println!();
2017-03-17 10:03:07 +00:00
println!("Hashing chunks with {} ...", hash.name());
let mut hashes = Vec::with_capacity(chunks.len());
2017-07-21 09:21:59 +00:00
let hash_time = Duration::span(|| for &(pos, len) in &chunks {
hashes.push(hash.hash(&data[pos..pos + len]))
2017-03-17 10:03:07 +00:00
}).num_milliseconds() as f32 / 1_000.0;
2017-03-18 16:22:11 +00:00
total_write_time += hash_time;
2017-07-21 09:21:59 +00:00
println!(
"- {}, {}",
to_duration(hash_time),
to_speed(size, hash_time)
);
2017-03-17 10:03:07 +00:00
let mut seen_hashes = HashSet::with_capacity(hashes.len());
let mut dups = Vec::new();
for (i, hash) in hashes.into_iter().enumerate() {
if !seen_hashes.insert(hash) {
dups.push(i);
}
2017-03-10 11:43:32 +00:00
}
2017-03-17 10:03:07 +00:00
let mut dup_size = 0;
dups.reverse();
for i in &dups {
let (_, len) = chunks.remove(*i);
dup_size += len;
2017-03-10 11:43:32 +00:00
}
2017-07-21 09:21:59 +00:00
println!(
"- {} duplicate chunks, {}, {:.1}% saved",
dups.len(),
to_file_size(dup_size as u64),
dup_size as f32 / size as f32 * 100.0
);
2017-03-17 10:03:07 +00:00
size -= dup_size as u64;
2017-03-18 16:22:11 +00:00
let mut bundles = Vec::new();
if let Some(compression) = compression.clone() {
2017-03-17 10:03:07 +00:00
println!();
println!("Compressing chunks with {} ...", compression.to_string());
let compress_time = Duration::span(|| {
2017-07-21 09:21:59 +00:00
let mut bundle = Vec::with_capacity(bundle_size + 2 * chunk_size_avg as usize);
2017-03-17 10:03:07 +00:00
let mut c = compression.compress_stream().unwrap();
for &(pos, len) in &chunks {
2017-07-21 09:21:59 +00:00
c.process(&data[pos..pos + len], &mut bundle).unwrap();
2017-03-17 10:03:07 +00:00
if bundle.len() >= bundle_size {
c.finish(&mut bundle).unwrap();
bundles.push(bundle);
2017-07-21 09:21:59 +00:00
bundle = Vec::with_capacity(bundle_size + 2 * chunk_size_avg as usize);
2017-03-17 10:03:07 +00:00
c = compression.compress_stream().unwrap();
}
}
c.finish(&mut bundle).unwrap();
bundles.push(bundle);
}).num_milliseconds() as f32 / 1_000.0;
2017-03-18 16:22:11 +00:00
total_write_time += compress_time;
2017-07-21 09:21:59 +00:00
println!(
"- {}, {}",
to_duration(compress_time),
to_speed(size, compress_time)
);
2017-03-17 10:03:07 +00:00
let compressed_size = bundles.iter().map(|b| b.len()).sum::<usize>();
2017-07-21 09:21:59 +00:00
println!(
"- {} bundles, {}, {:.1}% saved",
bundles.len(),
to_file_size(compressed_size as u64),
(size as f32 - compressed_size as f32) / size as f32 * 100.0
);
2017-03-17 10:03:07 +00:00
size = compressed_size as u64;
2017-03-18 16:22:11 +00:00
} else {
2017-07-21 09:21:59 +00:00
let mut bundle = Vec::with_capacity(bundle_size + 2 * chunk_size_avg as usize);
2017-03-18 16:22:11 +00:00
for &(pos, len) in &chunks {
2017-07-21 09:21:59 +00:00
bundle.extend_from_slice(&data[pos..pos + len]);
2017-03-18 16:22:11 +00:00
if bundle.len() >= bundle_size {
bundles.push(bundle);
2017-07-21 09:21:59 +00:00
bundle = Vec::with_capacity(bundle_size + 2 * chunk_size_avg as usize);
2017-03-18 16:22:11 +00:00
}
}
bundles.push(bundle);
}
2017-03-17 10:03:07 +00:00
2017-03-18 16:22:11 +00:00
if encrypt {
println!();
2017-04-12 06:30:42 +00:00
let (public, secret) = Crypto::gen_keypair();
2017-03-20 17:11:03 +00:00
let mut crypto = Crypto::dummy();
2017-03-18 16:22:11 +00:00
crypto.add_secret_key(public, secret);
2017-03-20 17:11:03 +00:00
let encryption = (EncryptionMethod::Sodium, public[..].to_vec().into());
2017-03-18 16:22:11 +00:00
println!("Encrypting bundles...");
let mut encrypted_bundles = Vec::with_capacity(bundles.len());
2017-07-21 09:21:59 +00:00
let encrypt_time = Duration::span(|| for bundle in bundles {
encrypted_bundles.push(crypto.encrypt(&encryption, &bundle).unwrap());
2017-03-18 16:22:11 +00:00
}).num_milliseconds() as f32 / 1_000.0;
2017-07-21 09:21:59 +00:00
println!(
"- {}, {}",
to_duration(encrypt_time),
to_speed(size, encrypt_time)
);
2017-03-18 16:22:11 +00:00
total_write_time += encrypt_time;
println!();
println!("Decrypting bundles...");
bundles = Vec::with_capacity(encrypted_bundles.len());
2017-07-21 09:21:59 +00:00
let decrypt_time = Duration::span(|| for bundle in encrypted_bundles {
bundles.push(crypto.decrypt(&encryption, &bundle).unwrap());
2017-03-18 16:22:11 +00:00
}).num_milliseconds() as f32 / 1_000.0;
2017-07-21 09:21:59 +00:00
println!(
"- {}, {}",
to_duration(decrypt_time),
to_speed(size, decrypt_time)
);
2017-03-18 16:22:11 +00:00
total_read_time += decrypt_time;
}
if let Some(compression) = compression {
2017-03-17 10:03:07 +00:00
println!();
println!("Decompressing bundles with {} ...", compression.to_string());
2017-07-21 09:21:59 +00:00
let mut dummy = ChunkSink {
chunks: vec![],
written: 0,
pos: 0
};
let decompress_time = Duration::span(|| for bundle in &bundles {
let mut c = compression.decompress_stream().unwrap();
c.process(bundle, &mut dummy).unwrap();
c.finish(&mut dummy).unwrap();
2017-03-17 10:03:07 +00:00
}).num_milliseconds() as f32 / 1_000.0;
2017-07-21 09:21:59 +00:00
println!(
"- {}, {}",
to_duration(decompress_time),
to_speed(total_size - dup_size as u64, decompress_time)
);
2017-03-18 16:22:11 +00:00
total_read_time += decompress_time;
2017-03-10 11:43:32 +00:00
}
2017-03-17 10:03:07 +00:00
println!();
2017-07-21 09:21:59 +00:00
println!(
"Total storage size: {} / {}, ratio: {:.1}%",
to_file_size(size as u64),
to_file_size(total_size as u64),
size as f32 / total_size as f32 * 100.0
);
println!(
"Total processing speed: {}",
to_speed(total_size, total_write_time)
);
println!(
"Total read speed: {}",
to_speed(total_size, total_read_time)
);
2017-03-10 11:43:32 +00:00
}