2017-03-21 11:28:11 +01:00
use ::prelude::*;
2017-03-17 11:03:07 +01:00
use std::io::{self, Cursor, Read, Write};
2017-03-10 12:43:32 +01:00
use std::fs::File;
2017-03-17 11:03:07 +01:00
use std::collections::HashSet;
use chrono::Duration;
2017-03-10 12:43:32 +01:00
2017-03-17 11:03:07 +01:00
struct ChunkSink {
chunks: Vec<(usize, usize)>,
pos: usize,
written: usize
2017-03-10 12:43:32 +01:00
2017-03-17 11:03:07 +01:00
impl ChunkSink {
fn end_chunk(&mut self) {
self.chunks.push((self.pos, self.written));
self.pos += self.written;
self.written = 0;
2017-03-10 12:43:32 +01:00
2017-03-17 11:03:07 +01:00
impl Write for ChunkSink {
fn write(&mut self, data: &[u8]) -> Result<usize, io::Error> {
self.written += data.len();
2017-03-10 12:43:32 +01:00
2017-03-17 11:03:07 +01:00
fn flush(&mut self) -> Result<(), io::Error> {
2017-03-10 12:43:32 +01:00
2017-03-17 11:03:07 +01:00
fn chunk(data: &[u8], mut chunker: Chunker, sink: &mut ChunkSink) {
let mut cursor = Cursor::new(data);
while chunker.chunk(&mut cursor, sink).unwrap() == ChunkerStatus::Continue {
2017-03-10 12:43:32 +01:00
2017-03-17 11:03:07 +01:00
2017-03-10 12:43:32 +01:00
2017-03-18 17:22:11 +01:00
pub fn run(path: &str, bundle_size: usize, chunker: ChunkerType, compression: Option<Compression>, encrypt: bool,hash: HashMethod) {
let mut total_write_time = 0.0;
let mut total_read_time = 0.0;
2017-03-17 11:03:07 +01:00
println!("Reading input file ...");
2017-03-10 12:43:32 +01:00
let mut file = File::open(path).unwrap();
2017-03-17 11:03:07 +01:00
let total_size = file.metadata().unwrap().len();
let mut size = total_size;
let mut data = Vec::with_capacity(size as usize);
let read_time = Duration::span(|| {
file.read_to_end(&mut data).unwrap();
}).num_milliseconds() as f32 / 1_000.0;
println!("- {}, {}", to_duration(read_time), to_speed(size, read_time));
2017-03-10 12:43:32 +01:00
2017-03-17 11:03:07 +01:00
println!("Chunking data with {}, avg chunk size {} ...", chunker.name(), to_file_size(chunker.avg_size() as u64));
let mut chunk_sink = ChunkSink {
chunks: Vec::with_capacity(2*size as usize/chunker.avg_size()),
written: 0,
pos: 0
let chunker = chunker.create();
let chunk_time = Duration::span(|| {
chunk(&data, chunker, &mut chunk_sink)
}).num_milliseconds() as f32 / 1_000.0;
2017-03-18 17:22:11 +01:00
total_write_time += chunk_time;
2017-03-17 11:03:07 +01:00
println!("- {}, {}", to_duration(chunk_time), to_speed(size, chunk_time));
let mut chunks = chunk_sink.chunks;
assert_eq!(chunks.iter().map(|c| c.1).sum::<usize>(), size as usize);
let chunk_size_avg = size as f32 / chunks.len() as f32;
let chunk_size_stddev = (chunks.iter().map(|c| (c.1 as f32 - chunk_size_avg).powi(2)).sum::<f32>() / (chunks.len() as f32 - 1.0)).sqrt();
2017-03-18 16:37:45 +01:00
println!("- {} chunks, avg size: {} ±{}", chunks.len(), to_file_size(chunk_size_avg as u64), to_file_size(chunk_size_stddev as u64));
2017-03-17 11:03:07 +01:00
2017-03-10 12:43:32 +01:00
2017-03-17 11:03:07 +01:00
println!("Hashing chunks with {} ...", hash.name());
let mut hashes = Vec::with_capacity(chunks.len());
let hash_time = Duration::span(|| {
for &(pos, len) in &chunks {
}).num_milliseconds() as f32 / 1_000.0;
2017-03-18 17:22:11 +01:00
total_write_time += hash_time;
2017-03-17 11:03:07 +01:00
println!("- {}, {}", to_duration(hash_time), to_speed(size, hash_time));
let mut seen_hashes = HashSet::with_capacity(hashes.len());
let mut dups = Vec::new();
for (i, hash) in hashes.into_iter().enumerate() {
if !seen_hashes.insert(hash) {
2017-03-10 12:43:32 +01:00
2017-03-17 11:03:07 +01:00
let mut dup_size = 0;
for i in &dups {
let (_, len) = chunks.remove(*i);
dup_size += len;
2017-03-10 12:43:32 +01:00
2017-03-17 11:03:07 +01:00
println!("- {} duplicate chunks, {}, {:.1}% saved", dups.len(), to_file_size(dup_size as u64), dup_size as f32 / size as f32*100.0);
size -= dup_size as u64;
2017-03-18 17:22:11 +01:00
let mut bundles = Vec::new();
if let Some(compression) = compression.clone() {
2017-03-17 11:03:07 +01:00
println!("Compressing chunks with {} ...", compression.to_string());
let compress_time = Duration::span(|| {
let mut bundle = Vec::with_capacity(bundle_size + 2*chunk_size_avg as usize);
let mut c = compression.compress_stream().unwrap();
for &(pos, len) in &chunks {
c.process(&data[pos..pos+len], &mut bundle).unwrap();
if bundle.len() >= bundle_size {
c.finish(&mut bundle).unwrap();
bundle = Vec::with_capacity(bundle_size + 2*chunk_size_avg as usize);
c = compression.compress_stream().unwrap();
c.finish(&mut bundle).unwrap();
}).num_milliseconds() as f32 / 1_000.0;
2017-03-18 17:22:11 +01:00
total_write_time += compress_time;
2017-03-17 11:03:07 +01:00
println!("- {}, {}", to_duration(compress_time), to_speed(size, compress_time));
let compressed_size = bundles.iter().map(|b| b.len()).sum::<usize>();
println!("- {} bundles, {}, {:.1}% saved", bundles.len(), to_file_size(compressed_size as u64), (size as f32 - compressed_size as f32)/size as f32*100.0);
size = compressed_size as u64;
2017-03-18 17:22:11 +01:00
} else {
let mut bundle = Vec::with_capacity(bundle_size + 2*chunk_size_avg as usize);
for &(pos, len) in &chunks {
if bundle.len() >= bundle_size {
bundle = Vec::with_capacity(bundle_size + 2*chunk_size_avg as usize);
2017-03-17 11:03:07 +01:00
2017-03-18 17:22:11 +01:00
if encrypt {
let (public, secret) = gen_keypair();
2017-03-20 18:11:03 +01:00
let mut crypto = Crypto::dummy();
2017-03-18 17:22:11 +01:00
crypto.add_secret_key(public, secret);
2017-03-20 18:11:03 +01:00
let encryption = (EncryptionMethod::Sodium, public[..].to_vec().into());
2017-03-18 17:22:11 +01:00
println!("Encrypting bundles...");
let mut encrypted_bundles = Vec::with_capacity(bundles.len());
let encrypt_time = Duration::span(|| {
for bundle in bundles {
encrypted_bundles.push(crypto.encrypt(&encryption, &bundle).unwrap());
}).num_milliseconds() as f32 / 1_000.0;
println!("- {}, {}", to_duration(encrypt_time), to_speed(size, encrypt_time));
total_write_time += encrypt_time;
println!("Decrypting bundles...");
bundles = Vec::with_capacity(encrypted_bundles.len());
let decrypt_time = Duration::span(|| {
for bundle in encrypted_bundles {
bundles.push(crypto.decrypt(&encryption, &bundle).unwrap());
}).num_milliseconds() as f32 / 1_000.0;
println!("- {}, {}", to_duration(decrypt_time), to_speed(size, decrypt_time));
total_read_time += decrypt_time;
if let Some(compression) = compression {
2017-03-17 11:03:07 +01:00
println!("Decompressing bundles with {} ...", compression.to_string());
let mut dummy = ChunkSink { chunks: vec![], written: 0, pos: 0 };
let decompress_time = Duration::span(|| {
for bundle in &bundles {
let mut c = compression.decompress_stream().unwrap();
c.process(bundle, &mut dummy).unwrap();
c.finish(&mut dummy).unwrap();
}).num_milliseconds() as f32 / 1_000.0;
println!("- {}, {}", to_duration(decompress_time), to_speed(size, decompress_time));
2017-03-18 17:22:11 +01:00
total_read_time += decompress_time;
2017-03-10 12:43:32 +01:00
2017-03-17 11:03:07 +01:00
2017-03-18 17:22:11 +01:00
println!("Total storage size: {} / {}, ratio: {:.1}%", to_file_size(size as u64), to_file_size(total_size as u64), size as f32/total_size as f32*100.0);
println!("Total processing speed: {}", to_speed(total_size, total_write_time));
println!("Total read speed: {}", to_speed(total_size, total_read_time));
2017-03-10 12:43:32 +01:00