From 56c916f585d02781ffe1886a7f7c00a42c344f33 Mon Sep 17 00:00:00 2001 From: Dennis Schwerdel Date: Wed, 7 Mar 2018 00:36:44 +0100 Subject: [PATCH] more stats --- src/bundledb/db.rs | 17 ++++++++++- src/bundledb/mod.rs | 15 ++++++++-- src/cli/args.rs | 17 +++++++++++ src/cli/mod.rs | 62 ++++++++++++++++++++++++++++++++++++++++- src/util/compression.rs | 4 +-- src/util/hash.rs | 2 +- 6 files changed, 109 insertions(+), 8 deletions(-) diff --git a/src/bundledb/db.rs b/src/bundledb/db.rs index c2ded25..4d76399 100644 --- a/src/bundledb/db.rs +++ b/src/bundledb/db.rs @@ -525,10 +525,25 @@ impl BundleDb { pub fn statistics(&self) -> BundleStatistics { let bundles = self.list_bundles(); + let bundles_meta: Vec<_> = bundles.iter().filter(|b| b.mode == BundleMode::Meta).collect(); + let bundles_data: Vec<_> = bundles.iter().filter(|b| b.mode == BundleMode::Data).collect(); + let mut hash_methods = HashMap::new(); + let mut compressions = HashMap::new(); + for bundle in &bundles { + *hash_methods.entry(bundle.hash_method).or_insert(0) += 1; + *compressions.entry(bundle.compression.clone()).or_insert(0) += 1; + } BundleStatistics { + hash_methods, compressions, raw_size: ValueStats::from_iter(|| bundles.iter().map(|b| b.raw_size as f32)), encoded_size: ValueStats::from_iter(|| bundles.iter().map(|b| b.encoded_size as f32)), - chunk_count: ValueStats::from_iter(|| bundles.iter().map(|b| b.chunk_count as f32)) + chunk_count: ValueStats::from_iter(|| bundles.iter().map(|b| b.chunk_count as f32)), + raw_size_meta: ValueStats::from_iter(|| bundles_meta.iter().map(|b| b.raw_size as f32)), + encoded_size_meta: ValueStats::from_iter(|| bundles_meta.iter().map(|b| b.encoded_size as f32)), + chunk_count_meta: ValueStats::from_iter(|| bundles_meta.iter().map(|b| b.chunk_count as f32)), + raw_size_data: ValueStats::from_iter(|| bundles_data.iter().map(|b| b.raw_size as f32)), + encoded_size_data: ValueStats::from_iter(|| bundles_data.iter().map(|b| b.encoded_size as f32)), + chunk_count_data: ValueStats::from_iter(|| bundles_data.iter().map(|b| b.chunk_count as f32)) } } } diff --git a/src/bundledb/mod.rs b/src/bundledb/mod.rs index 14f7887..acb4aa0 100644 --- a/src/bundledb/mod.rs +++ b/src/bundledb/mod.rs @@ -13,6 +13,7 @@ pub use self::uploader::BundleUploader; use prelude::*; use std::fmt; +use std::collections::HashMap; use serde; use rand; @@ -137,7 +138,15 @@ impl Default for BundleInfo { #[derive(Debug)] pub struct BundleStatistics { - raw_size: ValueStats, - encoded_size: ValueStats, - chunk_count: ValueStats + pub raw_size: ValueStats, + pub encoded_size: ValueStats, + pub chunk_count: ValueStats, + pub raw_size_meta: ValueStats, + pub encoded_size_meta: ValueStats, + pub chunk_count_meta: ValueStats, + pub raw_size_data: ValueStats, + pub encoded_size_data: ValueStats, + pub chunk_count_data: ValueStats, + pub hash_methods: HashMap, + pub compressions: HashMap, usize> } \ No newline at end of file diff --git a/src/cli/args.rs b/src/cli/args.rs index 647cd49..93379b8 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -75,6 +75,9 @@ pub enum Arguments { backup_name: Option, inode: Option }, + Stats { + repo_path: PathBuf + }, Copy { repo_path_src: PathBuf, backup_name_src: String, @@ -464,6 +467,11 @@ pub fn parse() -> Result<(log::Level, Arguments), ErrorCode> { .arg(Arg::from_usage("") .help(tr!("Path of the repository")) .validator(|val| validate_repo_path(val, true, Some(false), Some(false))))) + .subcommand(SubCommand::with_name("stats") + .about(tr!("Display statistics on a repository")) + .arg(Arg::from_usage("") + .help(tr!("Path of the repository")) + .validator(|val| validate_repo_path(val, true, Some(false), Some(false))))) .subcommand(SubCommand::with_name("bundleinfo") .about(tr!("Display information on a bundle")) .arg(Arg::from_usage("") @@ -739,6 +747,15 @@ pub fn parse() -> Result<(log::Level, Arguments), ErrorCode> { inode: inode.map(|v| v.to_string()) } } + ("stats", Some(args)) => { + let (repository, _backup, _inode) = parse_repo_path( + args.value_of("REPO").unwrap(), + true, + Some(false), + Some(false) + ).unwrap(); + Arguments::Stats { repo_path: repository } + } ("copy", Some(args)) => { let (repository_src, backup_src, _inode) = parse_repo_path(args.value_of("SRC").unwrap(), true, Some(true), Some(false)) diff --git a/src/cli/mod.rs b/src/cli/mod.rs index f1331b2..b2a2d9a 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -313,6 +313,61 @@ fn print_repoinfo(info: &RepositoryInfo) { ); } +fn print_repostats(stats: &RepositoryStatistics) { + tr_println!("Index\n====="); + let index_usage = stats.index.count as f32 / stats.index.capacity as f32; + tr_println!("Size: {}", to_file_size(stats.index.size as u64)); + tr_println!("Entries: {} / {}, {:.0}%", stats.index.count, stats.index.capacity, index_usage*100.0); + let disp = &stats.index.displacement; + tr_println!("Displacement:\n - average: {:.1}\n - stddev: {:.1}\n - over {:.1}: {:.0}, {:.1}%\n - maximum: {:.0}", + disp.avg, disp.stddev, disp.avg + 2.0 * disp.stddev, disp.count_xl, disp.count_xl as f32 / disp.count as f32 * 100.0, disp.max); + println!(""); + tr_println!("Bundles (all)\n============="); + let rsize = &stats.bundles.raw_size; + tr_println!("Raw size:\n - average: {}\n - stddev: {}\n - maximum: {}", + to_file_size(rsize.avg as u64), to_file_size(rsize.stddev as u64), to_file_size(rsize.max as u64)); + let esize = &stats.bundles.encoded_size; + tr_println!("Encoded size:\n - average: {}\n - stddev: {}\n - maximum: {}", + to_file_size(esize.avg as u64), to_file_size(esize.stddev as u64), to_file_size(esize.max as u64)); + let ccount = &stats.bundles.chunk_count; + tr_println!("Chunk count:\n - average: {:.1}\n - stddev: {:.1}\n - minimum: {:.0}\n - maximum: {:.0}", ccount.avg, ccount.stddev, ccount.min, ccount.max); + println!(""); + tr_println!("Meta bundles\n============"); + let rsize = &stats.bundles.raw_size_meta; + tr_println!("Raw size:\n - average: {}\n - stddev: {}\n - maximum: {}", + to_file_size(rsize.avg as u64), to_file_size(rsize.stddev as u64), to_file_size(rsize.max as u64)); + let esize = &stats.bundles.encoded_size_meta; + tr_println!("Encoded size:\n - average: {}\n - stddev: {}\n - maximum: {}", + to_file_size(esize.avg as u64), to_file_size(esize.stddev as u64), to_file_size(esize.max as u64)); + let ccount = &stats.bundles.chunk_count_meta; + tr_println!("Chunk count:\n - average: {:.1}\n - stddev: {:.1}\n - minimum: {:.0}\n - maximum: {:.0}", ccount.avg, ccount.stddev, ccount.min, ccount.max); + println!(""); + tr_println!("Data bundles\n============"); + let rsize = &stats.bundles.raw_size_data; + tr_println!("Raw size:\n - average: {}\n - stddev: {}\n - maximum: {}", + to_file_size(rsize.avg as u64), to_file_size(rsize.stddev as u64), to_file_size(rsize.max as u64)); + let esize = &stats.bundles.encoded_size_data; + tr_println!("Encoded size:\n - average: {}\n - stddev: {}\n - maximum: {}", + to_file_size(esize.avg as u64), to_file_size(esize.stddev as u64), to_file_size(esize.max as u64)); + let ccount = &stats.bundles.chunk_count_data; + tr_println!("Chunk count:\n - average: {:.1}\n - stddev: {:.1}\n - minimum: {:.0}\n - maximum: {:.0}", ccount.avg, ccount.stddev, ccount.min, ccount.max); + println!(""); + tr_println!("Bundle methods\n=============="); + tr_println!("Compression:"); + for (compr, &count) in &stats.bundles.compressions { + let compr_name = if let &Some(ref compr) = compr { + compr.to_string() + } else { + tr!("none").to_string() + }; + tr_println!(" - {}: {}, {:.1}%", compr_name, count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0); + } + tr_println!("Hash:"); + for (hash, &count) in &stats.bundles.hash_methods { + tr_println!(" - {}: {}, {:.1}%", hash.name(), count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0); + } +} + fn print_bundle(bundle: &StoredBundle) { tr_println!("Bundle {}", bundle.info.id); tr_println!(" - Mode: {:?}", bundle.info.mode); @@ -859,10 +914,15 @@ pub fn run() -> Result<(), ErrorCode> { print_backup(&backup); } } else { - println!("{:?}", repo.statistics()); print_repoinfo(&repo.info()); } } + Arguments::Stats { + repo_path + } => { + let mut repo = try!(open_repository(&repo_path, false)); + print_repostats(&repo.statistics()); + } Arguments::Mount { repo_path, backup_name, diff --git a/src/util/compression.rs b/src/util/compression.rs index d2b605b..ead9732 100644 --- a/src/util/compression.rs +++ b/src/util/compression.rs @@ -34,7 +34,7 @@ quick_error!{ } } -#[derive(Clone, Debug, Copy, Eq, PartialEq)] +#[derive(Clone, Debug, Copy, Eq, PartialEq, Hash)] pub enum CompressionMethod { Deflate, // Standardized Brotli, // Good speed and ratio @@ -49,7 +49,7 @@ serde_impl!(CompressionMethod(u8) { }); -#[derive(Clone, Debug, Eq, PartialEq)] +#[derive(Clone, Debug, Eq, PartialEq, Hash)] pub struct Compression { method: CompressionMethod, level: u8 diff --git a/src/util/hash.rs b/src/util/hash.rs index 5c6fca0..ea95e6b 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -105,7 +105,7 @@ impl<'a> Deserialize<'a> for Hash { } -#[derive(Debug, Clone, Copy, Eq, PartialEq)] +#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] pub enum HashMethod { Blake2, Murmur3