mirror of https://github.com/dswd/zvault
stats & dups
This commit is contained in:
parent
56c916f585
commit
2f3c97a043
|
@ -21,7 +21,9 @@ rust:
|
||||||
- nightly
|
- nightly
|
||||||
matrix:
|
matrix:
|
||||||
allow_failures:
|
allow_failures:
|
||||||
- rust: nightly
|
- rust:
|
||||||
|
- beta
|
||||||
|
- stable
|
||||||
script:
|
script:
|
||||||
- cargo clean
|
- cargo clean
|
||||||
- cargo build
|
- cargo build
|
||||||
|
|
|
@ -5,9 +5,15 @@ This project follows [semantic versioning](http://semver.org).
|
||||||
|
|
||||||
### UNRELEASED
|
### UNRELEASED
|
||||||
* [added] Translation infrastructure (**requires nightly rust**)
|
* [added] Translation infrastructure (**requires nightly rust**)
|
||||||
|
* [added] Checking hashes of chunks in check --bundle-data
|
||||||
|
* [added] Debian packet for libsodium23
|
||||||
* [modified] Updated dependencies
|
* [modified] Updated dependencies
|
||||||
* [modified] Updated copyright date
|
* [modified] Updated copyright date
|
||||||
|
* [modified] Moved all code into one crate for easier translation
|
||||||
|
* [modified] Compression ratio is now displayed in a clearer format
|
||||||
* [fixed] Also including the first min_size bytes in hash
|
* [fixed] Also including the first min_size bytes in hash
|
||||||
|
* [fixed] Fixed some texts in manpages
|
||||||
|
* [fixed] Calling strip on final binaries
|
||||||
|
|
||||||
|
|
||||||
### v0.4.0 (2017-07-21)
|
### v0.4.0 (2017-07-21)
|
||||||
|
|
|
@ -24,7 +24,7 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "atty"
|
name = "atty"
|
||||||
version = "0.2.6"
|
version = "0.2.8"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"libc 0.2.39 (registry+https://github.com/rust-lang/crates.io-index)",
|
"libc 0.2.39 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
@ -76,7 +76,7 @@ version = "2.31.1"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"atty 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
"atty 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
"bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
|
@ -611,7 +611,7 @@ dependencies = [
|
||||||
"checksum aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d6531d44de723825aa81398a6415283229725a00fa30713812ab9323faa82fc4"
|
"checksum aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d6531d44de723825aa81398a6415283229725a00fa30713812ab9323faa82fc4"
|
||||||
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
|
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
|
||||||
"checksum arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a1e964f9e24d588183fcb43503abda40d288c8657dfc27311516ce2f05675aef"
|
"checksum arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a1e964f9e24d588183fcb43503abda40d288c8657dfc27311516ce2f05675aef"
|
||||||
"checksum atty 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "8352656fd42c30a0c3c89d26dea01e3b77c0ab2af18230835c15e2e13cd51859"
|
"checksum atty 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "af80143d6f7608d746df1520709e5d141c96f240b0e62b0aa41bdfb53374d9d4"
|
||||||
"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d"
|
"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d"
|
||||||
"checksum bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b3c30d3802dfb7281680d6285f2ccdaa8c2d8fee41f93805dba5c4cf50dc23cf"
|
"checksum bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b3c30d3802dfb7281680d6285f2ccdaa8c2d8fee41f93805dba5c4cf50dc23cf"
|
||||||
"checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"
|
"checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"
|
||||||
|
|
|
@ -529,12 +529,14 @@ impl BundleDb {
|
||||||
let bundles_data: Vec<_> = bundles.iter().filter(|b| b.mode == BundleMode::Data).collect();
|
let bundles_data: Vec<_> = bundles.iter().filter(|b| b.mode == BundleMode::Data).collect();
|
||||||
let mut hash_methods = HashMap::new();
|
let mut hash_methods = HashMap::new();
|
||||||
let mut compressions = HashMap::new();
|
let mut compressions = HashMap::new();
|
||||||
|
let mut encryptions = HashMap::new();
|
||||||
for bundle in &bundles {
|
for bundle in &bundles {
|
||||||
*hash_methods.entry(bundle.hash_method).or_insert(0) += 1;
|
*hash_methods.entry(bundle.hash_method).or_insert(0) += 1;
|
||||||
*compressions.entry(bundle.compression.clone()).or_insert(0) += 1;
|
*compressions.entry(bundle.compression.clone()).or_insert(0) += 1;
|
||||||
|
*encryptions.entry(bundle.encryption.clone()).or_insert(0) += 1;
|
||||||
}
|
}
|
||||||
BundleStatistics {
|
BundleStatistics {
|
||||||
hash_methods, compressions,
|
hash_methods, compressions, encryptions,
|
||||||
raw_size: ValueStats::from_iter(|| bundles.iter().map(|b| b.raw_size as f32)),
|
raw_size: ValueStats::from_iter(|| bundles.iter().map(|b| b.raw_size as f32)),
|
||||||
encoded_size: ValueStats::from_iter(|| bundles.iter().map(|b| b.encoded_size as f32)),
|
encoded_size: ValueStats::from_iter(|| bundles.iter().map(|b| b.encoded_size as f32)),
|
||||||
chunk_count: ValueStats::from_iter(|| bundles.iter().map(|b| b.chunk_count as f32)),
|
chunk_count: ValueStats::from_iter(|| bundles.iter().map(|b| b.chunk_count as f32)),
|
||||||
|
|
|
@ -148,5 +148,6 @@ pub struct BundleStatistics {
|
||||||
pub encoded_size_data: ValueStats,
|
pub encoded_size_data: ValueStats,
|
||||||
pub chunk_count_data: ValueStats,
|
pub chunk_count_data: ValueStats,
|
||||||
pub hash_methods: HashMap<HashMethod, usize>,
|
pub hash_methods: HashMap<HashMethod, usize>,
|
||||||
pub compressions: HashMap<Option<Compression>, usize>
|
pub compressions: HashMap<Option<Compression>, usize>,
|
||||||
|
pub encryptions: HashMap<Option<Encryption>, usize>
|
||||||
}
|
}
|
|
@ -41,6 +41,12 @@ pub enum Arguments {
|
||||||
inode: Option<String>,
|
inode: Option<String>,
|
||||||
force: bool
|
force: bool
|
||||||
},
|
},
|
||||||
|
Duplicates {
|
||||||
|
repo_path: PathBuf,
|
||||||
|
backup_name: String,
|
||||||
|
inode: Option<String>,
|
||||||
|
min_size: u64
|
||||||
|
},
|
||||||
Prune {
|
Prune {
|
||||||
repo_path: PathBuf,
|
repo_path: PathBuf,
|
||||||
prefix: String,
|
prefix: String,
|
||||||
|
@ -75,7 +81,7 @@ pub enum Arguments {
|
||||||
backup_name: Option<String>,
|
backup_name: Option<String>,
|
||||||
inode: Option<String>
|
inode: Option<String>
|
||||||
},
|
},
|
||||||
Stats {
|
Statistics {
|
||||||
repo_path: PathBuf
|
repo_path: PathBuf
|
||||||
},
|
},
|
||||||
Copy {
|
Copy {
|
||||||
|
@ -206,6 +212,31 @@ fn validate_repo_path(
|
||||||
parse_repo_path(&repo_path, existing, backup_restr, path_restr).map(|_| ())
|
parse_repo_path(&repo_path, existing, backup_restr, path_restr).map(|_| ())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fn parse_filesize(num: &str) -> Result<u64, String> {
|
||||||
|
let (num, suffix) = if num.len() > 0 {
|
||||||
|
num.split_at(num.len() - 1)
|
||||||
|
} else {
|
||||||
|
(num, "b")
|
||||||
|
};
|
||||||
|
let factor = match suffix {
|
||||||
|
"b" | "B" => 1,
|
||||||
|
"k" | "K" => 1024,
|
||||||
|
"m" | "M" => 1024*1024,
|
||||||
|
"g" | "G" => 1024*1024*1024,
|
||||||
|
"t" | "T" => 1024*1024*1024*1024,
|
||||||
|
_ => return Err(tr!("Unknown suffix").to_string())
|
||||||
|
};
|
||||||
|
let num = try!(parse_num(num));
|
||||||
|
Ok(num * factor)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[allow(unknown_lints, needless_pass_by_value)]
|
||||||
|
fn validate_filesize(val: String) -> Result<(), String> {
|
||||||
|
parse_filesize(&val).map(|_| ())
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
fn parse_num(num: &str) -> Result<u64, String> {
|
fn parse_num(num: &str) -> Result<u64, String> {
|
||||||
if let Ok(num) = num.parse::<u64>() {
|
if let Ok(num) = num.parse::<u64>() {
|
||||||
Ok(num)
|
Ok(num)
|
||||||
|
@ -467,7 +498,8 @@ pub fn parse() -> Result<(log::Level, Arguments), ErrorCode> {
|
||||||
.arg(Arg::from_usage("<REPO>")
|
.arg(Arg::from_usage("<REPO>")
|
||||||
.help(tr!("Path of the repository"))
|
.help(tr!("Path of the repository"))
|
||||||
.validator(|val| validate_repo_path(val, true, Some(false), Some(false)))))
|
.validator(|val| validate_repo_path(val, true, Some(false), Some(false)))))
|
||||||
.subcommand(SubCommand::with_name("stats")
|
.subcommand(SubCommand::with_name("statistics")
|
||||||
|
.alias("stats")
|
||||||
.about(tr!("Display statistics on a repository"))
|
.about(tr!("Display statistics on a repository"))
|
||||||
.arg(Arg::from_usage("<REPO>")
|
.arg(Arg::from_usage("<REPO>")
|
||||||
.help(tr!("Path of the repository"))
|
.help(tr!("Path of the repository"))
|
||||||
|
@ -514,6 +546,16 @@ pub fn parse() -> Result<(log::Level, Arguments), ErrorCode> {
|
||||||
.arg(Arg::from_usage("<NEW>")
|
.arg(Arg::from_usage("<NEW>")
|
||||||
.help(tr!("New version, [repository]::backup[::subpath]"))
|
.help(tr!("New version, [repository]::backup[::subpath]"))
|
||||||
.validator(|val| validate_repo_path(val, true, Some(true), None))))
|
.validator(|val| validate_repo_path(val, true, Some(true), None))))
|
||||||
|
.subcommand(SubCommand::with_name("duplicates")
|
||||||
|
.aliases(&["dups"])
|
||||||
|
.about(tr!("Find duplicate files in a backup"))
|
||||||
|
.arg(Arg::from_usage("[min_size] --min-size [SIZE]")
|
||||||
|
.help(tr!("Set the minimum file size"))
|
||||||
|
.default_value(DEFAULT_DUPLICATES_MIN_SIZE_STR)
|
||||||
|
.validator(validate_filesize))
|
||||||
|
.arg(Arg::from_usage("<BACKUP>")
|
||||||
|
.help(tr!("The backup/subtree path, [repository]::backup[::subtree]"))
|
||||||
|
.validator(|val| validate_repo_path(val, true, Some(true), None))))
|
||||||
.subcommand(SubCommand::with_name("copy")
|
.subcommand(SubCommand::with_name("copy")
|
||||||
.alias("cp")
|
.alias("cp")
|
||||||
.about(tr!("Create a copy of a backup"))
|
.about(tr!("Create a copy of a backup"))
|
||||||
|
@ -747,14 +789,14 @@ pub fn parse() -> Result<(log::Level, Arguments), ErrorCode> {
|
||||||
inode: inode.map(|v| v.to_string())
|
inode: inode.map(|v| v.to_string())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
("stats", Some(args)) => {
|
("statistics", Some(args)) => {
|
||||||
let (repository, _backup, _inode) = parse_repo_path(
|
let (repository, _backup, _inode) = parse_repo_path(
|
||||||
args.value_of("REPO").unwrap(),
|
args.value_of("REPO").unwrap(),
|
||||||
true,
|
true,
|
||||||
Some(false),
|
Some(false),
|
||||||
Some(false)
|
Some(false)
|
||||||
).unwrap();
|
).unwrap();
|
||||||
Arguments::Stats { repo_path: repository }
|
Arguments::Statistics { repo_path: repository }
|
||||||
}
|
}
|
||||||
("copy", Some(args)) => {
|
("copy", Some(args)) => {
|
||||||
let (repository_src, backup_src, _inode) =
|
let (repository_src, backup_src, _inode) =
|
||||||
|
@ -830,6 +872,18 @@ pub fn parse() -> Result<(log::Level, Arguments), ErrorCode> {
|
||||||
.unwrap_or_else(|| vec![])
|
.unwrap_or_else(|| vec![])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
("duplicates", Some(args)) => {
|
||||||
|
let (repository, backup, inode) =
|
||||||
|
parse_repo_path(args.value_of("BACKUP").unwrap(), true, Some(true), None).unwrap();
|
||||||
|
Arguments::Duplicates {
|
||||||
|
repo_path: repository,
|
||||||
|
backup_name: backup.unwrap().to_string(),
|
||||||
|
inode: inode.map(|v| v.to_string()),
|
||||||
|
min_size: args.value_of("min_size").map(|v| {
|
||||||
|
parse_filesize(v).unwrap()
|
||||||
|
}).unwrap()
|
||||||
|
}
|
||||||
|
}
|
||||||
("config", Some(args)) => {
|
("config", Some(args)) => {
|
||||||
let (repository, _backup, _inode) = parse_repo_path(
|
let (repository, _backup, _inode) = parse_repo_path(
|
||||||
args.value_of("REPO").unwrap(),
|
args.value_of("REPO").unwrap(),
|
||||||
|
|
115
src/cli/mod.rs
115
src/cli/mod.rs
|
@ -45,7 +45,8 @@ pub enum ErrorCode {
|
||||||
DiffRun,
|
DiffRun,
|
||||||
VersionsRun,
|
VersionsRun,
|
||||||
ImportRun,
|
ImportRun,
|
||||||
FuseMount
|
FuseMount,
|
||||||
|
DuplicatesRun
|
||||||
}
|
}
|
||||||
impl ErrorCode {
|
impl ErrorCode {
|
||||||
pub fn code(&self) -> i32 {
|
pub fn code(&self) -> i32 {
|
||||||
|
@ -81,6 +82,7 @@ impl ErrorCode {
|
||||||
ErrorCode::VersionsRun => 22,
|
ErrorCode::VersionsRun => 22,
|
||||||
ErrorCode::ImportRun => 23,
|
ErrorCode::ImportRun => 23,
|
||||||
ErrorCode::FuseMount => 24,
|
ErrorCode::FuseMount => 24,
|
||||||
|
ErrorCode::DuplicatesRun => 27,
|
||||||
//
|
//
|
||||||
ErrorCode::NoSuchBackup => 25,
|
ErrorCode::NoSuchBackup => 25,
|
||||||
ErrorCode::BackupAlreadyExists => 26,
|
ErrorCode::BackupAlreadyExists => 26,
|
||||||
|
@ -94,6 +96,7 @@ pub const DEFAULT_HASH: &str = "blake2";
|
||||||
pub const DEFAULT_COMPRESSION: &str = "brotli/3";
|
pub const DEFAULT_COMPRESSION: &str = "brotli/3";
|
||||||
pub const DEFAULT_BUNDLE_SIZE_STR: &str = "25";
|
pub const DEFAULT_BUNDLE_SIZE_STR: &str = "25";
|
||||||
pub const DEFAULT_VACUUM_RATIO_STR: &str = "0";
|
pub const DEFAULT_VACUUM_RATIO_STR: &str = "0";
|
||||||
|
pub const DEFAULT_DUPLICATES_MIN_SIZE_STR: &str = "1b";
|
||||||
lazy_static! {
|
lazy_static! {
|
||||||
pub static ref ZVAULT_FOLDER: PathBuf = {
|
pub static ref ZVAULT_FOLDER: PathBuf = {
|
||||||
env::home_dir().unwrap().join(".zvault")
|
env::home_dir().unwrap().join(".zvault")
|
||||||
|
@ -132,6 +135,22 @@ fn get_backup(repo: &Repository, backup_name: &str) -> Result<Backup, ErrorCode>
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_inode(repo: &mut Repository, backup: &Backup, inode: Option<&String>) -> Result<Inode, ErrorCode> {
|
||||||
|
Ok(if let Some(inode) = inode {
|
||||||
|
checked!(
|
||||||
|
repo.get_backup_inode(&backup, &inode),
|
||||||
|
"load subpath inode",
|
||||||
|
ErrorCode::LoadInode
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
checked!(
|
||||||
|
repo.get_inode(&backup.root),
|
||||||
|
"load root inode",
|
||||||
|
ErrorCode::LoadInode
|
||||||
|
)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
fn find_reference_backup(
|
fn find_reference_backup(
|
||||||
repo: &Repository,
|
repo: &Repository,
|
||||||
path: &str,
|
path: &str,
|
||||||
|
@ -322,37 +341,37 @@ fn print_repostats(stats: &RepositoryStatistics) {
|
||||||
tr_println!("Displacement:\n - average: {:.1}\n - stddev: {:.1}\n - over {:.1}: {:.0}, {:.1}%\n - maximum: {:.0}",
|
tr_println!("Displacement:\n - average: {:.1}\n - stddev: {:.1}\n - over {:.1}: {:.0}, {:.1}%\n - maximum: {:.0}",
|
||||||
disp.avg, disp.stddev, disp.avg + 2.0 * disp.stddev, disp.count_xl, disp.count_xl as f32 / disp.count as f32 * 100.0, disp.max);
|
disp.avg, disp.stddev, disp.avg + 2.0 * disp.stddev, disp.count_xl, disp.count_xl as f32 / disp.count as f32 * 100.0, disp.max);
|
||||||
println!("");
|
println!("");
|
||||||
tr_println!("Bundles (all)\n=============");
|
tr_println!("Bundles\n=======");
|
||||||
|
let tsize = (stats.bundles.raw_size.count as f32 * stats.bundles.encoded_size.avg) as u64;
|
||||||
|
tr_println!("All bundles: {} in {} bundles", to_file_size(tsize), stats.bundles.raw_size.count);
|
||||||
let rsize = &stats.bundles.raw_size;
|
let rsize = &stats.bundles.raw_size;
|
||||||
tr_println!("Raw size:\n - average: {}\n - stddev: {}\n - maximum: {}",
|
tr_println!(" - raw size: ø = {}, maximum: {}", to_file_size(rsize.avg as u64), to_file_size(rsize.max as u64));
|
||||||
to_file_size(rsize.avg as u64), to_file_size(rsize.stddev as u64), to_file_size(rsize.max as u64));
|
|
||||||
let esize = &stats.bundles.encoded_size;
|
let esize = &stats.bundles.encoded_size;
|
||||||
tr_println!("Encoded size:\n - average: {}\n - stddev: {}\n - maximum: {}",
|
tr_println!(" - encoded size: ø = {}, maximum: {}", to_file_size(esize.avg as u64), to_file_size(esize.max as u64));
|
||||||
to_file_size(esize.avg as u64), to_file_size(esize.stddev as u64), to_file_size(esize.max as u64));
|
|
||||||
let ccount = &stats.bundles.chunk_count;
|
let ccount = &stats.bundles.chunk_count;
|
||||||
tr_println!("Chunk count:\n - average: {:.1}\n - stddev: {:.1}\n - minimum: {:.0}\n - maximum: {:.0}", ccount.avg, ccount.stddev, ccount.min, ccount.max);
|
tr_println!(" - chunk count: ø = {:.1}, maximum: {:.0}", ccount.avg, ccount.max);
|
||||||
println!("");
|
let tsize = (stats.bundles.raw_size_meta.count as f32 * stats.bundles.encoded_size_meta.avg) as u64;
|
||||||
tr_println!("Meta bundles\n============");
|
tr_println!("Meta bundles: {} in {} bundles", to_file_size(tsize), stats.bundles.raw_size_meta.count);
|
||||||
let rsize = &stats.bundles.raw_size_meta;
|
let rsize = &stats.bundles.raw_size_meta;
|
||||||
tr_println!("Raw size:\n - average: {}\n - stddev: {}\n - maximum: {}",
|
tr_println!(" - raw size: ø = {}, maximum: {}", to_file_size(rsize.avg as u64), to_file_size(rsize.max as u64));
|
||||||
to_file_size(rsize.avg as u64), to_file_size(rsize.stddev as u64), to_file_size(rsize.max as u64));
|
|
||||||
let esize = &stats.bundles.encoded_size_meta;
|
let esize = &stats.bundles.encoded_size_meta;
|
||||||
tr_println!("Encoded size:\n - average: {}\n - stddev: {}\n - maximum: {}",
|
tr_println!(" - encoded size: ø = {}, maximum: {}", to_file_size(esize.avg as u64), to_file_size(esize.max as u64));
|
||||||
to_file_size(esize.avg as u64), to_file_size(esize.stddev as u64), to_file_size(esize.max as u64));
|
|
||||||
let ccount = &stats.bundles.chunk_count_meta;
|
let ccount = &stats.bundles.chunk_count_meta;
|
||||||
tr_println!("Chunk count:\n - average: {:.1}\n - stddev: {:.1}\n - minimum: {:.0}\n - maximum: {:.0}", ccount.avg, ccount.stddev, ccount.min, ccount.max);
|
tr_println!(" - chunk count: ø = {:.1}, maximum: {:.0}", ccount.avg, ccount.max);
|
||||||
println!("");
|
let tsize = (stats.bundles.raw_size_data.count as f32 * stats.bundles.encoded_size_data.avg) as u64;
|
||||||
tr_println!("Data bundles\n============");
|
tr_println!("Data bundles: {} in {} bundles", to_file_size(tsize), stats.bundles.raw_size_data.count);
|
||||||
let rsize = &stats.bundles.raw_size_data;
|
let rsize = &stats.bundles.raw_size_data;
|
||||||
tr_println!("Raw size:\n - average: {}\n - stddev: {}\n - maximum: {}",
|
tr_println!(" - raw size: ø = {}, maximum: {}", to_file_size(rsize.avg as u64), to_file_size(rsize.max as u64));
|
||||||
to_file_size(rsize.avg as u64), to_file_size(rsize.stddev as u64), to_file_size(rsize.max as u64));
|
|
||||||
let esize = &stats.bundles.encoded_size_data;
|
let esize = &stats.bundles.encoded_size_data;
|
||||||
tr_println!("Encoded size:\n - average: {}\n - stddev: {}\n - maximum: {}",
|
tr_println!(" - encoded size: ø = {}, maximum: {}", to_file_size(esize.avg as u64), to_file_size(esize.max as u64));
|
||||||
to_file_size(esize.avg as u64), to_file_size(esize.stddev as u64), to_file_size(esize.max as u64));
|
|
||||||
let ccount = &stats.bundles.chunk_count_data;
|
let ccount = &stats.bundles.chunk_count_data;
|
||||||
tr_println!("Chunk count:\n - average: {:.1}\n - stddev: {:.1}\n - minimum: {:.0}\n - maximum: {:.0}", ccount.avg, ccount.stddev, ccount.min, ccount.max);
|
tr_println!(" - chunk count: ø = {:.1}, maximum: {:.0}", ccount.avg, ccount.max);
|
||||||
println!("");
|
println!("");
|
||||||
tr_println!("Bundle methods\n==============");
|
tr_println!("Bundle methods\n==============");
|
||||||
|
tr_println!("Hash:");
|
||||||
|
for (hash, &count) in &stats.bundles.hash_methods {
|
||||||
|
tr_println!(" - {}: {}, {:.1}%", hash.name(), count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0);
|
||||||
|
}
|
||||||
tr_println!("Compression:");
|
tr_println!("Compression:");
|
||||||
for (compr, &count) in &stats.bundles.compressions {
|
for (compr, &count) in &stats.bundles.compressions {
|
||||||
let compr_name = if let &Some(ref compr) = compr {
|
let compr_name = if let &Some(ref compr) = compr {
|
||||||
|
@ -362,9 +381,14 @@ fn print_repostats(stats: &RepositoryStatistics) {
|
||||||
};
|
};
|
||||||
tr_println!(" - {}: {}, {:.1}%", compr_name, count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0);
|
tr_println!(" - {}: {}, {:.1}%", compr_name, count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0);
|
||||||
}
|
}
|
||||||
tr_println!("Hash:");
|
tr_println!("Encryption:");
|
||||||
for (hash, &count) in &stats.bundles.hash_methods {
|
for (encr, &count) in &stats.bundles.encryptions {
|
||||||
tr_println!(" - {}: {}, {:.1}%", hash.name(), count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0);
|
let encr_name = if let &Some(ref encr) = encr {
|
||||||
|
to_hex(&encr.1[..])
|
||||||
|
} else {
|
||||||
|
tr!("none").to_string()
|
||||||
|
};
|
||||||
|
tr_println!(" - {}: {}, {:.1}%", encr_name, count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -465,6 +489,17 @@ fn print_analysis(analysis: &HashMap<u32, BundleAnalysis>) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn print_duplicates(dups: Vec<(Vec<PathBuf>, u64)>) {
|
||||||
|
for (group, size) in dups {
|
||||||
|
tr_println!("{} duplicates found, size: {}", group.len(), to_file_size(size));
|
||||||
|
for dup in group {
|
||||||
|
println!(" - {}", dup.to_string_lossy());
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#[allow(unknown_lints, cyclomatic_complexity)]
|
#[allow(unknown_lints, cyclomatic_complexity)]
|
||||||
pub fn run() -> Result<(), ErrorCode> {
|
pub fn run() -> Result<(), ErrorCode> {
|
||||||
|
@ -652,19 +687,7 @@ pub fn run() -> Result<(), ErrorCode> {
|
||||||
} => {
|
} => {
|
||||||
let mut repo = try!(open_repository(&repo_path, true));
|
let mut repo = try!(open_repository(&repo_path, true));
|
||||||
let backup = try!(get_backup(&repo, &backup_name));
|
let backup = try!(get_backup(&repo, &backup_name));
|
||||||
let inode = if let Some(inode) = inode {
|
let inode = try!(get_inode(&mut repo, &backup, inode.as_ref()));
|
||||||
checked!(
|
|
||||||
repo.get_backup_inode(&backup, &inode),
|
|
||||||
"load subpath inode",
|
|
||||||
ErrorCode::LoadInode
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
checked!(
|
|
||||||
repo.get_inode(&backup.root),
|
|
||||||
"load root inode",
|
|
||||||
ErrorCode::LoadInode
|
|
||||||
)
|
|
||||||
};
|
|
||||||
if tar {
|
if tar {
|
||||||
checked!(
|
checked!(
|
||||||
repo.export_tarfile(&backup, inode, &dst_path),
|
repo.export_tarfile(&backup, inode, &dst_path),
|
||||||
|
@ -917,12 +940,28 @@ pub fn run() -> Result<(), ErrorCode> {
|
||||||
print_repoinfo(&repo.info());
|
print_repoinfo(&repo.info());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Arguments::Stats {
|
Arguments::Statistics {
|
||||||
repo_path
|
repo_path
|
||||||
} => {
|
} => {
|
||||||
let mut repo = try!(open_repository(&repo_path, false));
|
let mut repo = try!(open_repository(&repo_path, false));
|
||||||
print_repostats(&repo.statistics());
|
print_repostats(&repo.statistics());
|
||||||
}
|
}
|
||||||
|
Arguments::Duplicates {
|
||||||
|
repo_path,
|
||||||
|
backup_name,
|
||||||
|
inode,
|
||||||
|
min_size
|
||||||
|
} => {
|
||||||
|
let mut repo = try!(open_repository(&repo_path, true));
|
||||||
|
let backup = try!(get_backup(&repo, &backup_name));
|
||||||
|
let inode = try!(get_inode(&mut repo, &backup, inode.as_ref()));
|
||||||
|
let dups = checked!(
|
||||||
|
repo.find_duplicates(&inode, min_size),
|
||||||
|
"find duplicates",
|
||||||
|
ErrorCode::DuplicatesRun
|
||||||
|
);
|
||||||
|
print_duplicates(dups);
|
||||||
|
}
|
||||||
Arguments::Mount {
|
Arguments::Mount {
|
||||||
repo_path,
|
repo_path,
|
||||||
backup_name,
|
backup_name,
|
||||||
|
|
|
@ -542,4 +542,49 @@ impl Repository {
|
||||||
));
|
));
|
||||||
Ok(diffs)
|
Ok(diffs)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn count_sizes_recursive(&mut self, inode: &Inode, sizes: &mut HashMap<u64, usize>, min_size: u64) -> Result<(), RepositoryError> {
|
||||||
|
if inode.size >= min_size {
|
||||||
|
*sizes.entry(inode.size).or_insert(0) += 1;
|
||||||
|
}
|
||||||
|
if let Some(ref children) = inode.children {
|
||||||
|
for chunks in children.values() {
|
||||||
|
let ch = try!(self.get_inode(&chunks));
|
||||||
|
try!(self.count_sizes_recursive(&ch, sizes, min_size));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn find_duplicates_recursive(&mut self, inode: &Inode, path: &Path, sizes: &HashMap<u64, usize>, hashes: &mut HashMap<Hash, (Vec<PathBuf>, u64)>) -> Result<(), RepositoryError> {
|
||||||
|
let path = path.join(&inode.name);
|
||||||
|
if sizes.get(&inode.size).cloned().unwrap_or(0) > 1 {
|
||||||
|
if let Some(ref data) = inode.data {
|
||||||
|
let chunk_data = try!(msgpack::encode(data).map_err(InodeError::from));
|
||||||
|
let hash = HashMethod::Blake2.hash(&chunk_data);
|
||||||
|
hashes.entry(hash).or_insert((Vec::new(), inode.size)).0.push(path.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if let Some(ref children) = inode.children {
|
||||||
|
for chunks in children.values() {
|
||||||
|
let ch = try!(self.get_inode(&chunks));
|
||||||
|
try!(self.find_duplicates_recursive(&ch, &path, sizes, hashes));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn find_duplicates(&mut self, inode: &Inode, min_size: u64) -> Result<Vec<(Vec<PathBuf>, u64)>, RepositoryError> {
|
||||||
|
let mut sizes = HashMap::new();
|
||||||
|
try!(self.count_sizes_recursive(inode, &mut sizes, min_size));
|
||||||
|
let mut hashes = HashMap::new();
|
||||||
|
if let Some(ref children) = inode.children {
|
||||||
|
for chunks in children.values() {
|
||||||
|
let ch = try!(self.get_inode(&chunks));
|
||||||
|
try!(self.find_duplicates_recursive(&ch, Path::new(""), &sizes, &mut hashes));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let dups = hashes.into_iter().map(|(_,v)| v).filter(|&(ref v, _)| v.len() > 1).collect();
|
||||||
|
Ok(dups)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue