stats & dups

This commit is contained in:
Dennis Schwerdel 2018-03-08 15:20:20 +01:00
parent 56c916f585
commit 2f3c97a043
8 changed files with 197 additions and 48 deletions

View File

@ -21,7 +21,9 @@ rust:
- nightly - nightly
matrix: matrix:
allow_failures: allow_failures:
- rust: nightly - rust:
- beta
- stable
script: script:
- cargo clean - cargo clean
- cargo build - cargo build

View File

@ -5,9 +5,15 @@ This project follows [semantic versioning](http://semver.org).
### UNRELEASED ### UNRELEASED
* [added] Translation infrastructure (**requires nightly rust**) * [added] Translation infrastructure (**requires nightly rust**)
* [added] Checking hashes of chunks in check --bundle-data
* [added] Debian packet for libsodium23
* [modified] Updated dependencies * [modified] Updated dependencies
* [modified] Updated copyright date * [modified] Updated copyright date
* [modified] Moved all code into one crate for easier translation
* [modified] Compression ratio is now displayed in a clearer format
* [fixed] Also including the first min_size bytes in hash * [fixed] Also including the first min_size bytes in hash
* [fixed] Fixed some texts in manpages
* [fixed] Calling strip on final binaries
### v0.4.0 (2017-07-21) ### v0.4.0 (2017-07-21)

6
Cargo.lock generated
View File

@ -24,7 +24,7 @@ dependencies = [
[[package]] [[package]]
name = "atty" name = "atty"
version = "0.2.6" version = "0.2.8"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [ dependencies = [
"libc 0.2.39 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.39 (registry+https://github.com/rust-lang/crates.io-index)",
@ -76,7 +76,7 @@ version = "2.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [ dependencies = [
"ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
"atty 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)", "atty 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
"bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
"strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", "strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", "textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
@ -611,7 +611,7 @@ dependencies = [
"checksum aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d6531d44de723825aa81398a6415283229725a00fa30713812ab9323faa82fc4" "checksum aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d6531d44de723825aa81398a6415283229725a00fa30713812ab9323faa82fc4"
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b" "checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
"checksum arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a1e964f9e24d588183fcb43503abda40d288c8657dfc27311516ce2f05675aef" "checksum arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a1e964f9e24d588183fcb43503abda40d288c8657dfc27311516ce2f05675aef"
"checksum atty 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "8352656fd42c30a0c3c89d26dea01e3b77c0ab2af18230835c15e2e13cd51859" "checksum atty 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "af80143d6f7608d746df1520709e5d141c96f240b0e62b0aa41bdfb53374d9d4"
"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d" "checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d"
"checksum bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b3c30d3802dfb7281680d6285f2ccdaa8c2d8fee41f93805dba5c4cf50dc23cf" "checksum bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b3c30d3802dfb7281680d6285f2ccdaa8c2d8fee41f93805dba5c4cf50dc23cf"
"checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400" "checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"

View File

@ -529,12 +529,14 @@ impl BundleDb {
let bundles_data: Vec<_> = bundles.iter().filter(|b| b.mode == BundleMode::Data).collect(); let bundles_data: Vec<_> = bundles.iter().filter(|b| b.mode == BundleMode::Data).collect();
let mut hash_methods = HashMap::new(); let mut hash_methods = HashMap::new();
let mut compressions = HashMap::new(); let mut compressions = HashMap::new();
let mut encryptions = HashMap::new();
for bundle in &bundles { for bundle in &bundles {
*hash_methods.entry(bundle.hash_method).or_insert(0) += 1; *hash_methods.entry(bundle.hash_method).or_insert(0) += 1;
*compressions.entry(bundle.compression.clone()).or_insert(0) += 1; *compressions.entry(bundle.compression.clone()).or_insert(0) += 1;
*encryptions.entry(bundle.encryption.clone()).or_insert(0) += 1;
} }
BundleStatistics { BundleStatistics {
hash_methods, compressions, hash_methods, compressions, encryptions,
raw_size: ValueStats::from_iter(|| bundles.iter().map(|b| b.raw_size as f32)), raw_size: ValueStats::from_iter(|| bundles.iter().map(|b| b.raw_size as f32)),
encoded_size: ValueStats::from_iter(|| bundles.iter().map(|b| b.encoded_size as f32)), encoded_size: ValueStats::from_iter(|| bundles.iter().map(|b| b.encoded_size as f32)),
chunk_count: ValueStats::from_iter(|| bundles.iter().map(|b| b.chunk_count as f32)), chunk_count: ValueStats::from_iter(|| bundles.iter().map(|b| b.chunk_count as f32)),

View File

@ -148,5 +148,6 @@ pub struct BundleStatistics {
pub encoded_size_data: ValueStats, pub encoded_size_data: ValueStats,
pub chunk_count_data: ValueStats, pub chunk_count_data: ValueStats,
pub hash_methods: HashMap<HashMethod, usize>, pub hash_methods: HashMap<HashMethod, usize>,
pub compressions: HashMap<Option<Compression>, usize> pub compressions: HashMap<Option<Compression>, usize>,
pub encryptions: HashMap<Option<Encryption>, usize>
} }

View File

@ -41,6 +41,12 @@ pub enum Arguments {
inode: Option<String>, inode: Option<String>,
force: bool force: bool
}, },
Duplicates {
repo_path: PathBuf,
backup_name: String,
inode: Option<String>,
min_size: u64
},
Prune { Prune {
repo_path: PathBuf, repo_path: PathBuf,
prefix: String, prefix: String,
@ -75,7 +81,7 @@ pub enum Arguments {
backup_name: Option<String>, backup_name: Option<String>,
inode: Option<String> inode: Option<String>
}, },
Stats { Statistics {
repo_path: PathBuf repo_path: PathBuf
}, },
Copy { Copy {
@ -206,6 +212,31 @@ fn validate_repo_path(
parse_repo_path(&repo_path, existing, backup_restr, path_restr).map(|_| ()) parse_repo_path(&repo_path, existing, backup_restr, path_restr).map(|_| ())
} }
fn parse_filesize(num: &str) -> Result<u64, String> {
let (num, suffix) = if num.len() > 0 {
num.split_at(num.len() - 1)
} else {
(num, "b")
};
let factor = match suffix {
"b" | "B" => 1,
"k" | "K" => 1024,
"m" | "M" => 1024*1024,
"g" | "G" => 1024*1024*1024,
"t" | "T" => 1024*1024*1024*1024,
_ => return Err(tr!("Unknown suffix").to_string())
};
let num = try!(parse_num(num));
Ok(num * factor)
}
#[allow(unknown_lints, needless_pass_by_value)]
fn validate_filesize(val: String) -> Result<(), String> {
parse_filesize(&val).map(|_| ())
}
fn parse_num(num: &str) -> Result<u64, String> { fn parse_num(num: &str) -> Result<u64, String> {
if let Ok(num) = num.parse::<u64>() { if let Ok(num) = num.parse::<u64>() {
Ok(num) Ok(num)
@ -467,7 +498,8 @@ pub fn parse() -> Result<(log::Level, Arguments), ErrorCode> {
.arg(Arg::from_usage("<REPO>") .arg(Arg::from_usage("<REPO>")
.help(tr!("Path of the repository")) .help(tr!("Path of the repository"))
.validator(|val| validate_repo_path(val, true, Some(false), Some(false))))) .validator(|val| validate_repo_path(val, true, Some(false), Some(false)))))
.subcommand(SubCommand::with_name("stats") .subcommand(SubCommand::with_name("statistics")
.alias("stats")
.about(tr!("Display statistics on a repository")) .about(tr!("Display statistics on a repository"))
.arg(Arg::from_usage("<REPO>") .arg(Arg::from_usage("<REPO>")
.help(tr!("Path of the repository")) .help(tr!("Path of the repository"))
@ -514,6 +546,16 @@ pub fn parse() -> Result<(log::Level, Arguments), ErrorCode> {
.arg(Arg::from_usage("<NEW>") .arg(Arg::from_usage("<NEW>")
.help(tr!("New version, [repository]::backup[::subpath]")) .help(tr!("New version, [repository]::backup[::subpath]"))
.validator(|val| validate_repo_path(val, true, Some(true), None)))) .validator(|val| validate_repo_path(val, true, Some(true), None))))
.subcommand(SubCommand::with_name("duplicates")
.aliases(&["dups"])
.about(tr!("Find duplicate files in a backup"))
.arg(Arg::from_usage("[min_size] --min-size [SIZE]")
.help(tr!("Set the minimum file size"))
.default_value(DEFAULT_DUPLICATES_MIN_SIZE_STR)
.validator(validate_filesize))
.arg(Arg::from_usage("<BACKUP>")
.help(tr!("The backup/subtree path, [repository]::backup[::subtree]"))
.validator(|val| validate_repo_path(val, true, Some(true), None))))
.subcommand(SubCommand::with_name("copy") .subcommand(SubCommand::with_name("copy")
.alias("cp") .alias("cp")
.about(tr!("Create a copy of a backup")) .about(tr!("Create a copy of a backup"))
@ -747,14 +789,14 @@ pub fn parse() -> Result<(log::Level, Arguments), ErrorCode> {
inode: inode.map(|v| v.to_string()) inode: inode.map(|v| v.to_string())
} }
} }
("stats", Some(args)) => { ("statistics", Some(args)) => {
let (repository, _backup, _inode) = parse_repo_path( let (repository, _backup, _inode) = parse_repo_path(
args.value_of("REPO").unwrap(), args.value_of("REPO").unwrap(),
true, true,
Some(false), Some(false),
Some(false) Some(false)
).unwrap(); ).unwrap();
Arguments::Stats { repo_path: repository } Arguments::Statistics { repo_path: repository }
} }
("copy", Some(args)) => { ("copy", Some(args)) => {
let (repository_src, backup_src, _inode) = let (repository_src, backup_src, _inode) =
@ -830,6 +872,18 @@ pub fn parse() -> Result<(log::Level, Arguments), ErrorCode> {
.unwrap_or_else(|| vec![]) .unwrap_or_else(|| vec![])
} }
} }
("duplicates", Some(args)) => {
let (repository, backup, inode) =
parse_repo_path(args.value_of("BACKUP").unwrap(), true, Some(true), None).unwrap();
Arguments::Duplicates {
repo_path: repository,
backup_name: backup.unwrap().to_string(),
inode: inode.map(|v| v.to_string()),
min_size: args.value_of("min_size").map(|v| {
parse_filesize(v).unwrap()
}).unwrap()
}
}
("config", Some(args)) => { ("config", Some(args)) => {
let (repository, _backup, _inode) = parse_repo_path( let (repository, _backup, _inode) = parse_repo_path(
args.value_of("REPO").unwrap(), args.value_of("REPO").unwrap(),

View File

@ -45,7 +45,8 @@ pub enum ErrorCode {
DiffRun, DiffRun,
VersionsRun, VersionsRun,
ImportRun, ImportRun,
FuseMount FuseMount,
DuplicatesRun
} }
impl ErrorCode { impl ErrorCode {
pub fn code(&self) -> i32 { pub fn code(&self) -> i32 {
@ -81,6 +82,7 @@ impl ErrorCode {
ErrorCode::VersionsRun => 22, ErrorCode::VersionsRun => 22,
ErrorCode::ImportRun => 23, ErrorCode::ImportRun => 23,
ErrorCode::FuseMount => 24, ErrorCode::FuseMount => 24,
ErrorCode::DuplicatesRun => 27,
// //
ErrorCode::NoSuchBackup => 25, ErrorCode::NoSuchBackup => 25,
ErrorCode::BackupAlreadyExists => 26, ErrorCode::BackupAlreadyExists => 26,
@ -94,6 +96,7 @@ pub const DEFAULT_HASH: &str = "blake2";
pub const DEFAULT_COMPRESSION: &str = "brotli/3"; pub const DEFAULT_COMPRESSION: &str = "brotli/3";
pub const DEFAULT_BUNDLE_SIZE_STR: &str = "25"; pub const DEFAULT_BUNDLE_SIZE_STR: &str = "25";
pub const DEFAULT_VACUUM_RATIO_STR: &str = "0"; pub const DEFAULT_VACUUM_RATIO_STR: &str = "0";
pub const DEFAULT_DUPLICATES_MIN_SIZE_STR: &str = "1b";
lazy_static! { lazy_static! {
pub static ref ZVAULT_FOLDER: PathBuf = { pub static ref ZVAULT_FOLDER: PathBuf = {
env::home_dir().unwrap().join(".zvault") env::home_dir().unwrap().join(".zvault")
@ -132,6 +135,22 @@ fn get_backup(repo: &Repository, backup_name: &str) -> Result<Backup, ErrorCode>
)) ))
} }
fn get_inode(repo: &mut Repository, backup: &Backup, inode: Option<&String>) -> Result<Inode, ErrorCode> {
Ok(if let Some(inode) = inode {
checked!(
repo.get_backup_inode(&backup, &inode),
"load subpath inode",
ErrorCode::LoadInode
)
} else {
checked!(
repo.get_inode(&backup.root),
"load root inode",
ErrorCode::LoadInode
)
})
}
fn find_reference_backup( fn find_reference_backup(
repo: &Repository, repo: &Repository,
path: &str, path: &str,
@ -322,37 +341,37 @@ fn print_repostats(stats: &RepositoryStatistics) {
tr_println!("Displacement:\n - average: {:.1}\n - stddev: {:.1}\n - over {:.1}: {:.0}, {:.1}%\n - maximum: {:.0}", tr_println!("Displacement:\n - average: {:.1}\n - stddev: {:.1}\n - over {:.1}: {:.0}, {:.1}%\n - maximum: {:.0}",
disp.avg, disp.stddev, disp.avg + 2.0 * disp.stddev, disp.count_xl, disp.count_xl as f32 / disp.count as f32 * 100.0, disp.max); disp.avg, disp.stddev, disp.avg + 2.0 * disp.stddev, disp.count_xl, disp.count_xl as f32 / disp.count as f32 * 100.0, disp.max);
println!(""); println!("");
tr_println!("Bundles (all)\n============="); tr_println!("Bundles\n=======");
let tsize = (stats.bundles.raw_size.count as f32 * stats.bundles.encoded_size.avg) as u64;
tr_println!("All bundles: {} in {} bundles", to_file_size(tsize), stats.bundles.raw_size.count);
let rsize = &stats.bundles.raw_size; let rsize = &stats.bundles.raw_size;
tr_println!("Raw size:\n - average: {}\n - stddev: {}\n - maximum: {}", tr_println!(" - raw size: ø = {}, maximum: {}", to_file_size(rsize.avg as u64), to_file_size(rsize.max as u64));
to_file_size(rsize.avg as u64), to_file_size(rsize.stddev as u64), to_file_size(rsize.max as u64));
let esize = &stats.bundles.encoded_size; let esize = &stats.bundles.encoded_size;
tr_println!("Encoded size:\n - average: {}\n - stddev: {}\n - maximum: {}", tr_println!(" - encoded size: ø = {}, maximum: {}", to_file_size(esize.avg as u64), to_file_size(esize.max as u64));
to_file_size(esize.avg as u64), to_file_size(esize.stddev as u64), to_file_size(esize.max as u64));
let ccount = &stats.bundles.chunk_count; let ccount = &stats.bundles.chunk_count;
tr_println!("Chunk count:\n - average: {:.1}\n - stddev: {:.1}\n - minimum: {:.0}\n - maximum: {:.0}", ccount.avg, ccount.stddev, ccount.min, ccount.max); tr_println!(" - chunk count: ø = {:.1}, maximum: {:.0}", ccount.avg, ccount.max);
println!(""); let tsize = (stats.bundles.raw_size_meta.count as f32 * stats.bundles.encoded_size_meta.avg) as u64;
tr_println!("Meta bundles\n============"); tr_println!("Meta bundles: {} in {} bundles", to_file_size(tsize), stats.bundles.raw_size_meta.count);
let rsize = &stats.bundles.raw_size_meta; let rsize = &stats.bundles.raw_size_meta;
tr_println!("Raw size:\n - average: {}\n - stddev: {}\n - maximum: {}", tr_println!(" - raw size: ø = {}, maximum: {}", to_file_size(rsize.avg as u64), to_file_size(rsize.max as u64));
to_file_size(rsize.avg as u64), to_file_size(rsize.stddev as u64), to_file_size(rsize.max as u64));
let esize = &stats.bundles.encoded_size_meta; let esize = &stats.bundles.encoded_size_meta;
tr_println!("Encoded size:\n - average: {}\n - stddev: {}\n - maximum: {}", tr_println!(" - encoded size: ø = {}, maximum: {}", to_file_size(esize.avg as u64), to_file_size(esize.max as u64));
to_file_size(esize.avg as u64), to_file_size(esize.stddev as u64), to_file_size(esize.max as u64));
let ccount = &stats.bundles.chunk_count_meta; let ccount = &stats.bundles.chunk_count_meta;
tr_println!("Chunk count:\n - average: {:.1}\n - stddev: {:.1}\n - minimum: {:.0}\n - maximum: {:.0}", ccount.avg, ccount.stddev, ccount.min, ccount.max); tr_println!(" - chunk count: ø = {:.1}, maximum: {:.0}", ccount.avg, ccount.max);
println!(""); let tsize = (stats.bundles.raw_size_data.count as f32 * stats.bundles.encoded_size_data.avg) as u64;
tr_println!("Data bundles\n============"); tr_println!("Data bundles: {} in {} bundles", to_file_size(tsize), stats.bundles.raw_size_data.count);
let rsize = &stats.bundles.raw_size_data; let rsize = &stats.bundles.raw_size_data;
tr_println!("Raw size:\n - average: {}\n - stddev: {}\n - maximum: {}", tr_println!(" - raw size: ø = {}, maximum: {}", to_file_size(rsize.avg as u64), to_file_size(rsize.max as u64));
to_file_size(rsize.avg as u64), to_file_size(rsize.stddev as u64), to_file_size(rsize.max as u64));
let esize = &stats.bundles.encoded_size_data; let esize = &stats.bundles.encoded_size_data;
tr_println!("Encoded size:\n - average: {}\n - stddev: {}\n - maximum: {}", tr_println!(" - encoded size: ø = {}, maximum: {}", to_file_size(esize.avg as u64), to_file_size(esize.max as u64));
to_file_size(esize.avg as u64), to_file_size(esize.stddev as u64), to_file_size(esize.max as u64));
let ccount = &stats.bundles.chunk_count_data; let ccount = &stats.bundles.chunk_count_data;
tr_println!("Chunk count:\n - average: {:.1}\n - stddev: {:.1}\n - minimum: {:.0}\n - maximum: {:.0}", ccount.avg, ccount.stddev, ccount.min, ccount.max); tr_println!(" - chunk count: ø = {:.1}, maximum: {:.0}", ccount.avg, ccount.max);
println!(""); println!("");
tr_println!("Bundle methods\n=============="); tr_println!("Bundle methods\n==============");
tr_println!("Hash:");
for (hash, &count) in &stats.bundles.hash_methods {
tr_println!(" - {}: {}, {:.1}%", hash.name(), count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0);
}
tr_println!("Compression:"); tr_println!("Compression:");
for (compr, &count) in &stats.bundles.compressions { for (compr, &count) in &stats.bundles.compressions {
let compr_name = if let &Some(ref compr) = compr { let compr_name = if let &Some(ref compr) = compr {
@ -362,9 +381,14 @@ fn print_repostats(stats: &RepositoryStatistics) {
}; };
tr_println!(" - {}: {}, {:.1}%", compr_name, count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0); tr_println!(" - {}: {}, {:.1}%", compr_name, count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0);
} }
tr_println!("Hash:"); tr_println!("Encryption:");
for (hash, &count) in &stats.bundles.hash_methods { for (encr, &count) in &stats.bundles.encryptions {
tr_println!(" - {}: {}, {:.1}%", hash.name(), count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0); let encr_name = if let &Some(ref encr) = encr {
to_hex(&encr.1[..])
} else {
tr!("none").to_string()
};
tr_println!(" - {}: {}, {:.1}%", encr_name, count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0);
} }
} }
@ -465,6 +489,17 @@ fn print_analysis(analysis: &HashMap<u32, BundleAnalysis>) {
} }
} }
fn print_duplicates(dups: Vec<(Vec<PathBuf>, u64)>) {
for (group, size) in dups {
tr_println!("{} duplicates found, size: {}", group.len(), to_file_size(size));
for dup in group {
println!(" - {}", dup.to_string_lossy());
}
println!();
}
}
#[allow(unknown_lints, cyclomatic_complexity)] #[allow(unknown_lints, cyclomatic_complexity)]
pub fn run() -> Result<(), ErrorCode> { pub fn run() -> Result<(), ErrorCode> {
@ -652,19 +687,7 @@ pub fn run() -> Result<(), ErrorCode> {
} => { } => {
let mut repo = try!(open_repository(&repo_path, true)); let mut repo = try!(open_repository(&repo_path, true));
let backup = try!(get_backup(&repo, &backup_name)); let backup = try!(get_backup(&repo, &backup_name));
let inode = if let Some(inode) = inode { let inode = try!(get_inode(&mut repo, &backup, inode.as_ref()));
checked!(
repo.get_backup_inode(&backup, &inode),
"load subpath inode",
ErrorCode::LoadInode
)
} else {
checked!(
repo.get_inode(&backup.root),
"load root inode",
ErrorCode::LoadInode
)
};
if tar { if tar {
checked!( checked!(
repo.export_tarfile(&backup, inode, &dst_path), repo.export_tarfile(&backup, inode, &dst_path),
@ -917,12 +940,28 @@ pub fn run() -> Result<(), ErrorCode> {
print_repoinfo(&repo.info()); print_repoinfo(&repo.info());
} }
} }
Arguments::Stats { Arguments::Statistics {
repo_path repo_path
} => { } => {
let mut repo = try!(open_repository(&repo_path, false)); let mut repo = try!(open_repository(&repo_path, false));
print_repostats(&repo.statistics()); print_repostats(&repo.statistics());
} }
Arguments::Duplicates {
repo_path,
backup_name,
inode,
min_size
} => {
let mut repo = try!(open_repository(&repo_path, true));
let backup = try!(get_backup(&repo, &backup_name));
let inode = try!(get_inode(&mut repo, &backup, inode.as_ref()));
let dups = checked!(
repo.find_duplicates(&inode, min_size),
"find duplicates",
ErrorCode::DuplicatesRun
);
print_duplicates(dups);
}
Arguments::Mount { Arguments::Mount {
repo_path, repo_path,
backup_name, backup_name,

View File

@ -542,4 +542,49 @@ impl Repository {
)); ));
Ok(diffs) Ok(diffs)
} }
fn count_sizes_recursive(&mut self, inode: &Inode, sizes: &mut HashMap<u64, usize>, min_size: u64) -> Result<(), RepositoryError> {
if inode.size >= min_size {
*sizes.entry(inode.size).or_insert(0) += 1;
}
if let Some(ref children) = inode.children {
for chunks in children.values() {
let ch = try!(self.get_inode(&chunks));
try!(self.count_sizes_recursive(&ch, sizes, min_size));
}
}
Ok(())
}
fn find_duplicates_recursive(&mut self, inode: &Inode, path: &Path, sizes: &HashMap<u64, usize>, hashes: &mut HashMap<Hash, (Vec<PathBuf>, u64)>) -> Result<(), RepositoryError> {
let path = path.join(&inode.name);
if sizes.get(&inode.size).cloned().unwrap_or(0) > 1 {
if let Some(ref data) = inode.data {
let chunk_data = try!(msgpack::encode(data).map_err(InodeError::from));
let hash = HashMethod::Blake2.hash(&chunk_data);
hashes.entry(hash).or_insert((Vec::new(), inode.size)).0.push(path.clone());
}
}
if let Some(ref children) = inode.children {
for chunks in children.values() {
let ch = try!(self.get_inode(&chunks));
try!(self.find_duplicates_recursive(&ch, &path, sizes, hashes));
}
}
Ok(())
}
pub fn find_duplicates(&mut self, inode: &Inode, min_size: u64) -> Result<Vec<(Vec<PathBuf>, u64)>, RepositoryError> {
let mut sizes = HashMap::new();
try!(self.count_sizes_recursive(inode, &mut sizes, min_size));
let mut hashes = HashMap::new();
if let Some(ref children) = inode.children {
for chunks in children.values() {
let ch = try!(self.get_inode(&chunks));
try!(self.find_duplicates_recursive(&ch, Path::new(""), &sizes, &mut hashes));
}
}
let dups = hashes.into_iter().map(|(_,v)| v).filter(|&(ref v, _)| v.len() > 1).collect();
Ok(dups)
}
} }