mirror of https://github.com/dswd/zvault
stats & dups
This commit is contained in:
parent
56c916f585
commit
2f3c97a043
|
@ -21,7 +21,9 @@ rust:
|
|||
- nightly
|
||||
matrix:
|
||||
allow_failures:
|
||||
- rust: nightly
|
||||
- rust:
|
||||
- beta
|
||||
- stable
|
||||
script:
|
||||
- cargo clean
|
||||
- cargo build
|
||||
|
|
|
@ -5,9 +5,15 @@ This project follows [semantic versioning](http://semver.org).
|
|||
|
||||
### UNRELEASED
|
||||
* [added] Translation infrastructure (**requires nightly rust**)
|
||||
* [added] Checking hashes of chunks in check --bundle-data
|
||||
* [added] Debian packet for libsodium23
|
||||
* [modified] Updated dependencies
|
||||
* [modified] Updated copyright date
|
||||
* [modified] Moved all code into one crate for easier translation
|
||||
* [modified] Compression ratio is now displayed in a clearer format
|
||||
* [fixed] Also including the first min_size bytes in hash
|
||||
* [fixed] Fixed some texts in manpages
|
||||
* [fixed] Calling strip on final binaries
|
||||
|
||||
|
||||
### v0.4.0 (2017-07-21)
|
||||
|
|
|
@ -24,7 +24,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "atty"
|
||||
version = "0.2.6"
|
||||
version = "0.2.8"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"libc 0.2.39 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -76,7 +76,7 @@ version = "2.31.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"atty 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"atty 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"strsim 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"textwrap 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
|
@ -611,7 +611,7 @@ dependencies = [
|
|||
"checksum aho-corasick 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "d6531d44de723825aa81398a6415283229725a00fa30713812ab9323faa82fc4"
|
||||
"checksum ansi_term 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
|
||||
"checksum arrayvec 0.4.7 (registry+https://github.com/rust-lang/crates.io-index)" = "a1e964f9e24d588183fcb43503abda40d288c8657dfc27311516ce2f05675aef"
|
||||
"checksum atty 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "8352656fd42c30a0c3c89d26dea01e3b77c0ab2af18230835c15e2e13cd51859"
|
||||
"checksum atty 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "af80143d6f7608d746df1520709e5d141c96f240b0e62b0aa41bdfb53374d9d4"
|
||||
"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d"
|
||||
"checksum bitflags 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b3c30d3802dfb7281680d6285f2ccdaa8c2d8fee41f93805dba5c4cf50dc23cf"
|
||||
"checksum blake2-rfc 0.2.18 (registry+https://github.com/rust-lang/crates.io-index)" = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400"
|
||||
|
|
|
@ -529,12 +529,14 @@ impl BundleDb {
|
|||
let bundles_data: Vec<_> = bundles.iter().filter(|b| b.mode == BundleMode::Data).collect();
|
||||
let mut hash_methods = HashMap::new();
|
||||
let mut compressions = HashMap::new();
|
||||
let mut encryptions = HashMap::new();
|
||||
for bundle in &bundles {
|
||||
*hash_methods.entry(bundle.hash_method).or_insert(0) += 1;
|
||||
*compressions.entry(bundle.compression.clone()).or_insert(0) += 1;
|
||||
*encryptions.entry(bundle.encryption.clone()).or_insert(0) += 1;
|
||||
}
|
||||
BundleStatistics {
|
||||
hash_methods, compressions,
|
||||
hash_methods, compressions, encryptions,
|
||||
raw_size: ValueStats::from_iter(|| bundles.iter().map(|b| b.raw_size as f32)),
|
||||
encoded_size: ValueStats::from_iter(|| bundles.iter().map(|b| b.encoded_size as f32)),
|
||||
chunk_count: ValueStats::from_iter(|| bundles.iter().map(|b| b.chunk_count as f32)),
|
||||
|
|
|
@ -148,5 +148,6 @@ pub struct BundleStatistics {
|
|||
pub encoded_size_data: ValueStats,
|
||||
pub chunk_count_data: ValueStats,
|
||||
pub hash_methods: HashMap<HashMethod, usize>,
|
||||
pub compressions: HashMap<Option<Compression>, usize>
|
||||
pub compressions: HashMap<Option<Compression>, usize>,
|
||||
pub encryptions: HashMap<Option<Encryption>, usize>
|
||||
}
|
|
@ -41,6 +41,12 @@ pub enum Arguments {
|
|||
inode: Option<String>,
|
||||
force: bool
|
||||
},
|
||||
Duplicates {
|
||||
repo_path: PathBuf,
|
||||
backup_name: String,
|
||||
inode: Option<String>,
|
||||
min_size: u64
|
||||
},
|
||||
Prune {
|
||||
repo_path: PathBuf,
|
||||
prefix: String,
|
||||
|
@ -75,7 +81,7 @@ pub enum Arguments {
|
|||
backup_name: Option<String>,
|
||||
inode: Option<String>
|
||||
},
|
||||
Stats {
|
||||
Statistics {
|
||||
repo_path: PathBuf
|
||||
},
|
||||
Copy {
|
||||
|
@ -206,6 +212,31 @@ fn validate_repo_path(
|
|||
parse_repo_path(&repo_path, existing, backup_restr, path_restr).map(|_| ())
|
||||
}
|
||||
|
||||
|
||||
fn parse_filesize(num: &str) -> Result<u64, String> {
|
||||
let (num, suffix) = if num.len() > 0 {
|
||||
num.split_at(num.len() - 1)
|
||||
} else {
|
||||
(num, "b")
|
||||
};
|
||||
let factor = match suffix {
|
||||
"b" | "B" => 1,
|
||||
"k" | "K" => 1024,
|
||||
"m" | "M" => 1024*1024,
|
||||
"g" | "G" => 1024*1024*1024,
|
||||
"t" | "T" => 1024*1024*1024*1024,
|
||||
_ => return Err(tr!("Unknown suffix").to_string())
|
||||
};
|
||||
let num = try!(parse_num(num));
|
||||
Ok(num * factor)
|
||||
}
|
||||
|
||||
#[allow(unknown_lints, needless_pass_by_value)]
|
||||
fn validate_filesize(val: String) -> Result<(), String> {
|
||||
parse_filesize(&val).map(|_| ())
|
||||
}
|
||||
|
||||
|
||||
fn parse_num(num: &str) -> Result<u64, String> {
|
||||
if let Ok(num) = num.parse::<u64>() {
|
||||
Ok(num)
|
||||
|
@ -467,7 +498,8 @@ pub fn parse() -> Result<(log::Level, Arguments), ErrorCode> {
|
|||
.arg(Arg::from_usage("<REPO>")
|
||||
.help(tr!("Path of the repository"))
|
||||
.validator(|val| validate_repo_path(val, true, Some(false), Some(false)))))
|
||||
.subcommand(SubCommand::with_name("stats")
|
||||
.subcommand(SubCommand::with_name("statistics")
|
||||
.alias("stats")
|
||||
.about(tr!("Display statistics on a repository"))
|
||||
.arg(Arg::from_usage("<REPO>")
|
||||
.help(tr!("Path of the repository"))
|
||||
|
@ -514,6 +546,16 @@ pub fn parse() -> Result<(log::Level, Arguments), ErrorCode> {
|
|||
.arg(Arg::from_usage("<NEW>")
|
||||
.help(tr!("New version, [repository]::backup[::subpath]"))
|
||||
.validator(|val| validate_repo_path(val, true, Some(true), None))))
|
||||
.subcommand(SubCommand::with_name("duplicates")
|
||||
.aliases(&["dups"])
|
||||
.about(tr!("Find duplicate files in a backup"))
|
||||
.arg(Arg::from_usage("[min_size] --min-size [SIZE]")
|
||||
.help(tr!("Set the minimum file size"))
|
||||
.default_value(DEFAULT_DUPLICATES_MIN_SIZE_STR)
|
||||
.validator(validate_filesize))
|
||||
.arg(Arg::from_usage("<BACKUP>")
|
||||
.help(tr!("The backup/subtree path, [repository]::backup[::subtree]"))
|
||||
.validator(|val| validate_repo_path(val, true, Some(true), None))))
|
||||
.subcommand(SubCommand::with_name("copy")
|
||||
.alias("cp")
|
||||
.about(tr!("Create a copy of a backup"))
|
||||
|
@ -747,14 +789,14 @@ pub fn parse() -> Result<(log::Level, Arguments), ErrorCode> {
|
|||
inode: inode.map(|v| v.to_string())
|
||||
}
|
||||
}
|
||||
("stats", Some(args)) => {
|
||||
("statistics", Some(args)) => {
|
||||
let (repository, _backup, _inode) = parse_repo_path(
|
||||
args.value_of("REPO").unwrap(),
|
||||
true,
|
||||
Some(false),
|
||||
Some(false)
|
||||
).unwrap();
|
||||
Arguments::Stats { repo_path: repository }
|
||||
Arguments::Statistics { repo_path: repository }
|
||||
}
|
||||
("copy", Some(args)) => {
|
||||
let (repository_src, backup_src, _inode) =
|
||||
|
@ -830,6 +872,18 @@ pub fn parse() -> Result<(log::Level, Arguments), ErrorCode> {
|
|||
.unwrap_or_else(|| vec![])
|
||||
}
|
||||
}
|
||||
("duplicates", Some(args)) => {
|
||||
let (repository, backup, inode) =
|
||||
parse_repo_path(args.value_of("BACKUP").unwrap(), true, Some(true), None).unwrap();
|
||||
Arguments::Duplicates {
|
||||
repo_path: repository,
|
||||
backup_name: backup.unwrap().to_string(),
|
||||
inode: inode.map(|v| v.to_string()),
|
||||
min_size: args.value_of("min_size").map(|v| {
|
||||
parse_filesize(v).unwrap()
|
||||
}).unwrap()
|
||||
}
|
||||
}
|
||||
("config", Some(args)) => {
|
||||
let (repository, _backup, _inode) = parse_repo_path(
|
||||
args.value_of("REPO").unwrap(),
|
||||
|
|
115
src/cli/mod.rs
115
src/cli/mod.rs
|
@ -45,7 +45,8 @@ pub enum ErrorCode {
|
|||
DiffRun,
|
||||
VersionsRun,
|
||||
ImportRun,
|
||||
FuseMount
|
||||
FuseMount,
|
||||
DuplicatesRun
|
||||
}
|
||||
impl ErrorCode {
|
||||
pub fn code(&self) -> i32 {
|
||||
|
@ -81,6 +82,7 @@ impl ErrorCode {
|
|||
ErrorCode::VersionsRun => 22,
|
||||
ErrorCode::ImportRun => 23,
|
||||
ErrorCode::FuseMount => 24,
|
||||
ErrorCode::DuplicatesRun => 27,
|
||||
//
|
||||
ErrorCode::NoSuchBackup => 25,
|
||||
ErrorCode::BackupAlreadyExists => 26,
|
||||
|
@ -94,6 +96,7 @@ pub const DEFAULT_HASH: &str = "blake2";
|
|||
pub const DEFAULT_COMPRESSION: &str = "brotli/3";
|
||||
pub const DEFAULT_BUNDLE_SIZE_STR: &str = "25";
|
||||
pub const DEFAULT_VACUUM_RATIO_STR: &str = "0";
|
||||
pub const DEFAULT_DUPLICATES_MIN_SIZE_STR: &str = "1b";
|
||||
lazy_static! {
|
||||
pub static ref ZVAULT_FOLDER: PathBuf = {
|
||||
env::home_dir().unwrap().join(".zvault")
|
||||
|
@ -132,6 +135,22 @@ fn get_backup(repo: &Repository, backup_name: &str) -> Result<Backup, ErrorCode>
|
|||
))
|
||||
}
|
||||
|
||||
fn get_inode(repo: &mut Repository, backup: &Backup, inode: Option<&String>) -> Result<Inode, ErrorCode> {
|
||||
Ok(if let Some(inode) = inode {
|
||||
checked!(
|
||||
repo.get_backup_inode(&backup, &inode),
|
||||
"load subpath inode",
|
||||
ErrorCode::LoadInode
|
||||
)
|
||||
} else {
|
||||
checked!(
|
||||
repo.get_inode(&backup.root),
|
||||
"load root inode",
|
||||
ErrorCode::LoadInode
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
fn find_reference_backup(
|
||||
repo: &Repository,
|
||||
path: &str,
|
||||
|
@ -322,37 +341,37 @@ fn print_repostats(stats: &RepositoryStatistics) {
|
|||
tr_println!("Displacement:\n - average: {:.1}\n - stddev: {:.1}\n - over {:.1}: {:.0}, {:.1}%\n - maximum: {:.0}",
|
||||
disp.avg, disp.stddev, disp.avg + 2.0 * disp.stddev, disp.count_xl, disp.count_xl as f32 / disp.count as f32 * 100.0, disp.max);
|
||||
println!("");
|
||||
tr_println!("Bundles (all)\n=============");
|
||||
tr_println!("Bundles\n=======");
|
||||
let tsize = (stats.bundles.raw_size.count as f32 * stats.bundles.encoded_size.avg) as u64;
|
||||
tr_println!("All bundles: {} in {} bundles", to_file_size(tsize), stats.bundles.raw_size.count);
|
||||
let rsize = &stats.bundles.raw_size;
|
||||
tr_println!("Raw size:\n - average: {}\n - stddev: {}\n - maximum: {}",
|
||||
to_file_size(rsize.avg as u64), to_file_size(rsize.stddev as u64), to_file_size(rsize.max as u64));
|
||||
tr_println!(" - raw size: ø = {}, maximum: {}", to_file_size(rsize.avg as u64), to_file_size(rsize.max as u64));
|
||||
let esize = &stats.bundles.encoded_size;
|
||||
tr_println!("Encoded size:\n - average: {}\n - stddev: {}\n - maximum: {}",
|
||||
to_file_size(esize.avg as u64), to_file_size(esize.stddev as u64), to_file_size(esize.max as u64));
|
||||
tr_println!(" - encoded size: ø = {}, maximum: {}", to_file_size(esize.avg as u64), to_file_size(esize.max as u64));
|
||||
let ccount = &stats.bundles.chunk_count;
|
||||
tr_println!("Chunk count:\n - average: {:.1}\n - stddev: {:.1}\n - minimum: {:.0}\n - maximum: {:.0}", ccount.avg, ccount.stddev, ccount.min, ccount.max);
|
||||
println!("");
|
||||
tr_println!("Meta bundles\n============");
|
||||
tr_println!(" - chunk count: ø = {:.1}, maximum: {:.0}", ccount.avg, ccount.max);
|
||||
let tsize = (stats.bundles.raw_size_meta.count as f32 * stats.bundles.encoded_size_meta.avg) as u64;
|
||||
tr_println!("Meta bundles: {} in {} bundles", to_file_size(tsize), stats.bundles.raw_size_meta.count);
|
||||
let rsize = &stats.bundles.raw_size_meta;
|
||||
tr_println!("Raw size:\n - average: {}\n - stddev: {}\n - maximum: {}",
|
||||
to_file_size(rsize.avg as u64), to_file_size(rsize.stddev as u64), to_file_size(rsize.max as u64));
|
||||
tr_println!(" - raw size: ø = {}, maximum: {}", to_file_size(rsize.avg as u64), to_file_size(rsize.max as u64));
|
||||
let esize = &stats.bundles.encoded_size_meta;
|
||||
tr_println!("Encoded size:\n - average: {}\n - stddev: {}\n - maximum: {}",
|
||||
to_file_size(esize.avg as u64), to_file_size(esize.stddev as u64), to_file_size(esize.max as u64));
|
||||
tr_println!(" - encoded size: ø = {}, maximum: {}", to_file_size(esize.avg as u64), to_file_size(esize.max as u64));
|
||||
let ccount = &stats.bundles.chunk_count_meta;
|
||||
tr_println!("Chunk count:\n - average: {:.1}\n - stddev: {:.1}\n - minimum: {:.0}\n - maximum: {:.0}", ccount.avg, ccount.stddev, ccount.min, ccount.max);
|
||||
println!("");
|
||||
tr_println!("Data bundles\n============");
|
||||
tr_println!(" - chunk count: ø = {:.1}, maximum: {:.0}", ccount.avg, ccount.max);
|
||||
let tsize = (stats.bundles.raw_size_data.count as f32 * stats.bundles.encoded_size_data.avg) as u64;
|
||||
tr_println!("Data bundles: {} in {} bundles", to_file_size(tsize), stats.bundles.raw_size_data.count);
|
||||
let rsize = &stats.bundles.raw_size_data;
|
||||
tr_println!("Raw size:\n - average: {}\n - stddev: {}\n - maximum: {}",
|
||||
to_file_size(rsize.avg as u64), to_file_size(rsize.stddev as u64), to_file_size(rsize.max as u64));
|
||||
tr_println!(" - raw size: ø = {}, maximum: {}", to_file_size(rsize.avg as u64), to_file_size(rsize.max as u64));
|
||||
let esize = &stats.bundles.encoded_size_data;
|
||||
tr_println!("Encoded size:\n - average: {}\n - stddev: {}\n - maximum: {}",
|
||||
to_file_size(esize.avg as u64), to_file_size(esize.stddev as u64), to_file_size(esize.max as u64));
|
||||
tr_println!(" - encoded size: ø = {}, maximum: {}", to_file_size(esize.avg as u64), to_file_size(esize.max as u64));
|
||||
let ccount = &stats.bundles.chunk_count_data;
|
||||
tr_println!("Chunk count:\n - average: {:.1}\n - stddev: {:.1}\n - minimum: {:.0}\n - maximum: {:.0}", ccount.avg, ccount.stddev, ccount.min, ccount.max);
|
||||
tr_println!(" - chunk count: ø = {:.1}, maximum: {:.0}", ccount.avg, ccount.max);
|
||||
println!("");
|
||||
tr_println!("Bundle methods\n==============");
|
||||
tr_println!("Hash:");
|
||||
for (hash, &count) in &stats.bundles.hash_methods {
|
||||
tr_println!(" - {}: {}, {:.1}%", hash.name(), count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0);
|
||||
}
|
||||
tr_println!("Compression:");
|
||||
for (compr, &count) in &stats.bundles.compressions {
|
||||
let compr_name = if let &Some(ref compr) = compr {
|
||||
|
@ -362,9 +381,14 @@ fn print_repostats(stats: &RepositoryStatistics) {
|
|||
};
|
||||
tr_println!(" - {}: {}, {:.1}%", compr_name, count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0);
|
||||
}
|
||||
tr_println!("Hash:");
|
||||
for (hash, &count) in &stats.bundles.hash_methods {
|
||||
tr_println!(" - {}: {}, {:.1}%", hash.name(), count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0);
|
||||
tr_println!("Encryption:");
|
||||
for (encr, &count) in &stats.bundles.encryptions {
|
||||
let encr_name = if let &Some(ref encr) = encr {
|
||||
to_hex(&encr.1[..])
|
||||
} else {
|
||||
tr!("none").to_string()
|
||||
};
|
||||
tr_println!(" - {}: {}, {:.1}%", encr_name, count, count as f32 / stats.bundles.raw_size.count as f32 * 100.0);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -465,6 +489,17 @@ fn print_analysis(analysis: &HashMap<u32, BundleAnalysis>) {
|
|||
}
|
||||
}
|
||||
|
||||
fn print_duplicates(dups: Vec<(Vec<PathBuf>, u64)>) {
|
||||
for (group, size) in dups {
|
||||
tr_println!("{} duplicates found, size: {}", group.len(), to_file_size(size));
|
||||
for dup in group {
|
||||
println!(" - {}", dup.to_string_lossy());
|
||||
}
|
||||
println!();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
#[allow(unknown_lints, cyclomatic_complexity)]
|
||||
pub fn run() -> Result<(), ErrorCode> {
|
||||
|
@ -652,19 +687,7 @@ pub fn run() -> Result<(), ErrorCode> {
|
|||
} => {
|
||||
let mut repo = try!(open_repository(&repo_path, true));
|
||||
let backup = try!(get_backup(&repo, &backup_name));
|
||||
let inode = if let Some(inode) = inode {
|
||||
checked!(
|
||||
repo.get_backup_inode(&backup, &inode),
|
||||
"load subpath inode",
|
||||
ErrorCode::LoadInode
|
||||
)
|
||||
} else {
|
||||
checked!(
|
||||
repo.get_inode(&backup.root),
|
||||
"load root inode",
|
||||
ErrorCode::LoadInode
|
||||
)
|
||||
};
|
||||
let inode = try!(get_inode(&mut repo, &backup, inode.as_ref()));
|
||||
if tar {
|
||||
checked!(
|
||||
repo.export_tarfile(&backup, inode, &dst_path),
|
||||
|
@ -917,12 +940,28 @@ pub fn run() -> Result<(), ErrorCode> {
|
|||
print_repoinfo(&repo.info());
|
||||
}
|
||||
}
|
||||
Arguments::Stats {
|
||||
Arguments::Statistics {
|
||||
repo_path
|
||||
} => {
|
||||
let mut repo = try!(open_repository(&repo_path, false));
|
||||
print_repostats(&repo.statistics());
|
||||
}
|
||||
Arguments::Duplicates {
|
||||
repo_path,
|
||||
backup_name,
|
||||
inode,
|
||||
min_size
|
||||
} => {
|
||||
let mut repo = try!(open_repository(&repo_path, true));
|
||||
let backup = try!(get_backup(&repo, &backup_name));
|
||||
let inode = try!(get_inode(&mut repo, &backup, inode.as_ref()));
|
||||
let dups = checked!(
|
||||
repo.find_duplicates(&inode, min_size),
|
||||
"find duplicates",
|
||||
ErrorCode::DuplicatesRun
|
||||
);
|
||||
print_duplicates(dups);
|
||||
}
|
||||
Arguments::Mount {
|
||||
repo_path,
|
||||
backup_name,
|
||||
|
|
|
@ -542,4 +542,49 @@ impl Repository {
|
|||
));
|
||||
Ok(diffs)
|
||||
}
|
||||
|
||||
fn count_sizes_recursive(&mut self, inode: &Inode, sizes: &mut HashMap<u64, usize>, min_size: u64) -> Result<(), RepositoryError> {
|
||||
if inode.size >= min_size {
|
||||
*sizes.entry(inode.size).or_insert(0) += 1;
|
||||
}
|
||||
if let Some(ref children) = inode.children {
|
||||
for chunks in children.values() {
|
||||
let ch = try!(self.get_inode(&chunks));
|
||||
try!(self.count_sizes_recursive(&ch, sizes, min_size));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn find_duplicates_recursive(&mut self, inode: &Inode, path: &Path, sizes: &HashMap<u64, usize>, hashes: &mut HashMap<Hash, (Vec<PathBuf>, u64)>) -> Result<(), RepositoryError> {
|
||||
let path = path.join(&inode.name);
|
||||
if sizes.get(&inode.size).cloned().unwrap_or(0) > 1 {
|
||||
if let Some(ref data) = inode.data {
|
||||
let chunk_data = try!(msgpack::encode(data).map_err(InodeError::from));
|
||||
let hash = HashMethod::Blake2.hash(&chunk_data);
|
||||
hashes.entry(hash).or_insert((Vec::new(), inode.size)).0.push(path.clone());
|
||||
}
|
||||
}
|
||||
if let Some(ref children) = inode.children {
|
||||
for chunks in children.values() {
|
||||
let ch = try!(self.get_inode(&chunks));
|
||||
try!(self.find_duplicates_recursive(&ch, &path, sizes, hashes));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn find_duplicates(&mut self, inode: &Inode, min_size: u64) -> Result<Vec<(Vec<PathBuf>, u64)>, RepositoryError> {
|
||||
let mut sizes = HashMap::new();
|
||||
try!(self.count_sizes_recursive(inode, &mut sizes, min_size));
|
||||
let mut hashes = HashMap::new();
|
||||
if let Some(ref children) = inode.children {
|
||||
for chunks in children.values() {
|
||||
let ch = try!(self.get_inode(&chunks));
|
||||
try!(self.find_duplicates_recursive(&ch, Path::new(""), &sizes, &mut hashes));
|
||||
}
|
||||
}
|
||||
let dups = hashes.into_iter().map(|(_,v)| v).filter(|&(ref v, _)| v.len() > 1).collect();
|
||||
Ok(dups)
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue