mirror of https://github.com/dswd/zvault
Analysis
This commit is contained in:
parent
d80c8ffb69
commit
4945615620
|
@ -98,8 +98,11 @@ Recommended: Brotli/2-7
|
||||||
## TODO
|
## TODO
|
||||||
|
|
||||||
### Core functionality
|
### Core functionality
|
||||||
|
- Fix vacuum inconsistencies (either index related, or bundle syncing related)
|
||||||
|
- Proper bundle usage analysis with compressed size estimation
|
||||||
- Recompress & combine bundles
|
- Recompress & combine bundles
|
||||||
- Allow to use tar files for backup and restore (--tar, http://alexcrichton.com/tar-rs/tar/index.html)
|
- Allow to use tar files for backup and restore (--tar, http://alexcrichton.com/tar-rs/tar/index.html)
|
||||||
|
- Allow to mount backups (inode id == position in index, lru cache)
|
||||||
- File attributes
|
- File attributes
|
||||||
- xattrs https://crates.io/crates/xattr
|
- xattrs https://crates.io/crates/xattr
|
||||||
|
|
||||||
|
|
|
@ -65,6 +65,9 @@ pub enum Arguments {
|
||||||
backup_name: Option<String>,
|
backup_name: Option<String>,
|
||||||
inode: Option<String>
|
inode: Option<String>
|
||||||
},
|
},
|
||||||
|
Analyze {
|
||||||
|
repo_path: String
|
||||||
|
},
|
||||||
BundleList {
|
BundleList {
|
||||||
repo_path: String
|
repo_path: String
|
||||||
},
|
},
|
||||||
|
@ -176,7 +179,7 @@ fn parse_bundle_id(val: &str) -> BundleId {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(unknown_lints,cyclomatic_complexity)]
|
||||||
pub fn parse() -> Arguments {
|
pub fn parse() -> Arguments {
|
||||||
let args = clap_app!(zvault =>
|
let args = clap_app!(zvault =>
|
||||||
(version: crate_version!())
|
(version: crate_version!())
|
||||||
|
@ -259,6 +262,10 @@ pub fn parse() -> Arguments {
|
||||||
(about: "displays information on a repository, a backup or a path in a backup")
|
(about: "displays information on a repository, a backup or a path in a backup")
|
||||||
(@arg PATH: +required "repository[::backup[::subpath]] path")
|
(@arg PATH: +required "repository[::backup[::subpath]] path")
|
||||||
)
|
)
|
||||||
|
(@subcommand analyze =>
|
||||||
|
(about: "analyze the used and reclaimable space of bundles")
|
||||||
|
(@arg REPO: +required "repository path")
|
||||||
|
)
|
||||||
(@subcommand configure =>
|
(@subcommand configure =>
|
||||||
(about: "changes the configuration")
|
(about: "changes the configuration")
|
||||||
(@arg REPO: +required "path of the repository")
|
(@arg REPO: +required "path of the repository")
|
||||||
|
@ -425,6 +432,16 @@ pub fn parse() -> Arguments {
|
||||||
inode: inode.map(|v| v.to_string())
|
inode: inode.map(|v| v.to_string())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if let Some(args) = args.subcommand_matches("analyze") {
|
||||||
|
let (repository, backup, inode) = split_repo_path(args.value_of("REPO").unwrap());
|
||||||
|
if backup.is_some() || inode.is_some() {
|
||||||
|
println!("No backups or subpaths may be given here");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
return Arguments::Analyze {
|
||||||
|
repo_path: repository.to_string()
|
||||||
|
}
|
||||||
|
}
|
||||||
if let Some(args) = args.subcommand_matches("import") {
|
if let Some(args) = args.subcommand_matches("import") {
|
||||||
let (repository, backup, inode) = split_repo_path(args.value_of("REPO").unwrap());
|
let (repository, backup, inode) = split_repo_path(args.value_of("REPO").unwrap());
|
||||||
if backup.is_some() || inode.is_some() {
|
if backup.is_some() || inode.is_some() {
|
||||||
|
|
|
@ -169,6 +169,28 @@ fn print_config(config: &Config) {
|
||||||
println!("Hash method: {}", config.hash.name());
|
println!("Hash method: {}", config.hash.name());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn print_analysis(analysis: &HashMap<u32, BundleAnalysis>) {
|
||||||
|
let mut reclaim_space = [0; 11];
|
||||||
|
let mut data_total = 0;
|
||||||
|
for bundle in analysis.values() {
|
||||||
|
data_total += bundle.info.encoded_size;
|
||||||
|
#[allow(unknown_lints,needless_range_loop)]
|
||||||
|
for i in 0..11 {
|
||||||
|
if bundle.get_usage_ratio() <= i as f32 * 0.1 {
|
||||||
|
reclaim_space[i] += bundle.get_unused_size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!("Total bundle size: {}", to_file_size(data_total as u64));
|
||||||
|
let used = data_total - reclaim_space[10];
|
||||||
|
println!("Space used: {}, {:.1} %", to_file_size(used as u64), used as f32 / data_total as f32 * 100.0);
|
||||||
|
println!("Reclaimable space (depending on vacuum ratio)");
|
||||||
|
#[allow(unknown_lints,needless_range_loop)]
|
||||||
|
for i in 0..11 {
|
||||||
|
println!(" - ratio={:3}: {:6}, {:4.1} %", i*10, to_file_size(reclaim_space[i] as u64), reclaim_space[i] as f32 / data_total as f32 * 100.0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
#[allow(unknown_lints,cyclomatic_complexity)]
|
#[allow(unknown_lints,cyclomatic_complexity)]
|
||||||
pub fn run() {
|
pub fn run() {
|
||||||
|
@ -346,6 +368,10 @@ pub fn run() {
|
||||||
print_repoinfo(&repo.info());
|
print_repoinfo(&repo.info());
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
Arguments::Analyze{repo_path} => {
|
||||||
|
let mut repo = open_repository(&repo_path);
|
||||||
|
print_analysis(&checked(repo.analyze_usage(), "analyze repository"));
|
||||||
|
},
|
||||||
Arguments::BundleList{repo_path} => {
|
Arguments::BundleList{repo_path} => {
|
||||||
let repo = open_repository(&repo_path);
|
let repo = open_repository(&repo_path);
|
||||||
for bundle in repo.list_bundles() {
|
for bundle in repo.list_bundles() {
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
pub use ::util::*;
|
pub use ::util::*;
|
||||||
pub use ::bundledb::{BundleReader, BundleMode, BundleWriter, BundleInfo, BundleId, BundleDbError, BundleDb, BundleWriterError};
|
pub use ::bundledb::{BundleReader, BundleMode, BundleWriter, BundleInfo, BundleId, BundleDbError, BundleDb, BundleWriterError};
|
||||||
pub use ::chunker::{ChunkerType, Chunker, ChunkerStatus, IChunker, ChunkerError};
|
pub use ::chunker::{ChunkerType, Chunker, ChunkerStatus, IChunker, ChunkerError};
|
||||||
pub use ::repository::{Repository, Backup, Config, RepositoryError, RepositoryInfo, Inode, FileType, RepositoryIntegrityError, BackupFileError, BackupError, BackupOptions};
|
pub use ::repository::{Repository, Backup, Config, RepositoryError, RepositoryInfo, Inode, FileType, RepositoryIntegrityError, BackupFileError, BackupError, BackupOptions, BundleAnalysis};
|
||||||
pub use ::index::{Index, Location, IndexError};
|
pub use ::index::{Index, Location, IndexError};
|
||||||
|
|
||||||
pub use serde::{Serialize, Deserialize};
|
pub use serde::{Serialize, Deserialize};
|
||||||
|
|
|
@ -1,5 +1,32 @@
|
||||||
use ::prelude::*;
|
use ::prelude::*;
|
||||||
|
|
||||||
|
use super::metadata::FileContents;
|
||||||
|
|
||||||
|
use std::collections::{HashMap, VecDeque};
|
||||||
|
|
||||||
|
|
||||||
|
pub struct BundleAnalysis {
|
||||||
|
pub info: BundleInfo,
|
||||||
|
pub chunk_usage: Bitmap,
|
||||||
|
pub used_raw_size: usize
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BundleAnalysis {
|
||||||
|
#[inline]
|
||||||
|
pub fn get_usage_ratio(&self) -> f32 {
|
||||||
|
self.used_raw_size as f32 / self.info.raw_size as f32
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn get_used_size(&self) -> usize {
|
||||||
|
(self.get_usage_ratio() * self.info.encoded_size as f32) as usize
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn get_unused_size(&self) -> usize {
|
||||||
|
((1.0 - self.get_usage_ratio()) * self.info.encoded_size as f32) as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub struct RepositoryInfo {
|
pub struct RepositoryInfo {
|
||||||
pub bundle_count: usize,
|
pub bundle_count: usize,
|
||||||
|
@ -15,6 +42,70 @@ pub struct RepositoryInfo {
|
||||||
|
|
||||||
|
|
||||||
impl Repository {
|
impl Repository {
|
||||||
|
fn mark_used(&self, bundles: &mut HashMap<u32, BundleAnalysis>, chunks: &[Chunk]) -> Result<bool, RepositoryError> {
|
||||||
|
let mut new = false;
|
||||||
|
for &(hash, len) in chunks {
|
||||||
|
if let Some(pos) = self.index.get(&hash) {
|
||||||
|
if let Some(bundle) = bundles.get_mut(&pos.bundle) {
|
||||||
|
if !bundle.chunk_usage.get(pos.chunk as usize) {
|
||||||
|
new = true;
|
||||||
|
bundle.chunk_usage.set(pos.chunk as usize);
|
||||||
|
bundle.used_raw_size += len as usize;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return Err(RepositoryIntegrityError::MissingBundleId(pos.bundle).into());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return Err(RepositoryIntegrityError::MissingChunk(hash).into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(new)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn analyze_usage(&mut self) -> Result<HashMap<u32, BundleAnalysis>, RepositoryError> {
|
||||||
|
let mut usage = HashMap::new();
|
||||||
|
for (id, bundle) in self.bundle_map.bundles() {
|
||||||
|
let bundle = try!(self.bundles.get_bundle_info(&bundle).ok_or_else(|| RepositoryIntegrityError::MissingBundle(bundle)));
|
||||||
|
usage.insert(id, BundleAnalysis {
|
||||||
|
chunk_usage: Bitmap::new(bundle.chunk_count),
|
||||||
|
info: bundle.clone(),
|
||||||
|
used_raw_size: 0
|
||||||
|
});
|
||||||
|
}
|
||||||
|
let backups = try!(self.get_backups());
|
||||||
|
let mut todo = VecDeque::new();
|
||||||
|
for (_name, backup) in backups {
|
||||||
|
todo.push_back(backup.root);
|
||||||
|
}
|
||||||
|
while let Some(chunks) = todo.pop_back() {
|
||||||
|
if !try!(self.mark_used(&mut usage, &chunks)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
let inode = try!(self.get_inode(&chunks));
|
||||||
|
// Mark the content chunks as used
|
||||||
|
match inode.contents {
|
||||||
|
None | Some(FileContents::Inline(_)) => (),
|
||||||
|
Some(FileContents::ChunkedDirect(chunks)) => {
|
||||||
|
try!(self.mark_used(&mut usage, &chunks));
|
||||||
|
},
|
||||||
|
Some(FileContents::ChunkedIndirect(chunks)) => {
|
||||||
|
if try!(self.mark_used(&mut usage, &chunks)) {
|
||||||
|
let chunk_data = try!(self.get_data(&chunks));
|
||||||
|
let chunks = ChunkList::read_from(&chunk_data);
|
||||||
|
try!(self.mark_used(&mut usage, &chunks));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Put children in todo
|
||||||
|
if let Some(children) = inode.children {
|
||||||
|
for (_name, chunks) in children {
|
||||||
|
todo.push_back(chunks);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(usage)
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn list_bundles(&self) -> Vec<&BundleInfo> {
|
pub fn list_bundles(&self) -> Vec<&BundleInfo> {
|
||||||
self.bundles.list_bundles()
|
self.bundles.list_bundles()
|
||||||
|
|
|
@ -24,7 +24,7 @@ pub use self::metadata::{Inode, FileType};
|
||||||
pub use self::backup::{BackupError, BackupOptions};
|
pub use self::backup::{BackupError, BackupOptions};
|
||||||
pub use self::backup_file::{Backup, BackupFileError};
|
pub use self::backup_file::{Backup, BackupFileError};
|
||||||
pub use self::integrity::RepositoryIntegrityError;
|
pub use self::integrity::RepositoryIntegrityError;
|
||||||
pub use self::info::RepositoryInfo;
|
pub use self::info::{RepositoryInfo, BundleAnalysis};
|
||||||
use self::bundle_map::BundleMap;
|
use self::bundle_map::BundleMap;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,86 +1,9 @@
|
||||||
use ::prelude::*;
|
use ::prelude::*;
|
||||||
|
|
||||||
use super::metadata::FileContents;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use std::collections::{HashMap, HashSet, VecDeque};
|
|
||||||
|
|
||||||
|
|
||||||
pub struct BundleUsage {
|
|
||||||
pub used: Bitmap,
|
|
||||||
pub mode: Bitmap,
|
|
||||||
pub chunk_count: usize,
|
|
||||||
pub total_size: usize,
|
|
||||||
pub used_size: usize
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Repository {
|
impl Repository {
|
||||||
fn mark_used(&self, bundles: &mut HashMap<u32, BundleUsage>, chunks: &[Chunk], mode: BundleMode) -> Result<bool, RepositoryError> {
|
|
||||||
let mut new = false;
|
|
||||||
for chunk in chunks {
|
|
||||||
if let Some(pos) = self.index.get(&chunk.0) {
|
|
||||||
if let Some(bundle) = bundles.get_mut(&pos.bundle) {
|
|
||||||
if !bundle.used.get(pos.chunk as usize) {
|
|
||||||
new = true;
|
|
||||||
bundle.used.set(pos.chunk as usize);
|
|
||||||
bundle.used_size += chunk.1 as usize;
|
|
||||||
if mode == BundleMode::Meta {
|
|
||||||
bundle.mode.set(pos.chunk as usize);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return Err(RepositoryIntegrityError::MissingChunk(chunk.0).into());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(new)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn analyze_usage(&mut self) -> Result<HashMap<u32, BundleUsage>, RepositoryError> {
|
|
||||||
let mut usage = HashMap::new();
|
|
||||||
for (id, bundle) in self.bundle_map.bundles() {
|
|
||||||
let bundle = try!(self.bundles.get_bundle_info(&bundle).ok_or_else(|| RepositoryIntegrityError::MissingBundle(bundle)));
|
|
||||||
usage.insert(id, BundleUsage {
|
|
||||||
used: Bitmap::new(bundle.chunk_count),
|
|
||||||
mode: Bitmap::new(bundle.chunk_count),
|
|
||||||
chunk_count: bundle.chunk_count,
|
|
||||||
total_size: bundle.raw_size,
|
|
||||||
used_size: 0
|
|
||||||
});
|
|
||||||
}
|
|
||||||
let backups = try!(self.get_backups());
|
|
||||||
for (_name, backup) in backups {
|
|
||||||
let mut todo = VecDeque::new();
|
|
||||||
todo.push_back(backup.root);
|
|
||||||
while let Some(chunks) = todo.pop_front() {
|
|
||||||
if !try!(self.mark_used(&mut usage, &chunks, BundleMode::Meta)) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
let inode = try!(self.get_inode(&chunks));
|
|
||||||
// Mark the content chunks as used
|
|
||||||
match inode.contents {
|
|
||||||
Some(FileContents::ChunkedDirect(chunks)) => {
|
|
||||||
try!(self.mark_used(&mut usage, &chunks, BundleMode::Content));
|
|
||||||
},
|
|
||||||
Some(FileContents::ChunkedIndirect(chunks)) => {
|
|
||||||
if try!(self.mark_used(&mut usage, &chunks, BundleMode::Meta)) {
|
|
||||||
let chunk_data = try!(self.get_data(&chunks));
|
|
||||||
let chunks = ChunkList::read_from(&chunk_data);
|
|
||||||
try!(self.mark_used(&mut usage, &chunks, BundleMode::Content));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => ()
|
|
||||||
}
|
|
||||||
// Put children in todo
|
|
||||||
if let Some(children) = inode.children {
|
|
||||||
for (_name, chunks) in children {
|
|
||||||
todo.push_back(chunks);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(usage)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn delete_bundle(&mut self, id: u32) -> Result<(), RepositoryError> {
|
fn delete_bundle(&mut self, id: u32) -> Result<(), RepositoryError> {
|
||||||
if let Some(bundle) = self.bundle_map.remove(id) {
|
if let Some(bundle) = self.bundle_map.remove(id) {
|
||||||
try!(self.bundles.delete_bundle(&bundle));
|
try!(self.bundles.delete_bundle(&bundle));
|
||||||
|
@ -96,15 +19,19 @@ impl Repository {
|
||||||
let _lock = try!(self.lock(true));
|
let _lock = try!(self.lock(true));
|
||||||
info!("Analyzing chunk usage");
|
info!("Analyzing chunk usage");
|
||||||
let usage = try!(self.analyze_usage());
|
let usage = try!(self.analyze_usage());
|
||||||
let total = usage.values().map(|b| b.total_size).sum::<usize>();
|
let mut data_total = 0;
|
||||||
let used = usage.values().map(|b| b.used_size).sum::<usize>();
|
let mut data_used = 0;
|
||||||
info!("Usage: {} of {}, {:.1}%", to_file_size(used as u64), to_file_size(total as u64), used as f32/total as f32*100.0);
|
for bundle in usage.values() {
|
||||||
|
data_total += bundle.info.encoded_size;
|
||||||
|
data_used += bundle.get_used_size();
|
||||||
|
}
|
||||||
|
info!("Usage: {} of {}, {:.1}%", to_file_size(data_used as u64), to_file_size(data_total as u64), data_used as f32/data_total as f32*100.0);
|
||||||
let mut rewrite_bundles = HashSet::new();
|
let mut rewrite_bundles = HashSet::new();
|
||||||
let mut reclaim_space = 0;
|
let mut reclaim_space = 0;
|
||||||
for (id, bundle) in &usage {
|
for (id, bundle) in &usage {
|
||||||
if bundle.used_size as f32 / bundle.total_size as f32 <= ratio {
|
if bundle.get_usage_ratio() <= ratio {
|
||||||
rewrite_bundles.insert(*id);
|
rewrite_bundles.insert(*id);
|
||||||
reclaim_space += bundle.total_size - bundle.used_size;
|
reclaim_space += bundle.get_unused_size();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
info!("Reclaiming {} by rewriting {} bundles", to_file_size(reclaim_space as u64), rewrite_bundles.len());
|
info!("Reclaiming {} by rewriting {} bundles", to_file_size(reclaim_space as u64), rewrite_bundles.len());
|
||||||
|
@ -115,35 +42,24 @@ impl Repository {
|
||||||
let bundle = &usage[id];
|
let bundle = &usage[id];
|
||||||
let bundle_id = self.bundle_map.get(*id).unwrap();
|
let bundle_id = self.bundle_map.get(*id).unwrap();
|
||||||
let chunks = try!(self.bundles.get_chunk_list(&bundle_id));
|
let chunks = try!(self.bundles.get_chunk_list(&bundle_id));
|
||||||
|
let mode = usage[id].info.mode;
|
||||||
for (chunk, &(hash, _len)) in chunks.into_iter().enumerate() {
|
for (chunk, &(hash, _len)) in chunks.into_iter().enumerate() {
|
||||||
if !bundle.used.get(chunk) {
|
if !bundle.chunk_usage.get(chunk) {
|
||||||
try!(self.index.delete(&hash));
|
try!(self.index.delete(&hash));
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
let data = try!(self.bundles.get_chunk(&bundle_id, chunk));
|
let data = try!(self.bundles.get_chunk(&bundle_id, chunk));
|
||||||
let mode = if bundle.mode.get(chunk) {
|
|
||||||
BundleMode::Meta
|
|
||||||
} else {
|
|
||||||
BundleMode::Content
|
|
||||||
};
|
|
||||||
try!(self.put_chunk_override(mode, hash, &data));
|
try!(self.put_chunk_override(mode, hash, &data));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
try!(self.flush());
|
try!(self.flush());
|
||||||
info!("Checking index");
|
info!("Checking index");
|
||||||
let mut pos = 0;
|
self.index.walk::<_, ()>(|_hash, location| {
|
||||||
loop {
|
if rewrite_bundles.contains(&location.bundle) {
|
||||||
pos = if let Some(pos) = self.index.next_entry(pos) {
|
|
||||||
pos
|
|
||||||
} else {
|
|
||||||
break
|
|
||||||
};
|
|
||||||
let entry = self.index.get_entry(pos).unwrap();
|
|
||||||
if rewrite_bundles.contains(&entry.data.bundle) {
|
|
||||||
panic!("Removed bundle is still referenced in index");
|
panic!("Removed bundle is still referenced in index");
|
||||||
}
|
}
|
||||||
pos += 1;
|
Ok(())
|
||||||
}
|
}).ok();
|
||||||
info!("Deleting {} bundles", rewrite_bundles.len());
|
info!("Deleting {} bundles", rewrite_bundles.len());
|
||||||
for id in rewrite_bundles {
|
for id in rewrite_bundles {
|
||||||
try!(self.delete_bundle(id));
|
try!(self.delete_bundle(id));
|
||||||
|
|
Loading…
Reference in New Issue