mirror of https://github.com/dswd/zvault
Analysis
This commit is contained in:
parent
d80c8ffb69
commit
4945615620
|
@ -98,8 +98,11 @@ Recommended: Brotli/2-7
|
|||
## TODO
|
||||
|
||||
### Core functionality
|
||||
- Fix vacuum inconsistencies (either index related, or bundle syncing related)
|
||||
- Proper bundle usage analysis with compressed size estimation
|
||||
- Recompress & combine bundles
|
||||
- Allow to use tar files for backup and restore (--tar, http://alexcrichton.com/tar-rs/tar/index.html)
|
||||
- Allow to mount backups (inode id == position in index, lru cache)
|
||||
- File attributes
|
||||
- xattrs https://crates.io/crates/xattr
|
||||
|
||||
|
|
|
@ -65,6 +65,9 @@ pub enum Arguments {
|
|||
backup_name: Option<String>,
|
||||
inode: Option<String>
|
||||
},
|
||||
Analyze {
|
||||
repo_path: String
|
||||
},
|
||||
BundleList {
|
||||
repo_path: String
|
||||
},
|
||||
|
@ -176,7 +179,7 @@ fn parse_bundle_id(val: &str) -> BundleId {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
#[allow(unknown_lints,cyclomatic_complexity)]
|
||||
pub fn parse() -> Arguments {
|
||||
let args = clap_app!(zvault =>
|
||||
(version: crate_version!())
|
||||
|
@ -259,6 +262,10 @@ pub fn parse() -> Arguments {
|
|||
(about: "displays information on a repository, a backup or a path in a backup")
|
||||
(@arg PATH: +required "repository[::backup[::subpath]] path")
|
||||
)
|
||||
(@subcommand analyze =>
|
||||
(about: "analyze the used and reclaimable space of bundles")
|
||||
(@arg REPO: +required "repository path")
|
||||
)
|
||||
(@subcommand configure =>
|
||||
(about: "changes the configuration")
|
||||
(@arg REPO: +required "path of the repository")
|
||||
|
@ -425,6 +432,16 @@ pub fn parse() -> Arguments {
|
|||
inode: inode.map(|v| v.to_string())
|
||||
}
|
||||
}
|
||||
if let Some(args) = args.subcommand_matches("analyze") {
|
||||
let (repository, backup, inode) = split_repo_path(args.value_of("REPO").unwrap());
|
||||
if backup.is_some() || inode.is_some() {
|
||||
println!("No backups or subpaths may be given here");
|
||||
exit(1);
|
||||
}
|
||||
return Arguments::Analyze {
|
||||
repo_path: repository.to_string()
|
||||
}
|
||||
}
|
||||
if let Some(args) = args.subcommand_matches("import") {
|
||||
let (repository, backup, inode) = split_repo_path(args.value_of("REPO").unwrap());
|
||||
if backup.is_some() || inode.is_some() {
|
||||
|
|
|
@ -169,6 +169,28 @@ fn print_config(config: &Config) {
|
|||
println!("Hash method: {}", config.hash.name());
|
||||
}
|
||||
|
||||
fn print_analysis(analysis: &HashMap<u32, BundleAnalysis>) {
|
||||
let mut reclaim_space = [0; 11];
|
||||
let mut data_total = 0;
|
||||
for bundle in analysis.values() {
|
||||
data_total += bundle.info.encoded_size;
|
||||
#[allow(unknown_lints,needless_range_loop)]
|
||||
for i in 0..11 {
|
||||
if bundle.get_usage_ratio() <= i as f32 * 0.1 {
|
||||
reclaim_space[i] += bundle.get_unused_size();
|
||||
}
|
||||
}
|
||||
}
|
||||
println!("Total bundle size: {}", to_file_size(data_total as u64));
|
||||
let used = data_total - reclaim_space[10];
|
||||
println!("Space used: {}, {:.1} %", to_file_size(used as u64), used as f32 / data_total as f32 * 100.0);
|
||||
println!("Reclaimable space (depending on vacuum ratio)");
|
||||
#[allow(unknown_lints,needless_range_loop)]
|
||||
for i in 0..11 {
|
||||
println!(" - ratio={:3}: {:6}, {:4.1} %", i*10, to_file_size(reclaim_space[i] as u64), reclaim_space[i] as f32 / data_total as f32 * 100.0);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[allow(unknown_lints,cyclomatic_complexity)]
|
||||
pub fn run() {
|
||||
|
@ -346,6 +368,10 @@ pub fn run() {
|
|||
print_repoinfo(&repo.info());
|
||||
}
|
||||
},
|
||||
Arguments::Analyze{repo_path} => {
|
||||
let mut repo = open_repository(&repo_path);
|
||||
print_analysis(&checked(repo.analyze_usage(), "analyze repository"));
|
||||
},
|
||||
Arguments::BundleList{repo_path} => {
|
||||
let repo = open_repository(&repo_path);
|
||||
for bundle in repo.list_bundles() {
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
pub use ::util::*;
|
||||
pub use ::bundledb::{BundleReader, BundleMode, BundleWriter, BundleInfo, BundleId, BundleDbError, BundleDb, BundleWriterError};
|
||||
pub use ::chunker::{ChunkerType, Chunker, ChunkerStatus, IChunker, ChunkerError};
|
||||
pub use ::repository::{Repository, Backup, Config, RepositoryError, RepositoryInfo, Inode, FileType, RepositoryIntegrityError, BackupFileError, BackupError, BackupOptions};
|
||||
pub use ::repository::{Repository, Backup, Config, RepositoryError, RepositoryInfo, Inode, FileType, RepositoryIntegrityError, BackupFileError, BackupError, BackupOptions, BundleAnalysis};
|
||||
pub use ::index::{Index, Location, IndexError};
|
||||
|
||||
pub use serde::{Serialize, Deserialize};
|
||||
|
|
|
@ -1,5 +1,32 @@
|
|||
use ::prelude::*;
|
||||
|
||||
use super::metadata::FileContents;
|
||||
|
||||
use std::collections::{HashMap, VecDeque};
|
||||
|
||||
|
||||
pub struct BundleAnalysis {
|
||||
pub info: BundleInfo,
|
||||
pub chunk_usage: Bitmap,
|
||||
pub used_raw_size: usize
|
||||
}
|
||||
|
||||
impl BundleAnalysis {
|
||||
#[inline]
|
||||
pub fn get_usage_ratio(&self) -> f32 {
|
||||
self.used_raw_size as f32 / self.info.raw_size as f32
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn get_used_size(&self) -> usize {
|
||||
(self.get_usage_ratio() * self.info.encoded_size as f32) as usize
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn get_unused_size(&self) -> usize {
|
||||
((1.0 - self.get_usage_ratio()) * self.info.encoded_size as f32) as usize
|
||||
}
|
||||
}
|
||||
|
||||
pub struct RepositoryInfo {
|
||||
pub bundle_count: usize,
|
||||
|
@ -15,6 +42,70 @@ pub struct RepositoryInfo {
|
|||
|
||||
|
||||
impl Repository {
|
||||
fn mark_used(&self, bundles: &mut HashMap<u32, BundleAnalysis>, chunks: &[Chunk]) -> Result<bool, RepositoryError> {
|
||||
let mut new = false;
|
||||
for &(hash, len) in chunks {
|
||||
if let Some(pos) = self.index.get(&hash) {
|
||||
if let Some(bundle) = bundles.get_mut(&pos.bundle) {
|
||||
if !bundle.chunk_usage.get(pos.chunk as usize) {
|
||||
new = true;
|
||||
bundle.chunk_usage.set(pos.chunk as usize);
|
||||
bundle.used_raw_size += len as usize;
|
||||
}
|
||||
} else {
|
||||
return Err(RepositoryIntegrityError::MissingBundleId(pos.bundle).into());
|
||||
}
|
||||
} else {
|
||||
return Err(RepositoryIntegrityError::MissingChunk(hash).into());
|
||||
}
|
||||
}
|
||||
Ok(new)
|
||||
}
|
||||
|
||||
pub fn analyze_usage(&mut self) -> Result<HashMap<u32, BundleAnalysis>, RepositoryError> {
|
||||
let mut usage = HashMap::new();
|
||||
for (id, bundle) in self.bundle_map.bundles() {
|
||||
let bundle = try!(self.bundles.get_bundle_info(&bundle).ok_or_else(|| RepositoryIntegrityError::MissingBundle(bundle)));
|
||||
usage.insert(id, BundleAnalysis {
|
||||
chunk_usage: Bitmap::new(bundle.chunk_count),
|
||||
info: bundle.clone(),
|
||||
used_raw_size: 0
|
||||
});
|
||||
}
|
||||
let backups = try!(self.get_backups());
|
||||
let mut todo = VecDeque::new();
|
||||
for (_name, backup) in backups {
|
||||
todo.push_back(backup.root);
|
||||
}
|
||||
while let Some(chunks) = todo.pop_back() {
|
||||
if !try!(self.mark_used(&mut usage, &chunks)) {
|
||||
continue
|
||||
}
|
||||
let inode = try!(self.get_inode(&chunks));
|
||||
// Mark the content chunks as used
|
||||
match inode.contents {
|
||||
None | Some(FileContents::Inline(_)) => (),
|
||||
Some(FileContents::ChunkedDirect(chunks)) => {
|
||||
try!(self.mark_used(&mut usage, &chunks));
|
||||
},
|
||||
Some(FileContents::ChunkedIndirect(chunks)) => {
|
||||
if try!(self.mark_used(&mut usage, &chunks)) {
|
||||
let chunk_data = try!(self.get_data(&chunks));
|
||||
let chunks = ChunkList::read_from(&chunk_data);
|
||||
try!(self.mark_used(&mut usage, &chunks));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Put children in todo
|
||||
if let Some(children) = inode.children {
|
||||
for (_name, chunks) in children {
|
||||
todo.push_back(chunks);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(usage)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn list_bundles(&self) -> Vec<&BundleInfo> {
|
||||
self.bundles.list_bundles()
|
||||
|
|
|
@ -24,7 +24,7 @@ pub use self::metadata::{Inode, FileType};
|
|||
pub use self::backup::{BackupError, BackupOptions};
|
||||
pub use self::backup_file::{Backup, BackupFileError};
|
||||
pub use self::integrity::RepositoryIntegrityError;
|
||||
pub use self::info::RepositoryInfo;
|
||||
pub use self::info::{RepositoryInfo, BundleAnalysis};
|
||||
use self::bundle_map::BundleMap;
|
||||
|
||||
|
||||
|
|
|
@ -1,86 +1,9 @@
|
|||
use ::prelude::*;
|
||||
|
||||
use super::metadata::FileContents;
|
||||
use std::collections::HashSet;
|
||||
|
||||
use std::collections::{HashMap, HashSet, VecDeque};
|
||||
|
||||
|
||||
pub struct BundleUsage {
|
||||
pub used: Bitmap,
|
||||
pub mode: Bitmap,
|
||||
pub chunk_count: usize,
|
||||
pub total_size: usize,
|
||||
pub used_size: usize
|
||||
}
|
||||
|
||||
impl Repository {
|
||||
fn mark_used(&self, bundles: &mut HashMap<u32, BundleUsage>, chunks: &[Chunk], mode: BundleMode) -> Result<bool, RepositoryError> {
|
||||
let mut new = false;
|
||||
for chunk in chunks {
|
||||
if let Some(pos) = self.index.get(&chunk.0) {
|
||||
if let Some(bundle) = bundles.get_mut(&pos.bundle) {
|
||||
if !bundle.used.get(pos.chunk as usize) {
|
||||
new = true;
|
||||
bundle.used.set(pos.chunk as usize);
|
||||
bundle.used_size += chunk.1 as usize;
|
||||
if mode == BundleMode::Meta {
|
||||
bundle.mode.set(pos.chunk as usize);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
return Err(RepositoryIntegrityError::MissingChunk(chunk.0).into());
|
||||
}
|
||||
}
|
||||
Ok(new)
|
||||
}
|
||||
|
||||
pub fn analyze_usage(&mut self) -> Result<HashMap<u32, BundleUsage>, RepositoryError> {
|
||||
let mut usage = HashMap::new();
|
||||
for (id, bundle) in self.bundle_map.bundles() {
|
||||
let bundle = try!(self.bundles.get_bundle_info(&bundle).ok_or_else(|| RepositoryIntegrityError::MissingBundle(bundle)));
|
||||
usage.insert(id, BundleUsage {
|
||||
used: Bitmap::new(bundle.chunk_count),
|
||||
mode: Bitmap::new(bundle.chunk_count),
|
||||
chunk_count: bundle.chunk_count,
|
||||
total_size: bundle.raw_size,
|
||||
used_size: 0
|
||||
});
|
||||
}
|
||||
let backups = try!(self.get_backups());
|
||||
for (_name, backup) in backups {
|
||||
let mut todo = VecDeque::new();
|
||||
todo.push_back(backup.root);
|
||||
while let Some(chunks) = todo.pop_front() {
|
||||
if !try!(self.mark_used(&mut usage, &chunks, BundleMode::Meta)) {
|
||||
continue
|
||||
}
|
||||
let inode = try!(self.get_inode(&chunks));
|
||||
// Mark the content chunks as used
|
||||
match inode.contents {
|
||||
Some(FileContents::ChunkedDirect(chunks)) => {
|
||||
try!(self.mark_used(&mut usage, &chunks, BundleMode::Content));
|
||||
},
|
||||
Some(FileContents::ChunkedIndirect(chunks)) => {
|
||||
if try!(self.mark_used(&mut usage, &chunks, BundleMode::Meta)) {
|
||||
let chunk_data = try!(self.get_data(&chunks));
|
||||
let chunks = ChunkList::read_from(&chunk_data);
|
||||
try!(self.mark_used(&mut usage, &chunks, BundleMode::Content));
|
||||
}
|
||||
}
|
||||
_ => ()
|
||||
}
|
||||
// Put children in todo
|
||||
if let Some(children) = inode.children {
|
||||
for (_name, chunks) in children {
|
||||
todo.push_back(chunks);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(usage)
|
||||
}
|
||||
|
||||
fn delete_bundle(&mut self, id: u32) -> Result<(), RepositoryError> {
|
||||
if let Some(bundle) = self.bundle_map.remove(id) {
|
||||
try!(self.bundles.delete_bundle(&bundle));
|
||||
|
@ -96,15 +19,19 @@ impl Repository {
|
|||
let _lock = try!(self.lock(true));
|
||||
info!("Analyzing chunk usage");
|
||||
let usage = try!(self.analyze_usage());
|
||||
let total = usage.values().map(|b| b.total_size).sum::<usize>();
|
||||
let used = usage.values().map(|b| b.used_size).sum::<usize>();
|
||||
info!("Usage: {} of {}, {:.1}%", to_file_size(used as u64), to_file_size(total as u64), used as f32/total as f32*100.0);
|
||||
let mut data_total = 0;
|
||||
let mut data_used = 0;
|
||||
for bundle in usage.values() {
|
||||
data_total += bundle.info.encoded_size;
|
||||
data_used += bundle.get_used_size();
|
||||
}
|
||||
info!("Usage: {} of {}, {:.1}%", to_file_size(data_used as u64), to_file_size(data_total as u64), data_used as f32/data_total as f32*100.0);
|
||||
let mut rewrite_bundles = HashSet::new();
|
||||
let mut reclaim_space = 0;
|
||||
for (id, bundle) in &usage {
|
||||
if bundle.used_size as f32 / bundle.total_size as f32 <= ratio {
|
||||
if bundle.get_usage_ratio() <= ratio {
|
||||
rewrite_bundles.insert(*id);
|
||||
reclaim_space += bundle.total_size - bundle.used_size;
|
||||
reclaim_space += bundle.get_unused_size();
|
||||
}
|
||||
}
|
||||
info!("Reclaiming {} by rewriting {} bundles", to_file_size(reclaim_space as u64), rewrite_bundles.len());
|
||||
|
@ -115,35 +42,24 @@ impl Repository {
|
|||
let bundle = &usage[id];
|
||||
let bundle_id = self.bundle_map.get(*id).unwrap();
|
||||
let chunks = try!(self.bundles.get_chunk_list(&bundle_id));
|
||||
let mode = usage[id].info.mode;
|
||||
for (chunk, &(hash, _len)) in chunks.into_iter().enumerate() {
|
||||
if !bundle.used.get(chunk) {
|
||||
if !bundle.chunk_usage.get(chunk) {
|
||||
try!(self.index.delete(&hash));
|
||||
continue
|
||||
}
|
||||
let data = try!(self.bundles.get_chunk(&bundle_id, chunk));
|
||||
let mode = if bundle.mode.get(chunk) {
|
||||
BundleMode::Meta
|
||||
} else {
|
||||
BundleMode::Content
|
||||
};
|
||||
try!(self.put_chunk_override(mode, hash, &data));
|
||||
}
|
||||
}
|
||||
try!(self.flush());
|
||||
info!("Checking index");
|
||||
let mut pos = 0;
|
||||
loop {
|
||||
pos = if let Some(pos) = self.index.next_entry(pos) {
|
||||
pos
|
||||
} else {
|
||||
break
|
||||
};
|
||||
let entry = self.index.get_entry(pos).unwrap();
|
||||
if rewrite_bundles.contains(&entry.data.bundle) {
|
||||
self.index.walk::<_, ()>(|_hash, location| {
|
||||
if rewrite_bundles.contains(&location.bundle) {
|
||||
panic!("Removed bundle is still referenced in index");
|
||||
}
|
||||
pos += 1;
|
||||
}
|
||||
Ok(())
|
||||
}).ok();
|
||||
info!("Deleting {} bundles", rewrite_bundles.len());
|
||||
for id in rewrite_bundles {
|
||||
try!(self.delete_bundle(id));
|
||||
|
|
Loading…
Reference in New Issue