This commit is contained in:
Dennis Schwerdel 2017-03-25 12:43:49 +01:00 committed by Dennis Schwerdel
parent d80c8ffb69
commit 4945615620
7 changed files with 156 additions and 103 deletions

View File

@ -98,8 +98,11 @@ Recommended: Brotli/2-7
## TODO ## TODO
### Core functionality ### Core functionality
- Fix vacuum inconsistencies (either index related, or bundle syncing related)
- Proper bundle usage analysis with compressed size estimation
- Recompress & combine bundles - Recompress & combine bundles
- Allow to use tar files for backup and restore (--tar, http://alexcrichton.com/tar-rs/tar/index.html) - Allow to use tar files for backup and restore (--tar, http://alexcrichton.com/tar-rs/tar/index.html)
- Allow to mount backups (inode id == position in index, lru cache)
- File attributes - File attributes
- xattrs https://crates.io/crates/xattr - xattrs https://crates.io/crates/xattr

View File

@ -65,6 +65,9 @@ pub enum Arguments {
backup_name: Option<String>, backup_name: Option<String>,
inode: Option<String> inode: Option<String>
}, },
Analyze {
repo_path: String
},
BundleList { BundleList {
repo_path: String repo_path: String
}, },
@ -176,7 +179,7 @@ fn parse_bundle_id(val: &str) -> BundleId {
} }
} }
#[allow(unknown_lints,cyclomatic_complexity)]
pub fn parse() -> Arguments { pub fn parse() -> Arguments {
let args = clap_app!(zvault => let args = clap_app!(zvault =>
(version: crate_version!()) (version: crate_version!())
@ -259,6 +262,10 @@ pub fn parse() -> Arguments {
(about: "displays information on a repository, a backup or a path in a backup") (about: "displays information on a repository, a backup or a path in a backup")
(@arg PATH: +required "repository[::backup[::subpath]] path") (@arg PATH: +required "repository[::backup[::subpath]] path")
) )
(@subcommand analyze =>
(about: "analyze the used and reclaimable space of bundles")
(@arg REPO: +required "repository path")
)
(@subcommand configure => (@subcommand configure =>
(about: "changes the configuration") (about: "changes the configuration")
(@arg REPO: +required "path of the repository") (@arg REPO: +required "path of the repository")
@ -425,6 +432,16 @@ pub fn parse() -> Arguments {
inode: inode.map(|v| v.to_string()) inode: inode.map(|v| v.to_string())
} }
} }
if let Some(args) = args.subcommand_matches("analyze") {
let (repository, backup, inode) = split_repo_path(args.value_of("REPO").unwrap());
if backup.is_some() || inode.is_some() {
println!("No backups or subpaths may be given here");
exit(1);
}
return Arguments::Analyze {
repo_path: repository.to_string()
}
}
if let Some(args) = args.subcommand_matches("import") { if let Some(args) = args.subcommand_matches("import") {
let (repository, backup, inode) = split_repo_path(args.value_of("REPO").unwrap()); let (repository, backup, inode) = split_repo_path(args.value_of("REPO").unwrap());
if backup.is_some() || inode.is_some() { if backup.is_some() || inode.is_some() {

View File

@ -169,6 +169,28 @@ fn print_config(config: &Config) {
println!("Hash method: {}", config.hash.name()); println!("Hash method: {}", config.hash.name());
} }
fn print_analysis(analysis: &HashMap<u32, BundleAnalysis>) {
let mut reclaim_space = [0; 11];
let mut data_total = 0;
for bundle in analysis.values() {
data_total += bundle.info.encoded_size;
#[allow(unknown_lints,needless_range_loop)]
for i in 0..11 {
if bundle.get_usage_ratio() <= i as f32 * 0.1 {
reclaim_space[i] += bundle.get_unused_size();
}
}
}
println!("Total bundle size: {}", to_file_size(data_total as u64));
let used = data_total - reclaim_space[10];
println!("Space used: {}, {:.1} %", to_file_size(used as u64), used as f32 / data_total as f32 * 100.0);
println!("Reclaimable space (depending on vacuum ratio)");
#[allow(unknown_lints,needless_range_loop)]
for i in 0..11 {
println!(" - ratio={:3}: {:6}, {:4.1} %", i*10, to_file_size(reclaim_space[i] as u64), reclaim_space[i] as f32 / data_total as f32 * 100.0);
}
}
#[allow(unknown_lints,cyclomatic_complexity)] #[allow(unknown_lints,cyclomatic_complexity)]
pub fn run() { pub fn run() {
@ -346,6 +368,10 @@ pub fn run() {
print_repoinfo(&repo.info()); print_repoinfo(&repo.info());
} }
}, },
Arguments::Analyze{repo_path} => {
let mut repo = open_repository(&repo_path);
print_analysis(&checked(repo.analyze_usage(), "analyze repository"));
},
Arguments::BundleList{repo_path} => { Arguments::BundleList{repo_path} => {
let repo = open_repository(&repo_path); let repo = open_repository(&repo_path);
for bundle in repo.list_bundles() { for bundle in repo.list_bundles() {

View File

@ -1,7 +1,7 @@
pub use ::util::*; pub use ::util::*;
pub use ::bundledb::{BundleReader, BundleMode, BundleWriter, BundleInfo, BundleId, BundleDbError, BundleDb, BundleWriterError}; pub use ::bundledb::{BundleReader, BundleMode, BundleWriter, BundleInfo, BundleId, BundleDbError, BundleDb, BundleWriterError};
pub use ::chunker::{ChunkerType, Chunker, ChunkerStatus, IChunker, ChunkerError}; pub use ::chunker::{ChunkerType, Chunker, ChunkerStatus, IChunker, ChunkerError};
pub use ::repository::{Repository, Backup, Config, RepositoryError, RepositoryInfo, Inode, FileType, RepositoryIntegrityError, BackupFileError, BackupError, BackupOptions}; pub use ::repository::{Repository, Backup, Config, RepositoryError, RepositoryInfo, Inode, FileType, RepositoryIntegrityError, BackupFileError, BackupError, BackupOptions, BundleAnalysis};
pub use ::index::{Index, Location, IndexError}; pub use ::index::{Index, Location, IndexError};
pub use serde::{Serialize, Deserialize}; pub use serde::{Serialize, Deserialize};

View File

@ -1,5 +1,32 @@
use ::prelude::*; use ::prelude::*;
use super::metadata::FileContents;
use std::collections::{HashMap, VecDeque};
pub struct BundleAnalysis {
pub info: BundleInfo,
pub chunk_usage: Bitmap,
pub used_raw_size: usize
}
impl BundleAnalysis {
#[inline]
pub fn get_usage_ratio(&self) -> f32 {
self.used_raw_size as f32 / self.info.raw_size as f32
}
#[inline]
pub fn get_used_size(&self) -> usize {
(self.get_usage_ratio() * self.info.encoded_size as f32) as usize
}
#[inline]
pub fn get_unused_size(&self) -> usize {
((1.0 - self.get_usage_ratio()) * self.info.encoded_size as f32) as usize
}
}
pub struct RepositoryInfo { pub struct RepositoryInfo {
pub bundle_count: usize, pub bundle_count: usize,
@ -15,6 +42,70 @@ pub struct RepositoryInfo {
impl Repository { impl Repository {
fn mark_used(&self, bundles: &mut HashMap<u32, BundleAnalysis>, chunks: &[Chunk]) -> Result<bool, RepositoryError> {
let mut new = false;
for &(hash, len) in chunks {
if let Some(pos) = self.index.get(&hash) {
if let Some(bundle) = bundles.get_mut(&pos.bundle) {
if !bundle.chunk_usage.get(pos.chunk as usize) {
new = true;
bundle.chunk_usage.set(pos.chunk as usize);
bundle.used_raw_size += len as usize;
}
} else {
return Err(RepositoryIntegrityError::MissingBundleId(pos.bundle).into());
}
} else {
return Err(RepositoryIntegrityError::MissingChunk(hash).into());
}
}
Ok(new)
}
pub fn analyze_usage(&mut self) -> Result<HashMap<u32, BundleAnalysis>, RepositoryError> {
let mut usage = HashMap::new();
for (id, bundle) in self.bundle_map.bundles() {
let bundle = try!(self.bundles.get_bundle_info(&bundle).ok_or_else(|| RepositoryIntegrityError::MissingBundle(bundle)));
usage.insert(id, BundleAnalysis {
chunk_usage: Bitmap::new(bundle.chunk_count),
info: bundle.clone(),
used_raw_size: 0
});
}
let backups = try!(self.get_backups());
let mut todo = VecDeque::new();
for (_name, backup) in backups {
todo.push_back(backup.root);
}
while let Some(chunks) = todo.pop_back() {
if !try!(self.mark_used(&mut usage, &chunks)) {
continue
}
let inode = try!(self.get_inode(&chunks));
// Mark the content chunks as used
match inode.contents {
None | Some(FileContents::Inline(_)) => (),
Some(FileContents::ChunkedDirect(chunks)) => {
try!(self.mark_used(&mut usage, &chunks));
},
Some(FileContents::ChunkedIndirect(chunks)) => {
if try!(self.mark_used(&mut usage, &chunks)) {
let chunk_data = try!(self.get_data(&chunks));
let chunks = ChunkList::read_from(&chunk_data);
try!(self.mark_used(&mut usage, &chunks));
}
}
}
// Put children in todo
if let Some(children) = inode.children {
for (_name, chunks) in children {
todo.push_back(chunks);
}
}
}
Ok(usage)
}
#[inline] #[inline]
pub fn list_bundles(&self) -> Vec<&BundleInfo> { pub fn list_bundles(&self) -> Vec<&BundleInfo> {
self.bundles.list_bundles() self.bundles.list_bundles()

View File

@ -24,7 +24,7 @@ pub use self::metadata::{Inode, FileType};
pub use self::backup::{BackupError, BackupOptions}; pub use self::backup::{BackupError, BackupOptions};
pub use self::backup_file::{Backup, BackupFileError}; pub use self::backup_file::{Backup, BackupFileError};
pub use self::integrity::RepositoryIntegrityError; pub use self::integrity::RepositoryIntegrityError;
pub use self::info::RepositoryInfo; pub use self::info::{RepositoryInfo, BundleAnalysis};
use self::bundle_map::BundleMap; use self::bundle_map::BundleMap;

View File

@ -1,86 +1,9 @@
use ::prelude::*; use ::prelude::*;
use super::metadata::FileContents; use std::collections::HashSet;
use std::collections::{HashMap, HashSet, VecDeque};
pub struct BundleUsage {
pub used: Bitmap,
pub mode: Bitmap,
pub chunk_count: usize,
pub total_size: usize,
pub used_size: usize
}
impl Repository { impl Repository {
fn mark_used(&self, bundles: &mut HashMap<u32, BundleUsage>, chunks: &[Chunk], mode: BundleMode) -> Result<bool, RepositoryError> {
let mut new = false;
for chunk in chunks {
if let Some(pos) = self.index.get(&chunk.0) {
if let Some(bundle) = bundles.get_mut(&pos.bundle) {
if !bundle.used.get(pos.chunk as usize) {
new = true;
bundle.used.set(pos.chunk as usize);
bundle.used_size += chunk.1 as usize;
if mode == BundleMode::Meta {
bundle.mode.set(pos.chunk as usize);
}
}
}
} else {
return Err(RepositoryIntegrityError::MissingChunk(chunk.0).into());
}
}
Ok(new)
}
pub fn analyze_usage(&mut self) -> Result<HashMap<u32, BundleUsage>, RepositoryError> {
let mut usage = HashMap::new();
for (id, bundle) in self.bundle_map.bundles() {
let bundle = try!(self.bundles.get_bundle_info(&bundle).ok_or_else(|| RepositoryIntegrityError::MissingBundle(bundle)));
usage.insert(id, BundleUsage {
used: Bitmap::new(bundle.chunk_count),
mode: Bitmap::new(bundle.chunk_count),
chunk_count: bundle.chunk_count,
total_size: bundle.raw_size,
used_size: 0
});
}
let backups = try!(self.get_backups());
for (_name, backup) in backups {
let mut todo = VecDeque::new();
todo.push_back(backup.root);
while let Some(chunks) = todo.pop_front() {
if !try!(self.mark_used(&mut usage, &chunks, BundleMode::Meta)) {
continue
}
let inode = try!(self.get_inode(&chunks));
// Mark the content chunks as used
match inode.contents {
Some(FileContents::ChunkedDirect(chunks)) => {
try!(self.mark_used(&mut usage, &chunks, BundleMode::Content));
},
Some(FileContents::ChunkedIndirect(chunks)) => {
if try!(self.mark_used(&mut usage, &chunks, BundleMode::Meta)) {
let chunk_data = try!(self.get_data(&chunks));
let chunks = ChunkList::read_from(&chunk_data);
try!(self.mark_used(&mut usage, &chunks, BundleMode::Content));
}
}
_ => ()
}
// Put children in todo
if let Some(children) = inode.children {
for (_name, chunks) in children {
todo.push_back(chunks);
}
}
}
}
Ok(usage)
}
fn delete_bundle(&mut self, id: u32) -> Result<(), RepositoryError> { fn delete_bundle(&mut self, id: u32) -> Result<(), RepositoryError> {
if let Some(bundle) = self.bundle_map.remove(id) { if let Some(bundle) = self.bundle_map.remove(id) {
try!(self.bundles.delete_bundle(&bundle)); try!(self.bundles.delete_bundle(&bundle));
@ -96,15 +19,19 @@ impl Repository {
let _lock = try!(self.lock(true)); let _lock = try!(self.lock(true));
info!("Analyzing chunk usage"); info!("Analyzing chunk usage");
let usage = try!(self.analyze_usage()); let usage = try!(self.analyze_usage());
let total = usage.values().map(|b| b.total_size).sum::<usize>(); let mut data_total = 0;
let used = usage.values().map(|b| b.used_size).sum::<usize>(); let mut data_used = 0;
info!("Usage: {} of {}, {:.1}%", to_file_size(used as u64), to_file_size(total as u64), used as f32/total as f32*100.0); for bundle in usage.values() {
data_total += bundle.info.encoded_size;
data_used += bundle.get_used_size();
}
info!("Usage: {} of {}, {:.1}%", to_file_size(data_used as u64), to_file_size(data_total as u64), data_used as f32/data_total as f32*100.0);
let mut rewrite_bundles = HashSet::new(); let mut rewrite_bundles = HashSet::new();
let mut reclaim_space = 0; let mut reclaim_space = 0;
for (id, bundle) in &usage { for (id, bundle) in &usage {
if bundle.used_size as f32 / bundle.total_size as f32 <= ratio { if bundle.get_usage_ratio() <= ratio {
rewrite_bundles.insert(*id); rewrite_bundles.insert(*id);
reclaim_space += bundle.total_size - bundle.used_size; reclaim_space += bundle.get_unused_size();
} }
} }
info!("Reclaiming {} by rewriting {} bundles", to_file_size(reclaim_space as u64), rewrite_bundles.len()); info!("Reclaiming {} by rewriting {} bundles", to_file_size(reclaim_space as u64), rewrite_bundles.len());
@ -115,35 +42,24 @@ impl Repository {
let bundle = &usage[id]; let bundle = &usage[id];
let bundle_id = self.bundle_map.get(*id).unwrap(); let bundle_id = self.bundle_map.get(*id).unwrap();
let chunks = try!(self.bundles.get_chunk_list(&bundle_id)); let chunks = try!(self.bundles.get_chunk_list(&bundle_id));
let mode = usage[id].info.mode;
for (chunk, &(hash, _len)) in chunks.into_iter().enumerate() { for (chunk, &(hash, _len)) in chunks.into_iter().enumerate() {
if !bundle.used.get(chunk) { if !bundle.chunk_usage.get(chunk) {
try!(self.index.delete(&hash)); try!(self.index.delete(&hash));
continue continue
} }
let data = try!(self.bundles.get_chunk(&bundle_id, chunk)); let data = try!(self.bundles.get_chunk(&bundle_id, chunk));
let mode = if bundle.mode.get(chunk) {
BundleMode::Meta
} else {
BundleMode::Content
};
try!(self.put_chunk_override(mode, hash, &data)); try!(self.put_chunk_override(mode, hash, &data));
} }
} }
try!(self.flush()); try!(self.flush());
info!("Checking index"); info!("Checking index");
let mut pos = 0; self.index.walk::<_, ()>(|_hash, location| {
loop { if rewrite_bundles.contains(&location.bundle) {
pos = if let Some(pos) = self.index.next_entry(pos) {
pos
} else {
break
};
let entry = self.index.get_entry(pos).unwrap();
if rewrite_bundles.contains(&entry.data.bundle) {
panic!("Removed bundle is still referenced in index"); panic!("Removed bundle is still referenced in index");
} }
pos += 1; Ok(())
} }).ok();
info!("Deleting {} bundles", rewrite_bundles.len()); info!("Deleting {} bundles", rewrite_bundles.len());
for id in rewrite_bundles { for id in rewrite_bundles {
try!(self.delete_bundle(id)); try!(self.delete_bundle(id));