pull/10/head
Dennis Schwerdel 2017-03-20 14:03:29 +01:00
parent c8b69ebe25
commit fc45fa4e33
14 changed files with 391 additions and 76 deletions

View File

@ -6,7 +6,6 @@ use chrono::Duration;
use ::chunker::*;
use ::util::*;
use ::util::cli::*;
struct ChunkSink {

View File

@ -28,11 +28,13 @@ pub enum Arguments {
Remove {
repo_path: String,
backup_name: String,
vacuum: bool,
inode: Option<String>
},
Vacuum {
repo_path: String,
ratio: f32
ratio: f32,
simulate: bool
},
Check {
repo_path: String,
@ -204,11 +206,13 @@ pub fn parse() -> Arguments {
)
(@subcommand remove =>
(about: "removes a backup or a subpath")
(@arg vacuum: --vacuum "run vacuum afterwards to reclaim space")
(@arg BACKUP: +required "repository::backup[::subpath] path")
)
(@subcommand vacuum =>
(about: "saves space by combining and recompressing bundles")
(@arg ratio: --ratio -r "ratio of unused chunks in a bundle to rewrite that bundle")
(@arg ratio: --ratio -r +takes_value "ratio of unused chunks in a bundle to rewrite that bundle")
(@arg ratio: --simulate "only simulate the vacuum, do not remove any bundles")
(@arg REPO: +required "path of the repository")
)
(@subcommand check =>
@ -317,6 +321,7 @@ pub fn parse() -> Arguments {
return Arguments::Remove {
repo_path: repository.to_string(),
backup_name: backup.unwrap().to_string(),
vacuum: args.is_present("vacuum"),
inode: inode.map(|v| v.to_string())
}
}
@ -328,6 +333,7 @@ pub fn parse() -> Arguments {
}
return Arguments::Vacuum {
repo_path: repository.to_string(),
simulate: args.is_present("simulate"),
ratio: parse_float(args.value_of("ratio").unwrap_or("0.5"), "ratio") as f32
}
}

View File

@ -6,7 +6,6 @@ use chrono::prelude::*;
use std::process::exit;
use ::repository::{Repository, Config, Backup};
use ::util::cli::*;
use ::util::*;
use self::args::Arguments;
@ -72,8 +71,8 @@ pub fn run() {
repo.restore_backup(&backup, &dst_path).unwrap();
}
},
Arguments::Remove{repo_path, backup_name, inode} => {
let repo = open_repository(&repo_path);
Arguments::Remove{repo_path, backup_name, inode, vacuum} => {
let mut repo = open_repository(&repo_path);
if let Some(_inode) = inode {
let _backup = get_backup(&repo, &backup_name);
error!("Removing backup subtrees is not implemented yet");
@ -82,10 +81,13 @@ pub fn run() {
repo.delete_backup(&backup_name).unwrap();
info!("The backup has been deleted, run vacuum to reclaim space");
}
if vacuum {
repo.vacuum(0.5, false).unwrap();
}
},
Arguments::Vacuum{repo_path, ..} => {
let _repo = open_repository(&repo_path);
error!("Vaccum is not implemented yet");
Arguments::Vacuum{repo_path, ratio, simulate} => {
let mut repo = open_repository(&repo_path);
repo.vacuum(ratio, simulate).unwrap();
return
},
Arguments::Check{repo_path, backup_name, inode, full} => {

View File

@ -392,6 +392,15 @@ impl Index {
}
}
#[inline]
pub fn pos(&self, key: &Hash) -> Option<usize> {
debug_assert!(self.check().is_ok(), "Inconsistent before get");
match self.locate(key) {
LocateResult::Found(pos) => Some(pos),
_ => None
}
}
#[inline]
pub fn get(&self, key: &Hash) -> Option<Location> {
debug_assert!(self.check().is_ok(), "Inconsistent before get");

View File

@ -31,7 +31,6 @@ mod cli;
// TODO: Remove backup subtrees
// TODO: Recompress & combine bundles
// TODO: Prune backups (based on age like attic)
// TODO: Check backup integrity too
// TODO: Encrypt backup files too
// TODO: list --tree
// TODO: Partial backups

View File

@ -32,11 +32,16 @@ impl Repository {
Ok(Some(try!(self.bundles.get_chunk(&bundle_id, found.chunk as usize))))
}
#[inline]
pub fn put_chunk(&mut self, mode: BundleMode, hash: Hash, data: &[u8]) -> Result<(), RepositoryError> {
// If this chunk is in the index, ignore it
if self.index.contains(&hash) {
return Ok(())
}
self.put_chunk_override(mode, hash, data)
}
pub fn put_chunk_override(&mut self, mode: BundleMode, hash: Hash, data: &[u8]) -> Result<(), RepositoryError> {
// Calculate the next free bundle id now (late lifetime prevents this)
let next_free_bundle_id = self.next_free_bundle_id();
// Select a bundle writer according to the mode and...

View File

@ -89,6 +89,11 @@ impl BundleMap {
self.0.get(&id)
}
#[inline]
pub fn remove(&mut self, id: u32) -> Option<BundleData> {
self.0.remove(&id)
}
#[inline]
pub fn set(&mut self, id: u32, bundle: &Bundle) {
let data = BundleData { info: bundle.info.clone() };
@ -96,7 +101,7 @@ impl BundleMap {
}
#[inline]
pub fn bundles(&self) -> Vec<&BundleData> {
self.0.values().collect()
pub fn bundles(&self) -> Vec<(u32, &BundleData)> {
self.0.iter().map(|(id, bundle)| (*id, bundle)).collect()
}
}

View File

@ -17,7 +17,7 @@ pub struct RepositoryInfo {
impl Repository {
#[inline]
pub fn list_bundles(&self) -> Vec<&BundleInfo> {
self.bundle_map.bundles().iter().map(|b| &b.info).collect()
self.bundle_map.bundles().into_iter().map(|(_id, b)| &b.info).collect()
}
pub fn info(&self) -> RepositoryInfo {

View File

@ -1,7 +1,10 @@
use super::{Repository, RepositoryError};
use super::metadata::FileContents;
use ::bundle::BundleId;
use ::util::Hash;
use ::util::*;
use std::collections::VecDeque;
quick_error!{
@ -33,34 +36,7 @@ quick_error!{
}
impl Repository {
fn check_chunk(&self, hash: Hash) -> Result<(), RepositoryError> {
// Find bundle and chunk id in index
let found = if let Some(found) = self.index.get(&hash) {
found
} else {
return Err(RepositoryIntegrityError::MissingChunk(hash).into());
};
// Lookup bundle id from map
let bundle_id = try!(self.get_bundle_id(found.bundle));
// Get bundle object from bundledb
let bundle = if let Some(bundle) = self.bundles.get_bundle(&bundle_id) {
bundle
} else {
return Err(RepositoryIntegrityError::MissingBundle(bundle_id.clone()).into())
};
// Get chunk from bundle
if bundle.info.chunk_count > found.chunk as usize {
Ok(())
} else {
Err(RepositoryIntegrityError::NoSuchChunk(bundle_id.clone(), found.chunk).into())
}
//TODO: check that contents match their hash
}
pub fn check(&mut self, full: bool) -> Result<(), RepositoryError> {
try!(self.flush());
try!(self.bundles.check(full));
try!(self.index.check());
fn check_index_chunks(&self) -> Result<(), RepositoryError> {
let mut pos = 0;
loop {
pos = if let Some(pos) = self.index.next_entry(pos) {
@ -69,9 +45,24 @@ impl Repository {
break
};
let entry = self.index.get_entry(pos).unwrap();
try!(self.check_chunk(entry.key));
// Lookup bundle id from map
let bundle_id = try!(self.get_bundle_id(entry.data.bundle));
// Get bundle object from bundledb
let bundle = if let Some(bundle) = self.bundles.get_bundle(&bundle_id) {
bundle
} else {
return Err(RepositoryIntegrityError::MissingBundle(bundle_id.clone()).into())
};
// Get chunk from bundle
if bundle.info.chunk_count <= entry.data.chunk as usize {
return Err(RepositoryIntegrityError::NoSuchChunk(bundle_id.clone(), entry.data.chunk).into())
}
pos += 1;
}
Ok(())
}
fn check_repository(&self) -> Result<(), RepositoryError> {
if self.next_content_bundle == self.next_meta_bundle {
return Err(RepositoryIntegrityError::InvalidNextBundleId.into())
}
@ -83,4 +74,67 @@ impl Repository {
}
Ok(())
}
fn check_chunks(&self, checked: &mut Bitmap, chunks: &[Chunk]) -> Result<bool, RepositoryError> {
let mut new = false;
for &(hash, _len) in chunks {
if let Some(pos) = self.index.pos(&hash) {
new |= checked.get(pos);
checked.set(pos);
} else {
return Err(RepositoryIntegrityError::MissingChunk(hash).into())
}
}
Ok(new)
}
fn check_backups(&mut self) -> Result<(), RepositoryError> {
let mut checked = Bitmap::new(self.index.capacity());
for name in try!(self.list_backups()) {
let backup = try!(self.get_backup(&name));
let mut todo = VecDeque::new();
todo.push_back(backup.root);
while let Some(chunks) = todo.pop_front() {
if !try!(self.check_chunks(&mut checked, &chunks)) {
continue
}
let inode = try!(self.get_inode(&chunks));
// Mark the content chunks as used
match inode.contents {
Some(FileContents::ChunkedDirect(chunks)) => {
try!(self.check_chunks(&mut checked, &chunks));
},
Some(FileContents::ChunkedIndirect(chunks)) => {
if try!(self.check_chunks(&mut checked, &chunks)) {
let chunk_data = try!(self.get_data(&chunks));
let chunks = ChunkList::read_from(&chunk_data);
try!(self.check_chunks(&mut checked, &chunks));
}
}
_ => ()
}
// Put children in todo
if let Some(children) = inode.children {
for (_name, chunks) in children {
todo.push_back(chunks);
}
}
}
}
Ok(())
}
pub fn check(&mut self, full: bool) -> Result<(), RepositoryError> {
try!(self.flush());
info!("Checking bundle integrity...");
try!(self.bundles.check(full));
info!("Checking index integrity...");
try!(self.index.check());
try!(self.check_index_chunks());
info!("Checking backup integrity...");
try!(self.check_backups());
info!("Checking repository integrity...");
try!(self.check_repository());
Ok(())
}
}

View File

@ -6,6 +6,7 @@ mod info;
mod metadata;
mod backup;
mod error;
mod vacuum;
use std::mem;
use std::cmp::max;
@ -22,6 +23,7 @@ pub use self::error::RepositoryError;
pub use self::config::Config;
pub use self::metadata::{Inode, FileType};
pub use self::backup::Backup;
pub use self::integrity::RepositoryIntegrityError;
use self::bundle_map::BundleMap;

153
src/repository/vacuum.rs Normal file
View File

@ -0,0 +1,153 @@
use super::{Repository, RepositoryError, RepositoryIntegrityError};
use super::metadata::FileContents;
use std::collections::{HashMap, HashSet, VecDeque};
use ::bundle::BundleMode;
use ::util::*;
pub struct BundleUsage {
pub used: Bitmap,
pub mode: Bitmap,
pub chunk_count: usize,
pub total_size: usize,
pub used_size: usize
}
impl Repository {
fn mark_used(&self, bundles: &mut HashMap<u32, BundleUsage>, chunks: &[Chunk], mode: BundleMode) -> Result<bool, RepositoryError> {
let mut new = false;
for chunk in chunks {
if let Some(pos) = self.index.get(&chunk.0) {
if let Some(bundle) = bundles.get_mut(&pos.bundle) {
if !bundle.used.get(pos.chunk as usize) {
new = true;
bundle.used.set(pos.chunk as usize);
bundle.used_size += chunk.1 as usize;
if mode == BundleMode::Meta {
bundle.mode.set(pos.chunk as usize);
}
}
}
} else {
return Err(RepositoryIntegrityError::MissingChunk(chunk.0).into());
}
}
Ok(new)
}
pub fn analyze_usage(&mut self) -> Result<HashMap<u32, BundleUsage>, RepositoryError> {
let mut usage = HashMap::new();
for (id, bundle) in self.bundle_map.bundles() {
usage.insert(id, BundleUsage {
used: Bitmap::new(bundle.info.chunk_count),
mode: Bitmap::new(bundle.info.chunk_count),
chunk_count: bundle.info.chunk_count,
total_size: bundle.info.raw_size,
used_size: 0
});
}
for name in try!(self.list_backups()) {
let backup = try!(self.get_backup(&name));
let mut todo = VecDeque::new();
todo.push_back(backup.root);
while let Some(chunks) = todo.pop_front() {
if !try!(self.mark_used(&mut usage, &chunks, BundleMode::Meta)) {
continue
}
let inode = try!(self.get_inode(&chunks));
// Mark the content chunks as used
match inode.contents {
Some(FileContents::ChunkedDirect(chunks)) => {
try!(self.mark_used(&mut usage, &chunks, BundleMode::Content));
},
Some(FileContents::ChunkedIndirect(chunks)) => {
if try!(self.mark_used(&mut usage, &chunks, BundleMode::Meta)) {
let chunk_data = try!(self.get_data(&chunks));
let chunks = ChunkList::read_from(&chunk_data);
try!(self.mark_used(&mut usage, &chunks, BundleMode::Content));
}
}
_ => ()
}
// Put children in todo
if let Some(children) = inode.children {
for (_name, chunks) in children {
todo.push_back(chunks);
}
}
}
}
Ok(usage)
}
fn delete_bundle(&mut self, id: u32) -> Result<(), RepositoryError> {
if let Some(bundle) = self.bundle_map.remove(id) {
try!(self.bundles.delete_bundle(&bundle.id()));
Ok(())
} else {
Err(RepositoryIntegrityError::MissingBundleId(id).into())
}
}
pub fn vacuum(&mut self, ratio: f32, simulate: bool) -> Result<(), RepositoryError> {
try!(self.flush());
info!("Analyzing chunk usage");
let usage = try!(self.analyze_usage());
let total = usage.values().map(|b| b.total_size).sum::<usize>();
let used = usage.values().map(|b| b.used_size).sum::<usize>();
info!("Usage: {} of {}, {:.1}%", to_file_size(used as u64), to_file_size(total as u64), used as f32/total as f32*100.0);
let mut rewrite_bundles = HashSet::new();
let mut reclaim_space = 0;
for (id, bundle) in &usage {
if bundle.used_size as f32 / bundle.total_size as f32 <= ratio {
rewrite_bundles.insert(*id);
reclaim_space += bundle.total_size - bundle.used_size;
}
}
info!("Reclaiming {} by rewriting {} bundles", to_file_size(reclaim_space as u64), rewrite_bundles.len());
if simulate {
return Ok(())
}
for id in &rewrite_bundles {
let bundle = usage.get(id).unwrap();
let bundle_id = self.bundle_map.get(*id).unwrap().id();
for chunk in 0..bundle.chunk_count {
let data = try!(self.bundles.get_chunk(&bundle_id, chunk));
let hash = self.config.hash.hash(&data);
if !bundle.used.get(chunk) {
try!(self.index.delete(&hash));
continue
}
let mode = if bundle.mode.get(chunk) {
BundleMode::Meta
} else {
BundleMode::Content
};
try!(self.put_chunk_override(mode, hash, &data));
}
}
try!(self.flush());
info!("Checking index");
let mut pos = 0;
loop {
pos = if let Some(pos) = self.index.next_entry(pos) {
pos
} else {
break
};
let entry = self.index.get_entry(pos).unwrap();
if rewrite_bundles.contains(&entry.data.bundle) {
panic!("Removed bundle is still referenced from index");
}
pos += 1;
}
info!("Deleting {} bundles", rewrite_bundles.len());
for id in rewrite_bundles {
try!(self.delete_bundle(id));
}
try!(self.bundle_map.save(self.path.join("bundles.map")));
Ok(())
}
}

77
src/util/bitmap.rs Normal file
View File

@ -0,0 +1,77 @@
use std::ops::Deref;
pub struct Bitmap {
bytes: Vec<u8>
}
impl Bitmap {
pub fn new(len: usize) -> Self {
let len = (len+7)/8;
let mut bytes = Vec::with_capacity(len);
bytes.resize(len, 0);
Self { bytes: bytes }
}
#[inline]
pub fn len(&self) -> usize {
self.bytes.len() * 8
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
fn convert_index(&self, index: usize) -> (usize, u8) {
(index/8, 1u8<<(index%8))
}
#[inline]
pub fn set(&mut self, index: usize) {
let (byte, mask) = self.convert_index(index);
self.bytes[byte] |= mask
}
#[inline]
pub fn unset(&mut self, index: usize) {
let (byte, mask) = self.convert_index(index);
self.bytes[byte] &= !mask
}
#[inline]
pub fn flip(&mut self, index: usize) {
let (byte, mask) = self.convert_index(index);
self.bytes[byte] ^= mask
}
#[inline]
pub fn get(&self, index: usize) -> bool {
let (byte, mask) = self.convert_index(index);
self.bytes[byte] & mask != 0
}
#[inline]
pub fn into_bytes(self) -> Vec<u8> {
self.bytes
}
#[inline]
pub fn as_bytes(&self) -> &[u8] {
&self.bytes
}
#[inline]
pub fn from_bytes(bytes: Vec<u8>) -> Self {
Self { bytes: bytes }
}
}
impl Deref for Bitmap {
type Target = [u8];
#[inline]
fn deref(&self) -> &[u8] {
&self.bytes
}
}

31
src/util/hex.rs Normal file
View File

@ -0,0 +1,31 @@
pub fn to_hex(data: &[u8]) -> String {
data.iter().map(|b| format!("{:02x}", b)).collect::<Vec<String>>().join("")
}
pub fn parse_hex(hex: &str) -> Result<Vec<u8>, ()> {
let mut b = Vec::with_capacity(hex.len() / 2);
let mut modulus = 0;
let mut buf = 0;
for (_, byte) in hex.bytes().enumerate() {
buf <<= 4;
match byte {
b'A'...b'F' => buf |= byte - b'A' + 10,
b'a'...b'f' => buf |= byte - b'a' + 10,
b'0'...b'9' => buf |= byte - b'0',
b' '|b'\r'|b'\n'|b'\t' => {
buf >>= 4;
continue
}
_ => return Err(()),
}
modulus += 1;
if modulus == 2 {
modulus = 0;
b.push(buf);
}
}
match modulus {
0 => Ok(b.into_iter().collect()),
_ => Err(()),
}
}

View File

@ -4,7 +4,9 @@ mod encryption;
mod hash;
mod lru_cache;
mod chunk;
pub mod cli;
mod bitmap;
mod hex;
mod cli;
pub mod msgpack;
pub use self::chunk::*;
@ -12,35 +14,6 @@ pub use self::compression::*;
pub use self::encryption::*;
pub use self::hash::*;
pub use self::lru_cache::*;
pub fn to_hex(data: &[u8]) -> String {
data.iter().map(|b| format!("{:02x}", b)).collect::<Vec<String>>().join("")
}
pub fn parse_hex(hex: &str) -> Result<Vec<u8>, ()> {
let mut b = Vec::with_capacity(hex.len() / 2);
let mut modulus = 0;
let mut buf = 0;
for (_, byte) in hex.bytes().enumerate() {
buf <<= 4;
match byte {
b'A'...b'F' => buf |= byte - b'A' + 10,
b'a'...b'f' => buf |= byte - b'a' + 10,
b'0'...b'9' => buf |= byte - b'0',
b' '|b'\r'|b'\n'|b'\t' => {
buf >>= 4;
continue
}
_ => return Err(()),
}
modulus += 1;
if modulus == 2 {
modulus = 0;
b.push(buf);
}
}
match modulus {
0 => Ok(b.into_iter().collect()),
_ => Err(()),
}
}
pub use self::bitmap::*;
pub use self::hex::*;
pub use self::cli::*;