mirror of https://github.com/dswd/zvault
Vacuum
This commit is contained in:
parent
c8b69ebe25
commit
fc45fa4e33
|
@ -6,7 +6,6 @@ use chrono::Duration;
|
||||||
|
|
||||||
use ::chunker::*;
|
use ::chunker::*;
|
||||||
use ::util::*;
|
use ::util::*;
|
||||||
use ::util::cli::*;
|
|
||||||
|
|
||||||
|
|
||||||
struct ChunkSink {
|
struct ChunkSink {
|
||||||
|
|
|
@ -28,11 +28,13 @@ pub enum Arguments {
|
||||||
Remove {
|
Remove {
|
||||||
repo_path: String,
|
repo_path: String,
|
||||||
backup_name: String,
|
backup_name: String,
|
||||||
|
vacuum: bool,
|
||||||
inode: Option<String>
|
inode: Option<String>
|
||||||
},
|
},
|
||||||
Vacuum {
|
Vacuum {
|
||||||
repo_path: String,
|
repo_path: String,
|
||||||
ratio: f32
|
ratio: f32,
|
||||||
|
simulate: bool
|
||||||
},
|
},
|
||||||
Check {
|
Check {
|
||||||
repo_path: String,
|
repo_path: String,
|
||||||
|
@ -204,11 +206,13 @@ pub fn parse() -> Arguments {
|
||||||
)
|
)
|
||||||
(@subcommand remove =>
|
(@subcommand remove =>
|
||||||
(about: "removes a backup or a subpath")
|
(about: "removes a backup or a subpath")
|
||||||
|
(@arg vacuum: --vacuum "run vacuum afterwards to reclaim space")
|
||||||
(@arg BACKUP: +required "repository::backup[::subpath] path")
|
(@arg BACKUP: +required "repository::backup[::subpath] path")
|
||||||
)
|
)
|
||||||
(@subcommand vacuum =>
|
(@subcommand vacuum =>
|
||||||
(about: "saves space by combining and recompressing bundles")
|
(about: "saves space by combining and recompressing bundles")
|
||||||
(@arg ratio: --ratio -r "ratio of unused chunks in a bundle to rewrite that bundle")
|
(@arg ratio: --ratio -r +takes_value "ratio of unused chunks in a bundle to rewrite that bundle")
|
||||||
|
(@arg ratio: --simulate "only simulate the vacuum, do not remove any bundles")
|
||||||
(@arg REPO: +required "path of the repository")
|
(@arg REPO: +required "path of the repository")
|
||||||
)
|
)
|
||||||
(@subcommand check =>
|
(@subcommand check =>
|
||||||
|
@ -317,6 +321,7 @@ pub fn parse() -> Arguments {
|
||||||
return Arguments::Remove {
|
return Arguments::Remove {
|
||||||
repo_path: repository.to_string(),
|
repo_path: repository.to_string(),
|
||||||
backup_name: backup.unwrap().to_string(),
|
backup_name: backup.unwrap().to_string(),
|
||||||
|
vacuum: args.is_present("vacuum"),
|
||||||
inode: inode.map(|v| v.to_string())
|
inode: inode.map(|v| v.to_string())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -328,6 +333,7 @@ pub fn parse() -> Arguments {
|
||||||
}
|
}
|
||||||
return Arguments::Vacuum {
|
return Arguments::Vacuum {
|
||||||
repo_path: repository.to_string(),
|
repo_path: repository.to_string(),
|
||||||
|
simulate: args.is_present("simulate"),
|
||||||
ratio: parse_float(args.value_of("ratio").unwrap_or("0.5"), "ratio") as f32
|
ratio: parse_float(args.value_of("ratio").unwrap_or("0.5"), "ratio") as f32
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,7 +6,6 @@ use chrono::prelude::*;
|
||||||
use std::process::exit;
|
use std::process::exit;
|
||||||
|
|
||||||
use ::repository::{Repository, Config, Backup};
|
use ::repository::{Repository, Config, Backup};
|
||||||
use ::util::cli::*;
|
|
||||||
use ::util::*;
|
use ::util::*;
|
||||||
use self::args::Arguments;
|
use self::args::Arguments;
|
||||||
|
|
||||||
|
@ -72,8 +71,8 @@ pub fn run() {
|
||||||
repo.restore_backup(&backup, &dst_path).unwrap();
|
repo.restore_backup(&backup, &dst_path).unwrap();
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
Arguments::Remove{repo_path, backup_name, inode} => {
|
Arguments::Remove{repo_path, backup_name, inode, vacuum} => {
|
||||||
let repo = open_repository(&repo_path);
|
let mut repo = open_repository(&repo_path);
|
||||||
if let Some(_inode) = inode {
|
if let Some(_inode) = inode {
|
||||||
let _backup = get_backup(&repo, &backup_name);
|
let _backup = get_backup(&repo, &backup_name);
|
||||||
error!("Removing backup subtrees is not implemented yet");
|
error!("Removing backup subtrees is not implemented yet");
|
||||||
|
@ -82,10 +81,13 @@ pub fn run() {
|
||||||
repo.delete_backup(&backup_name).unwrap();
|
repo.delete_backup(&backup_name).unwrap();
|
||||||
info!("The backup has been deleted, run vacuum to reclaim space");
|
info!("The backup has been deleted, run vacuum to reclaim space");
|
||||||
}
|
}
|
||||||
|
if vacuum {
|
||||||
|
repo.vacuum(0.5, false).unwrap();
|
||||||
|
}
|
||||||
},
|
},
|
||||||
Arguments::Vacuum{repo_path, ..} => {
|
Arguments::Vacuum{repo_path, ratio, simulate} => {
|
||||||
let _repo = open_repository(&repo_path);
|
let mut repo = open_repository(&repo_path);
|
||||||
error!("Vaccum is not implemented yet");
|
repo.vacuum(ratio, simulate).unwrap();
|
||||||
return
|
return
|
||||||
},
|
},
|
||||||
Arguments::Check{repo_path, backup_name, inode, full} => {
|
Arguments::Check{repo_path, backup_name, inode, full} => {
|
||||||
|
|
|
@ -392,6 +392,15 @@ impl Index {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn pos(&self, key: &Hash) -> Option<usize> {
|
||||||
|
debug_assert!(self.check().is_ok(), "Inconsistent before get");
|
||||||
|
match self.locate(key) {
|
||||||
|
LocateResult::Found(pos) => Some(pos),
|
||||||
|
_ => None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn get(&self, key: &Hash) -> Option<Location> {
|
pub fn get(&self, key: &Hash) -> Option<Location> {
|
||||||
debug_assert!(self.check().is_ok(), "Inconsistent before get");
|
debug_assert!(self.check().is_ok(), "Inconsistent before get");
|
||||||
|
|
|
@ -31,7 +31,6 @@ mod cli;
|
||||||
// TODO: Remove backup subtrees
|
// TODO: Remove backup subtrees
|
||||||
// TODO: Recompress & combine bundles
|
// TODO: Recompress & combine bundles
|
||||||
// TODO: Prune backups (based on age like attic)
|
// TODO: Prune backups (based on age like attic)
|
||||||
// TODO: Check backup integrity too
|
|
||||||
// TODO: Encrypt backup files too
|
// TODO: Encrypt backup files too
|
||||||
// TODO: list --tree
|
// TODO: list --tree
|
||||||
// TODO: Partial backups
|
// TODO: Partial backups
|
||||||
|
|
|
@ -32,11 +32,16 @@ impl Repository {
|
||||||
Ok(Some(try!(self.bundles.get_chunk(&bundle_id, found.chunk as usize))))
|
Ok(Some(try!(self.bundles.get_chunk(&bundle_id, found.chunk as usize))))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
pub fn put_chunk(&mut self, mode: BundleMode, hash: Hash, data: &[u8]) -> Result<(), RepositoryError> {
|
pub fn put_chunk(&mut self, mode: BundleMode, hash: Hash, data: &[u8]) -> Result<(), RepositoryError> {
|
||||||
// If this chunk is in the index, ignore it
|
// If this chunk is in the index, ignore it
|
||||||
if self.index.contains(&hash) {
|
if self.index.contains(&hash) {
|
||||||
return Ok(())
|
return Ok(())
|
||||||
}
|
}
|
||||||
|
self.put_chunk_override(mode, hash, data)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn put_chunk_override(&mut self, mode: BundleMode, hash: Hash, data: &[u8]) -> Result<(), RepositoryError> {
|
||||||
// Calculate the next free bundle id now (late lifetime prevents this)
|
// Calculate the next free bundle id now (late lifetime prevents this)
|
||||||
let next_free_bundle_id = self.next_free_bundle_id();
|
let next_free_bundle_id = self.next_free_bundle_id();
|
||||||
// Select a bundle writer according to the mode and...
|
// Select a bundle writer according to the mode and...
|
||||||
|
|
|
@ -89,6 +89,11 @@ impl BundleMap {
|
||||||
self.0.get(&id)
|
self.0.get(&id)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn remove(&mut self, id: u32) -> Option<BundleData> {
|
||||||
|
self.0.remove(&id)
|
||||||
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn set(&mut self, id: u32, bundle: &Bundle) {
|
pub fn set(&mut self, id: u32, bundle: &Bundle) {
|
||||||
let data = BundleData { info: bundle.info.clone() };
|
let data = BundleData { info: bundle.info.clone() };
|
||||||
|
@ -96,7 +101,7 @@ impl BundleMap {
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn bundles(&self) -> Vec<&BundleData> {
|
pub fn bundles(&self) -> Vec<(u32, &BundleData)> {
|
||||||
self.0.values().collect()
|
self.0.iter().map(|(id, bundle)| (*id, bundle)).collect()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,7 @@ pub struct RepositoryInfo {
|
||||||
impl Repository {
|
impl Repository {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn list_bundles(&self) -> Vec<&BundleInfo> {
|
pub fn list_bundles(&self) -> Vec<&BundleInfo> {
|
||||||
self.bundle_map.bundles().iter().map(|b| &b.info).collect()
|
self.bundle_map.bundles().into_iter().map(|(_id, b)| &b.info).collect()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn info(&self) -> RepositoryInfo {
|
pub fn info(&self) -> RepositoryInfo {
|
||||||
|
|
|
@ -1,7 +1,10 @@
|
||||||
use super::{Repository, RepositoryError};
|
use super::{Repository, RepositoryError};
|
||||||
|
use super::metadata::FileContents;
|
||||||
|
|
||||||
use ::bundle::BundleId;
|
use ::bundle::BundleId;
|
||||||
use ::util::Hash;
|
use ::util::*;
|
||||||
|
|
||||||
|
use std::collections::VecDeque;
|
||||||
|
|
||||||
|
|
||||||
quick_error!{
|
quick_error!{
|
||||||
|
@ -33,34 +36,7 @@ quick_error!{
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Repository {
|
impl Repository {
|
||||||
fn check_chunk(&self, hash: Hash) -> Result<(), RepositoryError> {
|
fn check_index_chunks(&self) -> Result<(), RepositoryError> {
|
||||||
// Find bundle and chunk id in index
|
|
||||||
let found = if let Some(found) = self.index.get(&hash) {
|
|
||||||
found
|
|
||||||
} else {
|
|
||||||
return Err(RepositoryIntegrityError::MissingChunk(hash).into());
|
|
||||||
};
|
|
||||||
// Lookup bundle id from map
|
|
||||||
let bundle_id = try!(self.get_bundle_id(found.bundle));
|
|
||||||
// Get bundle object from bundledb
|
|
||||||
let bundle = if let Some(bundle) = self.bundles.get_bundle(&bundle_id) {
|
|
||||||
bundle
|
|
||||||
} else {
|
|
||||||
return Err(RepositoryIntegrityError::MissingBundle(bundle_id.clone()).into())
|
|
||||||
};
|
|
||||||
// Get chunk from bundle
|
|
||||||
if bundle.info.chunk_count > found.chunk as usize {
|
|
||||||
Ok(())
|
|
||||||
} else {
|
|
||||||
Err(RepositoryIntegrityError::NoSuchChunk(bundle_id.clone(), found.chunk).into())
|
|
||||||
}
|
|
||||||
//TODO: check that contents match their hash
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn check(&mut self, full: bool) -> Result<(), RepositoryError> {
|
|
||||||
try!(self.flush());
|
|
||||||
try!(self.bundles.check(full));
|
|
||||||
try!(self.index.check());
|
|
||||||
let mut pos = 0;
|
let mut pos = 0;
|
||||||
loop {
|
loop {
|
||||||
pos = if let Some(pos) = self.index.next_entry(pos) {
|
pos = if let Some(pos) = self.index.next_entry(pos) {
|
||||||
|
@ -69,9 +45,24 @@ impl Repository {
|
||||||
break
|
break
|
||||||
};
|
};
|
||||||
let entry = self.index.get_entry(pos).unwrap();
|
let entry = self.index.get_entry(pos).unwrap();
|
||||||
try!(self.check_chunk(entry.key));
|
// Lookup bundle id from map
|
||||||
|
let bundle_id = try!(self.get_bundle_id(entry.data.bundle));
|
||||||
|
// Get bundle object from bundledb
|
||||||
|
let bundle = if let Some(bundle) = self.bundles.get_bundle(&bundle_id) {
|
||||||
|
bundle
|
||||||
|
} else {
|
||||||
|
return Err(RepositoryIntegrityError::MissingBundle(bundle_id.clone()).into())
|
||||||
|
};
|
||||||
|
// Get chunk from bundle
|
||||||
|
if bundle.info.chunk_count <= entry.data.chunk as usize {
|
||||||
|
return Err(RepositoryIntegrityError::NoSuchChunk(bundle_id.clone(), entry.data.chunk).into())
|
||||||
|
}
|
||||||
pos += 1;
|
pos += 1;
|
||||||
}
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_repository(&self) -> Result<(), RepositoryError> {
|
||||||
if self.next_content_bundle == self.next_meta_bundle {
|
if self.next_content_bundle == self.next_meta_bundle {
|
||||||
return Err(RepositoryIntegrityError::InvalidNextBundleId.into())
|
return Err(RepositoryIntegrityError::InvalidNextBundleId.into())
|
||||||
}
|
}
|
||||||
|
@ -83,4 +74,67 @@ impl Repository {
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn check_chunks(&self, checked: &mut Bitmap, chunks: &[Chunk]) -> Result<bool, RepositoryError> {
|
||||||
|
let mut new = false;
|
||||||
|
for &(hash, _len) in chunks {
|
||||||
|
if let Some(pos) = self.index.pos(&hash) {
|
||||||
|
new |= checked.get(pos);
|
||||||
|
checked.set(pos);
|
||||||
|
} else {
|
||||||
|
return Err(RepositoryIntegrityError::MissingChunk(hash).into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(new)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn check_backups(&mut self) -> Result<(), RepositoryError> {
|
||||||
|
let mut checked = Bitmap::new(self.index.capacity());
|
||||||
|
for name in try!(self.list_backups()) {
|
||||||
|
let backup = try!(self.get_backup(&name));
|
||||||
|
let mut todo = VecDeque::new();
|
||||||
|
todo.push_back(backup.root);
|
||||||
|
while let Some(chunks) = todo.pop_front() {
|
||||||
|
if !try!(self.check_chunks(&mut checked, &chunks)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
let inode = try!(self.get_inode(&chunks));
|
||||||
|
// Mark the content chunks as used
|
||||||
|
match inode.contents {
|
||||||
|
Some(FileContents::ChunkedDirect(chunks)) => {
|
||||||
|
try!(self.check_chunks(&mut checked, &chunks));
|
||||||
|
},
|
||||||
|
Some(FileContents::ChunkedIndirect(chunks)) => {
|
||||||
|
if try!(self.check_chunks(&mut checked, &chunks)) {
|
||||||
|
let chunk_data = try!(self.get_data(&chunks));
|
||||||
|
let chunks = ChunkList::read_from(&chunk_data);
|
||||||
|
try!(self.check_chunks(&mut checked, &chunks));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => ()
|
||||||
|
}
|
||||||
|
// Put children in todo
|
||||||
|
if let Some(children) = inode.children {
|
||||||
|
for (_name, chunks) in children {
|
||||||
|
todo.push_back(chunks);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn check(&mut self, full: bool) -> Result<(), RepositoryError> {
|
||||||
|
try!(self.flush());
|
||||||
|
info!("Checking bundle integrity...");
|
||||||
|
try!(self.bundles.check(full));
|
||||||
|
info!("Checking index integrity...");
|
||||||
|
try!(self.index.check());
|
||||||
|
try!(self.check_index_chunks());
|
||||||
|
info!("Checking backup integrity...");
|
||||||
|
try!(self.check_backups());
|
||||||
|
info!("Checking repository integrity...");
|
||||||
|
try!(self.check_repository());
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@ mod info;
|
||||||
mod metadata;
|
mod metadata;
|
||||||
mod backup;
|
mod backup;
|
||||||
mod error;
|
mod error;
|
||||||
|
mod vacuum;
|
||||||
|
|
||||||
use std::mem;
|
use std::mem;
|
||||||
use std::cmp::max;
|
use std::cmp::max;
|
||||||
|
@ -22,6 +23,7 @@ pub use self::error::RepositoryError;
|
||||||
pub use self::config::Config;
|
pub use self::config::Config;
|
||||||
pub use self::metadata::{Inode, FileType};
|
pub use self::metadata::{Inode, FileType};
|
||||||
pub use self::backup::Backup;
|
pub use self::backup::Backup;
|
||||||
|
pub use self::integrity::RepositoryIntegrityError;
|
||||||
use self::bundle_map::BundleMap;
|
use self::bundle_map::BundleMap;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,153 @@
|
||||||
|
use super::{Repository, RepositoryError, RepositoryIntegrityError};
|
||||||
|
use super::metadata::FileContents;
|
||||||
|
|
||||||
|
use std::collections::{HashMap, HashSet, VecDeque};
|
||||||
|
|
||||||
|
use ::bundle::BundleMode;
|
||||||
|
use ::util::*;
|
||||||
|
|
||||||
|
|
||||||
|
pub struct BundleUsage {
|
||||||
|
pub used: Bitmap,
|
||||||
|
pub mode: Bitmap,
|
||||||
|
pub chunk_count: usize,
|
||||||
|
pub total_size: usize,
|
||||||
|
pub used_size: usize
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Repository {
|
||||||
|
fn mark_used(&self, bundles: &mut HashMap<u32, BundleUsage>, chunks: &[Chunk], mode: BundleMode) -> Result<bool, RepositoryError> {
|
||||||
|
let mut new = false;
|
||||||
|
for chunk in chunks {
|
||||||
|
if let Some(pos) = self.index.get(&chunk.0) {
|
||||||
|
if let Some(bundle) = bundles.get_mut(&pos.bundle) {
|
||||||
|
if !bundle.used.get(pos.chunk as usize) {
|
||||||
|
new = true;
|
||||||
|
bundle.used.set(pos.chunk as usize);
|
||||||
|
bundle.used_size += chunk.1 as usize;
|
||||||
|
if mode == BundleMode::Meta {
|
||||||
|
bundle.mode.set(pos.chunk as usize);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return Err(RepositoryIntegrityError::MissingChunk(chunk.0).into());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(new)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn analyze_usage(&mut self) -> Result<HashMap<u32, BundleUsage>, RepositoryError> {
|
||||||
|
let mut usage = HashMap::new();
|
||||||
|
for (id, bundle) in self.bundle_map.bundles() {
|
||||||
|
usage.insert(id, BundleUsage {
|
||||||
|
used: Bitmap::new(bundle.info.chunk_count),
|
||||||
|
mode: Bitmap::new(bundle.info.chunk_count),
|
||||||
|
chunk_count: bundle.info.chunk_count,
|
||||||
|
total_size: bundle.info.raw_size,
|
||||||
|
used_size: 0
|
||||||
|
});
|
||||||
|
}
|
||||||
|
for name in try!(self.list_backups()) {
|
||||||
|
let backup = try!(self.get_backup(&name));
|
||||||
|
let mut todo = VecDeque::new();
|
||||||
|
todo.push_back(backup.root);
|
||||||
|
while let Some(chunks) = todo.pop_front() {
|
||||||
|
if !try!(self.mark_used(&mut usage, &chunks, BundleMode::Meta)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
let inode = try!(self.get_inode(&chunks));
|
||||||
|
// Mark the content chunks as used
|
||||||
|
match inode.contents {
|
||||||
|
Some(FileContents::ChunkedDirect(chunks)) => {
|
||||||
|
try!(self.mark_used(&mut usage, &chunks, BundleMode::Content));
|
||||||
|
},
|
||||||
|
Some(FileContents::ChunkedIndirect(chunks)) => {
|
||||||
|
if try!(self.mark_used(&mut usage, &chunks, BundleMode::Meta)) {
|
||||||
|
let chunk_data = try!(self.get_data(&chunks));
|
||||||
|
let chunks = ChunkList::read_from(&chunk_data);
|
||||||
|
try!(self.mark_used(&mut usage, &chunks, BundleMode::Content));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => ()
|
||||||
|
}
|
||||||
|
// Put children in todo
|
||||||
|
if let Some(children) = inode.children {
|
||||||
|
for (_name, chunks) in children {
|
||||||
|
todo.push_back(chunks);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(usage)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn delete_bundle(&mut self, id: u32) -> Result<(), RepositoryError> {
|
||||||
|
if let Some(bundle) = self.bundle_map.remove(id) {
|
||||||
|
try!(self.bundles.delete_bundle(&bundle.id()));
|
||||||
|
Ok(())
|
||||||
|
} else {
|
||||||
|
Err(RepositoryIntegrityError::MissingBundleId(id).into())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn vacuum(&mut self, ratio: f32, simulate: bool) -> Result<(), RepositoryError> {
|
||||||
|
try!(self.flush());
|
||||||
|
info!("Analyzing chunk usage");
|
||||||
|
let usage = try!(self.analyze_usage());
|
||||||
|
let total = usage.values().map(|b| b.total_size).sum::<usize>();
|
||||||
|
let used = usage.values().map(|b| b.used_size).sum::<usize>();
|
||||||
|
info!("Usage: {} of {}, {:.1}%", to_file_size(used as u64), to_file_size(total as u64), used as f32/total as f32*100.0);
|
||||||
|
let mut rewrite_bundles = HashSet::new();
|
||||||
|
let mut reclaim_space = 0;
|
||||||
|
for (id, bundle) in &usage {
|
||||||
|
if bundle.used_size as f32 / bundle.total_size as f32 <= ratio {
|
||||||
|
rewrite_bundles.insert(*id);
|
||||||
|
reclaim_space += bundle.total_size - bundle.used_size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
info!("Reclaiming {} by rewriting {} bundles", to_file_size(reclaim_space as u64), rewrite_bundles.len());
|
||||||
|
if simulate {
|
||||||
|
return Ok(())
|
||||||
|
}
|
||||||
|
for id in &rewrite_bundles {
|
||||||
|
let bundle = usage.get(id).unwrap();
|
||||||
|
let bundle_id = self.bundle_map.get(*id).unwrap().id();
|
||||||
|
for chunk in 0..bundle.chunk_count {
|
||||||
|
let data = try!(self.bundles.get_chunk(&bundle_id, chunk));
|
||||||
|
let hash = self.config.hash.hash(&data);
|
||||||
|
if !bundle.used.get(chunk) {
|
||||||
|
try!(self.index.delete(&hash));
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
let mode = if bundle.mode.get(chunk) {
|
||||||
|
BundleMode::Meta
|
||||||
|
} else {
|
||||||
|
BundleMode::Content
|
||||||
|
};
|
||||||
|
try!(self.put_chunk_override(mode, hash, &data));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
try!(self.flush());
|
||||||
|
info!("Checking index");
|
||||||
|
let mut pos = 0;
|
||||||
|
loop {
|
||||||
|
pos = if let Some(pos) = self.index.next_entry(pos) {
|
||||||
|
pos
|
||||||
|
} else {
|
||||||
|
break
|
||||||
|
};
|
||||||
|
let entry = self.index.get_entry(pos).unwrap();
|
||||||
|
if rewrite_bundles.contains(&entry.data.bundle) {
|
||||||
|
panic!("Removed bundle is still referenced from index");
|
||||||
|
}
|
||||||
|
pos += 1;
|
||||||
|
}
|
||||||
|
info!("Deleting {} bundles", rewrite_bundles.len());
|
||||||
|
for id in rewrite_bundles {
|
||||||
|
try!(self.delete_bundle(id));
|
||||||
|
}
|
||||||
|
try!(self.bundle_map.save(self.path.join("bundles.map")));
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,77 @@
|
||||||
|
use std::ops::Deref;
|
||||||
|
|
||||||
|
pub struct Bitmap {
|
||||||
|
bytes: Vec<u8>
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Bitmap {
|
||||||
|
pub fn new(len: usize) -> Self {
|
||||||
|
let len = (len+7)/8;
|
||||||
|
let mut bytes = Vec::with_capacity(len);
|
||||||
|
bytes.resize(len, 0);
|
||||||
|
Self { bytes: bytes }
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn len(&self) -> usize {
|
||||||
|
self.bytes.len() * 8
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.len() == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn convert_index(&self, index: usize) -> (usize, u8) {
|
||||||
|
(index/8, 1u8<<(index%8))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn set(&mut self, index: usize) {
|
||||||
|
let (byte, mask) = self.convert_index(index);
|
||||||
|
self.bytes[byte] |= mask
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn unset(&mut self, index: usize) {
|
||||||
|
let (byte, mask) = self.convert_index(index);
|
||||||
|
self.bytes[byte] &= !mask
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn flip(&mut self, index: usize) {
|
||||||
|
let (byte, mask) = self.convert_index(index);
|
||||||
|
self.bytes[byte] ^= mask
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn get(&self, index: usize) -> bool {
|
||||||
|
let (byte, mask) = self.convert_index(index);
|
||||||
|
self.bytes[byte] & mask != 0
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn into_bytes(self) -> Vec<u8> {
|
||||||
|
self.bytes
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn as_bytes(&self) -> &[u8] {
|
||||||
|
&self.bytes
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn from_bytes(bytes: Vec<u8>) -> Self {
|
||||||
|
Self { bytes: bytes }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deref for Bitmap {
|
||||||
|
type Target = [u8];
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn deref(&self) -> &[u8] {
|
||||||
|
&self.bytes
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,31 @@
|
||||||
|
pub fn to_hex(data: &[u8]) -> String {
|
||||||
|
data.iter().map(|b| format!("{:02x}", b)).collect::<Vec<String>>().join("")
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn parse_hex(hex: &str) -> Result<Vec<u8>, ()> {
|
||||||
|
let mut b = Vec::with_capacity(hex.len() / 2);
|
||||||
|
let mut modulus = 0;
|
||||||
|
let mut buf = 0;
|
||||||
|
for (_, byte) in hex.bytes().enumerate() {
|
||||||
|
buf <<= 4;
|
||||||
|
match byte {
|
||||||
|
b'A'...b'F' => buf |= byte - b'A' + 10,
|
||||||
|
b'a'...b'f' => buf |= byte - b'a' + 10,
|
||||||
|
b'0'...b'9' => buf |= byte - b'0',
|
||||||
|
b' '|b'\r'|b'\n'|b'\t' => {
|
||||||
|
buf >>= 4;
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_ => return Err(()),
|
||||||
|
}
|
||||||
|
modulus += 1;
|
||||||
|
if modulus == 2 {
|
||||||
|
modulus = 0;
|
||||||
|
b.push(buf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
match modulus {
|
||||||
|
0 => Ok(b.into_iter().collect()),
|
||||||
|
_ => Err(()),
|
||||||
|
}
|
||||||
|
}
|
|
@ -4,7 +4,9 @@ mod encryption;
|
||||||
mod hash;
|
mod hash;
|
||||||
mod lru_cache;
|
mod lru_cache;
|
||||||
mod chunk;
|
mod chunk;
|
||||||
pub mod cli;
|
mod bitmap;
|
||||||
|
mod hex;
|
||||||
|
mod cli;
|
||||||
pub mod msgpack;
|
pub mod msgpack;
|
||||||
|
|
||||||
pub use self::chunk::*;
|
pub use self::chunk::*;
|
||||||
|
@ -12,35 +14,6 @@ pub use self::compression::*;
|
||||||
pub use self::encryption::*;
|
pub use self::encryption::*;
|
||||||
pub use self::hash::*;
|
pub use self::hash::*;
|
||||||
pub use self::lru_cache::*;
|
pub use self::lru_cache::*;
|
||||||
|
pub use self::bitmap::*;
|
||||||
pub fn to_hex(data: &[u8]) -> String {
|
pub use self::hex::*;
|
||||||
data.iter().map(|b| format!("{:02x}", b)).collect::<Vec<String>>().join("")
|
pub use self::cli::*;
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse_hex(hex: &str) -> Result<Vec<u8>, ()> {
|
|
||||||
let mut b = Vec::with_capacity(hex.len() / 2);
|
|
||||||
let mut modulus = 0;
|
|
||||||
let mut buf = 0;
|
|
||||||
for (_, byte) in hex.bytes().enumerate() {
|
|
||||||
buf <<= 4;
|
|
||||||
match byte {
|
|
||||||
b'A'...b'F' => buf |= byte - b'A' + 10,
|
|
||||||
b'a'...b'f' => buf |= byte - b'a' + 10,
|
|
||||||
b'0'...b'9' => buf |= byte - b'0',
|
|
||||||
b' '|b'\r'|b'\n'|b'\t' => {
|
|
||||||
buf >>= 4;
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
_ => return Err(()),
|
|
||||||
}
|
|
||||||
modulus += 1;
|
|
||||||
if modulus == 2 {
|
|
||||||
modulus = 0;
|
|
||||||
b.push(buf);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
match modulus {
|
|
||||||
0 => Ok(b.into_iter().collect()),
|
|
||||||
_ => Err(()),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
Loading…
Reference in New Issue