Full backups

pull/10/head
Dennis Schwerdel 2017-03-16 12:33:10 +01:00
parent 0b673d145f
commit ba68fc2fda
6 changed files with 175 additions and 76 deletions

59
Cargo.lock generated
View File

@ -3,6 +3,7 @@ name = "zvault"
version = "0.1.0"
dependencies = [
"blake2-rfc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"docopt 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)",
"mmap 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
"murmurhash3 0.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
@ -41,6 +42,15 @@ name = "byteorder"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "chrono"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)",
"time 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "constant_time_eq"
version = "0.1.2"
@ -108,6 +118,33 @@ name = "murmurhash3"
version = "0.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "num"
version = "0.1.37"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num-integer 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)",
"num-iter 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-integer"
version = "0.1.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num-traits 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-iter"
version = "0.1.33"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"num-integer 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)",
"num-traits 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "num-traits"
version = "0.1.37"
@ -131,6 +168,11 @@ dependencies = [
"libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "redox_syscall"
version = "0.1.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "regex"
version = "0.2.1"
@ -236,6 +278,17 @@ dependencies = [
"unreachable 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "time"
version = "0.1.36"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)",
"redox_syscall 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)",
"winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "unreachable"
version = "0.1.1"
@ -277,6 +330,7 @@ dependencies = [
"checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d"
"checksum blake2-rfc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "0c6a476f32fef3402f1161f89d0d39822809627754a126f8441ff2a9d45e2d59"
"checksum byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c40977b0ee6b9885c9013cd41d9feffdd22deb3bb4dc3a71d901cc7a77de18c8"
"checksum chrono 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "158b0bd7d75cbb6bf9c25967a48a2e9f77da95876b858eadfabaa99cd069de6e"
"checksum constant_time_eq 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "07dcb7959f0f6f1cf662f9a7ff389bcb919924d99ac41cf31f10d611d8721323"
"checksum docopt 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ab32ea6e284d87987066f21a9e809a73c14720571ef34516f0890b3d355ccfd8"
"checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d"
@ -287,10 +341,14 @@ dependencies = [
"checksum memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1dbccc0e46f1ea47b9f17e6d67c5a96bd27030519c519c9c91327e31275a47b4"
"checksum mmap 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0bc85448a6006dd2ba26a385a564a8a0f1f2c7e78c70f1a70b2e0f4af286b823"
"checksum murmurhash3 0.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a2983372caf4480544083767bf2d27defafe32af49ab4df3a0b7fc90793a3664"
"checksum num 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)" = "98b15ba84e910ea7a1973bccd3df7b31ae282bf9d8bd2897779950c9b8303d40"
"checksum num-integer 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)" = "21e4df1098d1d797d27ef0c69c178c3fab64941559b290fcae198e0825c9c8b5"
"checksum num-iter 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)" = "f7d1891bd7b936f12349b7d1403761c8a0b85a18b148e9da4429d5d102c1a41e"
"checksum num-traits 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)" = "e1cbfa3781f3fe73dc05321bed52a06d2d491eaa764c52335cf4399f046ece99"
"checksum pkg-config 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "3a8b4c6b8165cd1a1cd4b9b120978131389f64bdaf456435caa41e630edba903"
"checksum quick-error 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0aad603e8d7fb67da22dbdf1f4b826ce8829e406124109e73cf1b2454b93a71c"
"checksum rand 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "022e0636ec2519ddae48154b028864bdce4eaf7d35226ab8e65c611be97b189d"
"checksum redox_syscall 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "8dd35cc9a8bdec562c757e3d43c1526b5c6d2653e23e2315065bc25556550753"
"checksum regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4278c17d0f6d62dfef0ab00028feb45bd7d2102843f80763474eeb1be8a10c01"
"checksum regex-syntax 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9191b1f57603095f105d317e375d19b1c9c5c3185ea9633a99a6dcbed04457"
"checksum rmp 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)" = "e59917c01f49718a59c644a621a4848aafc6577c4a47d66270d78951a807541a"
@ -304,6 +362,7 @@ dependencies = [
"checksum tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "87974a6f5c1dfb344d733055601650059a3363de2a6104819293baff662132d6"
"checksum thread-id 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4437c97558c70d129e40629a5b385b3fb1ffac301e63941335e4d354081ec14a"
"checksum thread_local 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c85048c6260d17cf486ceae3282d9fb6b90be220bf5b28c400f5485ffc29f0c7"
"checksum time 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "211b63c112206356ef1ff9b19355f43740fc3f85960c598a93d3a3d3ba7beade"
"checksum unreachable 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1f2ae5ddb18e1c92664717616dd9549dde73f539f01bd7b77c2edb2446bdff91"
"checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122"
"checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"

View File

@ -15,3 +15,4 @@ blake2-rfc = "*"
murmurhash3 = "*"
docopt = "0.7"
rustc-serialize = "0.3"
chrono = "0.3"

View File

@ -9,6 +9,7 @@ extern crate serde_yaml;
#[macro_use] extern crate quick_error;
extern crate docopt;
extern crate rustc_serialize;
extern crate chrono;
pub mod util;
pub mod bundle;
@ -17,15 +18,12 @@ mod chunker;
mod repository;
mod algotest;
use std::fs::File;
use std::io::Read;
use std::time;
use docopt::Docopt;
use chrono::prelude::*;
use chunker::ChunkerType;
use repository::{Repository, Config, Mode, Inode, Backup};
use util::{ChecksumType, Compression, HashMethod, to_file_size};
use repository::{Repository, Config, Inode};
use util::{ChecksumType, Compression, HashMethod, to_file_size, to_duration};
static USAGE: &'static str = "
@ -34,14 +32,12 @@ Usage:
zvault backup [--full] <backup> <path>
zvault restore <backup> <path>
zvault check [--full] <repo>
zvault list <repo>
zvault backups <repo>
zvault info <backup>
zvault stats <repo>
zvault bundles <repo>
zvault algotest <path>
zvault test <repo> <path>
zvault stat <path>
zvault put <backup> <path>
Options:
--full Whether to verify the repository by loading all bundles
@ -59,16 +55,14 @@ struct Args {
cmd_restore: bool,
cmd_check: bool,
cmd_list: bool,
cmd_backups: bool,
cmd_info: bool,
cmd_stats: bool,
cmd_bundles: bool,
cmd_algotest: bool,
cmd_test: bool,
cmd_stat: bool,
cmd_put: bool,
arg_repo: Option<String>,
arg_path: Option<String>,
@ -134,15 +128,15 @@ fn main() {
println!("Bundles: {}", info.bundle_count);
println!("Total size: {}", to_file_size(info.encoded_data_size));
println!("Uncompressed size: {}", to_file_size(info.raw_data_size));
println!("Compression ratio: {:.1}", info.compression_ratio * 100.0);
println!("Compression ratio: {:.1}%", info.compression_ratio * 100.0);
println!("Chunk count: {}", info.chunk_count);
println!("Average chunk size: {}", to_file_size(info.avg_chunk_size as u64));
let index_usage = info.index_entries as f32 / info.index_capacity as f32;
println!("Index: {}, {}% full", to_file_size(info.index_size as u64), index_usage * 100.0);
println!("Index: {}, {:.0}% full", to_file_size(info.index_size as u64), index_usage * 100.0);
return
}
if args.cmd_list {
if args.cmd_backups {
for backup in repo.list_backups().unwrap() {
println!("{}", backup);
}
@ -167,58 +161,27 @@ fn main() {
return
}
if args.cmd_test {
print!("Integrity check before...");
repo.check(true).unwrap();
println!(" done.");
let file_path = args.arg_path.unwrap();
print!("Reading file {}...", file_path);
let mut data = Vec::new();
let mut file = File::open(file_path).unwrap();
file.read_to_end(&mut data).unwrap();
println!(" done. {} bytes", data.len());
print!("Adding data to repository...");
let start = time::Instant::now();
let chunks = repo.put_data(Mode::Content, &data).unwrap();
repo.flush().unwrap();
let elapsed = start.elapsed();
let duration = elapsed.as_secs() as f64 * 1.0 + elapsed.subsec_nanos() as f64 / 1_000_000_000.0;
let write_speed = data.len() as f64 / duration;
println!(" done. {} chunks, {:.1} MB/s", chunks.len(), write_speed / 1_000_000.0);
println!("Integrity check after...");
repo.check(true).unwrap();
println!(" done.");
print!("Reading data from repository...");
let start = time::Instant::now();
let data2 = repo.get_data(&chunks).unwrap();
let elapsed = start.elapsed();
let duration = elapsed.as_secs() as f64 * 1.0 + elapsed.subsec_nanos() as f64 / 1_000_000_000.0;
let read_speed = data.len() as f64 / duration;
assert_eq!(data.len(), data2.len());
println!(" done. {:.1} MB/s", read_speed / 1_000_000.0);
return
}
let backup_name = args.arg_backup.unwrap().splitn(2, "::").nth(1).unwrap().to_string();
if args.cmd_put {
let chunks = repo.put_inode(&args.arg_path.unwrap()).unwrap();
repo.save_backup(&Backup{root: chunks, ..Default::default()}, &backup_name).unwrap();
return
}
if args.cmd_backup {
unimplemented!()
let backup = repo.create_full_backup(&args.arg_path.unwrap()).unwrap();
repo.save_backup(&backup, &backup_name).unwrap();
return
}
let backup = repo.get_backup(&backup_name).unwrap();
if args.cmd_info {
println!("{:?}", backup.root);
println!("Date: {}", Local.timestamp(backup.date, 0).to_rfc2822());
println!("Duration: {}", to_duration(backup.duration));
println!("Entries: {} files, {} dirs", backup.file_count, backup.dir_count);
println!("Total backup size: {}", to_file_size(backup.total_data_size));
println!("Modified data size: {}", to_file_size(backup.changed_data_size));
let dedup_ratio = backup.deduplicated_data_size as f32 / backup.changed_data_size as f32;
println!("Deduplicated size: {}, {:.1}% saved", to_file_size(backup.deduplicated_data_size), (1.0 - dedup_ratio)*100.0);
let compress_ratio = backup.encoded_data_size as f32 / backup.deduplicated_data_size as f32;
println!("Compressed size: {} in {} bundles, {:.1}% saved", to_file_size(backup.encoded_data_size), backup.bundle_count, (1.0 - compress_ratio)*100.0);
println!("Chunk count: {}, avg size: {}", backup.chunk_count, to_file_size(backup.avg_chunk_size as u64));
return
}

View File

@ -1,19 +1,23 @@
use super::{Repository, Chunk, RepositoryError};
use super::metadata::FileType;
use ::util::*;
use std::fs::{self, File};
use std::path::Path;
use std::collections::HashMap;
use chrono::prelude::*;
#[derive(Default, Debug)]
pub struct Backup {
pub root: Vec<Chunk>,
pub total_data_size: u64,
pub changed_data_size: u64,
pub new_data_size: u64,
pub encoded_data_size: u64,
pub new_bundle_count: usize,
pub total_data_size: u64, // Sum of all raw sizes of all entities
pub changed_data_size: u64, // Sum of all raw sizes of all entities actively stored
pub deduplicated_data_size: u64, // Sum of all raw sizes of all new bundles
pub encoded_data_size: u64, // Sum al all encoded sizes of all new bundles
pub bundle_count: usize,
pub chunk_count: usize,
pub avg_chunk_size: f32,
pub date: i64,
@ -25,9 +29,9 @@ serde_impl!(Backup(u8) {
root: Vec<Chunk> => 0,
total_data_size: u64 => 1,
changed_data_size: u64 => 2,
new_data_size: u64 => 3,
deduplicated_data_size: u64 => 3,
encoded_data_size: u64 => 4,
new_bundle_count: usize => 5,
bundle_count: usize => 5,
chunk_count: usize => 6,
avg_chunk_size: f32 => 7,
date: i64 => 8,
@ -71,17 +75,76 @@ impl Repository {
pub fn restore_backup<P: AsRef<Path>>(&mut self, backup: &Backup, path: P) -> Result<(), RepositoryError> {
let inode = try!(self.get_inode(&backup.root));
try!(self.save_inode_at(&inode, path));
//FIXME: recurse
Ok(())
}
#[allow(dead_code)]
pub fn create_full_backup<P: AsRef<Path>>(&mut self, path: P) -> Result<Backup, RepositoryError> {
// Maintain a stack of folders still todo
// Maintain a map of path->inode entries
// Work on topmost stack entry
// If it is a file, create inode for it and put it in the map
// If it is a folder, list contents and put entries not in the map on the stack, folders last
// If it is a folder with no missing entries, create a directory inode, add it to the map, and remove all children from the map
// If stack is empty create a backup with the last inode as root
unimplemented!()
let mut scan_stack = vec![path.as_ref().to_owned()];
let mut save_stack = vec![];
let mut directories = HashMap::new();
let mut backup = Backup::default();
let info_before = self.info();
let start = Local::now();
while let Some(path) = scan_stack.pop() {
// Create an inode for this path containing all attributes and contents
// (for files) but no children (for directories)
let mut inode = try!(self.create_inode(&path));
backup.total_data_size += inode.size;
backup.changed_data_size += inode.size;
if inode.file_type == FileType::Directory {
backup.dir_count +=1;
// For directories we need to put all children on the stack too, so there will be inodes created for them
// Also we put directories on the save stack to save them in order
save_stack.push(path.clone());
inode.children = Some(HashMap::new());
directories.insert(path.clone(), inode);
for ch in try!(fs::read_dir(&path)) {
scan_stack.push(try!(ch).path());
}
} else {
backup.file_count +=1;
// Non-directories are stored directly and the chunks are put into the children map of their parents
let chunks = try!(self.put_inode(&inode));
if let Some(parent) = path.parent() {
let parent = parent.to_owned();
if let Some(ref mut parent) = directories.get_mut(&parent) {
let children = parent.children.as_mut().unwrap();
children.insert(inode.name.clone(), chunks);
}
}
}
}
loop {
let path = save_stack.pop().unwrap();
// Now that all children have been saved the directories can be saved in order, adding their chunks to their parents as well
let inode = directories.remove(&path).unwrap();
let chunks = try!(self.put_inode(&inode));
if let Some(parent) = path.parent() {
let parent = parent.to_owned();
if let Some(ref mut parent) = directories.get_mut(&parent) {
let children = parent.children.as_mut().unwrap();
children.insert(inode.name.clone(), chunks);
} else if save_stack.is_empty() {
backup.root = chunks;
break
}
} else if save_stack.is_empty() {
backup.root = chunks;
break
}
}
try!(self.flush());
let elapsed = Local::now().signed_duration_since(start);
backup.date = start.timestamp();
backup.duration = elapsed.num_milliseconds() as f32 / 1_000.0;
let info_after = self.info();
backup.deduplicated_data_size = info_after.raw_data_size - info_before.raw_data_size;
backup.encoded_data_size = info_after.encoded_data_size - info_before.encoded_data_size;
backup.bundle_count = info_after.bundle_count - info_before.bundle_count;
backup.chunk_count = info_after.chunk_count - info_before.chunk_count;
backup.avg_chunk_size = backup.deduplicated_data_size as f32 / backup.chunk_count as f32;
Ok(backup)
}
}

View File

@ -147,7 +147,7 @@ impl Inode {
impl Repository {
pub fn put_inode<P: AsRef<Path>>(&mut self, path: P) -> Result<Vec<Chunk>, RepositoryError> {
pub fn create_inode<P: AsRef<Path>>(&mut self, path: P) -> Result<Inode, RepositoryError> {
let mut inode = try!(Inode::get_from(path.as_ref()));
if inode.file_type == FileType::File && inode.size > 0 {
let mut file = try!(File::open(path));
@ -166,7 +166,11 @@ impl Repository {
}
}
}
self.put_data(Mode::Meta, &try!(msgpack::encode(&inode)))
Ok(inode)
}
pub fn put_inode(&mut self, inode: &Inode) -> Result<Vec<Chunk>, RepositoryError> {
self.put_data(Mode::Meta, &try!(msgpack::encode(inode)))
}
#[inline]

View File

@ -36,3 +36,12 @@ pub fn to_file_size(size: u64) -> String {
}
format!("{:.1} TiB", size)
}
pub fn to_duration(dur: f32) -> String {
let secs = dur.floor() as u64;
let subsecs = dur - dur.floor();
let hours = secs / 3600;
let mins = (secs / 60) % 60;
let secs = (secs % 60) as f32 + subsecs;
format!("{}:{:02}:{:04.1}", hours, mins, secs)
}