From ba68fc2fda626f4257315320c47cb5a791deb482 Mon Sep 17 00:00:00 2001 From: Dennis Schwerdel Date: Thu, 16 Mar 2017 12:33:10 +0100 Subject: [PATCH] Full backups --- Cargo.lock | 59 ++++++++++++++++++++++++ Cargo.toml | 1 + src/main.rs | 81 +++++++++------------------------ src/repository/backup.rs | 93 ++++++++++++++++++++++++++++++++------ src/repository/metadata.rs | 8 +++- src/util/mod.rs | 9 ++++ 6 files changed, 175 insertions(+), 76 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9591426..d8e3566 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,6 +3,7 @@ name = "zvault" version = "0.1.0" dependencies = [ "blake2-rfc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", + "chrono 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "docopt 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)", "mmap 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "murmurhash3 0.0.5 (registry+https://github.com/rust-lang/crates.io-index)", @@ -41,6 +42,15 @@ name = "byteorder" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "chrono" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", + "time 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "constant_time_eq" version = "0.1.2" @@ -108,6 +118,33 @@ name = "murmurhash3" version = "0.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "num" +version = "0.1.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-integer 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)", + "num-iter 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-integer" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-traits 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "num-iter" +version = "0.1.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "num-integer 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)", + "num-traits 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "num-traits" version = "0.1.37" @@ -131,6 +168,11 @@ dependencies = [ "libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "redox_syscall" +version = "0.1.16" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "regex" version = "0.2.1" @@ -236,6 +278,17 @@ dependencies = [ "unreachable 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "time" +version = "0.1.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.21 (registry+https://github.com/rust-lang/crates.io-index)", + "redox_syscall 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "unreachable" version = "0.1.1" @@ -277,6 +330,7 @@ dependencies = [ "checksum bitflags 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "aad18937a628ec6abcd26d1489012cc0e18c21798210f491af69ded9b881106d" "checksum blake2-rfc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)" = "0c6a476f32fef3402f1161f89d0d39822809627754a126f8441ff2a9d45e2d59" "checksum byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c40977b0ee6b9885c9013cd41d9feffdd22deb3bb4dc3a71d901cc7a77de18c8" +"checksum chrono 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "158b0bd7d75cbb6bf9c25967a48a2e9f77da95876b858eadfabaa99cd069de6e" "checksum constant_time_eq 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "07dcb7959f0f6f1cf662f9a7ff389bcb919924d99ac41cf31f10d611d8721323" "checksum docopt 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ab32ea6e284d87987066f21a9e809a73c14720571ef34516f0890b3d355ccfd8" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" @@ -287,10 +341,14 @@ dependencies = [ "checksum memchr 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1dbccc0e46f1ea47b9f17e6d67c5a96bd27030519c519c9c91327e31275a47b4" "checksum mmap 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "0bc85448a6006dd2ba26a385a564a8a0f1f2c7e78c70f1a70b2e0f4af286b823" "checksum murmurhash3 0.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "a2983372caf4480544083767bf2d27defafe32af49ab4df3a0b7fc90793a3664" +"checksum num 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)" = "98b15ba84e910ea7a1973bccd3df7b31ae282bf9d8bd2897779950c9b8303d40" +"checksum num-integer 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)" = "21e4df1098d1d797d27ef0c69c178c3fab64941559b290fcae198e0825c9c8b5" +"checksum num-iter 0.1.33 (registry+https://github.com/rust-lang/crates.io-index)" = "f7d1891bd7b936f12349b7d1403761c8a0b85a18b148e9da4429d5d102c1a41e" "checksum num-traits 0.1.37 (registry+https://github.com/rust-lang/crates.io-index)" = "e1cbfa3781f3fe73dc05321bed52a06d2d491eaa764c52335cf4399f046ece99" "checksum pkg-config 0.3.9 (registry+https://github.com/rust-lang/crates.io-index)" = "3a8b4c6b8165cd1a1cd4b9b120978131389f64bdaf456435caa41e630edba903" "checksum quick-error 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0aad603e8d7fb67da22dbdf1f4b826ce8829e406124109e73cf1b2454b93a71c" "checksum rand 0.3.15 (registry+https://github.com/rust-lang/crates.io-index)" = "022e0636ec2519ddae48154b028864bdce4eaf7d35226ab8e65c611be97b189d" +"checksum redox_syscall 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "8dd35cc9a8bdec562c757e3d43c1526b5c6d2653e23e2315065bc25556550753" "checksum regex 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4278c17d0f6d62dfef0ab00028feb45bd7d2102843f80763474eeb1be8a10c01" "checksum regex-syntax 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2f9191b1f57603095f105d317e375d19b1c9c5c3185ea9633a99a6dcbed04457" "checksum rmp 0.8.4 (registry+https://github.com/rust-lang/crates.io-index)" = "e59917c01f49718a59c644a621a4848aafc6577c4a47d66270d78951a807541a" @@ -304,6 +362,7 @@ dependencies = [ "checksum tempdir 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "87974a6f5c1dfb344d733055601650059a3363de2a6104819293baff662132d6" "checksum thread-id 3.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4437c97558c70d129e40629a5b385b3fb1ffac301e63941335e4d354081ec14a" "checksum thread_local 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "c85048c6260d17cf486ceae3282d9fb6b90be220bf5b28c400f5485ffc29f0c7" +"checksum time 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)" = "211b63c112206356ef1ff9b19355f43740fc3f85960c598a93d3a3d3ba7beade" "checksum unreachable 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1f2ae5ddb18e1c92664717616dd9549dde73f539f01bd7b77c2edb2446bdff91" "checksum utf8-ranges 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "662fab6525a98beff2921d7f61a39e7d59e0b425ebc7d0d9e66d316e55124122" "checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" diff --git a/Cargo.toml b/Cargo.toml index c7eaec0..db4024e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,4 @@ blake2-rfc = "*" murmurhash3 = "*" docopt = "0.7" rustc-serialize = "0.3" +chrono = "0.3" diff --git a/src/main.rs b/src/main.rs index 3d0c8c7..f3da830 100644 --- a/src/main.rs +++ b/src/main.rs @@ -9,6 +9,7 @@ extern crate serde_yaml; #[macro_use] extern crate quick_error; extern crate docopt; extern crate rustc_serialize; +extern crate chrono; pub mod util; pub mod bundle; @@ -17,15 +18,12 @@ mod chunker; mod repository; mod algotest; -use std::fs::File; -use std::io::Read; -use std::time; - use docopt::Docopt; +use chrono::prelude::*; use chunker::ChunkerType; -use repository::{Repository, Config, Mode, Inode, Backup}; -use util::{ChecksumType, Compression, HashMethod, to_file_size}; +use repository::{Repository, Config, Inode}; +use util::{ChecksumType, Compression, HashMethod, to_file_size, to_duration}; static USAGE: &'static str = " @@ -34,14 +32,12 @@ Usage: zvault backup [--full] zvault restore zvault check [--full] - zvault list + zvault backups zvault info zvault stats zvault bundles zvault algotest - zvault test zvault stat - zvault put Options: --full Whether to verify the repository by loading all bundles @@ -59,16 +55,14 @@ struct Args { cmd_restore: bool, cmd_check: bool, - cmd_list: bool, + cmd_backups: bool, cmd_info: bool, cmd_stats: bool, cmd_bundles: bool, cmd_algotest: bool, - cmd_test: bool, cmd_stat: bool, - cmd_put: bool, arg_repo: Option, arg_path: Option, @@ -134,15 +128,15 @@ fn main() { println!("Bundles: {}", info.bundle_count); println!("Total size: {}", to_file_size(info.encoded_data_size)); println!("Uncompressed size: {}", to_file_size(info.raw_data_size)); - println!("Compression ratio: {:.1}", info.compression_ratio * 100.0); + println!("Compression ratio: {:.1}%", info.compression_ratio * 100.0); println!("Chunk count: {}", info.chunk_count); println!("Average chunk size: {}", to_file_size(info.avg_chunk_size as u64)); let index_usage = info.index_entries as f32 / info.index_capacity as f32; - println!("Index: {}, {}% full", to_file_size(info.index_size as u64), index_usage * 100.0); + println!("Index: {}, {:.0}% full", to_file_size(info.index_size as u64), index_usage * 100.0); return } - if args.cmd_list { + if args.cmd_backups { for backup in repo.list_backups().unwrap() { println!("{}", backup); } @@ -167,58 +161,27 @@ fn main() { return } - if args.cmd_test { - print!("Integrity check before..."); - repo.check(true).unwrap(); - println!(" done."); - - let file_path = args.arg_path.unwrap(); - print!("Reading file {}...", file_path); - let mut data = Vec::new(); - let mut file = File::open(file_path).unwrap(); - file.read_to_end(&mut data).unwrap(); - println!(" done. {} bytes", data.len()); - - print!("Adding data to repository..."); - let start = time::Instant::now(); - let chunks = repo.put_data(Mode::Content, &data).unwrap(); - repo.flush().unwrap(); - let elapsed = start.elapsed(); - let duration = elapsed.as_secs() as f64 * 1.0 + elapsed.subsec_nanos() as f64 / 1_000_000_000.0; - let write_speed = data.len() as f64 / duration; - println!(" done. {} chunks, {:.1} MB/s", chunks.len(), write_speed / 1_000_000.0); - - println!("Integrity check after..."); - repo.check(true).unwrap(); - println!(" done."); - - print!("Reading data from repository..."); - let start = time::Instant::now(); - let data2 = repo.get_data(&chunks).unwrap(); - let elapsed = start.elapsed(); - let duration = elapsed.as_secs() as f64 * 1.0 + elapsed.subsec_nanos() as f64 / 1_000_000_000.0; - let read_speed = data.len() as f64 / duration; - assert_eq!(data.len(), data2.len()); - println!(" done. {:.1} MB/s", read_speed / 1_000_000.0); - return - } - let backup_name = args.arg_backup.unwrap().splitn(2, "::").nth(1).unwrap().to_string(); - if args.cmd_put { - let chunks = repo.put_inode(&args.arg_path.unwrap()).unwrap(); - repo.save_backup(&Backup{root: chunks, ..Default::default()}, &backup_name).unwrap(); - return - } - if args.cmd_backup { - unimplemented!() + let backup = repo.create_full_backup(&args.arg_path.unwrap()).unwrap(); + repo.save_backup(&backup, &backup_name).unwrap(); + return } let backup = repo.get_backup(&backup_name).unwrap(); if args.cmd_info { - println!("{:?}", backup.root); + println!("Date: {}", Local.timestamp(backup.date, 0).to_rfc2822()); + println!("Duration: {}", to_duration(backup.duration)); + println!("Entries: {} files, {} dirs", backup.file_count, backup.dir_count); + println!("Total backup size: {}", to_file_size(backup.total_data_size)); + println!("Modified data size: {}", to_file_size(backup.changed_data_size)); + let dedup_ratio = backup.deduplicated_data_size as f32 / backup.changed_data_size as f32; + println!("Deduplicated size: {}, {:.1}% saved", to_file_size(backup.deduplicated_data_size), (1.0 - dedup_ratio)*100.0); + let compress_ratio = backup.encoded_data_size as f32 / backup.deduplicated_data_size as f32; + println!("Compressed size: {} in {} bundles, {:.1}% saved", to_file_size(backup.encoded_data_size), backup.bundle_count, (1.0 - compress_ratio)*100.0); + println!("Chunk count: {}, avg size: {}", backup.chunk_count, to_file_size(backup.avg_chunk_size as u64)); return } diff --git a/src/repository/backup.rs b/src/repository/backup.rs index 40526ed..59ab761 100644 --- a/src/repository/backup.rs +++ b/src/repository/backup.rs @@ -1,19 +1,23 @@ use super::{Repository, Chunk, RepositoryError}; +use super::metadata::FileType; use ::util::*; use std::fs::{self, File}; use std::path::Path; +use std::collections::HashMap; + +use chrono::prelude::*; #[derive(Default, Debug)] pub struct Backup { pub root: Vec, - pub total_data_size: u64, - pub changed_data_size: u64, - pub new_data_size: u64, - pub encoded_data_size: u64, - pub new_bundle_count: usize, + pub total_data_size: u64, // Sum of all raw sizes of all entities + pub changed_data_size: u64, // Sum of all raw sizes of all entities actively stored + pub deduplicated_data_size: u64, // Sum of all raw sizes of all new bundles + pub encoded_data_size: u64, // Sum al all encoded sizes of all new bundles + pub bundle_count: usize, pub chunk_count: usize, pub avg_chunk_size: f32, pub date: i64, @@ -25,9 +29,9 @@ serde_impl!(Backup(u8) { root: Vec => 0, total_data_size: u64 => 1, changed_data_size: u64 => 2, - new_data_size: u64 => 3, + deduplicated_data_size: u64 => 3, encoded_data_size: u64 => 4, - new_bundle_count: usize => 5, + bundle_count: usize => 5, chunk_count: usize => 6, avg_chunk_size: f32 => 7, date: i64 => 8, @@ -71,17 +75,76 @@ impl Repository { pub fn restore_backup>(&mut self, backup: &Backup, path: P) -> Result<(), RepositoryError> { let inode = try!(self.get_inode(&backup.root)); try!(self.save_inode_at(&inode, path)); + //FIXME: recurse Ok(()) } + #[allow(dead_code)] pub fn create_full_backup>(&mut self, path: P) -> Result { - // Maintain a stack of folders still todo - // Maintain a map of path->inode entries - // Work on topmost stack entry - // If it is a file, create inode for it and put it in the map - // If it is a folder, list contents and put entries not in the map on the stack, folders last - // If it is a folder with no missing entries, create a directory inode, add it to the map, and remove all children from the map - // If stack is empty create a backup with the last inode as root - unimplemented!() + let mut scan_stack = vec![path.as_ref().to_owned()]; + let mut save_stack = vec![]; + let mut directories = HashMap::new(); + let mut backup = Backup::default(); + let info_before = self.info(); + let start = Local::now(); + while let Some(path) = scan_stack.pop() { + // Create an inode for this path containing all attributes and contents + // (for files) but no children (for directories) + let mut inode = try!(self.create_inode(&path)); + backup.total_data_size += inode.size; + backup.changed_data_size += inode.size; + if inode.file_type == FileType::Directory { + backup.dir_count +=1; + // For directories we need to put all children on the stack too, so there will be inodes created for them + // Also we put directories on the save stack to save them in order + save_stack.push(path.clone()); + inode.children = Some(HashMap::new()); + directories.insert(path.clone(), inode); + for ch in try!(fs::read_dir(&path)) { + scan_stack.push(try!(ch).path()); + } + } else { + backup.file_count +=1; + // Non-directories are stored directly and the chunks are put into the children map of their parents + let chunks = try!(self.put_inode(&inode)); + if let Some(parent) = path.parent() { + let parent = parent.to_owned(); + if let Some(ref mut parent) = directories.get_mut(&parent) { + let children = parent.children.as_mut().unwrap(); + children.insert(inode.name.clone(), chunks); + } + } + } + } + loop { + let path = save_stack.pop().unwrap(); + // Now that all children have been saved the directories can be saved in order, adding their chunks to their parents as well + let inode = directories.remove(&path).unwrap(); + let chunks = try!(self.put_inode(&inode)); + if let Some(parent) = path.parent() { + let parent = parent.to_owned(); + if let Some(ref mut parent) = directories.get_mut(&parent) { + let children = parent.children.as_mut().unwrap(); + children.insert(inode.name.clone(), chunks); + } else if save_stack.is_empty() { + backup.root = chunks; + break + } + } else if save_stack.is_empty() { + backup.root = chunks; + break + } + } + try!(self.flush()); + let elapsed = Local::now().signed_duration_since(start); + backup.date = start.timestamp(); + backup.duration = elapsed.num_milliseconds() as f32 / 1_000.0; + let info_after = self.info(); + backup.deduplicated_data_size = info_after.raw_data_size - info_before.raw_data_size; + backup.encoded_data_size = info_after.encoded_data_size - info_before.encoded_data_size; + backup.bundle_count = info_after.bundle_count - info_before.bundle_count; + backup.chunk_count = info_after.chunk_count - info_before.chunk_count; + backup.avg_chunk_size = backup.deduplicated_data_size as f32 / backup.chunk_count as f32; + Ok(backup) } } diff --git a/src/repository/metadata.rs b/src/repository/metadata.rs index 9d67722..58eced1 100644 --- a/src/repository/metadata.rs +++ b/src/repository/metadata.rs @@ -147,7 +147,7 @@ impl Inode { impl Repository { - pub fn put_inode>(&mut self, path: P) -> Result, RepositoryError> { + pub fn create_inode>(&mut self, path: P) -> Result { let mut inode = try!(Inode::get_from(path.as_ref())); if inode.file_type == FileType::File && inode.size > 0 { let mut file = try!(File::open(path)); @@ -166,7 +166,11 @@ impl Repository { } } } - self.put_data(Mode::Meta, &try!(msgpack::encode(&inode))) + Ok(inode) + } + + pub fn put_inode(&mut self, inode: &Inode) -> Result, RepositoryError> { + self.put_data(Mode::Meta, &try!(msgpack::encode(inode))) } #[inline] diff --git a/src/util/mod.rs b/src/util/mod.rs index 7b8a6b5..f5673db 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -36,3 +36,12 @@ pub fn to_file_size(size: u64) -> String { } format!("{:.1} TiB", size) } + +pub fn to_duration(dur: f32) -> String { + let secs = dur.floor() as u64; + let subsecs = dur - dur.floor(); + let hours = secs / 3600; + let mins = (secs / 60) % 60; + let secs = (secs % 60) as f32 + subsecs; + format!("{}:{:02}:{:04.1}", hours, mins, secs) +}