From 657952d4d705505b1b69c97835e44c5662a443b1 Mon Sep 17 00:00:00 2001 From: Dennis Schwerdel Date: Mon, 20 Mar 2017 22:24:53 +0100 Subject: [PATCH] Partial backups --- Cargo.lock | 1 + Cargo.toml | 1 + src/bundle.rs | 2 +- src/cli/args.rs | 7 +++++-- src/cli/mod.rs | 31 ++++++++++++++++++++++++++++--- src/main.rs | 2 +- src/repository/backup.rs | 35 +++++++++++++++++++++++++++-------- src/repository/integrity.rs | 2 +- src/repository/metadata.rs | 19 ++++++++++++++++--- src/util/hostname.rs | 18 ++++++++++++++++++ src/util/mod.rs | 2 ++ 11 files changed, 101 insertions(+), 19 deletions(-) create mode 100644 src/util/hostname.rs diff --git a/Cargo.lock b/Cargo.lock index 5127559..3adadc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7,6 +7,7 @@ dependencies = [ "byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.21.1 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", "mmap 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "murmurhash3 0.0.5 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index d3694ab..c881e0b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -21,6 +21,7 @@ log = "0.3" byteorder = "1.0" ansi_term = "0.9" sodiumoxide = "*" +libc = "*" [build-dependencies] pkg-config = "0.3" diff --git a/src/bundle.rs b/src/bundle.rs index d5eb80b..5d6ac48 100644 --- a/src/bundle.rs +++ b/src/bundle.rs @@ -254,7 +254,7 @@ impl Bundle { #[inline] fn load_encoded_contents(&self) -> Result, BundleError> { - debug!("Load bundle data {}", self.info.id); + debug!("Load bundle data {} ({:?})", self.info.id, self.info.mode); let mut file = BufReader::new(try!(File::open(&self.path).context(&self.path as &Path))); try!(file.seek(SeekFrom::Start(self.content_start as u64)).context(&self.path as &Path)); let mut data = Vec::with_capacity(max(self.info.encoded_size, self.info.raw_size)+1024); diff --git a/src/cli/args.rs b/src/cli/args.rs index 127e27c..bca43cc 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -17,7 +17,8 @@ pub enum Arguments { repo_path: String, backup_name: String, src_path: String, - full: bool + full: bool, + reference: Option }, Restore { repo_path: String, @@ -204,6 +205,7 @@ pub fn parse() -> Arguments { (@subcommand backup => (about: "creates a new backup") (@arg full: --full "create a full backup") + (@arg reference: --ref +takes_value "the reference backup to use for partial backup") (@arg SRC: +required "source path to backup") (@arg BACKUP: +required "repository::backup path") ) @@ -313,7 +315,8 @@ pub fn parse() -> Arguments { repo_path: repository.to_string(), backup_name: backup.unwrap().to_string(), full: args.is_present("full"), - src_path: args.value_of("SRC").unwrap().to_string() + src_path: args.value_of("SRC").unwrap().to_string(), + reference: args.value_of("reference").map(|v| v.to_string()) } } if let Some(args) = args.subcommand_matches("restore") { diff --git a/src/cli/mod.rs b/src/cli/mod.rs index b55ee3b..2bdfd35 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -30,6 +30,22 @@ fn get_backup(repo: &Repository, backup_name: &str) -> Backup { } } +fn find_reference_backup(repo: &Repository, path: &str) -> Option { + let mut matching = Vec::new(); + let hostname = match get_hostname() { + Ok(hostname) => hostname, + Err(_) => return None + }; + for (_, backup) in repo.list_backups().unwrap() { + if backup.host == hostname && backup.path == path { + matching.push(backup); + } + } + matching.sort_by_key(|b| b.date); + matching.pop() +} + + #[allow(unknown_lints,cyclomatic_complexity)] pub fn run() { if let Err(err) = logger::init() { @@ -54,12 +70,21 @@ pub fn run() { repo.save_config().unwrap(); } }, - Arguments::Backup{repo_path, backup_name, src_path, full} => { + Arguments::Backup{repo_path, backup_name, src_path, full, reference} => { let mut repo = open_repository(&repo_path); + let mut reference_backup = None; if !full { - warn!("Partial backups are not implemented yet, creating full backup"); + reference_backup = reference.map(|r| get_backup(&repo, &r)); + if reference_backup.is_none() { + reference_backup = find_reference_backup(&repo, &src_path); + } + if let Some(ref backup) = reference_backup { + info!("Using backup from {} as reference", Local.timestamp(backup.date, 0).to_rfc2822()); + } else { + info!("No reference backup found, doing a full scan instead"); + } } - let backup = repo.create_full_backup(&src_path).unwrap(); + let backup = repo.create_backup(&src_path, reference_backup.as_ref()).unwrap(); repo.save_backup(&backup, &backup_name).unwrap(); }, Arguments::Restore{repo_path, backup_name, inode, dst_path} => { diff --git a/src/main.rs b/src/main.rs index a7e939e..ea6cb9c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -14,6 +14,7 @@ extern crate chrono; extern crate byteorder; extern crate sodiumoxide; extern crate ansi_term; +extern crate libc; pub mod util; @@ -33,7 +34,6 @@ mod cli; // TODO: Recompress & combine bundles // TODO: Encrypt backup files too // TODO: list --tree -// TODO: Partial backups via reference inode // TODO: Import repository from remote folder // TODO: Continue on errors diff --git a/src/repository/backup.rs b/src/repository/backup.rs index b9448ea..0921339 100644 --- a/src/repository/backup.rs +++ b/src/repository/backup.rs @@ -23,7 +23,9 @@ pub struct Backup { pub date: i64, pub duration: f32, pub file_count: usize, - pub dir_count: usize + pub dir_count: usize, + pub host: String, + pub path: String } serde_impl!(Backup(u8) { root: Vec => 0, @@ -37,7 +39,9 @@ serde_impl!(Backup(u8) { date: i64 => 8, duration: f32 => 9, file_count: usize => 10, - dir_count: usize => 11 + dir_count: usize => 11, + host: String => 12, + path: String => 13 }); @@ -168,19 +172,28 @@ impl Repository { } #[allow(dead_code)] - pub fn create_full_backup>(&mut self, path: P) -> Result { - let mut scan_stack = vec![path.as_ref().to_owned()]; + pub fn create_backup>(&mut self, path: P, reference: Option<&Backup>) -> Result { + let reference_inode = reference.and_then(|b| self.get_inode(&b.root).ok()); + let mut scan_stack = vec![(path.as_ref().to_owned(), reference_inode)]; let mut save_stack = vec![]; let mut directories = HashMap::new(); let mut backup = Backup::default(); + backup.host = get_hostname().unwrap_or_else(|_| "".to_string()); + backup.path = path.as_ref().to_string_lossy().to_string(); let info_before = self.info(); let start = Local::now(); - while let Some(path) = scan_stack.pop() { + while let Some((path, reference_inode)) = scan_stack.pop() { // Create an inode for this path containing all attributes and contents // (for files) but no children (for directories) - let mut inode = try!(self.create_inode(&path)); + let mut inode = try!(self.create_inode(&path, reference_inode.as_ref())); backup.total_data_size += inode.size; - backup.changed_data_size += inode.size; + if let Some(ref ref_inode) = reference_inode { + if !ref_inode.is_unchanged(&inode) { + backup.changed_data_size += inode.size; + } + } else { + backup.changed_data_size += inode.size; + } if inode.file_type == FileType::Directory { backup.dir_count +=1; // For directories we need to put all children on the stack too, so there will be inodes created for them @@ -189,7 +202,13 @@ impl Repository { inode.children = Some(HashMap::new()); directories.insert(path.clone(), inode); for ch in try!(fs::read_dir(&path)) { - scan_stack.push(try!(ch).path()); + let child = try!(ch); + let name = child.file_name().to_string_lossy().to_string(); + let ref_child = reference_inode.as_ref() + .and_then(|inode| inode.children.as_ref()) + .and_then(|map| map.get(&name)) + .and_then(|chunks| self.get_inode(chunks).ok()); + scan_stack.push((child.path(), ref_child)); } } else { backup.file_count +=1; diff --git a/src/repository/integrity.rs b/src/repository/integrity.rs index 5078858..3f9e541 100644 --- a/src/repository/integrity.rs +++ b/src/repository/integrity.rs @@ -79,7 +79,7 @@ impl Repository { let mut new = false; for &(hash, _len) in chunks { if let Some(pos) = self.index.pos(&hash) { - new |= checked.get(pos); + new |= !checked.get(pos); checked.set(pos); } else { return Err(RepositoryIntegrityError::MissingChunk(hash).into()) diff --git a/src/repository/metadata.rs b/src/repository/metadata.rs index eaa54d8..d93eff4 100644 --- a/src/repository/metadata.rs +++ b/src/repository/metadata.rs @@ -24,7 +24,7 @@ serde_impl!(FileType(u8) { }); -#[derive(Debug)] +#[derive(Debug, Clone)] pub enum FileContents { Inline(msgpack::Bytes), ChunkedDirect(ChunkList), @@ -85,6 +85,7 @@ serde_impl!(Inode(u8) { children: HashMap => 11 }); + impl Inode { #[inline] fn get_extended_attrs_from(&mut self, meta: &Metadata) -> Result<(), RepositoryError> { @@ -145,13 +146,25 @@ impl Inode { // https://crates.io/crates/filetime Ok(file) } + + pub fn is_unchanged(&self, other: &Inode) -> bool { + self.modify_time == other.modify_time + && self.create_time == other.create_time + && self.file_type == other.file_type + } } impl Repository { - pub fn create_inode>(&mut self, path: P) -> Result { + pub fn create_inode>(&mut self, path: P, reference: Option<&Inode>) -> Result { let mut inode = try!(Inode::get_from(path.as_ref())); if inode.file_type == FileType::File && inode.size > 0 { + if let Some(reference) = reference { + if reference.is_unchanged(&inode) { + inode.contents = reference.contents.clone(); + return Ok(inode) + } + } let mut file = try!(File::open(path)); if inode.size < 100 { let mut data = Vec::with_capacity(inode.size as usize); @@ -164,7 +177,7 @@ impl Repository { } else { let mut chunk_data = Vec::with_capacity(chunks.encoded_size()); chunks.write_to(&mut chunk_data).unwrap(); - chunks = try!(self.put_data(BundleMode::Content, &chunk_data)); + chunks = try!(self.put_data(BundleMode::Meta, &chunk_data)); inode.contents = Some(FileContents::ChunkedIndirect(chunks)); } } diff --git a/src/util/hostname.rs b/src/util/hostname.rs new file mode 100644 index 0000000..00cd607 --- /dev/null +++ b/src/util/hostname.rs @@ -0,0 +1,18 @@ +use libc; +use std::ffi; + +extern { + fn gethostname(name: *mut libc::c_char, size: libc::size_t) -> libc::c_int; +} + +pub fn get_hostname() -> Result { + let mut buf = Vec::with_capacity(255); + buf.resize(255, 0u8); + if unsafe { gethostname(buf.as_mut_ptr() as *mut libc::c_char, buf.len() as libc::size_t) } == 0 { + buf[254] = 0; //enforce null-termination + let name = unsafe { ffi::CStr::from_ptr(buf.as_ptr() as *const libc::c_char) }; + name.to_str().map(|s| s.to_string()).map_err(|_| ()) + } else { + Err(()) + } +} diff --git a/src/util/mod.rs b/src/util/mod.rs index a7a9e3b..adf72ee 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -7,6 +7,7 @@ mod chunk; mod bitmap; mod hex; mod cli; +mod hostname; pub mod msgpack; pub use self::chunk::*; @@ -17,3 +18,4 @@ pub use self::lru_cache::*; pub use self::bitmap::*; pub use self::hex::*; pub use self::cli::*; +pub use self::hostname::*;