diff --git a/README.md b/README.md index 1cff9fd..0200479 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ data size and is compressed as a whole to save space ("solid archive"). ### Independent backups All backups share common data in form of chunks but are independent on a higher -level. Backups can be delete and chunks that are not used by any backup can be +level. Backups can be deleted and chunks that are not used by any backup can be removed. Other backup solutions use differential backups organized in chains. This makes @@ -94,87 +94,33 @@ Recommended: Brotli/2-7 ## Design -- Use rolling checksum to create content-dependent chunks -- Use sha3-shake128 to hash chunks -- Use mmapped hashtable to find duplicate chunks -- Serialize metadata into chunks -- Store small file data within metadata -- Store directory metadata to avoid calculating checksums of unchanged files (same mtime and size) -- Store full directory tree in each backup (use cached metadata and checksums for unchanged entries) -- Compress data chunks in blocks of ~10MB to improve compression ("solid archive") -- Store metadata in separate data chunks to enable metadata caching on client -- Encrypt archive -- Sort new files by file extension to improve compression - -## Configurable parameters - -- Rolling chunker algorithm -- Minimal chunk size [default: 1 KiB] -- Maximal chunk size [default: 64 KiB] -- Maximal file size for inlining [default: 128 Bytes] -- Block size [default: 10 MiB] -- Block compression algorithm [default: Brotli 6] -- Encryption algorithm [default: chacha20+poly1305] ## TODO -- Remove old data -- Locking / Multiple clients +### Core functionality +- Keep backup files also remotely and sync them +- Lock during backup and vacuum +- Options for creating backups (same filesystem, exclude/include patterns) +- Recompress & combine bundles +- Allow to use tar files for backup and restore (--tar, http://alexcrichton.com/tar-rs/tar/index.html) -## Modules +### CLI functionality +- Remove backup subtrees +- list --tree +- More detailed errors with nicer text -- Rolling checksum chunker - - Also creates hashes -- Mmapped hashtable that stores existing chunks hashes -- Remote block writing and compression/encryption -- Inode data serialization -- Recursive directory scanning, difference calculation, new entry sorting - - -### ChunkDB - -- Stores data in chunks -- A chunk is a file -- Per Chunk properties - - Format version - - Encryption method - - Encryption key - - Compression method / level -- Chunk ID is the hash of the contents - - No locks needed on shared chunk repository !!! - - Chunk ID is calculated after compression and encryption -- Chunk header - - "zvault01" - - Chunk size compressed / raw - - Content hash method / value - - Encryption method / options / key hash - - Compression method / options -- Chunks are write-once read-often -- Chunks are prepared outside the repository -- Only one chunk is being prepared at a time -- Adding data to the chunk returns starting position in raw data -- Operations: - - List available chunks - - Add data - - Flush chunk - - Delete chunk - - Get data - - Check chunk -- Chunk path is `checksum.chunk` or `chec/ksum.chunk` -- Data is added to current chunk and compressed in memory -- Operations on chunk files are just sequencial read/write and delete -- Ability to recompress chunks - - -### Index - -16 Bytes per hash key -8 Bytes data per entry (4 bytes bundle id, 4 bytes chunk id) -=> 24 Bytes per entry - -Average chunk sizes - 8 Kib => 3 MiB / 1 GiB -16 Kib => 1.5 MiB / 1 GiB -24 Kib => 1.0 MiB / 1 GiB -32 Kib => 750 Kib / 1 GiB -64 Kib => 375 Kib / 1 GiB +### Other +- Stability +- Tests & benchmarks + - Chunker + - Index + - BundleDB + - Bundle map + - Config files + - Backup files + - Backup + - Prune + - Vacuum +- Documentation + - All file formats + - Design diff --git a/src/cli/args.rs b/src/cli/args.rs index 5c4a1b4..ab91412 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -67,7 +67,8 @@ pub enum Arguments { }, Import { repo_path: String, - remote_path: String + remote_path: String, + key_files: Vec }, Configure { repo_path: String, @@ -78,10 +79,11 @@ pub enum Arguments { hash: Option }, GenKey { + file: Option }, AddKey { repo_path: String, - key_pair: Option<(PublicKey, SecretKey)>, + file: Option, set_default: bool }, AlgoTest { @@ -159,22 +161,6 @@ fn parse_public_key(val: &str) -> PublicKey { } } -fn parse_secret_key(val: &str) -> SecretKey { - let bytes = match parse_hex(val) { - Ok(bytes) => bytes, - Err(_) => { - error!("Invalid key: {}", val); - exit(1); - } - }; - if let Some(key) = SecretKey::from_slice(&bytes) { - key - } else { - error!("Invalid key: {}", val); - exit(1); - } -} - fn parse_hash(val: &str) -> HashMethod { if let Ok(hash) = HashMethod::from(val) { hash @@ -251,6 +237,7 @@ pub fn parse() -> Arguments { ) (@subcommand import => (about: "reconstruct a repository from the remote files") + (@arg key: --key -k ... +takes_value "a file with a needed to read the bundles") (@arg REMOTE: +required "remote repository path") (@arg REPO: +required "path of the local repository to create") ) @@ -269,14 +256,14 @@ pub fn parse() -> Arguments { ) (@subcommand genkey => (about: "generates a new key pair") + (@arg FILE: +takes_value "the destination file for the keypair") ) (@subcommand addkey => (about: "adds a key to the respository") (@arg REPO: +required "path of the repository") (@arg generate: --generate "generate a new key") (@arg set_default: --default "set this key as default") - (@arg PUBLIC: +takes_value "the public key") - (@arg SECRET: +takes_value "the secret key") + (@arg FILE: +takes_value "the file containing the keypair") ) (@subcommand algotest => (about: "test a specific algorithm combination") @@ -418,7 +405,8 @@ pub fn parse() -> Arguments { } return Arguments::Import { repo_path: repository.to_string(), - remote_path: args.value_of("REMOTE").unwrap().to_string() + remote_path: args.value_of("REMOTE").unwrap().to_string(), + key_files: args.values_of("key").map(|v| v.map(|k| k.to_string()).collect()).unwrap_or_else(|| vec![]) } } if let Some(args) = args.subcommand_matches("configure") { @@ -442,8 +430,10 @@ pub fn parse() -> Arguments { repo_path: repository.to_string(), } } - if let Some(_args) = args.subcommand_matches("genkey") { - return Arguments::GenKey {} + if let Some(args) = args.subcommand_matches("genkey") { + return Arguments::GenKey { + file: args.value_of("FILE").map(|v| v.to_string()) + } } if let Some(args) = args.subcommand_matches("addkey") { let (repository, backup, inode) = split_repo_path(args.value_of("REPO").unwrap()); @@ -452,23 +442,18 @@ pub fn parse() -> Arguments { exit(1); } let generate = args.is_present("generate"); - if !generate && (!args.is_present("PUBLIC") || !args.is_present("SECRET")) { - println!("Without --generate, a public and secret key must be given"); + if !generate && !args.is_present("FILE") { + println!("Without --generate, a file containing the key pair must be given"); exit(1); } - if generate && (args.is_present("PUBLIC") || args.is_present("SECRET")) { - println!("With --generate, no public or secret key may be given"); + if generate && args.is_present("FILE") { + println!("With --generate, no file may be given"); exit(1); } - let key_pair = if generate { - None - } else { - Some((parse_public_key(args.value_of("PUBLIC").unwrap()), parse_secret_key(args.value_of("SECRET").unwrap()))) - }; return Arguments::AddKey { repo_path: repository.to_string(), set_default: args.is_present("set_default"), - key_pair: key_pair + file: args.value_of("FILE").map(|v| v.to_string()) } } if let Some(args) = args.subcommand_matches("algotest") { diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 311f819..1cde52b 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -284,8 +284,8 @@ pub fn run() { println!(); } }, - Arguments::Import{repo_path, remote_path} => { - Repository::import(repo_path, remote_path).unwrap(); + Arguments::Import{repo_path, remote_path, key_files} => { + Repository::import(repo_path, remote_path, key_files).unwrap(); }, Arguments::Configure{repo_path, bundle_size, chunker, compression, encryption, hash} => { let mut repo = open_repository(&repo_path); @@ -309,26 +309,29 @@ pub fn run() { repo.save_config().unwrap(); print_config(&repo.config); }, - Arguments::GenKey{} => { + Arguments::GenKey{file} => { let (public, secret) = gen_keypair(); - println!("Public key: {}", to_hex(&public[..])); - println!("Secret key: {}", to_hex(&secret[..])); + println!("public: {}", to_hex(&public[..])); + println!("secret: {}", to_hex(&secret[..])); + if let Some(file) = file { + Crypto::save_keypair_to_file(&public, &secret, file).unwrap(); + } }, - Arguments::AddKey{repo_path, set_default, key_pair} => { + Arguments::AddKey{repo_path, set_default, file} => { let mut repo = open_repository(&repo_path); - let (public, secret) = if let Some(key_pair) = key_pair { - key_pair + let (public, secret) = if let Some(file) = file { + Crypto::load_keypair_from_file(file).unwrap() } else { let (public, secret) = gen_keypair(); - println!("Public key: {}", to_hex(&public[..])); - println!("Secret key: {}", to_hex(&secret[..])); + println!("public: {}", to_hex(&public[..])); + println!("secret: {}", to_hex(&secret[..])); (public, secret) }; + repo.register_key(public, secret).unwrap(); if set_default { repo.set_encryption(Some(&public)); repo.save_config().unwrap(); } - repo.register_key(public, secret).unwrap(); }, Arguments::AlgoTest{bundle_size, chunker, compression, encrypt, hash, file} => { algotest::run(&file, bundle_size, chunker, compression, encrypt, hash); diff --git a/src/main.rs b/src/main.rs index 457a0a0..df5ed62 100644 --- a/src/main.rs +++ b/src/main.rs @@ -25,14 +25,6 @@ mod repository; mod cli; mod prelude; -// TODO: Keep backup files also remotely and sync them -// TODO: Lock during backup and vacuum -// TODO: Remove backup subtrees -// TODO: Recompress & combine bundles -// TODO: list --tree -// TODO: Give crypto keys for import -// TODO: More detailed errors with nicer text -// TODO: Allow to use tar files for backup and restore (--tar, http://alexcrichton.com/tar-rs/tar/index.html) fn main() { cli::run(); diff --git a/src/repository/mod.rs b/src/repository/mod.rs index 562b0d7..25333f8 100644 --- a/src/repository/mod.rs +++ b/src/repository/mod.rs @@ -109,10 +109,13 @@ impl Repository { Ok(repo) } - pub fn import, R: AsRef>(path: P, remote: R) -> Result { + pub fn import, R: AsRef>(path: P, remote: R, key_files: Vec) -> Result { let path = path.as_ref(); - try!(Repository::create(path, Config::default(), remote)); - let mut repo = try!(Repository::open(path)); + let mut repo = try!(Repository::create(path, Config::default(), remote)); + for file in key_files { + try!(repo.crypto.lock().unwrap().register_keyfile(file)); + } + repo = try!(Repository::open(path)); let mut backups: Vec = try!(repo.get_backups()).into_iter().map(|(_, v)| v).collect(); backups.sort_by_key(|b| b.date); if let Some(backup) = backups.pop() { diff --git a/src/util/encryption.rs b/src/util/encryption.rs index 9441fac..c09d98b 100644 --- a/src/util/encryption.rs +++ b/src/util/encryption.rs @@ -132,11 +132,31 @@ impl Crypto { self.keys.insert(public, secret); } + #[inline] + pub fn register_keyfile>(&mut self, path: P) -> Result<(), EncryptionError> { + let (public, secret) = try!(Self::load_keypair_from_file(path)); + self.register_secret_key(public, secret) + } + + #[inline] + pub fn load_keypair_from_file>(path: P) -> Result<(PublicKey, SecretKey), EncryptionError> { + let keyfile = try!(KeyfileYaml::load(path)); + let public = try!(parse_hex(&keyfile.public).map_err(|_| EncryptionError::InvalidKey)); + let public = try!(PublicKey::from_slice(&public).ok_or(EncryptionError::InvalidKey)); + let secret = try!(parse_hex(&keyfile.secret).map_err(|_| EncryptionError::InvalidKey)); + let secret = try!(SecretKey::from_slice(&secret).ok_or(EncryptionError::InvalidKey)); + Ok((public, secret)) + } + + #[inline] + pub fn save_keypair_to_file>(public: &PublicKey, secret: &SecretKey, path: P) -> Result<(), EncryptionError> { + KeyfileYaml { public: to_hex(&public[..]), secret: to_hex(&secret[..]) }.save(path) + } + #[inline] pub fn register_secret_key(&mut self, public: PublicKey, secret: SecretKey) -> Result<(), EncryptionError> { - let keyfile = KeyfileYaml { public: to_hex(&public[..]), secret: to_hex(&secret[..]) }; let path = self.path.join(to_hex(&public[..]) + ".yaml"); - try!(keyfile.save(path)); + try!(Self::save_keypair_to_file(&public, &secret, path)); self.keys.insert(public, secret); Ok(()) }