diff --git a/README.md b/README.md index 8d3a494..9d0b20c 100644 --- a/README.md +++ b/README.md @@ -103,6 +103,15 @@ Recommended: Brotli/2-7 - File attributes - xattrs https://crates.io/crates/xattr +### Formats +- Bundles + - Encrypted bundle header + - Random bundle name +- Metadata + - Arbitrarily nested chunk lists + - Cumulative size, chunk count, dir/file count +- Permissive msgpack mode + ### CLI functionality - list --tree diff --git a/docs/repository_readme.md b/docs/repository_readme.md new file mode 100644 index 0000000..f437c98 --- /dev/null +++ b/docs/repository_readme.md @@ -0,0 +1,136 @@ +# ZVault repository + +This folder is a zVault remote repository and contains backup data. + +The repository contains the following components: +* The backup bundles in the subfolder `bundles`. The individual files are + organized in subfolders and named after their bundle ids. The structure and + names of the files is not important as the files include the bundle id in + their headers. Thus the files can be renamed and reorganized. +* The backup anchor files in the subfolder `backups`. The names of the files + and their structure determine the backup names but are not used otherwise. +* Active locks in the subfolder `locks`. This folder only contains lock files + when the repository is currently used. If any zVault process crashes, a stale + lock file might be left back. Those files can be safely removed if no process + is running for sure. + + +## Repository format + +In case the zVault software is not available for restoring the backups included +in this repository the following sections describe the format of the repository +so that its contents can be read without zVault. + + +### Bundle files +The bundle file format consists of 4 parts: +- A magic header with version +- An encoded header structure +- An encoded chunk list +- The chunk data + +The main reason for having those multiple parts is that it is expected that the +smaller front parts can be read much faster than the the whole file. So +information that is needed more frequently is put into earlier parts and the +data that is need the least frequent is put into the latter part so that it does +not slow down reading the front parts. Keeping those parts in separate files +was also considered but rejected to increase the reliability of the storage. + + +#### Magic header with version +The first part of a bundle file contains an 8 byte magic header with version +information. + +The first 6 bytes of the header consist of the fixed string "zvault", followed +by one byte with the fixed value 0x01. Those 7 bytes make up the magic header of +the file and serve to identify the file type as a zvault bundle file. + +The 8th byte of the first file part is the version of the file format. This +value is currently 0x01 and is expected to be increased for any breaking changes +in the file format. + + +#### Encoded header structure +The encoded header structure is the second part of the bundle file format and +follows directly after the 8 bytes of the magic header. + +The header structure is defined in the appendix as `BundleInfo` and contains +general information on the bundle's contents and on how to decode the other two +parts of the bundle file. + +This header structure is encoded using the *MsgPack* format. It is neither +compressed (since its size is pretty small) nor encrypted (since it only +contains general information and no user data) in any way. + + +#### Encoded chunk list +The chunk list is the third part of the bundle file and follows directly after +the encoded header structure. + +The chunk list contains hashes and sizes of all chunks stored in this bundle in +the order they are stored. The list is encoded efficiently as 20 bytes per chunk +(16 for the hash and 4 for the size) as defined in the appendix as `ChunkList`. + +Since the chunk list contains confidential information (the chunk hashes and +sized can be used to identify files) the encoded chunk list is encrypted using +the encryption method specified in the header structure. The header structure +also contains the full size of the encoded and encrypted chunk list which is +needed since the encryption could add some bytes for a nonce or an +authentication code. + +The chunk list is not compressed since the hashes have a very high entropy and +do not compress significantly. + +The chunk list is not stored in the header structure because it contains +confidential data and the encryption method is stored in the header. Also the +chunk list can be pretty big compared to the header which needs to be read more +often. + + +#### Chunk data +The chunk data is the final part of a bundle file and follows after the encoded +chunk list. The starting position can be obtained from the header as the encoded +size of the chunk list is stored there. + +The chunk data part consists of the content data of the chunks contained in this +bundle simply concatenated without any separator. The actual size (and by +summing up the sizes also the starting position) of each chunk can be obtained +from the chunk list. + +The chunk data is compressed as whole (solid archive) and encrypted with the +methods specified in the bundle header structure. + + +### Inode metadata +TODO + +### Backup format +TODO + +### Backup file +TODO + + +## Appendix + +### Constants +TODO + +### Types + +### `BundeInfo` encoding +serde_impl!(BundleInfo(u64) { + id: BundleId => 0, + mode: BundleMode => 1, + compression: Option => 2, + encryption: Option => 3, + hash_method: HashMethod => 4, + raw_size: usize => 6, + encoded_size: usize => 7, + chunk_count: usize => 8, + chunk_info_size: usize => 9 +}); + + +### `ChunkList` encoding +TODO diff --git a/src/bundledb/mod.rs b/src/bundledb/mod.rs index 8313137..a585ace 100644 --- a/src/bundledb/mod.rs +++ b/src/bundledb/mod.rs @@ -78,7 +78,7 @@ pub struct BundleInfo { pub chunk_count: usize, pub chunk_info_size: usize } -serde_impl!(BundleInfo(u64) { +serde_impl!(BundleInfo(u64?) { id: BundleId => 0, mode: BundleMode => 1, compression: Option => 2, diff --git a/src/chunker/mod.rs b/src/chunker/mod.rs index aadcf2d..75ec1b6 100644 --- a/src/chunker/mod.rs +++ b/src/chunker/mod.rs @@ -78,7 +78,7 @@ impl IChunker for Chunker { } -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Eq, PartialEq)] pub enum ChunkerType { Ae(usize), Rabin((usize, u32)), diff --git a/src/cli/mod.rs b/src/cli/mod.rs index e6b61d0..ad742d2 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -89,7 +89,7 @@ fn print_backup(backup: &Backup) { pub fn format_inode_one_line(inode: &Inode) -> String { match inode.file_type { FileType::Directory => format!("{:25}\t{} entries", format!("{}/", inode.name), inode.children.as_ref().unwrap().len()), - FileType::File => format!("{:25}\t{:>10}\t{}", inode.name, to_file_size(inode.size), Local.timestamp(inode.modify_time, 0).to_rfc2822()), + FileType::File => format!("{:25}\t{:>10}\t{}", inode.name, to_file_size(inode.size), Local.timestamp(inode.timestamp, 0).to_rfc2822()), FileType::Symlink => format!("{:25}\t -> {}", inode.name, inode.symlink_target.as_ref().unwrap()), } } @@ -101,7 +101,7 @@ fn print_inode(inode: &Inode) { println!("Permissions: {:3o}", inode.mode); println!("User: {}", inode.user); println!("Group: {}", inode.group); - println!("Modification time: {}", Local.timestamp(inode.modify_time, 0).to_rfc2822()); + println!("Timestamp: {}", Local.timestamp(inode.timestamp, 0).to_rfc2822()); if let Some(ref target) = inode.symlink_target { println!("Symlink target: {}", target); } diff --git a/src/mount.rs b/src/mount.rs index b0f8465..b1321c1 100644 --- a/src/mount.rs +++ b/src/mount.rs @@ -87,8 +87,8 @@ impl FuseInode { ino: self.num, size: self.inode.size, blocks: self.inode.size / 512, - atime: Timespec::new(self.inode.modify_time, 0), - mtime: Timespec::new(self.inode.modify_time, 0), + atime: Timespec::new(self.inode.timestamp, 0), + mtime: Timespec::new(self.inode.timestamp, 0), ctime: Timespec::new(0, 0), crtime: Timespec::new(0, 0), kind: convert_file_type(self.inode.file_type), diff --git a/src/repository/backup.rs b/src/repository/backup.rs index 220b4d7..0fff9a2 100644 --- a/src/repository/backup.rs +++ b/src/repository/backup.rs @@ -290,14 +290,14 @@ impl Repository { for (name, backup) in try!(self.get_backups()) { match self.get_backup_inode(&backup, path) { Ok(inode) => { - versions.insert((inode.file_type, inode.modify_time, inode.size), (name, inode)); + versions.insert((inode.file_type, inode.timestamp, inode.size), (name, inode)); }, Err(RepositoryError::NoSuchFileInBackup(..)) => continue, Err(err) => return Err(err) } } let mut versions: Vec<_> = versions.into_iter().map(|(_, v)| v).collect(); - versions.sort_by_key(|v| v.1.modify_time); + versions.sort_by_key(|v| v.1.timestamp); Ok(versions) } diff --git a/src/repository/backup_file.rs b/src/repository/backup_file.rs index 3cf2ff4..3369876 100644 --- a/src/repository/backup_file.rs +++ b/src/repository/backup_file.rs @@ -89,7 +89,7 @@ pub struct Backup { pub path: String, pub config: Config, } -serde_impl!(Backup(u8) { +serde_impl!(Backup(u8?) { root: Vec => 0, total_data_size: u64 => 1, changed_data_size: u64 => 2, diff --git a/src/repository/config.rs b/src/repository/config.rs index b8fffaf..d8df65c 100644 --- a/src/repository/config.rs +++ b/src/repository/config.rs @@ -149,7 +149,7 @@ serde_impl!(ConfigYaml(String) { -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Eq, PartialEq)] pub struct Config { pub compression: Option, pub encryption: Option, diff --git a/src/repository/metadata.rs b/src/repository/metadata.rs index 5b356cd..dc10eea 100644 --- a/src/repository/metadata.rs +++ b/src/repository/metadata.rs @@ -112,7 +112,7 @@ pub struct Inode { pub user: u32, pub group: u32, pub __old_access_time: i64, - pub modify_time: i64, + pub timestamp: i64, pub __old_create_time: i64, pub symlink_target: Option, pub contents: Option, @@ -128,7 +128,7 @@ impl Default for Inode { user: 1000, group: 1000, __old_access_time: 0, - modify_time: 0, + timestamp: 0, __old_create_time: 0, symlink_target: None, contents: None, @@ -136,7 +136,7 @@ impl Default for Inode { } } } -serde_impl!(Inode(u8) { +serde_impl!(Inode(u8?) { name: String => 0, size: u64 => 1, file_type: FileType => 2, @@ -144,7 +144,7 @@ serde_impl!(Inode(u8) { user: u32 => 4, group: u32 => 5, __old_access_time: i64 => 6, - modify_time: i64 => 7, + timestamp: i64 => 7, __old_create_time: i64 => 8, symlink_target: Option => 9, contents: Option => 10, @@ -177,7 +177,7 @@ impl Inode { inode.mode = meta.st_mode(); inode.user = meta.st_uid(); inode.group = meta.st_gid(); - inode.modify_time = meta.st_mtime(); + inode.timestamp = meta.st_mtime(); Ok(inode) } @@ -204,11 +204,8 @@ impl Inode { &full_path, Permissions::from_mode(self.mode) ).map_err(|e| InodeError::SetPermissions(e, full_path.clone(), self.mode))); - try!(filetime::set_file_times( - &full_path, - FileTime::from_seconds_since_1970(self.modify_time as u64, 0), - FileTime::from_seconds_since_1970(self.modify_time as u64, 0) - ).map_err(|e| InodeError::SetTimes(e, full_path.clone()))); + let time = FileTime::from_seconds_since_1970(self.timestamp as u64, 0); + try!(filetime::set_file_times(&full_path, time, time).map_err(|e| InodeError::SetTimes(e, full_path.clone()))); try!(chown(&full_path, self.user, self.group).map_err(|e| InodeError::SetOwnership(e, full_path.clone()))); Ok(file) } @@ -216,11 +213,11 @@ impl Inode { pub fn is_same_meta(&self, other: &Inode) -> bool { self.file_type == other.file_type && self.size == other.size && self.mode == other.mode && self.user == other.user && self.group == other.group && self.name == other.name - && self.modify_time == other.modify_time && self.symlink_target == other.symlink_target + && self.timestamp == other.timestamp && self.symlink_target == other.symlink_target } pub fn is_same_meta_quick(&self, other: &Inode) -> bool { - self.modify_time == other.modify_time + self.timestamp == other.timestamp && self.file_type == other.file_type && self.size == other.size } diff --git a/src/repository/mod.rs b/src/repository/mod.rs index 3febbd2..dbe69ce 100644 --- a/src/repository/mod.rs +++ b/src/repository/mod.rs @@ -29,6 +29,7 @@ pub use self::info::{RepositoryInfo, BundleAnalysis}; use self::bundle_map::BundleMap; +const REPOSITORY_README: &'static [u8] = include_bytes!("../../docs/repository_readme.md"); const DEFAULT_EXCLUDES: &'static [u8] = include_bytes!("../../excludes.default"); @@ -59,6 +60,8 @@ impl Repository { try!(fs::create_dir(path.join("keys"))); let crypto = Arc::new(Mutex::new(try!(Crypto::open(path.join("keys"))))); try!(symlink(remote, path.join("remote"))); + let mut remote_readme = try!(File::create(path.join("remote/README.md"))); + try!(remote_readme.write_all(REPOSITORY_README)); try!(fs::create_dir_all(path.join("remote/locks"))); let locks = LockFolder::new(path.join("remote/locks")); let bundles = try!(BundleDb::create( diff --git a/src/util/compression.rs b/src/util/compression.rs index 1d9c780..4657c78 100644 --- a/src/util/compression.rs +++ b/src/util/compression.rs @@ -35,7 +35,7 @@ quick_error!{ } } -#[derive(Clone, Debug, Copy)] +#[derive(Clone, Debug, Copy, Eq, PartialEq)] pub enum CompressionAlgo { Deflate, // Standardized Brotli, // Good speed and ratio @@ -50,7 +50,7 @@ serde_impl!(CompressionAlgo(u8) { }); -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Eq, PartialEq)] pub struct Compression { algo: CompressionAlgo, level: u8 diff --git a/src/util/hash.rs b/src/util/hash.rs index 526332c..494e527 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -94,7 +94,7 @@ impl Deserialize for Hash { } -#[derive(Debug, Clone, Copy)] +#[derive(Debug, Clone, Copy, Eq, PartialEq)] pub enum HashMethod { Blake2, Murmur3