From e67ddbb27541b1875b563b301a98a4f85b87689f Mon Sep 17 00:00:00 2001 From: Dennis Schwerdel Date: Sat, 18 Mar 2017 15:41:59 +0100 Subject: [PATCH] More efficient chunk list encoding --- Cargo.lock | 1 + Cargo.toml | 1 + src/bundle.rs | 131 ++++++++++++++----------------------- src/cli/args.rs | 12 +--- src/cli/mod.rs | 2 - src/main.rs | 2 + src/repository/backup.rs | 4 +- src/repository/basic_io.rs | 17 ++--- src/repository/config.rs | 19 ------ src/repository/metadata.rs | 19 +++--- src/repository/mod.rs | 3 - src/util/chunk.rs | 125 +++++++++++++++++++++++++++++++++++ src/util/hash.rs | 33 ++++++++-- src/util/mod.rs | 5 +- 14 files changed, 228 insertions(+), 146 deletions(-) create mode 100644 src/util/chunk.rs diff --git a/Cargo.lock b/Cargo.lock index 309f16f..217c5fb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,6 +3,7 @@ name = "zvault" version = "0.1.0" dependencies = [ "blake2-rfc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", + "byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.21.1 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", diff --git a/Cargo.toml b/Cargo.toml index 05a29b9..c7d21d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -17,3 +17,4 @@ rustc-serialize = "0.3" chrono = "0.3" clap = "2.19" log = "0.3" +byteorder = "1.0" diff --git a/src/bundle.rs b/src/bundle.rs index e82831b..1ff0993 100644 --- a/src/bundle.rs +++ b/src/bundle.rs @@ -1,9 +1,9 @@ use std::path::{Path, PathBuf}; use std::collections::HashMap; use std::fs::{self, File}; -use std::io::{self, Read, Write, Seek, SeekFrom, BufWriter, BufReader, Cursor}; +use std::io::{self, Read, Write, Seek, SeekFrom, BufWriter, BufReader}; use std::cmp::max; -use std::fmt::{self, Debug, Write as FmtWrite}; +use std::fmt::{self, Debug}; use std::sync::{Arc, Mutex}; use serde::{self, Serialize, Deserialize}; @@ -86,29 +86,25 @@ quick_error!{ #[derive(Hash, PartialEq, Eq, Clone, Default)] -pub struct BundleId(pub Vec); +pub struct BundleId(pub Hash); impl Serialize for BundleId { fn serialize(&self, ser: S) -> Result { - ser.serialize_bytes(&self.0) + self.0.serialize(ser) } } impl Deserialize for BundleId { fn deserialize(de: D) -> Result { - let bytes = try!(msgpack::Bytes::deserialize(de)); - Ok(BundleId(bytes.into())) + let hash = try!(Hash::deserialize(de)); + Ok(BundleId(hash)) } } impl BundleId { #[inline] fn to_string(&self) -> String { - let mut buf = String::with_capacity(self.0.len()*2); - for b in &self.0 { - write!(&mut buf, "{:02x}", b).unwrap() - } - buf + self.0.to_string() } } @@ -144,11 +140,10 @@ pub struct BundleInfo { pub compression: Option, pub encryption: Option, pub hash_method: HashMethod, - pub checksum: Checksum, pub raw_size: usize, pub encoded_size: usize, pub chunk_count: usize, - pub contents_info_size: usize + pub chunk_info_size: usize } serde_impl!(BundleInfo(u64) { id: BundleId => 0, @@ -156,45 +151,32 @@ serde_impl!(BundleInfo(u64) { compression: Option => 2, encryption: Option => 3, hash_method: HashMethod => 4, - checksum: Checksum => 5, raw_size: usize => 6, encoded_size: usize => 7, chunk_count: usize => 8, - contents_info_size: usize => 9 + chunk_info_size: usize => 9 }); impl Default for BundleInfo { fn default() -> Self { BundleInfo { - id: BundleId(vec![]), + id: BundleId(Hash::empty()), compression: None, encryption: None, hash_method: HashMethod::Blake2, - checksum: (ChecksumType::Blake2_256, msgpack::Bytes::new()), raw_size: 0, encoded_size: 0, chunk_count: 0, mode: BundleMode::Content, - contents_info_size: 0 + chunk_info_size: 0 } } } -#[derive(Clone, Default)] -pub struct BundleContentInfo { - pub chunk_sizes: Vec, - pub chunk_hashes: Vec -} -serde_impl!(BundleContentInfo(u64) { - chunk_sizes: Vec => 0, - chunk_hashes: Vec => 1 -}); - - pub struct Bundle { pub info: BundleInfo, - pub contents: BundleContentInfo, + pub chunks: ChunkList, pub version: u8, pub path: PathBuf, crypto: Arc>, @@ -203,16 +185,16 @@ pub struct Bundle { } impl Bundle { - fn new(path: PathBuf, version: u8, content_start: usize, crypto: Arc>, info: BundleInfo, contents: BundleContentInfo) -> Self { - let mut chunk_positions = Vec::with_capacity(contents.chunk_sizes.len()); + fn new(path: PathBuf, version: u8, content_start: usize, crypto: Arc>, info: BundleInfo, chunks: ChunkList) -> Self { + let mut chunk_positions = Vec::with_capacity(chunks.len()); let mut pos = 0; - for len in &contents.chunk_sizes { + for &(_, len) in (&chunks).iter() { chunk_positions.push(pos); - pos += *len; + pos += len as usize; } Bundle { info: info, - contents: contents, + chunks: chunks, version: version, path: path, crypto: crypto, @@ -239,16 +221,15 @@ impl Bundle { } let header: BundleInfo = try!(msgpack::decode_from_stream(&mut file) .map_err(|e| BundleError::Decode(e, path.clone()))); - let mut contents_data = Vec::with_capacity(header.contents_info_size); - contents_data.resize(header.contents_info_size, 0); - try!(file.read_exact(&mut contents_data).map_err(|e| BundleError::Read(e, path.clone()))); + let mut chunk_data = Vec::with_capacity(header.chunk_info_size); + chunk_data.resize(header.chunk_info_size, 0); + try!(file.read_exact(&mut chunk_data).map_err(|e| BundleError::Read(e, path.clone()))); if let Some(ref encryption) = header.encryption { - contents_data = try!(crypto.lock().unwrap().decrypt(encryption.clone(), &contents_data)); + chunk_data = try!(crypto.lock().unwrap().decrypt(encryption.clone(), &chunk_data)); } - let contents = try!(msgpack::decode_from_stream(&mut Cursor::new(&contents_data)) - .map_err(|e| BundleError::Decode(e, path.clone()))); + let chunks = ChunkList::read_from(&chunk_data); let content_start = file.seek(SeekFrom::Current(0)).unwrap() as usize; - Ok(Bundle::new(path, version, content_start, crypto, header, contents)) + Ok(Bundle::new(path, version, content_start, crypto, header, chunks)) } #[inline] @@ -281,16 +262,16 @@ impl Bundle { if id >= self.info.chunk_count { return Err(BundleError::NoSuchChunk(self.id(), id)) } - Ok((self.chunk_positions[id], self.contents.chunk_sizes[id])) + Ok((self.chunk_positions[id], self.chunks[id].1 as usize)) } pub fn check(&self, full: bool) -> Result<(), BundleError> { //FIXME: adapt to new format - if self.info.chunk_count != self.contents.chunk_sizes.len() { + if self.info.chunk_count != self.chunks.len() { return Err(BundleError::Integrity(self.id(), "Chunk list size does not match chunk count")) } - if self.contents.chunk_sizes.iter().sum::() != self.info.raw_size { + if self.chunks.iter().map(|c| c.1 as usize).sum::() != self.info.raw_size { return Err(BundleError::Integrity(self.id(), "Individual chunk sizes do not add up to total size")) } @@ -331,16 +312,14 @@ impl Debug for Bundle { pub struct BundleWriter { mode: BundleMode, hash_method: HashMethod, - hashes: Vec, data: Vec, compression: Option, compression_stream: Option, encryption: Option, crypto: Arc>, - checksum: ChecksumCreator, raw_size: usize, chunk_count: usize, - chunk_sizes: Vec + chunks: ChunkList, } impl BundleWriter { @@ -349,8 +328,7 @@ impl BundleWriter { hash_method: HashMethod, compression: Option, encryption: Option, - crypto: Arc>, - checksum: ChecksumType + crypto: Arc> ) -> Result { let compression_stream = match compression { Some(ref compression) => Some(try!(compression.compress_stream())), @@ -359,16 +337,14 @@ impl BundleWriter { Ok(BundleWriter { mode: mode, hash_method: hash_method, - hashes: vec![], data: vec![], compression: compression, compression_stream: compression_stream, encryption: encryption, crypto: crypto, - checksum: ChecksumCreator::new(checksum), raw_size: 0, chunk_count: 0, - chunk_sizes: vec![] + chunks: ChunkList::new() }) } @@ -378,11 +354,9 @@ impl BundleWriter { } else { self.data.extend_from_slice(chunk) } - self.checksum.update(chunk); self.raw_size += chunk.len(); self.chunk_count += 1; - self.chunk_sizes.push(chunk.len()); - self.hashes.push(hash); + self.chunks.push((hash, chunk.len() as u32)); Ok(self.chunk_count-1) } @@ -394,42 +368,35 @@ impl BundleWriter { self.data = try!(self.crypto.lock().unwrap().encrypt(encryption.clone(), &self.data)); } let encoded_size = self.data.len(); - let checksum = self.checksum.finish(); - let id = BundleId(checksum.1.to_vec()); + let mut chunk_data = Vec::with_capacity(self.chunks.encoded_size()); + self.chunks.write_to(&mut chunk_data).unwrap(); + let id = BundleId(self.hash_method.hash(&chunk_data)); + if let Some(ref encryption) = self.encryption { + chunk_data = try!(self.crypto.lock().unwrap().encrypt(encryption.clone(), &chunk_data)); + } let (folder, file) = db.bundle_path(&id); let path = folder.join(file); try!(fs::create_dir_all(&folder).map_err(|e| BundleError::Write(e, path.clone()))); let mut file = BufWriter::new(try!(File::create(&path).map_err(|e| BundleError::Write(e, path.clone())))); try!(file.write_all(&HEADER_STRING).map_err(|e| BundleError::Write(e, path.clone()))); try!(file.write_all(&[HEADER_VERSION]).map_err(|e| BundleError::Write(e, path.clone()))); - let contents = BundleContentInfo { - chunk_sizes: self.chunk_sizes, - chunk_hashes: self.hashes - }; - let mut contents_data = Vec::new(); - try!(msgpack::encode_to_stream(&contents, &mut contents_data) - .map_err(|e| BundleError::Encode(e, path.clone()))); - if let Some(ref encryption) = self.encryption { - contents_data = try!(self.crypto.lock().unwrap().encrypt(encryption.clone(), &contents_data)); - } let header = BundleInfo { mode: self.mode, hash_method: self.hash_method, - checksum: checksum, compression: self.compression, encryption: self.encryption, chunk_count: self.chunk_count, id: id.clone(), raw_size: self.raw_size, encoded_size: encoded_size, - contents_info_size: contents_data.len() + chunk_info_size: chunk_data.len() }; try!(msgpack::encode_to_stream(&header, &mut file) .map_err(|e| BundleError::Encode(e, path.clone()))); - try!(file.write_all(&contents_data).map_err(|e| BundleError::Write(e, path.clone()))); + try!(file.write_all(&chunk_data).map_err(|e| BundleError::Write(e, path.clone()))); let content_start = file.seek(SeekFrom::Current(0)).unwrap() as usize; try!(file.write_all(&self.data).map_err(|e| BundleError::Write(e, path.clone()))); - Ok(Bundle::new(path, HEADER_VERSION, content_start, self.crypto, header, contents)) + Ok(Bundle::new(path, HEADER_VERSION, content_start, self.crypto, header, self.chunks)) } #[inline] @@ -449,21 +416,19 @@ pub struct BundleDb { compression: Option, encryption: Option, crypto: Arc>, - checksum: ChecksumType, bundles: HashMap, bundle_cache: LruCache> } impl BundleDb { - fn new(path: PathBuf, compression: Option, encryption: Option, checksum: ChecksumType) -> Self { + fn new(path: PathBuf, compression: Option, encryption: Option) -> Self { BundleDb { path: path, compression: compression, crypto: Arc::new(Mutex::new(Crypto::new())), encryption: encryption, - checksum: checksum, bundles: HashMap::new(), bundle_cache: LruCache::new(5, 10) } @@ -504,33 +469,33 @@ impl BundleDb { } #[inline] - pub fn open>(path: P, compression: Option, encryption: Option, checksum: ChecksumType) -> Result { + pub fn open>(path: P, compression: Option, encryption: Option) -> Result { let path = path.as_ref().to_owned(); - let mut self_ = Self::new(path, compression, encryption, checksum); + let mut self_ = Self::new(path, compression, encryption); try!(self_.load_bundle_list()); Ok(self_) } #[inline] - pub fn create>(path: P, compression: Option, encryption: Option, checksum: ChecksumType) -> Result { + pub fn create>(path: P, compression: Option, encryption: Option) -> Result { let path = path.as_ref().to_owned(); try!(fs::create_dir_all(&path) .map_err(|e| BundleError::Write(e, path.clone()))); - Ok(Self::new(path, compression, encryption, checksum)) + Ok(Self::new(path, compression, encryption)) } #[inline] - pub fn open_or_create>(path: P, compression: Option, encryption: Option, checksum: ChecksumType) -> Result { + pub fn open_or_create>(path: P, compression: Option, encryption: Option) -> Result { if path.as_ref().exists() { - Self::open(path, compression, encryption, checksum) + Self::open(path, compression, encryption) } else { - Self::create(path, compression, encryption, checksum) + Self::create(path, compression, encryption) } } #[inline] pub fn create_bundle(&self, mode: BundleMode, hash_method: HashMethod) -> Result { - BundleWriter::new(mode, hash_method, self.compression.clone(), self.encryption.clone(), self.crypto.clone(), self.checksum) + BundleWriter::new(mode, hash_method, self.compression.clone(), self.encryption.clone(), self.crypto.clone()) } pub fn get_chunk(&mut self, bundle_id: &BundleId, id: usize) -> Result, BundleError> { diff --git a/src/cli/args.rs b/src/cli/args.rs index 6286b23..9d8886d 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -1,5 +1,5 @@ use ::chunker::ChunkerType; -use ::util::{Compression, HashMethod, ChecksumType}; +use ::util::{Compression, HashMethod}; use std::process::exit; @@ -115,16 +115,6 @@ fn parse_compression(val: Option<&str>) -> Option { } } -#[allow(dead_code)] -fn parse_checksum(val: Option<&str>) -> ChecksumType { - if let Ok(checksum) = ChecksumType::from(val.unwrap_or("blake2")) { - checksum - } else { - error!("Invalid checksum method: {}", val.unwrap()); - exit(1); - } -} - fn parse_hash(val: Option<&str>) -> HashMethod { if let Ok(hash) = HashMethod::from(val.unwrap_or("blake2")) { hash diff --git a/src/cli/mod.rs b/src/cli/mod.rs index fde3929..19f26fe 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -6,7 +6,6 @@ use chrono::prelude::*; use std::process::exit; use ::repository::{Repository, Config, Backup}; -use ::util::ChecksumType; use ::util::cli::*; use self::args::Arguments; @@ -40,7 +39,6 @@ pub fn run() { Arguments::Init{repo_path, bundle_size, chunker, compression, hash} => { Repository::create(repo_path, Config { bundle_size: bundle_size, - checksum: ChecksumType::Blake2_256, chunker: chunker, compression: compression, hash: hash diff --git a/src/main.rs b/src/main.rs index 9cb46e1..424f60c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -11,6 +11,8 @@ extern crate rustc_serialize; extern crate chrono; #[macro_use] extern crate clap; #[macro_use] extern crate log; +extern crate byteorder; + pub mod util; pub mod bundle; diff --git a/src/repository/backup.rs b/src/repository/backup.rs index 378f8e0..3181d1b 100644 --- a/src/repository/backup.rs +++ b/src/repository/backup.rs @@ -1,4 +1,4 @@ -use super::{Repository, Chunk, RepositoryError}; +use super::{Repository, RepositoryError}; use super::metadata::{FileType, Inode}; use ::util::*; @@ -12,7 +12,7 @@ use chrono::prelude::*; #[derive(Default, Debug, Clone)] pub struct Backup { - pub root: Vec, + pub root: ChunkList, pub total_data_size: u64, // Sum of all raw sizes of all entities pub changed_data_size: u64, // Sum of all raw sizes of all entities actively stored pub deduplicated_data_size: u64, // Sum of all raw sizes of all new bundles diff --git a/src/repository/basic_io.rs b/src/repository/basic_io.rs index 4c81c13..15a4c68 100644 --- a/src/repository/basic_io.rs +++ b/src/repository/basic_io.rs @@ -6,13 +6,10 @@ use ::index::Location; use ::bundle::{BundleId, BundleMode}; use super::integrity::RepositoryIntegrityError; -use ::util::Hash; +use ::util::*; use ::chunker::{IChunker, ChunkerStatus}; -pub type Chunk = (Hash, usize); - - impl Repository { pub fn get_bundle_id(&self, id: u32) -> Result { if let Some(bundle_info) = self.bundle_map.get(id) { @@ -86,12 +83,12 @@ impl Repository { } #[inline] - pub fn put_data(&mut self, mode: BundleMode, data: &[u8]) -> Result, RepositoryError> { + pub fn put_data(&mut self, mode: BundleMode, data: &[u8]) -> Result { let mut input = Cursor::new(data); self.put_stream(mode, &mut input) } - pub fn put_stream(&mut self, mode: BundleMode, data: &mut R) -> Result, RepositoryError> { + pub fn put_stream(&mut self, mode: BundleMode, data: &mut R) -> Result { let avg_size = self.config.chunker.avg_size(); let mut chunks = Vec::new(); let mut chunk = Vec::with_capacity(avg_size * 2); @@ -102,17 +99,17 @@ impl Repository { chunk = output.into_inner(); let hash = self.config.hash.hash(&chunk); try!(self.put_chunk(mode, hash, &chunk)); - chunks.push((hash, chunk.len())); + chunks.push((hash, chunk.len() as u32)); if res == ChunkerStatus::Finished { break } } - Ok(chunks) + Ok(chunks.into()) } #[inline] pub fn get_data(&mut self, chunks: &[Chunk]) -> Result, RepositoryError> { - let mut data = Vec::with_capacity(chunks.iter().map(|&(_, size)| size).sum()); + let mut data = Vec::with_capacity(chunks.iter().map(|&(_, size)| size).sum::() as usize); try!(self.get_stream(chunks, &mut data)); Ok(data) } @@ -121,7 +118,7 @@ impl Repository { pub fn get_stream(&mut self, chunks: &[Chunk], w: &mut W) -> Result<(), RepositoryError> { for &(ref hash, len) in chunks { let data = try!(try!(self.get_chunk(*hash)).ok_or_else(|| RepositoryIntegrityError::MissingChunk(hash.clone()))); - debug_assert_eq!(data.len(), len); + debug_assert_eq!(data.len() as u32, len); try!(w.write_all(&data)); } Ok(()) diff --git a/src/repository/config.rs b/src/repository/config.rs index ac04966..40a4a6c 100644 --- a/src/repository/config.rs +++ b/src/repository/config.rs @@ -40,19 +40,6 @@ impl HashMethod { } - -impl ChecksumType { - fn from_yaml(yaml: String) -> Result { - ChecksumType::from(&yaml).map_err(ConfigError::Parse) - } - - fn to_yaml(&self) -> String { - self.name().to_string() - } -} - - - struct ChunkerYaml { method: String, avg_size: usize, @@ -107,7 +94,6 @@ struct ConfigYaml { compression: Option, bundle_size: usize, chunker: ChunkerYaml, - checksum: String, hash: String, } impl Default for ConfigYaml { @@ -116,7 +102,6 @@ impl Default for ConfigYaml { compression: Some("brotli/5".to_string()), bundle_size: 25*1024*1024, chunker: ChunkerYaml::default(), - checksum: "blake2_256".to_string(), hash: "blake2".to_string() } } @@ -125,7 +110,6 @@ serde_impl!(ConfigYaml(String) { compression: Option => "compression", bundle_size: usize => "bundle_size", chunker: ChunkerYaml => "chunker", - checksum: String => "checksum", hash: String => "hash" }); @@ -136,7 +120,6 @@ pub struct Config { pub compression: Option, pub bundle_size: usize, pub chunker: ChunkerType, - pub checksum: ChecksumType, pub hash: HashMethod } impl Config { @@ -150,7 +133,6 @@ impl Config { compression: compression, bundle_size: yaml.bundle_size, chunker: try!(ChunkerType::from_yaml(yaml.chunker)), - checksum: try!(ChecksumType::from_yaml(yaml.checksum)), hash: try!(HashMethod::from_yaml(yaml.hash)) }) } @@ -160,7 +142,6 @@ impl Config { compression: self.compression.as_ref().map(|c| c.to_yaml()), bundle_size: self.bundle_size, chunker: self.chunker.to_yaml(), - checksum: self.checksum.to_yaml(), hash: self.hash.to_yaml() } } diff --git a/src/repository/metadata.rs b/src/repository/metadata.rs index 50a5882..eaa54d8 100644 --- a/src/repository/metadata.rs +++ b/src/repository/metadata.rs @@ -6,7 +6,7 @@ use std::os::unix::fs::{PermissionsExt, symlink}; use std::io::{Read, Write}; use ::util::*; -use super::{Repository, RepositoryError, Chunk}; +use super::{Repository, RepositoryError}; use super::integrity::RepositoryIntegrityError; use ::bundle::BundleMode; @@ -27,8 +27,8 @@ serde_impl!(FileType(u8) { #[derive(Debug)] pub enum FileContents { Inline(msgpack::Bytes), - ChunkedDirect(Vec), - ChunkedIndirect(Vec) + ChunkedDirect(ChunkList), + ChunkedIndirect(ChunkList) } serde_impl!(FileContents(u8) { Inline(ByteBuf) => 0, @@ -50,7 +50,7 @@ pub struct Inode { pub create_time: i64, pub symlink_target: Option, pub contents: Option, - pub children: Option>> + pub children: Option> } impl Default for Inode { fn default() -> Self { @@ -82,7 +82,7 @@ serde_impl!(Inode(u8) { create_time: i64 => 8, symlink_target: Option => 9, contents: Option => 10, - children: HashMap> => 11 + children: HashMap => 11 }); impl Inode { @@ -162,8 +162,9 @@ impl Repository { if chunks.len() < 10 { inode.contents = Some(FileContents::ChunkedDirect(chunks)); } else { - let chunks_data = try!(msgpack::encode(&chunks)); - chunks = try!(self.put_data(BundleMode::Content, &chunks_data)); + let mut chunk_data = Vec::with_capacity(chunks.encoded_size()); + chunks.write_to(&mut chunk_data).unwrap(); + chunks = try!(self.put_data(BundleMode::Content, &chunk_data)); inode.contents = Some(FileContents::ChunkedIndirect(chunks)); } } @@ -172,7 +173,7 @@ impl Repository { } #[inline] - pub fn put_inode(&mut self, inode: &Inode) -> Result, RepositoryError> { + pub fn put_inode(&mut self, inode: &Inode) -> Result { self.put_data(BundleMode::Meta, &try!(msgpack::encode(inode))) } @@ -194,7 +195,7 @@ impl Repository { }, FileContents::ChunkedIndirect(ref chunks) => { let chunk_data = try!(self.get_data(chunks)); - let chunks: Vec = try!(msgpack::decode(&chunk_data)); + let chunks = ChunkList::read_from(&chunk_data); try!(self.get_stream(&chunks, &mut file)); } } diff --git a/src/repository/mod.rs b/src/repository/mod.rs index 7654265..ac5d119 100644 --- a/src/repository/mod.rs +++ b/src/repository/mod.rs @@ -19,7 +19,6 @@ use super::chunker::Chunker; pub use self::error::RepositoryError; pub use self::config::Config; pub use self::metadata::{Inode, FileType}; -pub use self::basic_io::Chunk; pub use self::backup::Backup; use self::bundle_map::BundleMap; @@ -46,7 +45,6 @@ impl Repository { path.join("bundles"), config.compression.clone(), None, //FIXME: store encryption in config - config.checksum )); let index = try!(Index::create(&path.join("index"))); try!(config.save(path.join("config.yaml"))); @@ -74,7 +72,6 @@ impl Repository { path.join("bundles"), config.compression.clone(), None, //FIXME: load encryption from config - config.checksum )); let index = try!(Index::open(&path.join("index"))); let bundle_map = try!(BundleMap::load(path.join("bundles.map"))); diff --git a/src/util/chunk.rs b/src/util/chunk.rs new file mode 100644 index 0000000..50d8642 --- /dev/null +++ b/src/util/chunk.rs @@ -0,0 +1,125 @@ +use std::io::{self, Write, Read, Cursor}; +use std::ops::{Deref, DerefMut}; + +use serde::{self, Serialize, Deserialize}; +use serde::bytes::{Bytes, ByteBuf}; +use serde::de::Error; + +use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt}; + +use super::Hash; + +pub type Chunk = (Hash, u32); + +#[derive(Debug, PartialEq, Eq, Hash, Clone)] +pub struct ChunkList(Vec); + +impl ChunkList { + #[inline] + pub fn new() -> Self { + ChunkList(Vec::new()) + } + + #[inline] + pub fn with_capacity(num: usize) -> Self { + ChunkList(Vec::with_capacity(num)) + } + + #[inline] + pub fn len(&self) -> usize { + self.0.len() + } + + #[inline] + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + #[inline] + pub fn push(&mut self, chunk: Chunk) { + self.0.push(chunk) + } + + #[inline] + pub fn write_to(&self, dst: &mut Write) -> Result<(), io::Error> { + for chunk in &self.0 { + try!(chunk.0.write_to(dst)); + try!(dst.write_u32::(chunk.1)); + } + Ok(()) + } + + #[inline] + pub fn read_n_from(n: usize, src: &mut Read) -> Result { + let mut chunks = Vec::with_capacity(n); + for _ in 0..n { + let hash = try!(Hash::read_from(src)); + let len = try!(src.read_u32::()); + chunks.push((hash, len)); + } + Ok(ChunkList(chunks)) + } + + #[inline] + pub fn read_from(src: &[u8]) -> Self { + if src.len() % 20 != 0 { + warn!("Reading truncated chunk list"); + } + ChunkList::read_n_from(src.len()/20, &mut Cursor::new(src)).unwrap() + } + + #[inline] + pub fn encoded_size(&self) -> usize { + self.0.len() * 20 + } +} + +impl Default for ChunkList { + #[inline] + fn default() -> Self { + ChunkList(Vec::new()) + } +} + +impl From> for ChunkList { + fn from(val: Vec) -> Self { + ChunkList(val) + } +} + +impl Into> for ChunkList { + fn into(self) -> Vec { + self.0 + } +} + +impl Deref for ChunkList { + type Target = [Chunk]; + fn deref(&self) -> &[Chunk] { + &self.0 + } +} + +impl DerefMut for ChunkList { + fn deref_mut(&mut self) -> &mut [Chunk] { + &mut self.0 + } +} + +impl Serialize for ChunkList { + fn serialize(&self, serializer: S) -> Result where S: serde::Serializer { + let mut buf = Vec::with_capacity(self.encoded_size()); + self.write_to(&mut buf).unwrap(); + Bytes::from(&buf as &[u8]).serialize(serializer) + } +} + +impl Deserialize for ChunkList { + fn deserialize(deserializer: D) -> Result where D: serde::Deserializer { + let data: Vec = try!(ByteBuf::deserialize(deserializer)).into(); + if data.len() % 20 != 0 { + return Err(D::Error::custom("Invalid chunk list length")); + } + Ok(ChunkList::read_n_from(data.len()/20, &mut Cursor::new(data)).unwrap()) + } +} diff --git a/src/util/hash.rs b/src/util/hash.rs index cc9a193..b21a65f 100644 --- a/src/util/hash.rs +++ b/src/util/hash.rs @@ -4,15 +4,17 @@ use serde::bytes::{ByteBuf, Bytes}; use murmurhash3::murmurhash3_x64_128; use blake2::blake2b::blake2b; +use byteorder::{LittleEndian, ByteOrder, WriteBytesExt, ReadBytesExt}; use std::mem; use std::fmt; use std::u64; +use std::io::{self, Read, Write}; #[repr(packed)] -#[derive(Clone, Copy, PartialEq, Hash, Eq)] +#[derive(Clone, Copy, PartialEq, Hash, Eq, Default)] pub struct Hash { pub high: u64, pub low: u64 @@ -28,6 +30,24 @@ impl Hash { pub fn empty() -> Self { Hash{high: 0, low: 0} } + + #[inline] + pub fn to_string(&self) -> String { + format!("{:016x}{:016x}", self.high, self.low) + } + + #[inline] + pub fn write_to(&self, dst: &mut Write) -> Result<(), io::Error> { + try!(dst.write_u64::(self.high)); + dst.write_u64::(self.low) + } + + #[inline] + pub fn read_from(src: &mut Read) -> Result { + let high = try!(src.read_u64::()); + let low = try!(src.read_u64::()); + Ok(Hash { high: high, low: low }) + } } impl fmt::Display for Hash { @@ -47,8 +67,9 @@ impl fmt::Debug for Hash { impl Serialize for Hash { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer { - let hash = Hash{high: u64::to_le(self.high), low: u64::to_le(self.low)}; - let dat: [u8; 16] = unsafe { mem::transmute(hash) }; + let mut dat = [0u8; 16]; + LittleEndian::write_u64(&mut dat[..8], self.high); + LittleEndian::write_u64(&mut dat[8..], self.low); Bytes::from(&dat as &[u8]).serialize(serializer) } } @@ -59,8 +80,10 @@ impl Deserialize for Hash { if dat.len() != 16 { return Err(D::Error::custom("Invalid key length")); } - let hash = unsafe { &*(dat.as_ptr() as *const Hash) }; - Ok(Hash{high: u64::from_le(hash.high), low: u64::from_le(hash.low)}) + Ok(Hash{ + high: LittleEndian::read_u64(&dat[..8]), + low: LittleEndian::read_u64(&dat[8..]) + }) } } diff --git a/src/util/mod.rs b/src/util/mod.rs index e560fda..1ba2c4b 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -1,12 +1,13 @@ -mod checksum; +//mod checksum; not used mod compression; mod encryption; mod hash; mod lru_cache; +mod chunk; pub mod cli; pub mod msgpack; -pub use self::checksum::*; +pub use self::chunk::*; pub use self::compression::*; pub use self::encryption::*; pub use self::hash::*;