Browse Source

More efficient chunk list encoding

pull/10/head
Dennis Schwerdel 5 years ago committed by Dennis Schwerdel
parent
commit
e67ddbb275
  1. 1
      Cargo.lock
  2. 1
      Cargo.toml
  3. 131
      src/bundle.rs
  4. 12
      src/cli/args.rs
  5. 2
      src/cli/mod.rs
  6. 2
      src/main.rs
  7. 4
      src/repository/backup.rs
  8. 17
      src/repository/basic_io.rs
  9. 19
      src/repository/config.rs
  10. 19
      src/repository/metadata.rs
  11. 3
      src/repository/mod.rs
  12. 125
      src/util/chunk.rs
  13. 33
      src/util/hash.rs
  14. 5
      src/util/mod.rs

1
Cargo.lock generated

@ -3,6 +3,7 @@ name = "zvault"
version = "0.1.0"
dependencies = [
"blake2-rfc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"clap 2.21.1 (registry+https://github.com/rust-lang/crates.io-index)",
"log 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",

1
Cargo.toml

@ -17,3 +17,4 @@ rustc-serialize = "0.3"
chrono = "0.3"
clap = "2.19"
log = "0.3"
byteorder = "1.0"

131
src/bundle.rs

@ -1,9 +1,9 @@
use std::path::{Path, PathBuf};
use std::collections::HashMap;
use std::fs::{self, File};
use std::io::{self, Read, Write, Seek, SeekFrom, BufWriter, BufReader, Cursor};
use std::io::{self, Read, Write, Seek, SeekFrom, BufWriter, BufReader};
use std::cmp::max;
use std::fmt::{self, Debug, Write as FmtWrite};
use std::fmt::{self, Debug};
use std::sync::{Arc, Mutex};
use serde::{self, Serialize, Deserialize};
@ -86,29 +86,25 @@ quick_error!{
#[derive(Hash, PartialEq, Eq, Clone, Default)]
pub struct BundleId(pub Vec<u8>);
pub struct BundleId(pub Hash);
impl Serialize for BundleId {
fn serialize<S: serde::Serializer>(&self, ser: S) -> Result<S::Ok, S::Error> {
ser.serialize_bytes(&self.0)
self.0.serialize(ser)
}
}
impl Deserialize for BundleId {
fn deserialize<D: serde::Deserializer>(de: D) -> Result<Self, D::Error> {
let bytes = try!(msgpack::Bytes::deserialize(de));
Ok(BundleId(bytes.into()))
let hash = try!(Hash::deserialize(de));
Ok(BundleId(hash))
}
}
impl BundleId {
#[inline]
fn to_string(&self) -> String {
let mut buf = String::with_capacity(self.0.len()*2);
for b in &self.0 {
write!(&mut buf, "{:02x}", b).unwrap()
}
buf
self.0.to_string()
}
}
@ -144,11 +140,10 @@ pub struct BundleInfo {
pub compression: Option<Compression>,
pub encryption: Option<Encryption>,
pub hash_method: HashMethod,
pub checksum: Checksum,
pub raw_size: usize,
pub encoded_size: usize,
pub chunk_count: usize,
pub contents_info_size: usize
pub chunk_info_size: usize
}
serde_impl!(BundleInfo(u64) {
id: BundleId => 0,
@ -156,45 +151,32 @@ serde_impl!(BundleInfo(u64) {
compression: Option<Compression> => 2,
encryption: Option<Encryption> => 3,
hash_method: HashMethod => 4,
checksum: Checksum => 5,
raw_size: usize => 6,
encoded_size: usize => 7,
chunk_count: usize => 8,
contents_info_size: usize => 9
chunk_info_size: usize => 9
});
impl Default for BundleInfo {
fn default() -> Self {
BundleInfo {
id: BundleId(vec![]),
id: BundleId(Hash::empty()),
compression: None,
encryption: None,
hash_method: HashMethod::Blake2,
checksum: (ChecksumType::Blake2_256, msgpack::Bytes::new()),
raw_size: 0,
encoded_size: 0,
chunk_count: 0,
mode: BundleMode::Content,
contents_info_size: 0
chunk_info_size: 0
}
}
}
#[derive(Clone, Default)]
pub struct BundleContentInfo {
pub chunk_sizes: Vec<usize>,
pub chunk_hashes: Vec<Hash>
}
serde_impl!(BundleContentInfo(u64) {
chunk_sizes: Vec<usize> => 0,
chunk_hashes: Vec<Hash> => 1
});
pub struct Bundle {
pub info: BundleInfo,
pub contents: BundleContentInfo,
pub chunks: ChunkList,
pub version: u8,
pub path: PathBuf,
crypto: Arc<Mutex<Crypto>>,
@ -203,16 +185,16 @@ pub struct Bundle {
}
impl Bundle {
fn new(path: PathBuf, version: u8, content_start: usize, crypto: Arc<Mutex<Crypto>>, info: BundleInfo, contents: BundleContentInfo) -> Self {
let mut chunk_positions = Vec::with_capacity(contents.chunk_sizes.len());
fn new(path: PathBuf, version: u8, content_start: usize, crypto: Arc<Mutex<Crypto>>, info: BundleInfo, chunks: ChunkList) -> Self {
let mut chunk_positions = Vec::with_capacity(chunks.len());
let mut pos = 0;
for len in &contents.chunk_sizes {
for &(_, len) in (&chunks).iter() {
chunk_positions.push(pos);
pos += *len;
pos += len as usize;
}
Bundle {
info: info,
contents: contents,
chunks: chunks,
version: version,
path: path,
crypto: crypto,
@ -239,16 +221,15 @@ impl Bundle {
}
let header: BundleInfo = try!(msgpack::decode_from_stream(&mut file)
.map_err(|e| BundleError::Decode(e, path.clone())));
let mut contents_data = Vec::with_capacity(header.contents_info_size);
contents_data.resize(header.contents_info_size, 0);
try!(file.read_exact(&mut contents_data).map_err(|e| BundleError::Read(e, path.clone())));
let mut chunk_data = Vec::with_capacity(header.chunk_info_size);
chunk_data.resize(header.chunk_info_size, 0);
try!(file.read_exact(&mut chunk_data).map_err(|e| BundleError::Read(e, path.clone())));
if let Some(ref encryption) = header.encryption {
contents_data = try!(crypto.lock().unwrap().decrypt(encryption.clone(), &contents_data));
chunk_data = try!(crypto.lock().unwrap().decrypt(encryption.clone(), &chunk_data));
}
let contents = try!(msgpack::decode_from_stream(&mut Cursor::new(&contents_data))
.map_err(|e| BundleError::Decode(e, path.clone())));
let chunks = ChunkList::read_from(&chunk_data);
let content_start = file.seek(SeekFrom::Current(0)).unwrap() as usize;
Ok(Bundle::new(path, version, content_start, crypto, header, contents))
Ok(Bundle::new(path, version, content_start, crypto, header, chunks))
}
#[inline]
@ -281,16 +262,16 @@ impl Bundle {
if id >= self.info.chunk_count {
return Err(BundleError::NoSuchChunk(self.id(), id))
}
Ok((self.chunk_positions[id], self.contents.chunk_sizes[id]))
Ok((self.chunk_positions[id], self.chunks[id].1 as usize))
}
pub fn check(&self, full: bool) -> Result<(), BundleError> {
//FIXME: adapt to new format
if self.info.chunk_count != self.contents.chunk_sizes.len() {
if self.info.chunk_count != self.chunks.len() {
return Err(BundleError::Integrity(self.id(),
"Chunk list size does not match chunk count"))
}
if self.contents.chunk_sizes.iter().sum::<usize>() != self.info.raw_size {
if self.chunks.iter().map(|c| c.1 as usize).sum::<usize>() != self.info.raw_size {
return Err(BundleError::Integrity(self.id(),
"Individual chunk sizes do not add up to total size"))
}
@ -331,16 +312,14 @@ impl Debug for Bundle {
pub struct BundleWriter {
mode: BundleMode,
hash_method: HashMethod,
hashes: Vec<Hash>,
data: Vec<u8>,
compression: Option<Compression>,
compression_stream: Option<CompressionStream>,
encryption: Option<Encryption>,
crypto: Arc<Mutex<Crypto>>,
checksum: ChecksumCreator,
raw_size: usize,
chunk_count: usize,
chunk_sizes: Vec<usize>
chunks: ChunkList,
}
impl BundleWriter {
@ -349,8 +328,7 @@ impl BundleWriter {
hash_method: HashMethod,
compression: Option<Compression>,
encryption: Option<Encryption>,
crypto: Arc<Mutex<Crypto>>,
checksum: ChecksumType
crypto: Arc<Mutex<Crypto>>
) -> Result<Self, BundleError> {
let compression_stream = match compression {
Some(ref compression) => Some(try!(compression.compress_stream())),
@ -359,16 +337,14 @@ impl BundleWriter {
Ok(BundleWriter {
mode: mode,
hash_method: hash_method,
hashes: vec![],
data: vec![],
compression: compression,
compression_stream: compression_stream,
encryption: encryption,
crypto: crypto,
checksum: ChecksumCreator::new(checksum),
raw_size: 0,
chunk_count: 0,
chunk_sizes: vec![]
chunks: ChunkList::new()
})
}
@ -378,11 +354,9 @@ impl BundleWriter {
} else {
self.data.extend_from_slice(chunk)
}
self.checksum.update(chunk);
self.raw_size += chunk.len();
self.chunk_count += 1;
self.chunk_sizes.push(chunk.len());
self.hashes.push(hash);
self.chunks.push((hash, chunk.len() as u32));
Ok(self.chunk_count-1)
}
@ -394,42 +368,35 @@ impl BundleWriter {
self.data = try!(self.crypto.lock().unwrap().encrypt(encryption.clone(), &self.data));
}
let encoded_size = self.data.len();
let checksum = self.checksum.finish();
let id = BundleId(checksum.1.to_vec());
let mut chunk_data = Vec::with_capacity(self.chunks.encoded_size());
self.chunks.write_to(&mut chunk_data).unwrap();
let id = BundleId(self.hash_method.hash(&chunk_data));
if let Some(ref encryption) = self.encryption {
chunk_data = try!(self.crypto.lock().unwrap().encrypt(encryption.clone(), &chunk_data));
}
let (folder, file) = db.bundle_path(&id);
let path = folder.join(file);
try!(fs::create_dir_all(&folder).map_err(|e| BundleError::Write(e, path.clone())));
let mut file = BufWriter::new(try!(File::create(&path).map_err(|e| BundleError::Write(e, path.clone()))));
try!(file.write_all(&HEADER_STRING).map_err(|e| BundleError::Write(e, path.clone())));
try!(file.write_all(&[HEADER_VERSION]).map_err(|e| BundleError::Write(e, path.clone())));
let contents = BundleContentInfo {
chunk_sizes: self.chunk_sizes,
chunk_hashes: self.hashes
};
let mut contents_data = Vec::new();
try!(msgpack::encode_to_stream(&contents, &mut contents_data)
.map_err(|e| BundleError::Encode(e, path.clone())));
if let Some(ref encryption) = self.encryption {
contents_data = try!(self.crypto.lock().unwrap().encrypt(encryption.clone(), &contents_data));
}
let header = BundleInfo {
mode: self.mode,
hash_method: self.hash_method,
checksum: checksum,
compression: self.compression,
encryption: self.encryption,
chunk_count: self.chunk_count,
id: id.clone(),
raw_size: self.raw_size,
encoded_size: encoded_size,
contents_info_size: contents_data.len()
chunk_info_size: chunk_data.len()
};
try!(msgpack::encode_to_stream(&header, &mut file)
.map_err(|e| BundleError::Encode(e, path.clone())));
try!(file.write_all(&contents_data).map_err(|e| BundleError::Write(e, path.clone())));
try!(file.write_all(&chunk_data).map_err(|e| BundleError::Write(e, path.clone())));
let content_start = file.seek(SeekFrom::Current(0)).unwrap() as usize;
try!(file.write_all(&self.data).map_err(|e| BundleError::Write(e, path.clone())));
Ok(Bundle::new(path, HEADER_VERSION, content_start, self.crypto, header, contents))
Ok(Bundle::new(path, HEADER_VERSION, content_start, self.crypto, header, self.chunks))
}
#[inline]
@ -449,21 +416,19 @@ pub struct BundleDb {
compression: Option<Compression>,
encryption: Option<Encryption>,
crypto: Arc<Mutex<Crypto>>,
checksum: ChecksumType,
bundles: HashMap<BundleId, Bundle>,
bundle_cache: LruCache<BundleId, Vec<u8>>
}
impl BundleDb {
fn new(path: PathBuf, compression: Option<Compression>, encryption: Option<Encryption>, checksum: ChecksumType) -> Self {
fn new(path: PathBuf, compression: Option<Compression>, encryption: Option<Encryption>) -> Self {
BundleDb {
path: path,
compression:
compression,
crypto: Arc::new(Mutex::new(Crypto::new())),
encryption: encryption,
checksum: checksum,
bundles: HashMap::new(),
bundle_cache: LruCache::new(5, 10)
}
@ -504,33 +469,33 @@ impl BundleDb {
}
#[inline]
pub fn open<P: AsRef<Path>>(path: P, compression: Option<Compression>, encryption: Option<Encryption>, checksum: ChecksumType) -> Result<Self, BundleError> {
pub fn open<P: AsRef<Path>>(path: P, compression: Option<Compression>, encryption: Option<Encryption>) -> Result<Self, BundleError> {
let path = path.as_ref().to_owned();
let mut self_ = Self::new(path, compression, encryption, checksum);
let mut self_ = Self::new(path, compression, encryption);
try!(self_.load_bundle_list());
Ok(self_)
}
#[inline]
pub fn create<P: AsRef<Path>>(path: P, compression: Option<Compression>, encryption: Option<Encryption>, checksum: ChecksumType) -> Result<Self, BundleError> {
pub fn create<P: AsRef<Path>>(path: P, compression: Option<Compression>, encryption: Option<Encryption>) -> Result<Self, BundleError> {
let path = path.as_ref().to_owned();
try!(fs::create_dir_all(&path)
.map_err(|e| BundleError::Write(e, path.clone())));
Ok(Self::new(path, compression, encryption, checksum))
Ok(Self::new(path, compression, encryption))
}
#[inline]
pub fn open_or_create<P: AsRef<Path>>(path: P, compression: Option<Compression>, encryption: Option<Encryption>, checksum: ChecksumType) -> Result<Self, BundleError> {
pub fn open_or_create<P: AsRef<Path>>(path: P, compression: Option<Compression>, encryption: Option<Encryption>) -> Result<Self, BundleError> {
if path.as_ref().exists() {
Self::open(path, compression, encryption, checksum)
Self::open(path, compression, encryption)
} else {
Self::create(path, compression, encryption, checksum)
Self::create(path, compression, encryption)
}
}
#[inline]
pub fn create_bundle(&self, mode: BundleMode, hash_method: HashMethod) -> Result<BundleWriter, BundleError> {
BundleWriter::new(mode, hash_method, self.compression.clone(), self.encryption.clone(), self.crypto.clone(), self.checksum)
BundleWriter::new(mode, hash_method, self.compression.clone(), self.encryption.clone(), self.crypto.clone())
}
pub fn get_chunk(&mut self, bundle_id: &BundleId, id: usize) -> Result<Vec<u8>, BundleError> {

12
src/cli/args.rs

@ -1,5 +1,5 @@
use ::chunker::ChunkerType;
use ::util::{Compression, HashMethod, ChecksumType};
use ::util::{Compression, HashMethod};
use std::process::exit;
@ -115,16 +115,6 @@ fn parse_compression(val: Option<&str>) -> Option<Compression> {
}
}
#[allow(dead_code)]
fn parse_checksum(val: Option<&str>) -> ChecksumType {
if let Ok(checksum) = ChecksumType::from(val.unwrap_or("blake2")) {
checksum
} else {
error!("Invalid checksum method: {}", val.unwrap());
exit(1);
}
}
fn parse_hash(val: Option<&str>) -> HashMethod {
if let Ok(hash) = HashMethod::from(val.unwrap_or("blake2")) {
hash

2
src/cli/mod.rs

@ -6,7 +6,6 @@ use chrono::prelude::*;
use std::process::exit;
use ::repository::{Repository, Config, Backup};
use ::util::ChecksumType;
use ::util::cli::*;
use self::args::Arguments;
@ -40,7 +39,6 @@ pub fn run() {
Arguments::Init{repo_path, bundle_size, chunker, compression, hash} => {
Repository::create(repo_path, Config {
bundle_size: bundle_size,
checksum: ChecksumType::Blake2_256,
chunker: chunker,
compression: compression,
hash: hash

2
src/main.rs

@ -11,6 +11,8 @@ extern crate rustc_serialize;
extern crate chrono;
#[macro_use] extern crate clap;
#[macro_use] extern crate log;
extern crate byteorder;
pub mod util;
pub mod bundle;

4
src/repository/backup.rs

@ -1,4 +1,4 @@
use super::{Repository, Chunk, RepositoryError};
use super::{Repository, RepositoryError};
use super::metadata::{FileType, Inode};
use ::util::*;
@ -12,7 +12,7 @@ use chrono::prelude::*;
#[derive(Default, Debug, Clone)]
pub struct Backup {
pub root: Vec<Chunk>,
pub root: ChunkList,
pub total_data_size: u64, // Sum of all raw sizes of all entities
pub changed_data_size: u64, // Sum of all raw sizes of all entities actively stored
pub deduplicated_data_size: u64, // Sum of all raw sizes of all new bundles

17
src/repository/basic_io.rs

@ -6,13 +6,10 @@ use ::index::Location;
use ::bundle::{BundleId, BundleMode};
use super::integrity::RepositoryIntegrityError;
use ::util::Hash;
use ::util::*;
use ::chunker::{IChunker, ChunkerStatus};
pub type Chunk = (Hash, usize);
impl Repository {
pub fn get_bundle_id(&self, id: u32) -> Result<BundleId, RepositoryError> {
if let Some(bundle_info) = self.bundle_map.get(id) {
@ -86,12 +83,12 @@ impl Repository {
}
#[inline]
pub fn put_data(&mut self, mode: BundleMode, data: &[u8]) -> Result<Vec<Chunk>, RepositoryError> {
pub fn put_data(&mut self, mode: BundleMode, data: &[u8]) -> Result<ChunkList, RepositoryError> {
let mut input = Cursor::new(data);
self.put_stream(mode, &mut input)
}
pub fn put_stream<R: Read>(&mut self, mode: BundleMode, data: &mut R) -> Result<Vec<Chunk>, RepositoryError> {
pub fn put_stream<R: Read>(&mut self, mode: BundleMode, data: &mut R) -> Result<ChunkList, RepositoryError> {
let avg_size = self.config.chunker.avg_size();
let mut chunks = Vec::new();
let mut chunk = Vec::with_capacity(avg_size * 2);
@ -102,17 +99,17 @@ impl Repository {
chunk = output.into_inner();
let hash = self.config.hash.hash(&chunk);
try!(self.put_chunk(mode, hash, &chunk));
chunks.push((hash, chunk.len()));
chunks.push((hash, chunk.len() as u32));
if res == ChunkerStatus::Finished {
break
}
}
Ok(chunks)
Ok(chunks.into())
}
#[inline]
pub fn get_data(&mut self, chunks: &[Chunk]) -> Result<Vec<u8>, RepositoryError> {
let mut data = Vec::with_capacity(chunks.iter().map(|&(_, size)| size).sum());
let mut data = Vec::with_capacity(chunks.iter().map(|&(_, size)| size).sum::<u32>() as usize);
try!(self.get_stream(chunks, &mut data));
Ok(data)
}
@ -121,7 +118,7 @@ impl Repository {
pub fn get_stream<W: Write>(&mut self, chunks: &[Chunk], w: &mut W) -> Result<(), RepositoryError> {
for &(ref hash, len) in chunks {
let data = try!(try!(self.get_chunk(*hash)).ok_or_else(|| RepositoryIntegrityError::MissingChunk(hash.clone())));
debug_assert_eq!(data.len(), len);
debug_assert_eq!(data.len() as u32, len);
try!(w.write_all(&data));
}
Ok(())

19
src/repository/config.rs

@ -40,19 +40,6 @@ impl HashMethod {
}
impl ChecksumType {
fn from_yaml(yaml: String) -> Result<Self, ConfigError> {
ChecksumType::from(&yaml).map_err(ConfigError::Parse)
}
fn to_yaml(&self) -> String {
self.name().to_string()
}
}
struct ChunkerYaml {
method: String,
avg_size: usize,
@ -107,7 +94,6 @@ struct ConfigYaml {
compression: Option<String>,
bundle_size: usize,
chunker: ChunkerYaml,
checksum: String,
hash: String,
}
impl Default for ConfigYaml {
@ -116,7 +102,6 @@ impl Default for ConfigYaml {
compression: Some("brotli/5".to_string()),
bundle_size: 25*1024*1024,
chunker: ChunkerYaml::default(),
checksum: "blake2_256".to_string(),
hash: "blake2".to_string()
}
}
@ -125,7 +110,6 @@ serde_impl!(ConfigYaml(String) {
compression: Option<String> => "compression",
bundle_size: usize => "bundle_size",
chunker: ChunkerYaml => "chunker",
checksum: String => "checksum",
hash: String => "hash"
});
@ -136,7 +120,6 @@ pub struct Config {
pub compression: Option<Compression>,
pub bundle_size: usize,
pub chunker: ChunkerType,
pub checksum: ChecksumType,
pub hash: HashMethod
}
impl Config {
@ -150,7 +133,6 @@ impl Config {
compression: compression,
bundle_size: yaml.bundle_size,
chunker: try!(ChunkerType::from_yaml(yaml.chunker)),
checksum: try!(ChecksumType::from_yaml(yaml.checksum)),
hash: try!(HashMethod::from_yaml(yaml.hash))
})
}
@ -160,7 +142,6 @@ impl Config {
compression: self.compression.as_ref().map(|c| c.to_yaml()),
bundle_size: self.bundle_size,
chunker: self.chunker.to_yaml(),
checksum: self.checksum.to_yaml(),
hash: self.hash.to_yaml()
}
}

19
src/repository/metadata.rs

@ -6,7 +6,7 @@ use std::os::unix::fs::{PermissionsExt, symlink};
use std::io::{Read, Write};
use ::util::*;
use super::{Repository, RepositoryError, Chunk};
use super::{Repository, RepositoryError};
use super::integrity::RepositoryIntegrityError;
use ::bundle::BundleMode;
@ -27,8 +27,8 @@ serde_impl!(FileType(u8) {
#[derive(Debug)]
pub enum FileContents {
Inline(msgpack::Bytes),
ChunkedDirect(Vec<Chunk>),
ChunkedIndirect(Vec<Chunk>)
ChunkedDirect(ChunkList),
ChunkedIndirect(ChunkList)
}
serde_impl!(FileContents(u8) {
Inline(ByteBuf) => 0,
@ -50,7 +50,7 @@ pub struct Inode {
pub create_time: i64,
pub symlink_target: Option<String>,
pub contents: Option<FileContents>,
pub children: Option<HashMap<String, Vec<Chunk>>>
pub children: Option<HashMap<String, ChunkList>>
}
impl Default for Inode {
fn default() -> Self {
@ -82,7 +82,7 @@ serde_impl!(Inode(u8) {
create_time: i64 => 8,
symlink_target: Option<String> => 9,
contents: Option<FileContents> => 10,
children: HashMap<String, Vec<Chunk>> => 11
children: HashMap<String, ChunkList> => 11
});
impl Inode {
@ -162,8 +162,9 @@ impl Repository {
if chunks.len() < 10 {
inode.contents = Some(FileContents::ChunkedDirect(chunks));
} else {
let chunks_data = try!(msgpack::encode(&chunks));
chunks = try!(self.put_data(BundleMode::Content, &chunks_data));
let mut chunk_data = Vec::with_capacity(chunks.encoded_size());
chunks.write_to(&mut chunk_data).unwrap();
chunks = try!(self.put_data(BundleMode::Content, &chunk_data));
inode.contents = Some(FileContents::ChunkedIndirect(chunks));
}
}
@ -172,7 +173,7 @@ impl Repository {
}
#[inline]
pub fn put_inode(&mut self, inode: &Inode) -> Result<Vec<Chunk>, RepositoryError> {
pub fn put_inode(&mut self, inode: &Inode) -> Result<ChunkList, RepositoryError> {
self.put_data(BundleMode::Meta, &try!(msgpack::encode(inode)))
}
@ -194,7 +195,7 @@ impl Repository {
},
FileContents::ChunkedIndirect(ref chunks) => {
let chunk_data = try!(self.get_data(chunks));
let chunks: Vec<Chunk> = try!(msgpack::decode(&chunk_data));
let chunks = ChunkList::read_from(&chunk_data);
try!(self.get_stream(&chunks, &mut file));
}
}

3
src/repository/mod.rs

@ -19,7 +19,6 @@ use super::chunker::Chunker;
pub use self::error::RepositoryError;
pub use self::config::Config;
pub use self::metadata::{Inode, FileType};
pub use self::basic_io::Chunk;
pub use self::backup::Backup;
use self::bundle_map::BundleMap;
@ -46,7 +45,6 @@ impl Repository {
path.join("bundles"),
config.compression.clone(),
None, //FIXME: store encryption in config
config.checksum
));
let index = try!(Index::create(&path.join("index")));
try!(config.save(path.join("config.yaml")));
@ -74,7 +72,6 @@ impl Repository {
path.join("bundles"),
config.compression.clone(),
None, //FIXME: load encryption from config
config.checksum
));
let index = try!(Index::open(&path.join("index")));
let bundle_map = try!(BundleMap::load(path.join("bundles.map")));

125
src/util/chunk.rs

@ -0,0 +1,125 @@
use std::io::{self, Write, Read, Cursor};
use std::ops::{Deref, DerefMut};
use serde::{self, Serialize, Deserialize};
use serde::bytes::{Bytes, ByteBuf};
use serde::de::Error;
use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
use super::Hash;
pub type Chunk = (Hash, u32);
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct ChunkList(Vec<Chunk>);
impl ChunkList {
#[inline]
pub fn new() -> Self {
ChunkList(Vec::new())
}
#[inline]
pub fn with_capacity(num: usize) -> Self {
ChunkList(Vec::with_capacity(num))
}
#[inline]
pub fn len(&self) -> usize {
self.0.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
#[inline]
pub fn push(&mut self, chunk: Chunk) {
self.0.push(chunk)
}
#[inline]
pub fn write_to(&self, dst: &mut Write) -> Result<(), io::Error> {
for chunk in &self.0 {
try!(chunk.0.write_to(dst));
try!(dst.write_u32::<LittleEndian>(chunk.1));
}
Ok(())
}
#[inline]
pub fn read_n_from(n: usize, src: &mut Read) -> Result<Self, io::Error> {
let mut chunks = Vec::with_capacity(n);
for _ in 0..n {
let hash = try!(Hash::read_from(src));
let len = try!(src.read_u32::<LittleEndian>());
chunks.push((hash, len));
}
Ok(ChunkList(chunks))
}
#[inline]
pub fn read_from(src: &[u8]) -> Self {
if src.len() % 20 != 0 {
warn!("Reading truncated chunk list");
}
ChunkList::read_n_from(src.len()/20, &mut Cursor::new(src)).unwrap()
}
#[inline]
pub fn encoded_size(&self) -> usize {
self.0.len() * 20
}
}
impl Default for ChunkList {
#[inline]
fn default() -> Self {
ChunkList(Vec::new())
}
}
impl From<Vec<Chunk>> for ChunkList {
fn from(val: Vec<Chunk>) -> Self {
ChunkList(val)
}
}
impl Into<Vec<Chunk>> for ChunkList {
fn into(self) -> Vec<Chunk> {
self.0
}
}
impl Deref for ChunkList {
type Target = [Chunk];
fn deref(&self) -> &[Chunk] {
&self.0
}
}
impl DerefMut for ChunkList {
fn deref_mut(&mut self) -> &mut [Chunk] {
&mut self.0
}
}
impl Serialize for ChunkList {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer {
let mut buf = Vec::with_capacity(self.encoded_size());
self.write_to(&mut buf).unwrap();
Bytes::from(&buf as &[u8]).serialize(serializer)
}
}
impl Deserialize for ChunkList {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error> where D: serde::Deserializer {
let data: Vec<u8> = try!(ByteBuf::deserialize(deserializer)).into();
if data.len() % 20 != 0 {
return Err(D::Error::custom("Invalid chunk list length"));
}
Ok(ChunkList::read_n_from(data.len()/20, &mut Cursor::new(data)).unwrap())
}
}

33
src/util/hash.rs

@ -4,15 +4,17 @@ use serde::bytes::{ByteBuf, Bytes};
use murmurhash3::murmurhash3_x64_128;
use blake2::blake2b::blake2b;
use byteorder::{LittleEndian, ByteOrder, WriteBytesExt, ReadBytesExt};
use std::mem;
use std::fmt;
use std::u64;
use std::io::{self, Read, Write};
#[repr(packed)]
#[derive(Clone, Copy, PartialEq, Hash, Eq)]
#[derive(Clone, Copy, PartialEq, Hash, Eq, Default)]
pub struct Hash {
pub high: u64,
pub low: u64
@ -28,6 +30,24 @@ impl Hash {
pub fn empty() -> Self {
Hash{high: 0, low: 0}
}
#[inline]
pub fn to_string(&self) -> String {
format!("{:016x}{:016x}", self.high, self.low)
}
#[inline]
pub fn write_to(&self, dst: &mut Write) -> Result<(), io::Error> {
try!(dst.write_u64::<LittleEndian>(self.high));
dst.write_u64::<LittleEndian>(self.low)
}
#[inline]
pub fn read_from(src: &mut Read) -> Result<Self, io::Error> {
let high = try!(src.read_u64::<LittleEndian>());
let low = try!(src.read_u64::<LittleEndian>());
Ok(Hash { high: high, low: low })
}
}
impl fmt::Display for Hash {
@ -47,8 +67,9 @@ impl fmt::Debug for Hash {
impl Serialize for Hash {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error> where S: serde::Serializer {
let hash = Hash{high: u64::to_le(self.high), low: u64::to_le(self.low)};
let dat: [u8; 16] = unsafe { mem::transmute(hash) };
let mut dat = [0u8; 16];
LittleEndian::write_u64(&mut dat[..8], self.high);
LittleEndian::write_u64(&mut dat[8..], self.low);
Bytes::from(&dat as &[u8]).serialize(serializer)
}
}
@ -59,8 +80,10 @@ impl Deserialize for Hash {
if dat.len() != 16 {
return Err(D::Error::custom("Invalid key length"));
}
let hash = unsafe { &*(dat.as_ptr() as *const Hash) };
Ok(Hash{high: u64::from_le(hash.high), low: u64::from_le(hash.low)})
Ok(Hash{
high: LittleEndian::read_u64(&dat[..8]),
low: LittleEndian::read_u64(&dat[8..])
})
}
}

5
src/util/mod.rs

@ -1,12 +1,13 @@
mod checksum;
//mod checksum; not used
mod compression;
mod encryption;
mod hash;
mod lru_cache;
mod chunk;
pub mod cli;
pub mod msgpack;
pub use self::checksum::*;
pub use self::chunk::*;
pub use self::compression::*;
pub use self::encryption::*;
pub use self::hash::*;

Loading…
Cancel
Save