use std::path::{Path, PathBuf}; use std::collections::HashMap; use std::fs::{self, File}; use std::io::{self, Read, Write, Seek, SeekFrom, BufWriter, BufReader, Cursor}; use std::cmp::max; use std::fmt::{self, Debug, Write as FmtWrite}; use std::sync::{Arc, Mutex}; use serde::{self, Serialize, Deserialize}; use util::*; static HEADER_STRING: [u8; 7] = *b"zbundle"; static HEADER_VERSION: u8 = 1; /* Bundle format - Magic header + version - Encoded header structure (contains size of next structure) - Encoded contents structure (with chunk sizes and hashes) - Chunk data */ quick_error!{ #[derive(Debug)] pub enum BundleError { List(err: io::Error) { cause(err) description("Failed to list bundles") } Read(err: io::Error, path: PathBuf) { cause(err) description("Failed to read bundle") } Decode(err: msgpack::DecodeError, path: PathBuf) { cause(err) description("Failed to decode bundle header") } Write(err: io::Error, path: PathBuf) { cause(err) description("Failed to write bundle") } Encode(err: msgpack::EncodeError, path: PathBuf) { cause(err) description("Failed to encode bundle header") } WrongHeader(path: PathBuf) { description("Wrong header") display("Wrong header on bundle {:?}", path) } WrongVersion(path: PathBuf, version: u8) { description("Wrong version") display("Wrong version on bundle {:?}: {}", path, version) } Integrity(bundle: BundleId, reason: &'static str) { description("Bundle has an integrity error") display("Bundle {:?} has an integrity error: {}", bundle, reason) } NoSuchBundle(bundle: BundleId) { description("No such bundle") display("No such bundle: {:?}", bundle) } NoSuchChunk(bundle: BundleId, id: usize) { description("Bundle has no such chunk") display("Bundle {:?} has no chunk with that id: {}", bundle, id) } Compression(err: CompressionError) { from() cause(err) } Encryption(err: EncryptionError) { from() cause(err) } Remove(err: io::Error, bundle: BundleId) { cause(err) description("Failed to remove bundle") display("Failed to remove bundle {}", bundle) } } } #[derive(Hash, PartialEq, Eq, Clone, Default)] pub struct BundleId(pub Vec); impl Serialize for BundleId { fn serialize(&self, ser: S) -> Result { ser.serialize_bytes(&self.0) } } impl Deserialize for BundleId { fn deserialize(de: D) -> Result { let bytes = try!(msgpack::Bytes::deserialize(de)); Ok(BundleId(bytes.into())) } } impl BundleId { #[inline] fn to_string(&self) -> String { let mut buf = String::with_capacity(self.0.len()*2); for b in &self.0 { write!(&mut buf, "{:02x}", b).unwrap() } buf } } impl fmt::Display for BundleId { #[inline] fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(fmt, "{}", self.to_string()) } } impl fmt::Debug for BundleId { #[inline] fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(fmt, "{}", self.to_string()) } } #[derive(Eq, Debug, PartialEq, Clone, Copy)] pub enum BundleMode { Content, Meta } serde_impl!(BundleMode(u8) { Content => 0, Meta => 1 }); #[derive(Clone)] pub struct BundleInfo { pub id: BundleId, pub mode: BundleMode, pub compression: Option, pub encryption: Option, pub hash_method: HashMethod, pub checksum: Checksum, pub raw_size: usize, pub encoded_size: usize, pub chunk_count: usize, pub contents_info_size: usize } serde_impl!(BundleInfo(u64) { id: BundleId => 0, mode: BundleMode => 1, compression: Option => 2, encryption: Option => 3, hash_method: HashMethod => 4, checksum: Checksum => 5, raw_size: usize => 6, encoded_size: usize => 7, chunk_count: usize => 8, contents_info_size: usize => 9 }); impl Default for BundleInfo { fn default() -> Self { BundleInfo { id: BundleId(vec![]), compression: None, encryption: None, hash_method: HashMethod::Blake2, checksum: (ChecksumType::Blake2_256, msgpack::Bytes::new()), raw_size: 0, encoded_size: 0, chunk_count: 0, mode: BundleMode::Content, contents_info_size: 0 } } } #[derive(Clone, Default)] pub struct BundleContentInfo { pub chunk_sizes: Vec, pub chunk_hashes: Vec } serde_impl!(BundleContentInfo(u64) { chunk_sizes: Vec => 0, chunk_hashes: Vec => 1 }); pub struct Bundle { pub info: BundleInfo, pub contents: BundleContentInfo, pub version: u8, pub path: PathBuf, crypto: Arc>, pub content_start: usize, pub chunk_positions: Vec } impl Bundle { fn new(path: PathBuf, version: u8, content_start: usize, crypto: Arc>, info: BundleInfo, contents: BundleContentInfo) -> Self { let mut chunk_positions = Vec::with_capacity(contents.chunk_sizes.len()); let mut pos = 0; for len in &contents.chunk_sizes { chunk_positions.push(pos); pos += *len; } Bundle { info: info, contents: contents, version: version, path: path, crypto: crypto, content_start: content_start, chunk_positions: chunk_positions } } #[inline] pub fn id(&self) -> BundleId { self.info.id.clone() } pub fn load(path: PathBuf, crypto: Arc>) -> Result { let mut file = BufReader::new(try!(File::open(&path).map_err(|e| BundleError::Read(e, path.clone())))); let mut header = [0u8; 8]; try!(file.read_exact(&mut header).map_err(|e| BundleError::Read(e, path.clone()))); if header[..HEADER_STRING.len()] != HEADER_STRING { return Err(BundleError::WrongHeader(path.clone())) } let version = header[HEADER_STRING.len()]; if version != HEADER_VERSION { return Err(BundleError::WrongVersion(path.clone(), version)) } let header: BundleInfo = try!(msgpack::decode_from_stream(&mut file) .map_err(|e| BundleError::Decode(e, path.clone()))); let mut contents_data = Vec::with_capacity(header.contents_info_size); contents_data.resize(header.contents_info_size, 0); try!(file.read_exact(&mut contents_data).map_err(|e| BundleError::Read(e, path.clone()))); if let Some(ref encryption) = header.encryption { contents_data = try!(crypto.lock().unwrap().decrypt(encryption.clone(), &contents_data)); } let contents = try!(msgpack::decode_from_stream(&mut Cursor::new(&contents_data)) .map_err(|e| BundleError::Decode(e, path.clone()))); let content_start = file.seek(SeekFrom::Current(0)).unwrap() as usize; Ok(Bundle::new(path, version, content_start, crypto, header, contents)) } #[inline] fn load_encoded_contents(&self) -> Result, BundleError> { let mut file = BufReader::new(try!(File::open(&self.path).map_err(|e| BundleError::Read(e, self.path.clone())))); try!(file.seek(SeekFrom::Start(self.content_start as u64)).map_err(|e| BundleError::Read(e, self.path.clone()))); let mut data = Vec::with_capacity(max(self.info.encoded_size, self.info.raw_size)+1024); try!(file.read_to_end(&mut data).map_err(|e| BundleError::Read(e, self.path.clone()))); Ok(data) } #[inline] fn decode_contents(&self, mut data: Vec) -> Result, BundleError> { if let Some(ref encryption) = self.info.encryption { data = try!(self.crypto.lock().unwrap().decrypt(encryption.clone(), &data)); } if let Some(ref compression) = self.info.compression { data = try!(compression.decompress(&data)); } Ok(data) } #[inline] pub fn load_contents(&self) -> Result, BundleError> { self.load_encoded_contents().and_then(|data| self.decode_contents(data)) } #[inline] pub fn get_chunk_position(&self, id: usize) -> Result<(usize, usize), BundleError> { if id >= self.info.chunk_count { return Err(BundleError::NoSuchChunk(self.id(), id)) } Ok((self.chunk_positions[id], self.contents.chunk_sizes[id])) } pub fn check(&self, full: bool) -> Result<(), BundleError> { //FIXME: adapt to new format if self.info.chunk_count != self.contents.chunk_sizes.len() { return Err(BundleError::Integrity(self.id(), "Chunk list size does not match chunk count")) } if self.contents.chunk_sizes.iter().sum::() != self.info.raw_size { return Err(BundleError::Integrity(self.id(), "Individual chunk sizes do not add up to total size")) } if !full { let size = try!(fs::metadata(&self.path).map_err(|e| BundleError::Read(e, self.path.clone())) ).len(); if size as usize != self.info.encoded_size + self.content_start { return Err(BundleError::Integrity(self.id(), "File size does not match size in header, truncated file")) } return Ok(()) } let encoded_contents = try!(self.load_encoded_contents()); if self.info.encoded_size != encoded_contents.len() { return Err(BundleError::Integrity(self.id(), "Encoded data size does not match size in header, truncated bundle")) } let contents = try!(self.decode_contents(encoded_contents)); if self.info.raw_size != contents.len() { return Err(BundleError::Integrity(self.id(), "Raw data size does not match size in header, truncated bundle")) } //TODO: verify checksum Ok(()) } } impl Debug for Bundle { fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(fmt, "Bundle(\n\tid: {}\n\tpath: {:?}\n\tchunks: {}\n\tsize: {}, encoded: {}\n\tcompression: {:?}\n)", self.info.id.to_string(), self.path, self.info.chunk_count, self.info.raw_size, self.info.encoded_size, self.info.compression) } } pub struct BundleWriter { mode: BundleMode, hash_method: HashMethod, hashes: Vec, data: Vec, compression: Option, compression_stream: Option, encryption: Option, crypto: Arc>, checksum: ChecksumCreator, raw_size: usize, chunk_count: usize, chunk_sizes: Vec } impl BundleWriter { fn new( mode: BundleMode, hash_method: HashMethod, compression: Option, encryption: Option, crypto: Arc>, checksum: ChecksumType ) -> Result { let compression_stream = match compression { Some(ref compression) => Some(try!(compression.compress_stream())), None => None }; Ok(BundleWriter { mode: mode, hash_method: hash_method, hashes: vec![], data: vec![], compression: compression, compression_stream: compression_stream, encryption: encryption, crypto: crypto, checksum: ChecksumCreator::new(checksum), raw_size: 0, chunk_count: 0, chunk_sizes: vec![] }) } pub fn add(&mut self, chunk: &[u8], hash: Hash) -> Result { if let Some(ref mut stream) = self.compression_stream { try!(stream.process(chunk, &mut self.data)) } else { self.data.extend_from_slice(chunk) } self.checksum.update(chunk); self.raw_size += chunk.len(); self.chunk_count += 1; self.chunk_sizes.push(chunk.len()); self.hashes.push(hash); Ok(self.chunk_count-1) } fn finish(mut self, db: &BundleDb) -> Result { if let Some(stream) = self.compression_stream { try!(stream.finish(&mut self.data)) } if let Some(ref encryption) = self.encryption { self.data = try!(self.crypto.lock().unwrap().encrypt(encryption.clone(), &self.data)); } let encoded_size = self.data.len(); let checksum = self.checksum.finish(); let id = BundleId(checksum.1.to_vec()); let (folder, file) = db.bundle_path(&id); let path = folder.join(file); try!(fs::create_dir_all(&folder).map_err(|e| BundleError::Write(e, path.clone()))); let mut file = BufWriter::new(try!(File::create(&path).map_err(|e| BundleError::Write(e, path.clone())))); try!(file.write_all(&HEADER_STRING).map_err(|e| BundleError::Write(e, path.clone()))); try!(file.write_all(&[HEADER_VERSION]).map_err(|e| BundleError::Write(e, path.clone()))); let contents = BundleContentInfo { chunk_sizes: self.chunk_sizes, chunk_hashes: self.hashes }; let mut contents_data = Vec::new(); try!(msgpack::encode_to_stream(&contents, &mut contents_data) .map_err(|e| BundleError::Encode(e, path.clone()))); if let Some(ref encryption) = self.encryption { contents_data = try!(self.crypto.lock().unwrap().encrypt(encryption.clone(), &contents_data)); } let header = BundleInfo { mode: self.mode, hash_method: self.hash_method, checksum: checksum, compression: self.compression, encryption: self.encryption, chunk_count: self.chunk_count, id: id.clone(), raw_size: self.raw_size, encoded_size: encoded_size, contents_info_size: contents_data.len() }; try!(msgpack::encode_to_stream(&header, &mut file) .map_err(|e| BundleError::Encode(e, path.clone()))); try!(file.write_all(&contents_data).map_err(|e| BundleError::Write(e, path.clone()))); let content_start = file.seek(SeekFrom::Current(0)).unwrap() as usize; try!(file.write_all(&self.data).map_err(|e| BundleError::Write(e, path.clone()))); Ok(Bundle::new(path, HEADER_VERSION, content_start, self.crypto, header, contents)) } #[inline] pub fn size(&self) -> usize { self.data.len() } #[inline] pub fn raw_size(&self) -> usize { self.raw_size } } pub struct BundleDb { path: PathBuf, compression: Option, encryption: Option, crypto: Arc>, checksum: ChecksumType, bundles: HashMap, bundle_cache: LruCache> } impl BundleDb { fn new(path: PathBuf, compression: Option, encryption: Option, checksum: ChecksumType) -> Self { BundleDb { path: path, compression: compression, crypto: Arc::new(Mutex::new(Crypto::new())), encryption: encryption, checksum: checksum, bundles: HashMap::new(), bundle_cache: LruCache::new(5, 10) } } fn bundle_path(&self, bundle: &BundleId) -> (PathBuf, PathBuf) { let mut folder = self.path.clone(); let mut file = bundle.to_string()[0..32].to_owned() + ".bundle"; let mut count = self.bundles.len(); while count >= 100 { if file.len() < 10 { break } folder = folder.join(&file[0..2]); file = file[2..].to_string(); count /= 100; } (folder, file.into()) } fn load_bundle_list(&mut self) -> Result<(), BundleError> { self.bundles.clear(); let mut paths = Vec::new(); paths.push(self.path.clone()); while let Some(path) = paths.pop() { for entry in try!(fs::read_dir(path).map_err(BundleError::List)) { let entry = try!(entry.map_err(BundleError::List)); let path = entry.path(); if path.is_dir() { paths.push(path); } else { let bundle = try!(Bundle::load(path, self.crypto.clone())); self.bundles.insert(bundle.id(), bundle); } } } Ok(()) } #[inline] pub fn open>(path: P, compression: Option, encryption: Option, checksum: ChecksumType) -> Result { let path = path.as_ref().to_owned(); let mut self_ = Self::new(path, compression, encryption, checksum); try!(self_.load_bundle_list()); Ok(self_) } #[inline] pub fn create>(path: P, compression: Option, encryption: Option, checksum: ChecksumType) -> Result { let path = path.as_ref().to_owned(); try!(fs::create_dir_all(&path) .map_err(|e| BundleError::Write(e, path.clone()))); Ok(Self::new(path, compression, encryption, checksum)) } #[inline] pub fn open_or_create>(path: P, compression: Option, encryption: Option, checksum: ChecksumType) -> Result { if path.as_ref().exists() { Self::open(path, compression, encryption, checksum) } else { Self::create(path, compression, encryption, checksum) } } #[inline] pub fn create_bundle(&self, mode: BundleMode, hash_method: HashMethod) -> Result { BundleWriter::new(mode, hash_method, self.compression.clone(), self.encryption.clone(), self.crypto.clone(), self.checksum) } pub fn get_chunk(&mut self, bundle_id: &BundleId, id: usize) -> Result, BundleError> { let bundle = try!(self.bundles.get(bundle_id).ok_or(BundleError::NoSuchBundle(bundle_id.clone()))); let (pos, len) = try!(bundle.get_chunk_position(id)); let mut chunk = Vec::with_capacity(len); if let Some(data) = self.bundle_cache.get(bundle_id) { chunk.extend_from_slice(&data[pos..pos+len]); return Ok(chunk); } let data = try!(bundle.load_contents()); chunk.extend_from_slice(&data[pos..pos+len]); self.bundle_cache.put(bundle_id.clone(), data); Ok(chunk) } #[inline] pub fn add_bundle(&mut self, bundle: BundleWriter) -> Result<&Bundle, BundleError> { let bundle = try!(bundle.finish(&self)); let id = bundle.id(); self.bundles.insert(id.clone(), bundle); Ok(self.get_bundle(&id).unwrap()) } #[inline] pub fn get_bundle(&self, bundle: &BundleId) -> Option<&Bundle> { self.bundles.get(bundle) } #[inline] pub fn list_bundles(&self) -> Vec<&Bundle> { self.bundles.values().collect() } #[inline] pub fn delete_bundle(&mut self, bundle: &BundleId) -> Result<(), BundleError> { if let Some(bundle) = self.bundles.remove(bundle) { fs::remove_file(&bundle.path).map_err(|e| BundleError::Remove(e, bundle.id())) } else { Err(BundleError::NoSuchBundle(bundle.clone())) } } #[inline] pub fn check(&self, full: bool) -> Result<(), BundleError> { for bundle in self.bundles.values() { try!(bundle.check(full)) } Ok(()) } }