use std::path::{Path, PathBuf}; use std::collections::HashMap; use std::fs::{self, File}; use std::io::{Read, Write, Seek, SeekFrom, BufWriter, BufReader}; use std::cmp::max; use std::fmt::{self, Debug, Write as FmtWrite}; use std::sync::{Arc, Mutex}; use serde::{self, Serialize, Deserialize}; use serde::bytes::ByteBuf; use rmp_serde; use errors::BundleError; use util::*; static HEADER_STRING: [u8; 7] = *b"zbundle"; static HEADER_VERSION: u8 = 1; // TODO: Test cases // TODO: Benchmarks #[derive(Hash, PartialEq, Eq, Clone, Default)] pub struct BundleId(pub Vec); impl Serialize for BundleId { fn serialize(&self, ser: S) -> Result { ser.serialize_bytes(&self.0) } } impl Deserialize for BundleId { fn deserialize(de: D) -> Result { let bytes = try!(ByteBuf::deserialize(de)); Ok(BundleId(bytes.into())) } } impl BundleId { #[inline] fn to_string(&self) -> String { let mut buf = String::with_capacity(self.0.len()*2); for b in &self.0 { write!(&mut buf, "{:02x}", b).unwrap() } buf } } impl fmt::Display for BundleId { #[inline] fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(fmt, "{}", self.to_string()) } } impl fmt::Debug for BundleId { #[inline] fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(fmt, "{}", self.to_string()) } } #[derive(Clone)] pub struct BundleHeader { pub id: BundleId, pub compression: Option, pub encryption: Option, pub checksum: Checksum, pub raw_size: usize, pub encoded_size: usize, pub chunk_count: usize, pub chunk_sizes: Vec } serde_impl!(BundleHeader(u64) { id: BundleId => 0, compression: Option => 1, encryption: Option => 2, checksum: Checksum => 3, raw_size: usize => 4, encoded_size: usize => 5, chunk_count: usize => 6, chunk_sizes: Vec => 7 }); impl Default for BundleHeader { fn default() -> Self { BundleHeader { id: BundleId(vec![]), compression: None, encryption: None, checksum: (ChecksumType::Blake2_256, ByteBuf::new()), raw_size: 0, encoded_size: 0, chunk_count: 0, chunk_sizes: vec![] } } } pub struct Bundle { pub id: BundleId, pub version: u8, pub path: PathBuf, crypto: Arc>, pub compression: Option, pub encryption: Option, pub raw_size: usize, pub encoded_size: usize, pub checksum: Checksum, pub content_start: usize, pub chunk_count: usize, pub chunk_sizes: Vec, pub chunk_positions: Vec } impl Bundle { fn new(path: PathBuf, version: u8, content_start: usize, crypto: Arc>, header: BundleHeader) -> Self { let mut chunk_positions = Vec::with_capacity(header.chunk_sizes.len()); let mut pos = 0; for len in &header.chunk_sizes { chunk_positions.push(pos); pos += *len; } Bundle { id: header.id, version: version, path: path, crypto: crypto, compression: header.compression, encryption: header.encryption, raw_size: header.raw_size, encoded_size: header.encoded_size, chunk_count: header.chunk_count, checksum: header.checksum, content_start: content_start, chunk_sizes: header.chunk_sizes, chunk_positions: chunk_positions } } pub fn load(path: PathBuf, crypto: Arc>) -> Result { let mut file = BufReader::new(try!(File::open(&path) .map_err(|e| BundleError::Read(e, path.clone(), "Failed to open bundle file")))); let mut header = [0u8; 8]; try!(file.read_exact(&mut header) .map_err(|e| BundleError::Read(e, path.clone(), "Failed to read bundle header"))); if header[..HEADER_STRING.len()] != HEADER_STRING { return Err(BundleError::Format(path.clone(), "Wrong header string")) } let version = header[HEADER_STRING.len()]; if version != HEADER_VERSION { return Err(BundleError::Format(path.clone(), "Unsupported bundle file version")) } let mut reader = rmp_serde::Deserializer::new(file); let header = try!(BundleHeader::deserialize(&mut reader) .map_err(|e| BundleError::Decode(e, path.clone()))); file = reader.into_inner(); let content_start = file.seek(SeekFrom::Current(0)).unwrap() as usize; Ok(Bundle::new(path, version, content_start, crypto, header)) } #[inline] fn load_encoded_contents(&self) -> Result, BundleError> { let mut file = BufReader::new(try!(File::open(&self.path) .map_err(|e| BundleError::Read(e, self.path.clone(), "Failed to open bundle file")))); try!(file.seek(SeekFrom::Start(self.content_start as u64)) .map_err(|e| BundleError::Read(e, self.path.clone(), "Failed to seek to data"))); let mut data = Vec::with_capacity(max(self.encoded_size, self.raw_size)+1024); try!(file.read_to_end(&mut data).map_err(|_| "Failed to read data")); Ok(data) } #[inline] fn decode_contents(&self, mut data: Vec) -> Result, BundleError> { if let Some(ref encryption) = self.encryption { data = try!(self.crypto.lock().unwrap().decrypt(encryption.clone(), &data)); } if let Some(ref compression) = self.compression { data = try!(compression.decompress(&data)); } Ok(data) } #[inline] pub fn load_contents(&self) -> Result, BundleError> { self.load_encoded_contents().and_then(|data| self.decode_contents(data)) } #[inline] pub fn get_chunk_position(&self, id: usize) -> Result<(usize, usize), BundleError> { if id >= self.chunk_count { return Err("Invalid chunk id".into()) } Ok((self.chunk_positions[id], self.chunk_sizes[id])) } pub fn check(&self, full: bool) -> Result<(), BundleError> { if self.chunk_count != self.chunk_sizes.len() { return Err(BundleError::Integrity(self.id.clone(), "Chunk list size does not match chunk count")) } if self.chunk_sizes.iter().sum::() != self.raw_size { return Err(BundleError::Integrity(self.id.clone(), "Individual chunk sizes do not add up to total size")) } if !full { let size = try!(fs::metadata(&self.path) .map_err(|e| BundleError::Read(e, self.path.clone(), "Failed to get size of file")) ).len(); if size as usize != self.encoded_size + self.content_start { return Err(BundleError::Integrity(self.id.clone(), "File size does not match size in header, truncated file")) } return Ok(()) } let encoded_contents = try!(self.load_encoded_contents()); if self.encoded_size != encoded_contents.len() { return Err(BundleError::Integrity(self.id.clone(), "Encoded data size does not match size in header, truncated bundle")) } let contents = try!(self.decode_contents(encoded_contents)); if self.raw_size != contents.len() { return Err(BundleError::Integrity(self.id.clone(), "Raw data size does not match size in header, truncated bundle")) } //TODO: verify checksum Ok(()) } } impl Debug for Bundle { fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(fmt, "Bundle(\n\tid: {}\n\tpath: {:?}\n\tchunks: {}\n\tsize: {}, encoded: {}\n\tcompression: {:?}\n)", self.id.to_string(), self.path, self.chunk_count, self.raw_size, self.encoded_size, self.compression) } } pub struct BundleWriter { data: Vec, compression: Option, compression_stream: Option, encryption: Option, crypto: Arc>, checksum: ChecksumCreator, raw_size: usize, chunk_count: usize, chunk_sizes: Vec } impl BundleWriter { fn new(compression: Option, encryption: Option, crypto: Arc>, checksum: ChecksumType) -> Result { let compression_stream = match compression { Some(ref compression) => Some(try!(compression.compress_stream())), None => None }; Ok(BundleWriter { data: vec![], compression: compression, compression_stream: compression_stream, encryption: encryption, crypto: crypto, checksum: ChecksumCreator::new(checksum), raw_size: 0, chunk_count: 0, chunk_sizes: vec![] }) } pub fn add(&mut self, chunk: &[u8]) -> Result { if let Some(ref mut stream) = self.compression_stream { try!(stream.process(chunk, &mut self.data)) } else { self.data.extend_from_slice(chunk) } self.checksum.update(chunk); self.raw_size += chunk.len(); self.chunk_count += 1; self.chunk_sizes.push(chunk.len()); Ok(self.chunk_count-1) } fn finish(mut self, db: &BundleDb) -> Result { if let Some(stream) = self.compression_stream { try!(stream.finish(&mut self.data)) } if let Some(ref encryption) = self.encryption { self.data = try!(self.crypto.lock().unwrap().encrypt(encryption.clone(), &self.data)); } let encoded_size = self.data.len(); let checksum = self.checksum.finish(); let id = BundleId(checksum.1.to_vec()); let (folder, file) = db.bundle_path(&id); let path = folder.join(file); try!(fs::create_dir_all(&folder) .map_err(|e| BundleError::Write(e, path.clone(), "Failed to create folder"))); let mut file = BufWriter::new(try!(File::create(&path) .map_err(|e| BundleError::Write(e, path.clone(), "Failed to create bundle file")))); try!(file.write_all(&HEADER_STRING) .map_err(|e| BundleError::Write(e, path.clone(), "Failed to write bundle header"))); try!(file.write_all(&[HEADER_VERSION]) .map_err(|e| BundleError::Write(e, path.clone(), "Failed to write bundle header"))); let header = BundleHeader { checksum: checksum, compression: self.compression, encryption: self.encryption, chunk_count: self.chunk_count, id: id.clone(), raw_size: self.raw_size, encoded_size: encoded_size, chunk_sizes: self.chunk_sizes }; { let mut writer = rmp_serde::Serializer::new(&mut file); try!(header.serialize(&mut writer) .map_err(|e| BundleError::Encode(e, path.clone()))); } let content_start = file.seek(SeekFrom::Current(0)).unwrap() as usize; try!(file.write_all(&self.data) .map_err(|e| BundleError::Write(e, path.clone(), "Failed to write bundle data"))); Ok(Bundle::new(path, HEADER_VERSION, content_start, self.crypto, header)) } #[inline] pub fn size(&self) -> usize { self.data.len() } } pub struct BundleDb { path: PathBuf, compression: Option, encryption: Option, crypto: Arc>, checksum: ChecksumType, bundles: HashMap, bundle_cache: LruCache> } impl BundleDb { fn new(path: PathBuf, compression: Option, encryption: Option, checksum: ChecksumType) -> Self { BundleDb { path: path, compression: compression, crypto: Arc::new(Mutex::new(Crypto::new())), encryption: encryption, checksum: checksum, bundles: HashMap::new(), bundle_cache: LruCache::new(5, 10) } } fn bundle_path(&self, bundle: &BundleId) -> (PathBuf, PathBuf) { let mut folder = self.path.clone(); let mut file = bundle.to_string() + ".bundle"; let mut count = self.bundles.len(); while count >= 1000 { if file.len() < 10 { break } folder = folder.join(&file[0..3]); file = file[3..].to_string(); count /= 1000; } (folder, file.into()) } fn load_bundle_list(&mut self) -> Result<(), BundleError> { self.bundles.clear(); let mut paths = Vec::new(); paths.push(self.path.clone()); while let Some(path) = paths.pop() { for entry in try!(fs::read_dir(path).map_err(BundleError::List)) { let entry = try!(entry.map_err(BundleError::List)); let path = entry.path(); if path.is_dir() { paths.push(path); } else { let bundle = try!(Bundle::load(path, self.crypto.clone())); self.bundles.insert(bundle.id.clone(), bundle); } } } Ok(()) } #[inline] pub fn open>(path: P, compression: Option, encryption: Option, checksum: ChecksumType) -> Result { let path = path.as_ref().to_owned(); let mut self_ = Self::new(path, compression, encryption, checksum); try!(self_.load_bundle_list()); Ok(self_) } #[inline] pub fn create>(path: P, compression: Option, encryption: Option, checksum: ChecksumType) -> Result { let path = path.as_ref().to_owned(); try!(fs::create_dir_all(&path) .map_err(|e| BundleError::Write(e, path.clone(), "Failed to create folder"))); Ok(Self::new(path, compression, encryption, checksum)) } #[inline] pub fn open_or_create>(path: P, compression: Option, encryption: Option, checksum: ChecksumType) -> Result { if path.as_ref().exists() { Self::open(path, compression, encryption, checksum) } else { Self::create(path, compression, encryption, checksum) } } #[inline] pub fn create_bundle(&self) -> Result { BundleWriter::new(self.compression.clone(), self.encryption.clone(), self.crypto.clone(), self.checksum) } pub fn get_chunk(&mut self, bundle_id: &BundleId, id: usize) -> Result, BundleError> { let bundle = try!(self.bundles.get(bundle_id).ok_or("Bundle not found")); let (pos, len) = try!(bundle.get_chunk_position(id)); let mut chunk = Vec::with_capacity(len); if let Some(data) = self.bundle_cache.get(bundle_id) { chunk.extend_from_slice(&data[pos..pos+len]); return Ok(chunk); } let data = try!(bundle.load_contents()); chunk.extend_from_slice(&data[pos..pos+len]); self.bundle_cache.put(bundle_id.clone(), data); Ok(chunk) } #[inline] pub fn add_bundle(&mut self, bundle: BundleWriter) -> Result<&Bundle, BundleError> { let bundle = try!(bundle.finish(&self)); let id = bundle.id.clone(); self.bundles.insert(id.clone(), bundle); Ok(self.get_bundle(&id).unwrap()) } #[inline] pub fn get_bundle(&self, bundle: &BundleId) -> Option<&Bundle> { self.bundles.get(bundle) } #[inline] pub fn list_bundles(&self) -> Vec<&Bundle> { self.bundles.values().collect() } #[inline] pub fn delete_bundle(&mut self, bundle: &BundleId) -> Result<(), BundleError> { if let Some(bundle) = self.bundles.remove(bundle) { fs::remove_file(&bundle.path).map_err(|e| BundleError::Remove(e, bundle.id.clone())) } else { Err("No such bundle".into()) } } #[inline] pub fn check(&self, full: bool) -> Result<(), BundleError> { for bundle in self.bundles.values() { try!(bundle.check(full)) } Ok(()) } }