From d6b8e00718da74e436011410bb55d19b4a4166f8 Mon Sep 17 00:00:00 2001 From: Dennis Schwerdel Date: Wed, 15 Mar 2017 08:27:27 +0100 Subject: [PATCH] Changes --- .gitignore | 2 +- Algotest.txt | 133 +++++++++++++++++++++++++++++++++++ Algotest1.txt | 68 ------------------ src/algotest.rs | 6 +- src/bundle.rs | 95 ++++++++++++------------- src/main.rs | 41 +++++++++-- src/repository/basic_io.rs | 10 +-- src/repository/bundle_map.rs | 31 +++++--- src/repository/info.rs | 34 +++++++++ src/repository/integrity.rs | 4 +- src/repository/mod.rs | 9 ++- src/util/compression.rs | 2 +- src/util/mod.rs | 25 +++++++ 13 files changed, 313 insertions(+), 147 deletions(-) create mode 100644 Algotest.txt delete mode 100644 Algotest1.txt create mode 100644 src/repository/info.rs diff --git a/.gitignore b/.gitignore index 5f5515b..4b1923a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,4 @@ target squash test.tar -test_data +test_* diff --git a/Algotest.txt b/Algotest.txt new file mode 100644 index 0000000..b453e1b --- /dev/null +++ b/Algotest.txt @@ -0,0 +1,133 @@ +Algorithm comparison on file test_data/ubuntu-16.04.1.tar + +Reading input file... done. 183869440 bytes + +Chunker algorithms + Chunk size: 4 KiB + AE: avg chunk size 4424.2 ± 1270.2 bytes, 3.4% saved, speed 713.7 MB/s + Rabin: avg chunk size 5091.2 ± 3985.7 bytes, 4.1% saved, speed 214.1 MB/s + FastCdc: avg chunk size 4881.7 ± 2044.4 bytes, 3.4% saved, speed 485.2 MB/s + Chunk size: 8 KiB + AE: avg chunk size 8924.8 ± 2215.3 bytes, 2.6% saved, speed 721.6 MB/s + Rabin: avg chunk size 10406.9 ± 7996.2 bytes, 3.0% saved, speed 215.6 MB/s + FastCdc: avg chunk size 9704.4 ± 3740.6 bytes, 2.9% saved, speed 481.8 MB/s + Chunk size: 16 KiB + AE: avg chunk size 17828.9 ± 4205.5 bytes, 2.0% saved, speed 719.3 MB/s + Rabin: avg chunk size 21402.6 ± 16330.5 bytes, 2.1% saved, speed 215.5 MB/s + FastCdc: avg chunk size 18938.0 ± 6924.2 bytes, 2.2% saved, speed 502.7 MB/s + Chunk size: 32 KiB + AE: avg chunk size 35530.3 ± 8760.8 bytes, 1.2% saved, speed 711.5 MB/s + Rabin: avg chunk size 42601.8 ± 32150.9 bytes, 1.2% saved, speed 213.5 MB/s + FastCdc: avg chunk size 41356.1 ± 20308.1 bytes, 1.2% saved, speed 443.1 MB/s + Chunk size: 64 KiB + AE: avg chunk size 70746.2 ± 16942.6 bytes, 0.9% saved, speed 715.8 MB/s + Rabin: avg chunk size 83729.3 ± 63880.7 bytes, 1.2% saved, speed 199.6 MB/s + FastCdc: avg chunk size 77811.9 ± 29836.2 bytes, 0.8% saved, speed 492.3 MB/s + +Hash algorithms + Blake2: 536.4 MB/s + Murmur3: 3771.8 MB/s + +Compression algorithms + Snappy: ratio: 63.9%, compress: 417.8 MB/s, decompress: 839.9 MB/s + ZStd/1: ratio: 48.3%, compress: 206.7 MB/s, decompress: 320.4 MB/s + ZStd/2: ratio: 45.7%, compress: 147.5 MB/s, decompress: 346.0 MB/s + ZStd/3: ratio: 43.4%, compress: 89.9 MB/s, decompress: 342.2 MB/s + ZStd/4: ratio: 41.9%, compress: 79.3 MB/s, decompress: 350.4 MB/s + ZStd/5: ratio: 42.3%, compress: 55.8 MB/s, decompress: 268.3 MB/s + ZStd/6: ratio: 41.5%, compress: 42.9 MB/s, decompress: 310.0 MB/s + ZStd/7: ratio: 39.7%, compress: 33.3 MB/s, decompress: 313.7 MB/s + ZStd/8: ratio: 39.4%, compress: 26.3 MB/s, decompress: 281.4 MB/s + ZStd/9: ratio: 39.0%, compress: 17.5 MB/s, decompress: 308.5 MB/s + Deflate/1: ratio: 48.4%, compress: 83.9 MB/s, decompress: 306.9 MB/s + Deflate/2: ratio: 48.0%, compress: 81.8 MB/s, decompress: 311.2 MB/s + Deflate/3: ratio: 47.7%, compress: 76.7 MB/s, decompress: 314.5 MB/s + Deflate/4: ratio: 47.6%, compress: 74.9 MB/s, decompress: 317.5 MB/s + Deflate/5: ratio: 46.9%, compress: 63.6 MB/s, decompress: 319.8 MB/s + Deflate/6: ratio: 46.8%, compress: 55.2 MB/s, decompress: 309.0 MB/s + Deflate/7: ratio: 46.7%, compress: 50.8 MB/s, decompress: 326.3 MB/s + Deflate/8: ratio: 45.8%, compress: 18.4 MB/s, decompress: 301.8 MB/s + Deflate/9: ratio: 45.4%, compress: 13.3 MB/s, decompress: 278.0 MB/s + Brotli/1: ratio: 45.7%, compress: 141.6 MB/s, decompress: 181.3 MB/s + Brotli/2: ratio: 43.6%, compress: 90.8 MB/s, decompress: 195.0 MB/s + Brotli/3: ratio: 43.5%, compress: 68.6 MB/s, decompress: 197.0 MB/s + Brotli/4: ratio: 41.9%, compress: 44.7 MB/s, decompress: 201.2 MB/s + Brotli/5: ratio: 39.5%, compress: 17.5 MB/s, decompress: 194.0 MB/s + Brotli/6: ratio: 38.4%, compress: 12.2 MB/s, decompress: 196.4 MB/s + Brotli/7: ratio: 37.2%, compress: 6.0 MB/s, decompress: 203.3 MB/s + Brotli/8: ratio: 37.0%, compress: 4.2 MB/s, decompress: 204.2 MB/s + Brotli/9: ratio: 36.9%, compress: 3.2 MB/s, decompress: 205.8 MB/s + Lzma2/1: ratio: 37.7%, compress: 8.1 MB/s, decompress: 38.0 MB/s + Lzma2/2: ratio: 36.3%, compress: 5.6 MB/s, decompress: 40.7 MB/s + Lzma2/3: ratio: 35.5%, compress: 3.5 MB/s, decompress: 42.2 MB/s + Lzma2/4: ratio: 33.9%, compress: 2.9 MB/s, decompress: 43.1 MB/s + Lzma2/5: ratio: 33.2%, compress: 2.4 MB/s, decompress: 43.4 MB/s + Lzma2/6: ratio: 33.1%, compress: 2.1 MB/s, decompress: 43.9 MB/s + + + +Algorithm comparison on file test_data/silesia.tar + +Reading input file... done. 211957760 bytes + +Chunker algorithms + Chunk size: 4 KiB + AE: avg chunk size 4779.0 ± 1348.1 bytes, 0.3% saved, speed 715.3 MB/s + Rabin: avg chunk size 5081.6 ± 3845.5 bytes, 0.7% saved, speed 213.1 MB/s + FastCdc: avg chunk size 4829.0 ± 1560.8 bytes, 0.2% saved, speed 488.8 MB/s + Chunk size: 8 KiB + AE: avg chunk size 9634.0 ± 2548.9 bytes, 0.0% saved, speed 697.2 MB/s + Rabin: avg chunk size 10137.6 ± 7591.8 bytes, 0.3% saved, speed 213.7 MB/s + FastCdc: avg chunk size 9609.1 ± 2957.7 bytes, 0.1% saved, speed 482.2 MB/s + Chunk size: 16 KiB + AE: avg chunk size 18849.1 ± 4654.1 bytes, 0.0% saved, speed 719.9 MB/s + Rabin: avg chunk size 20443.5 ± 15211.9 bytes, 0.1% saved, speed 214.8 MB/s + FastCdc: avg chunk size 19309.3 ± 6274.3 bytes, 0.0% saved, speed 501.9 MB/s + Chunk size: 32 KiB + AE: avg chunk size 36677.2 ± 7905.2 bytes, 0.0% saved, speed 726.4 MB/s + Rabin: avg chunk size 41814.5 ± 31117.0 bytes, 0.0% saved, speed 213.3 MB/s + FastCdc: avg chunk size 44463.6 ± 26128.5 bytes, 0.0% saved, speed 469.7 MB/s + Chunk size: 64 KiB + AE: avg chunk size 71923.2 ± 14350.5 bytes, 0.0% saved, speed 724.0 MB/s + Rabin: avg chunk size 81742.3 ± 60106.9 bytes, 0.0% saved, speed 213.5 MB/s + FastCdc: avg chunk size 79384.9 ± 31116.6 bytes, 0.0% saved, speed 490.2 MB/s + +Hash algorithms + Blake2: 521.9 MB/s + Murmur3: 3763.0 MB/s + +Compression algorithms + Snappy: ratio: 47.8%, compress: 407.1 MB/s, decompress: 760.6 MB/s + ZStd/1: ratio: 34.8%, compress: 189.0 MB/s, decompress: 314.6 MB/s + ZStd/2: ratio: 33.1%, compress: 149.1 MB/s, decompress: 327.1 MB/s + ZStd/3: ratio: 32.2%, compress: 122.4 MB/s, decompress: 326.3 MB/s + ZStd/4: ratio: 31.9%, compress: 99.9 MB/s, decompress: 318.4 MB/s + ZStd/5: ratio: 30.9%, compress: 66.8 MB/s, decompress: 317.1 MB/s + ZStd/6: ratio: 30.0%, compress: 52.4 MB/s, decompress: 324.8 MB/s + ZStd/7: ratio: 29.3%, compress: 39.2 MB/s, decompress: 333.7 MB/s + ZStd/8: ratio: 28.9%, compress: 29.3 MB/s, decompress: 338.9 MB/s + ZStd/9: ratio: 28.5%, compress: 23.0 MB/s, decompress: 349.5 MB/s + Deflate/1: ratio: 34.7%, compress: 106.5 MB/s, decompress: 418.2 MB/s + Deflate/2: ratio: 33.9%, compress: 96.6 MB/s, decompress: 425.4 MB/s + Deflate/3: ratio: 33.5%, compress: 90.6 MB/s, decompress: 442.4 MB/s + Deflate/4: ratio: 33.1%, compress: 83.2 MB/s, decompress: 438.6 MB/s + Deflate/5: ratio: 32.4%, compress: 69.6 MB/s, decompress: 452.2 MB/s + Deflate/6: ratio: 32.1%, compress: 57.5 MB/s, decompress: 455.8 MB/s + Deflate/7: ratio: 32.0%, compress: 51.3 MB/s, decompress: 457.0 MB/s + Deflate/8: ratio: 31.8%, compress: 14.8 MB/s, decompress: 409.7 MB/s + Deflate/9: ratio: 31.2%, compress: 10.7 MB/s, decompress: 406.9 MB/s + Brotli/1: ratio: 34.3%, compress: 137.2 MB/s, decompress: 214.6 MB/s + Brotli/2: ratio: 32.1%, compress: 88.9 MB/s, decompress: 233.6 MB/s + Brotli/3: ratio: 31.8%, compress: 68.7 MB/s, decompress: 242.2 MB/s + Brotli/4: ratio: 30.9%, compress: 45.5 MB/s, decompress: 254.2 MB/s + Brotli/5: ratio: 28.7%, compress: 22.0 MB/s, decompress: 259.2 MB/s + Brotli/6: ratio: 28.1%, compress: 15.8 MB/s, decompress: 267.8 MB/s + Brotli/7: ratio: 27.4%, compress: 9.1 MB/s, decompress: 274.4 MB/s + Brotli/8: ratio: 27.1%, compress: 6.4 MB/s, decompress: 245.7 MB/s + Brotli/9: ratio: 26.8%, compress: 4.7 MB/s, decompress: 276.0 MB/s + Lzma2/1: ratio: 27.6%, compress: 11.3 MB/s, decompress: 52.5 MB/s + Lzma2/2: ratio: 26.8%, compress: 7.6 MB/s, decompress: 54.6 MB/s + Lzma2/3: ratio: 26.3%, compress: 5.0 MB/s, decompress: 56.4 MB/s + Lzma2/4: ratio: 24.6%, compress: 3.2 MB/s, decompress: 56.8 MB/s + Lzma2/5: ratio: 23.6%, compress: 2.4 MB/s, decompress: 59.0 MB/s + Lzma2/6: ratio: 23.2%, compress: 2.0 MB/s, decompress: 59.3 MB/s diff --git a/Algotest1.txt b/Algotest1.txt deleted file mode 100644 index 336fba3..0000000 --- a/Algotest1.txt +++ /dev/null @@ -1,68 +0,0 @@ -~/shared - - -Algorithm comparison on file test.tar - -Reading input file... done. 2175416320 bytes - -Chunker algorithms - Chunk size: 4 KiB - AE: avg chunk size 2756.5 ± 543.6 bytes, 12.1% saved, speed 748.9 MB/s - Rabin: avg chunk size 4902.3 ± 3826.2 bytes, 11.7% saved, speed 336.7 MB/s - FastCdc: avg chunk size 4783.3 ± 1940.5 bytes, 12.1% saved, speed 544.1 MB/s - Chunk size: 8 KiB - AE: avg chunk size 5245.1 ± 890.8 bytes, 10.0% saved, speed 756.3 MB/s - Rabin: avg chunk size 9774.2 ± 7636.0 bytes, 10.3% saved, speed 344.9 MB/s - FastCdc: avg chunk size 9583.2 ± 3933.2 bytes, 10.7% saved, speed 541.6 MB/s - Chunk size: 16 KiB - AE: avg chunk size 10169.5 ± 1485.8 bytes, 7.4% saved, speed 781.5 MB/s - Rabin: avg chunk size 19641.7 ± 15292.5 bytes, 9.0% saved, speed 345.9 MB/s - FastCdc: avg chunk size 19262.9 ± 7697.4 bytes, 9.0% saved, speed 548.1 MB/s - Chunk size: 32 KiB - AE: avg chunk size 20004.6 ± 2705.6 bytes, 5.6% saved, speed 787.0 MB/s - Rabin: avg chunk size 38963.6 ± 30218.2 bytes, 7.6% saved, speed 345.7 MB/s - FastCdc: avg chunk size 39159.3 ± 16834.6 bytes, 7.7% saved, speed 547.1 MB/s - Chunk size: 64 KiB - AE: avg chunk size 39627.2 ± 5310.6 bytes, 3.8% saved, speed 788.2 MB/s - Rabin: avg chunk size 78339.7 ± 60963.7 bytes, 6.4% saved, speed 345.6 MB/s - FastCdc: avg chunk size 76981.4 ± 30784.6 bytes, 6.1% saved, speed 548.4 MB/s - -Hash algorithms - Blake2: 724.2 MB/s - Murmur3: 5358.3 MB/s - -Compression algorithms - Snappy: ratio: 83.6%, compress: 301.7 MB/s, decompress: 876.2 MB/s -fatal runtime error: out of memory - - - ZStd/1: ratio: 77.2%, compress: 493.9 MB/s, decompress: 0.0 MB/s - ZStd/2: ratio: 76.7%, compress: 420.6 MB/s, decompress: 0.0 MB/s - ZStd/3: ratio: 75.4%, compress: 314.6 MB/s, decompress: 0.0 MB/s - ZStd/4: ratio: 75.3%, compress: 273.0 MB/s, decompress: 0.0 MB/s - ZStd/5: ratio: 74.9%, compress: 131.4 MB/s, decompress: 0.0 MB/s - ZStd/6: ratio: 73.6%, compress: 121.4 MB/s, decompress: 0.0 MB/s - ZStd/7: ratio: 73.5%, compress: 88.7 MB/s, decompress: 0.0 MB/s - ZStd/8: ratio: 73.4%, compress: 76.8 MB/s, decompress: 0.0 MB/s - ZStd/9: ratio: 73.3%, compress: 51.8 MB/s, decompress: 0.0 MB/s - Deflate/1: ratio: 78.3%, compress: 95.7 MB/s, decompress: 0.0 MB/s - Deflate/2: ratio: 78.2%, compress: 94.7 MB/s, decompress: 0.0 MB/s - Deflate/3: ratio: 78.1%, compress: 92.5 MB/s, decompress: 0.0 MB/s - Deflate/4: ratio: 78.0%, compress: 87.9 MB/s, decompress: 0.0 MB/s - Deflate/5: ratio: 77.8%, compress: 86.5 MB/s, decompress: 0.0 MB/s - Deflate/6: ratio: 77.7%, compress: 83.8 MB/s, decompress: 0.0 MB/s - Deflate/7: ratio: 77.7%, compress: 73.4 MB/s, decompress: 0.0 MB/s - Deflate/8: ratio: 77.6%, compress: 31.6 MB/s, decompress: 0.0 MB/s - Deflate/9: ratio: 77.4%, compress: 25.8 MB/s, decompress: 0.0 MB/s - Brotli/1: ratio: 77.6%, compress: 433.1 MB/s, decompress: 0.0 MB/s - Brotli/2: ratio: 75.4%, compress: 242.2 MB/s, decompress: 0.0 MB/s - Brotli/3: ratio: 75.3%, compress: 195.5 MB/s, decompress: 0.0 MB/s - Brotli/4: ratio: 72.4%, compress: 81.6 MB/s, decompress: 0.0 MB/s - Brotli/5: ratio: 73.9%, compress: 62.4 MB/s, decompress: 0.0 MB/s - Brotli/6: ratio: 72.9%, compress: 46.6 MB/s, decompress: 0.0 MB/s - Brotli/7: ratio: 71.5%, compress: 23.4 MB/s, decompress: 0.0 MB/s - Brotli/8: ratio: 71.5%, compress: 20.7 MB/s, decompress: 0.0 MB/s - Brotli/9: ratio: 71.2%, compress: 11.2 MB/s, decompress: 0.0 MB/s - Lzma2/1: ratio: 69.8%, compress: 4.2 MB/s, decompress: 0.0 MB/s - - diff --git a/src/algotest.rs b/src/algotest.rs index dc67a13..1a67b45 100644 --- a/src/algotest.rs +++ b/src/algotest.rs @@ -71,14 +71,14 @@ fn compare_compression(name: &str, method: Compression, data: &[u8]) { let duration = elapsed.as_secs() as f64 * 1.0 + elapsed.subsec_nanos() as f64 / 1_000_000_000.0; let cspeed = data.len() as f64 / duration; let ratio = compressed.len() as f64 / data.len() as f64; - /*let start = time::Instant::now(); + let start = time::Instant::now(); let uncompressed = method.decompress(&compressed).unwrap(); if uncompressed != data { panic!("{} did not uncompress to the same value", name); } let elapsed = start.elapsed(); - let duration = elapsed.as_secs() as f64 * 1.0 + elapsed.subsec_nanos() as f64 / 1_000_000_000.0;*/ - let dspeed = 0.0;//data.len() as f64 / duration; + let duration = elapsed.as_secs() as f64 * 1.0 + elapsed.subsec_nanos() as f64 / 1_000_000_000.0; + let dspeed = data.len() as f64 / duration; println!("{}:\tratio: {:.1}%,\tcompress: {:.1} MB/s,\tdecompress: {:.1} MB/s", name, ratio * 100.0, cspeed / 1_000_000.0, dspeed / 1_000_000.0); } diff --git a/src/bundle.rs b/src/bundle.rs index 35742f0..60b3440 100644 --- a/src/bundle.rs +++ b/src/bundle.rs @@ -65,7 +65,7 @@ impl fmt::Debug for BundleId { #[derive(Clone)] -pub struct BundleHeader { +pub struct BundleInfo { pub id: BundleId, pub compression: Option, pub encryption: Option, @@ -75,7 +75,7 @@ pub struct BundleHeader { pub chunk_count: usize, pub chunk_sizes: Vec } -serde_impl!(BundleHeader(u64) { +serde_impl!(BundleInfo(u64) { id: BundleId => 0, compression: Option => 1, encryption: Option => 2, @@ -86,9 +86,9 @@ serde_impl!(BundleHeader(u64) { chunk_sizes: Vec => 7 }); -impl Default for BundleHeader { +impl Default for BundleInfo { fn default() -> Self { - BundleHeader { + BundleInfo { id: BundleId(vec![]), compression: None, encryption: None, @@ -103,46 +103,37 @@ impl Default for BundleHeader { pub struct Bundle { - pub id: BundleId, + pub info: BundleInfo, pub version: u8, pub path: PathBuf, crypto: Arc>, - pub compression: Option, - pub encryption: Option, - pub raw_size: usize, - pub encoded_size: usize, - pub checksum: Checksum, pub content_start: usize, - pub chunk_count: usize, - pub chunk_sizes: Vec, pub chunk_positions: Vec } impl Bundle { - fn new(path: PathBuf, version: u8, content_start: usize, crypto: Arc>, header: BundleHeader) -> Self { - let mut chunk_positions = Vec::with_capacity(header.chunk_sizes.len()); + fn new(path: PathBuf, version: u8, content_start: usize, crypto: Arc>, info: BundleInfo) -> Self { + let mut chunk_positions = Vec::with_capacity(info.chunk_sizes.len()); let mut pos = 0; - for len in &header.chunk_sizes { + for len in &info.chunk_sizes { chunk_positions.push(pos); pos += *len; } Bundle { - id: header.id, + info: info, version: version, path: path, crypto: crypto, - compression: header.compression, - encryption: header.encryption, - raw_size: header.raw_size, - encoded_size: header.encoded_size, - chunk_count: header.chunk_count, - checksum: header.checksum, content_start: content_start, - chunk_sizes: header.chunk_sizes, chunk_positions: chunk_positions } } + #[inline] + pub fn id(&self) -> BundleId { + self.info.id.clone() + } + pub fn load(path: PathBuf, crypto: Arc>) -> Result { let mut file = BufReader::new(try!(File::open(&path) .map_err(|e| BundleError::Read(e, path.clone(), "Failed to open bundle file")))); @@ -157,7 +148,7 @@ impl Bundle { return Err(BundleError::Format(path.clone(), "Unsupported bundle file version")) } let mut reader = rmp_serde::Deserializer::new(file); - let header = try!(BundleHeader::deserialize(&mut reader) + let header = try!(BundleInfo::deserialize(&mut reader) .map_err(|e| BundleError::Decode(e, path.clone()))); file = reader.into_inner(); let content_start = file.seek(SeekFrom::Current(0)).unwrap() as usize; @@ -170,17 +161,17 @@ impl Bundle { .map_err(|e| BundleError::Read(e, self.path.clone(), "Failed to open bundle file")))); try!(file.seek(SeekFrom::Start(self.content_start as u64)) .map_err(|e| BundleError::Read(e, self.path.clone(), "Failed to seek to data"))); - let mut data = Vec::with_capacity(max(self.encoded_size, self.raw_size)+1024); + let mut data = Vec::with_capacity(max(self.info.encoded_size, self.info.raw_size)+1024); try!(file.read_to_end(&mut data).map_err(|_| "Failed to read data")); Ok(data) } #[inline] fn decode_contents(&self, mut data: Vec) -> Result, BundleError> { - if let Some(ref encryption) = self.encryption { + if let Some(ref encryption) = self.info.encryption { data = try!(self.crypto.lock().unwrap().decrypt(encryption.clone(), &data)); } - if let Some(ref compression) = self.compression { + if let Some(ref compression) = self.info.compression { data = try!(compression.decompress(&data)); } Ok(data) @@ -193,39 +184,39 @@ impl Bundle { #[inline] pub fn get_chunk_position(&self, id: usize) -> Result<(usize, usize), BundleError> { - if id >= self.chunk_count { + if id >= self.info.chunk_count { return Err("Invalid chunk id".into()) } - Ok((self.chunk_positions[id], self.chunk_sizes[id])) + Ok((self.chunk_positions[id], self.info.chunk_sizes[id])) } pub fn check(&self, full: bool) -> Result<(), BundleError> { - if self.chunk_count != self.chunk_sizes.len() { - return Err(BundleError::Integrity(self.id.clone(), + if self.info.chunk_count != self.info.chunk_sizes.len() { + return Err(BundleError::Integrity(self.id(), "Chunk list size does not match chunk count")) } - if self.chunk_sizes.iter().sum::() != self.raw_size { - return Err(BundleError::Integrity(self.id.clone(), + if self.info.chunk_sizes.iter().sum::() != self.info.raw_size { + return Err(BundleError::Integrity(self.id(), "Individual chunk sizes do not add up to total size")) } if !full { let size = try!(fs::metadata(&self.path) .map_err(|e| BundleError::Read(e, self.path.clone(), "Failed to get size of file")) ).len(); - if size as usize != self.encoded_size + self.content_start { - return Err(BundleError::Integrity(self.id.clone(), + if size as usize != self.info.encoded_size + self.content_start { + return Err(BundleError::Integrity(self.id(), "File size does not match size in header, truncated file")) } return Ok(()) } let encoded_contents = try!(self.load_encoded_contents()); - if self.encoded_size != encoded_contents.len() { - return Err(BundleError::Integrity(self.id.clone(), + if self.info.encoded_size != encoded_contents.len() { + return Err(BundleError::Integrity(self.id(), "Encoded data size does not match size in header, truncated bundle")) } let contents = try!(self.decode_contents(encoded_contents)); - if self.raw_size != contents.len() { - return Err(BundleError::Integrity(self.id.clone(), + if self.info.raw_size != contents.len() { + return Err(BundleError::Integrity(self.id(), "Raw data size does not match size in header, truncated bundle")) } //TODO: verify checksum @@ -236,7 +227,8 @@ impl Bundle { impl Debug for Bundle { fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { write!(fmt, "Bundle(\n\tid: {}\n\tpath: {:?}\n\tchunks: {}\n\tsize: {}, encoded: {}\n\tcompression: {:?}\n)", - self.id.to_string(), self.path, self.chunk_count, self.raw_size, self.encoded_size, self.compression) + self.info.id.to_string(), self.path, self.info.chunk_count, self.info.raw_size, + self.info.encoded_size, self.info.compression) } } @@ -306,7 +298,7 @@ impl BundleWriter { .map_err(|e| BundleError::Write(e, path.clone(), "Failed to write bundle header"))); try!(file.write_all(&[HEADER_VERSION]) .map_err(|e| BundleError::Write(e, path.clone(), "Failed to write bundle header"))); - let header = BundleHeader { + let header = BundleInfo { checksum: checksum, compression: self.compression, encryption: self.encryption, @@ -331,6 +323,11 @@ impl BundleWriter { pub fn size(&self) -> usize { self.data.len() } + + #[inline] + pub fn raw_size(&self) -> usize { + self.raw_size + } } @@ -361,15 +358,15 @@ impl BundleDb { fn bundle_path(&self, bundle: &BundleId) -> (PathBuf, PathBuf) { let mut folder = self.path.clone(); - let mut file = bundle.to_string() + ".bundle"; + let mut file = bundle.to_string()[0..32].to_owned() + ".bundle"; let mut count = self.bundles.len(); - while count >= 1000 { + while count >= 100 { if file.len() < 10 { break } - folder = folder.join(&file[0..3]); - file = file[3..].to_string(); - count /= 1000; + folder = folder.join(&file[0..2]); + file = file[2..].to_string(); + count /= 100; } (folder, file.into()) } @@ -386,7 +383,7 @@ impl BundleDb { paths.push(path); } else { let bundle = try!(Bundle::load(path, self.crypto.clone())); - self.bundles.insert(bundle.id.clone(), bundle); + self.bundles.insert(bundle.id(), bundle); } } } @@ -440,7 +437,7 @@ impl BundleDb { #[inline] pub fn add_bundle(&mut self, bundle: BundleWriter) -> Result<&Bundle, BundleError> { let bundle = try!(bundle.finish(&self)); - let id = bundle.id.clone(); + let id = bundle.id(); self.bundles.insert(id.clone(), bundle); Ok(self.get_bundle(&id).unwrap()) } @@ -458,7 +455,7 @@ impl BundleDb { #[inline] pub fn delete_bundle(&mut self, bundle: &BundleId) -> Result<(), BundleError> { if let Some(bundle) = self.bundles.remove(bundle) { - fs::remove_file(&bundle.path).map_err(|e| BundleError::Remove(e, bundle.id.clone())) + fs::remove_file(&bundle.path).map_err(|e| BundleError::Remove(e, bundle.id())) } else { Err("No such bundle".into()) } diff --git a/src/main.rs b/src/main.rs index 9349e52..d61ecbd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -20,7 +20,7 @@ mod algotest; use chunker::ChunkerType; use repository::{Repository, Config, Mode}; -use util::{ChecksumType, Compression, HashMethod}; +use util::{ChecksumType, Compression, HashMethod, to_file_size}; use std::fs::File; use std::io::Read; @@ -32,8 +32,10 @@ use docopt::Docopt; static USAGE: &'static str = " Usage: zvault init - zvault algotest + zvault info + zvault bundles zvault check [--full] + zvault algotest zvault test Options: @@ -48,9 +50,11 @@ Options: #[derive(RustcDecodable, Debug)] struct Args { cmd_init: bool, + cmd_info: bool, cmd_algotest: bool, cmd_test: bool, cmd_check: bool, + cmd_bundles: bool, arg_repo: Option, arg_path: Option, flag_full: bool, @@ -63,7 +67,7 @@ struct Args { fn main() { let args: Args = Docopt::new(USAGE).and_then(|d| d.decode()).unwrap_or_else(|e| e.exit()); - println!("{:?}", args); + //println!("{:?}", args); if args.cmd_algotest { algotest::run(&args.arg_path.unwrap()); @@ -71,7 +75,7 @@ fn main() { } if args.cmd_init { - let chunker = ChunkerType::from(&args.flag_chunker, args.flag_chunk_size, 0).expect("No such chunk algorithm"); + let chunker = ChunkerType::from(&args.flag_chunker, args.flag_chunk_size*1024, 0).expect("No such chunk algorithm"); let compression = if args.flag_compression == "none" { None } else { @@ -94,6 +98,35 @@ fn main() { return } + if args.cmd_info { + let info = repo.info(); + println!("Bundles: {}", info.bundle_count); + println!("Total size: {}", to_file_size(info.encoded_data_size)); + println!("Uncompressed size: {}", to_file_size(info.raw_data_size)); + println!("Compression ratio: {:.1}", info.compression_ratio * 100.0); + println!("Chunk count: {}", info.chunk_count); + println!("Average chunk size: {}", to_file_size(info.avg_chunk_size as u64)); + return + } + + if args.cmd_bundles { + for bundle in repo.list_bundles() { + println!("Bundle {}", bundle.id); + println!(" - Chunks: {}", bundle.chunk_count); + println!(" - Size: {}", to_file_size(bundle.encoded_size as u64)); + println!(" - Data size: {}", to_file_size(bundle.raw_size as u64)); + let ratio = bundle.encoded_size as f32 / bundle.raw_size as f32; + let compression = if let Some(ref c) = bundle.compression { + c.to_string() + } else { + "none".to_string() + }; + println!(" - Compression: {}, ratio: {:.1}%", compression, ratio * 100.0); + println!(); + } + return + } + if args.cmd_test { print!("Integrity check before..."); repo.check(true).unwrap(); diff --git a/src/repository/basic_io.rs b/src/repository/basic_io.rs index 40ac38b..06ca321 100644 --- a/src/repository/basic_io.rs +++ b/src/repository/basic_io.rs @@ -2,7 +2,6 @@ use std::mem; use std::io::{Read, Write, Cursor}; use super::{Repository, Mode}; -use super::bundle_map::BundleInfo; use ::index::Location; use ::util::Hash; @@ -19,7 +18,7 @@ impl Repository { }; // Lookup bundle id from map let bundle_id = if let Some(bundle_info) = self.bundle_map.get(found.bundle) { - bundle_info.id.clone() + bundle_info.id() } else { return Err("Bundle id not found in map") }; @@ -50,23 +49,24 @@ impl Repository { debug_assert!(writer.is_some()); let chunk_id; let size; + let raw_size; { // Add chunk to bundle writer and determine the size of the bundle let writer_obj = writer.as_mut().unwrap(); chunk_id = try!(writer_obj.add(data).map_err(|_| "Failed to write chunk")); size = writer_obj.size(); + raw_size = writer_obj.raw_size(); } let bundle_id = match mode { Mode::Content => self.next_content_bundle, Mode::Meta => self.next_meta_bundle }; // Finish bundle if over maximum size - if size >= self.config.bundle_size { + if size >= self.config.bundle_size || raw_size >= 4 * self.config.bundle_size { let mut finished = None; mem::swap(writer, &mut finished); let bundle = try!(self.bundles.add_bundle(finished.unwrap()).map_err(|_| "Failed to write finished bundle")); - let bundle_info = BundleInfo{id: bundle.id.clone()}; - self.bundle_map.set(bundle_id, bundle_info); + self.bundle_map.set(bundle_id, bundle); if self.next_meta_bundle == bundle_id { self.next_meta_bundle = next_free_bundle_id } diff --git a/src/repository/bundle_map.rs b/src/repository/bundle_map.rs index afd103c..bbc6738 100644 --- a/src/repository/bundle_map.rs +++ b/src/repository/bundle_map.rs @@ -7,7 +7,7 @@ use rmp_serde; use serde::Deserialize; use serde::Serialize; -use ::bundle::BundleId; +use ::bundle::{Bundle, BundleId, BundleInfo}; static HEADER_STRING: [u8; 7] = *b"zbunmap"; @@ -15,15 +15,22 @@ static HEADER_VERSION: u8 = 1; #[derive(Default)] -pub struct BundleInfo { - pub id: BundleId +pub struct BundleData { + pub info: BundleInfo } -serde_impl!(BundleInfo(u64) { - id: BundleId => 0 +serde_impl!(BundleData(u64) { + info: BundleInfo => 0 }); +impl BundleData { + #[inline] + pub fn id(&self) -> BundleId { + self.info.id.clone() + } +} -pub struct BundleMap(HashMap); + +pub struct BundleMap(HashMap); impl BundleMap { pub fn create() -> Self { @@ -63,12 +70,18 @@ impl BundleMap { } #[inline] - pub fn get(&self, id: u32) -> Option<&BundleInfo> { + pub fn get(&self, id: u32) -> Option<&BundleData> { self.0.get(&id) } #[inline] - pub fn set(&mut self, id: u32, info: BundleInfo) { - self.0.insert(id, info); + pub fn set(&mut self, id: u32, bundle: &Bundle) { + let data = BundleData { info: bundle.info.clone() }; + self.0.insert(id, data); + } + + #[inline] + pub fn bundles(&self) -> Vec<&BundleData> { + self.0.values().collect() } } diff --git a/src/repository/info.rs b/src/repository/info.rs new file mode 100644 index 0000000..423ed7d --- /dev/null +++ b/src/repository/info.rs @@ -0,0 +1,34 @@ +use super::Repository; +use ::bundle::BundleInfo; + +pub struct RepositoryInfo { + pub bundle_count: usize, + pub encoded_data_size: u64, + pub raw_data_size: u64, + pub compression_ratio: f32, + pub chunk_count: usize, + pub avg_chunk_size: f32 +} + + +impl Repository { + #[inline] + pub fn list_bundles(&self) -> Vec<&BundleInfo> { + self.bundle_map.bundles().iter().map(|b| &b.info).collect() + } + + pub fn info(&self) -> RepositoryInfo { + let bundles = self.list_bundles(); + let encoded_data_size = bundles.iter().map(|b| b.encoded_size as u64).sum(); + let raw_data_size = bundles.iter().map(|b| b.raw_size as u64).sum(); + let chunk_count = bundles.iter().map(|b| b.chunk_count).sum(); + RepositoryInfo { + bundle_count: bundles.len(), + chunk_count: chunk_count, + encoded_data_size: encoded_data_size, + raw_data_size: raw_data_size, + compression_ratio: encoded_data_size as f32 / raw_data_size as f32, + avg_chunk_size: raw_data_size as f32 / chunk_count as f32 + } + } +} diff --git a/src/repository/integrity.rs b/src/repository/integrity.rs index 668c3b0..d68cb3c 100644 --- a/src/repository/integrity.rs +++ b/src/repository/integrity.rs @@ -13,7 +13,7 @@ impl Repository { }; // Lookup bundle id from map let bundle_id = if let Some(bundle_info) = self.bundle_map.get(found.bundle) { - bundle_info.id.clone() + bundle_info.id() } else { return Err("Bundle id not found in map") }; @@ -24,7 +24,7 @@ impl Repository { return Err("Bundle not found in bundledb") }; // Get chunk from bundle - if bundle.chunk_count > found.chunk as usize { + if bundle.info.chunk_count > found.chunk as usize { Ok(()) } else { Err("Bundle does not contain that chunk") diff --git a/src/repository/mod.rs b/src/repository/mod.rs index 8f50284..997783c 100644 --- a/src/repository/mod.rs +++ b/src/repository/mod.rs @@ -2,6 +2,7 @@ mod config; mod bundle_map; mod integrity; mod basic_io; +mod info; use std::mem; use std::cmp::max; @@ -13,7 +14,7 @@ use super::bundle::{BundleDb, BundleWriter}; use super::chunker::Chunker; pub use self::config::Config; -use self::bundle_map::{BundleMap, BundleInfo}; +use self::bundle_map::BundleMap; #[derive(Eq, Debug, PartialEq, Clone, Copy)] @@ -111,8 +112,7 @@ impl Repository { mem::swap(&mut self.content_bundle, &mut finished); { let bundle = try!(self.bundles.add_bundle(finished.unwrap()).map_err(|_| "Failed to write finished bundle")); - let bundle_info = BundleInfo{id: bundle.id.clone()}; - self.bundle_map.set(self.next_content_bundle, bundle_info); + self.bundle_map.set(self.next_content_bundle, bundle); } self.next_content_bundle = self.next_free_bundle_id() } @@ -121,8 +121,7 @@ impl Repository { mem::swap(&mut self.meta_bundle, &mut finished); { let bundle = try!(self.bundles.add_bundle(finished.unwrap()).map_err(|_| "Failed to write finished bundle")); - let bundle_info = BundleInfo{id: bundle.id.clone()}; - self.bundle_map.set(self.next_meta_bundle, bundle_info); + self.bundle_map.set(self.next_meta_bundle, bundle); } self.next_meta_bundle = self.next_free_bundle_id() } diff --git a/src/util/compression.rs b/src/util/compression.rs index b6fc18a..4ac5154 100644 --- a/src/util/compression.rs +++ b/src/util/compression.rs @@ -35,7 +35,7 @@ impl Compression { #[inline] pub fn from_string(name: &str) -> Result { - let (name, level) = if let Some(pos) = name.find("/") { + let (name, level) = if let Some(pos) = name.find('/') { let level = try!(u8::from_str(&name[pos+1..]).map_err(|_| "Level must be a number")); let name = &name[..pos]; (name, level) diff --git a/src/util/mod.rs b/src/util/mod.rs index c40881e..0014cda 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -9,3 +9,28 @@ pub use self::compression::*; pub use self::encryption::*; pub use self::hash::*; pub use self::lru_cache::*; + +pub fn to_file_size(size: u64) -> String { + let mut size = size as f32; + if size >= 512.0 { + size /= 1024.0; + } else { + return format!("{:.0} Bytes", size); + } + if size >= 512.0 { + size /= 1024.0; + } else { + return format!("{:.1} KiB", size); + } + if size >= 512.0 { + size /= 1024.0; + } else { + return format!("{:.1} MiB", size); + } + if size >= 512.0 { + size /= 1024.0; + } else { + return format!("{:.1} GiB", size); + } + format!("{:.1} TiB", size) +}