This commit is contained in:
Dennis Schwerdel 2017-03-15 08:27:27 +01:00
parent 6aa35bcdb5
commit d6b8e00718
13 changed files with 313 additions and 147 deletions

2
.gitignore vendored
View File

@ -1,4 +1,4 @@
target
squash
test.tar
test_data
test_*

133
Algotest.txt Normal file
View File

@ -0,0 +1,133 @@
Algorithm comparison on file test_data/ubuntu-16.04.1.tar
Reading input file... done. 183869440 bytes
Chunker algorithms
Chunk size: 4 KiB
AE: avg chunk size 4424.2 ± 1270.2 bytes, 3.4% saved, speed 713.7 MB/s
Rabin: avg chunk size 5091.2 ± 3985.7 bytes, 4.1% saved, speed 214.1 MB/s
FastCdc: avg chunk size 4881.7 ± 2044.4 bytes, 3.4% saved, speed 485.2 MB/s
Chunk size: 8 KiB
AE: avg chunk size 8924.8 ± 2215.3 bytes, 2.6% saved, speed 721.6 MB/s
Rabin: avg chunk size 10406.9 ± 7996.2 bytes, 3.0% saved, speed 215.6 MB/s
FastCdc: avg chunk size 9704.4 ± 3740.6 bytes, 2.9% saved, speed 481.8 MB/s
Chunk size: 16 KiB
AE: avg chunk size 17828.9 ± 4205.5 bytes, 2.0% saved, speed 719.3 MB/s
Rabin: avg chunk size 21402.6 ± 16330.5 bytes, 2.1% saved, speed 215.5 MB/s
FastCdc: avg chunk size 18938.0 ± 6924.2 bytes, 2.2% saved, speed 502.7 MB/s
Chunk size: 32 KiB
AE: avg chunk size 35530.3 ± 8760.8 bytes, 1.2% saved, speed 711.5 MB/s
Rabin: avg chunk size 42601.8 ± 32150.9 bytes, 1.2% saved, speed 213.5 MB/s
FastCdc: avg chunk size 41356.1 ± 20308.1 bytes, 1.2% saved, speed 443.1 MB/s
Chunk size: 64 KiB
AE: avg chunk size 70746.2 ± 16942.6 bytes, 0.9% saved, speed 715.8 MB/s
Rabin: avg chunk size 83729.3 ± 63880.7 bytes, 1.2% saved, speed 199.6 MB/s
FastCdc: avg chunk size 77811.9 ± 29836.2 bytes, 0.8% saved, speed 492.3 MB/s
Hash algorithms
Blake2: 536.4 MB/s
Murmur3: 3771.8 MB/s
Compression algorithms
Snappy: ratio: 63.9%, compress: 417.8 MB/s, decompress: 839.9 MB/s
ZStd/1: ratio: 48.3%, compress: 206.7 MB/s, decompress: 320.4 MB/s
ZStd/2: ratio: 45.7%, compress: 147.5 MB/s, decompress: 346.0 MB/s
ZStd/3: ratio: 43.4%, compress: 89.9 MB/s, decompress: 342.2 MB/s
ZStd/4: ratio: 41.9%, compress: 79.3 MB/s, decompress: 350.4 MB/s
ZStd/5: ratio: 42.3%, compress: 55.8 MB/s, decompress: 268.3 MB/s
ZStd/6: ratio: 41.5%, compress: 42.9 MB/s, decompress: 310.0 MB/s
ZStd/7: ratio: 39.7%, compress: 33.3 MB/s, decompress: 313.7 MB/s
ZStd/8: ratio: 39.4%, compress: 26.3 MB/s, decompress: 281.4 MB/s
ZStd/9: ratio: 39.0%, compress: 17.5 MB/s, decompress: 308.5 MB/s
Deflate/1: ratio: 48.4%, compress: 83.9 MB/s, decompress: 306.9 MB/s
Deflate/2: ratio: 48.0%, compress: 81.8 MB/s, decompress: 311.2 MB/s
Deflate/3: ratio: 47.7%, compress: 76.7 MB/s, decompress: 314.5 MB/s
Deflate/4: ratio: 47.6%, compress: 74.9 MB/s, decompress: 317.5 MB/s
Deflate/5: ratio: 46.9%, compress: 63.6 MB/s, decompress: 319.8 MB/s
Deflate/6: ratio: 46.8%, compress: 55.2 MB/s, decompress: 309.0 MB/s
Deflate/7: ratio: 46.7%, compress: 50.8 MB/s, decompress: 326.3 MB/s
Deflate/8: ratio: 45.8%, compress: 18.4 MB/s, decompress: 301.8 MB/s
Deflate/9: ratio: 45.4%, compress: 13.3 MB/s, decompress: 278.0 MB/s
Brotli/1: ratio: 45.7%, compress: 141.6 MB/s, decompress: 181.3 MB/s
Brotli/2: ratio: 43.6%, compress: 90.8 MB/s, decompress: 195.0 MB/s
Brotli/3: ratio: 43.5%, compress: 68.6 MB/s, decompress: 197.0 MB/s
Brotli/4: ratio: 41.9%, compress: 44.7 MB/s, decompress: 201.2 MB/s
Brotli/5: ratio: 39.5%, compress: 17.5 MB/s, decompress: 194.0 MB/s
Brotli/6: ratio: 38.4%, compress: 12.2 MB/s, decompress: 196.4 MB/s
Brotli/7: ratio: 37.2%, compress: 6.0 MB/s, decompress: 203.3 MB/s
Brotli/8: ratio: 37.0%, compress: 4.2 MB/s, decompress: 204.2 MB/s
Brotli/9: ratio: 36.9%, compress: 3.2 MB/s, decompress: 205.8 MB/s
Lzma2/1: ratio: 37.7%, compress: 8.1 MB/s, decompress: 38.0 MB/s
Lzma2/2: ratio: 36.3%, compress: 5.6 MB/s, decompress: 40.7 MB/s
Lzma2/3: ratio: 35.5%, compress: 3.5 MB/s, decompress: 42.2 MB/s
Lzma2/4: ratio: 33.9%, compress: 2.9 MB/s, decompress: 43.1 MB/s
Lzma2/5: ratio: 33.2%, compress: 2.4 MB/s, decompress: 43.4 MB/s
Lzma2/6: ratio: 33.1%, compress: 2.1 MB/s, decompress: 43.9 MB/s
Algorithm comparison on file test_data/silesia.tar
Reading input file... done. 211957760 bytes
Chunker algorithms
Chunk size: 4 KiB
AE: avg chunk size 4779.0 ± 1348.1 bytes, 0.3% saved, speed 715.3 MB/s
Rabin: avg chunk size 5081.6 ± 3845.5 bytes, 0.7% saved, speed 213.1 MB/s
FastCdc: avg chunk size 4829.0 ± 1560.8 bytes, 0.2% saved, speed 488.8 MB/s
Chunk size: 8 KiB
AE: avg chunk size 9634.0 ± 2548.9 bytes, 0.0% saved, speed 697.2 MB/s
Rabin: avg chunk size 10137.6 ± 7591.8 bytes, 0.3% saved, speed 213.7 MB/s
FastCdc: avg chunk size 9609.1 ± 2957.7 bytes, 0.1% saved, speed 482.2 MB/s
Chunk size: 16 KiB
AE: avg chunk size 18849.1 ± 4654.1 bytes, 0.0% saved, speed 719.9 MB/s
Rabin: avg chunk size 20443.5 ± 15211.9 bytes, 0.1% saved, speed 214.8 MB/s
FastCdc: avg chunk size 19309.3 ± 6274.3 bytes, 0.0% saved, speed 501.9 MB/s
Chunk size: 32 KiB
AE: avg chunk size 36677.2 ± 7905.2 bytes, 0.0% saved, speed 726.4 MB/s
Rabin: avg chunk size 41814.5 ± 31117.0 bytes, 0.0% saved, speed 213.3 MB/s
FastCdc: avg chunk size 44463.6 ± 26128.5 bytes, 0.0% saved, speed 469.7 MB/s
Chunk size: 64 KiB
AE: avg chunk size 71923.2 ± 14350.5 bytes, 0.0% saved, speed 724.0 MB/s
Rabin: avg chunk size 81742.3 ± 60106.9 bytes, 0.0% saved, speed 213.5 MB/s
FastCdc: avg chunk size 79384.9 ± 31116.6 bytes, 0.0% saved, speed 490.2 MB/s
Hash algorithms
Blake2: 521.9 MB/s
Murmur3: 3763.0 MB/s
Compression algorithms
Snappy: ratio: 47.8%, compress: 407.1 MB/s, decompress: 760.6 MB/s
ZStd/1: ratio: 34.8%, compress: 189.0 MB/s, decompress: 314.6 MB/s
ZStd/2: ratio: 33.1%, compress: 149.1 MB/s, decompress: 327.1 MB/s
ZStd/3: ratio: 32.2%, compress: 122.4 MB/s, decompress: 326.3 MB/s
ZStd/4: ratio: 31.9%, compress: 99.9 MB/s, decompress: 318.4 MB/s
ZStd/5: ratio: 30.9%, compress: 66.8 MB/s, decompress: 317.1 MB/s
ZStd/6: ratio: 30.0%, compress: 52.4 MB/s, decompress: 324.8 MB/s
ZStd/7: ratio: 29.3%, compress: 39.2 MB/s, decompress: 333.7 MB/s
ZStd/8: ratio: 28.9%, compress: 29.3 MB/s, decompress: 338.9 MB/s
ZStd/9: ratio: 28.5%, compress: 23.0 MB/s, decompress: 349.5 MB/s
Deflate/1: ratio: 34.7%, compress: 106.5 MB/s, decompress: 418.2 MB/s
Deflate/2: ratio: 33.9%, compress: 96.6 MB/s, decompress: 425.4 MB/s
Deflate/3: ratio: 33.5%, compress: 90.6 MB/s, decompress: 442.4 MB/s
Deflate/4: ratio: 33.1%, compress: 83.2 MB/s, decompress: 438.6 MB/s
Deflate/5: ratio: 32.4%, compress: 69.6 MB/s, decompress: 452.2 MB/s
Deflate/6: ratio: 32.1%, compress: 57.5 MB/s, decompress: 455.8 MB/s
Deflate/7: ratio: 32.0%, compress: 51.3 MB/s, decompress: 457.0 MB/s
Deflate/8: ratio: 31.8%, compress: 14.8 MB/s, decompress: 409.7 MB/s
Deflate/9: ratio: 31.2%, compress: 10.7 MB/s, decompress: 406.9 MB/s
Brotli/1: ratio: 34.3%, compress: 137.2 MB/s, decompress: 214.6 MB/s
Brotli/2: ratio: 32.1%, compress: 88.9 MB/s, decompress: 233.6 MB/s
Brotli/3: ratio: 31.8%, compress: 68.7 MB/s, decompress: 242.2 MB/s
Brotli/4: ratio: 30.9%, compress: 45.5 MB/s, decompress: 254.2 MB/s
Brotli/5: ratio: 28.7%, compress: 22.0 MB/s, decompress: 259.2 MB/s
Brotli/6: ratio: 28.1%, compress: 15.8 MB/s, decompress: 267.8 MB/s
Brotli/7: ratio: 27.4%, compress: 9.1 MB/s, decompress: 274.4 MB/s
Brotli/8: ratio: 27.1%, compress: 6.4 MB/s, decompress: 245.7 MB/s
Brotli/9: ratio: 26.8%, compress: 4.7 MB/s, decompress: 276.0 MB/s
Lzma2/1: ratio: 27.6%, compress: 11.3 MB/s, decompress: 52.5 MB/s
Lzma2/2: ratio: 26.8%, compress: 7.6 MB/s, decompress: 54.6 MB/s
Lzma2/3: ratio: 26.3%, compress: 5.0 MB/s, decompress: 56.4 MB/s
Lzma2/4: ratio: 24.6%, compress: 3.2 MB/s, decompress: 56.8 MB/s
Lzma2/5: ratio: 23.6%, compress: 2.4 MB/s, decompress: 59.0 MB/s
Lzma2/6: ratio: 23.2%, compress: 2.0 MB/s, decompress: 59.3 MB/s

View File

@ -1,68 +0,0 @@
~/shared
Algorithm comparison on file test.tar
Reading input file... done. 2175416320 bytes
Chunker algorithms
Chunk size: 4 KiB
AE: avg chunk size 2756.5 ± 543.6 bytes, 12.1% saved, speed 748.9 MB/s
Rabin: avg chunk size 4902.3 ± 3826.2 bytes, 11.7% saved, speed 336.7 MB/s
FastCdc: avg chunk size 4783.3 ± 1940.5 bytes, 12.1% saved, speed 544.1 MB/s
Chunk size: 8 KiB
AE: avg chunk size 5245.1 ± 890.8 bytes, 10.0% saved, speed 756.3 MB/s
Rabin: avg chunk size 9774.2 ± 7636.0 bytes, 10.3% saved, speed 344.9 MB/s
FastCdc: avg chunk size 9583.2 ± 3933.2 bytes, 10.7% saved, speed 541.6 MB/s
Chunk size: 16 KiB
AE: avg chunk size 10169.5 ± 1485.8 bytes, 7.4% saved, speed 781.5 MB/s
Rabin: avg chunk size 19641.7 ± 15292.5 bytes, 9.0% saved, speed 345.9 MB/s
FastCdc: avg chunk size 19262.9 ± 7697.4 bytes, 9.0% saved, speed 548.1 MB/s
Chunk size: 32 KiB
AE: avg chunk size 20004.6 ± 2705.6 bytes, 5.6% saved, speed 787.0 MB/s
Rabin: avg chunk size 38963.6 ± 30218.2 bytes, 7.6% saved, speed 345.7 MB/s
FastCdc: avg chunk size 39159.3 ± 16834.6 bytes, 7.7% saved, speed 547.1 MB/s
Chunk size: 64 KiB
AE: avg chunk size 39627.2 ± 5310.6 bytes, 3.8% saved, speed 788.2 MB/s
Rabin: avg chunk size 78339.7 ± 60963.7 bytes, 6.4% saved, speed 345.6 MB/s
FastCdc: avg chunk size 76981.4 ± 30784.6 bytes, 6.1% saved, speed 548.4 MB/s
Hash algorithms
Blake2: 724.2 MB/s
Murmur3: 5358.3 MB/s
Compression algorithms
Snappy: ratio: 83.6%, compress: 301.7 MB/s, decompress: 876.2 MB/s
fatal runtime error: out of memory
ZStd/1: ratio: 77.2%, compress: 493.9 MB/s, decompress: 0.0 MB/s
ZStd/2: ratio: 76.7%, compress: 420.6 MB/s, decompress: 0.0 MB/s
ZStd/3: ratio: 75.4%, compress: 314.6 MB/s, decompress: 0.0 MB/s
ZStd/4: ratio: 75.3%, compress: 273.0 MB/s, decompress: 0.0 MB/s
ZStd/5: ratio: 74.9%, compress: 131.4 MB/s, decompress: 0.0 MB/s
ZStd/6: ratio: 73.6%, compress: 121.4 MB/s, decompress: 0.0 MB/s
ZStd/7: ratio: 73.5%, compress: 88.7 MB/s, decompress: 0.0 MB/s
ZStd/8: ratio: 73.4%, compress: 76.8 MB/s, decompress: 0.0 MB/s
ZStd/9: ratio: 73.3%, compress: 51.8 MB/s, decompress: 0.0 MB/s
Deflate/1: ratio: 78.3%, compress: 95.7 MB/s, decompress: 0.0 MB/s
Deflate/2: ratio: 78.2%, compress: 94.7 MB/s, decompress: 0.0 MB/s
Deflate/3: ratio: 78.1%, compress: 92.5 MB/s, decompress: 0.0 MB/s
Deflate/4: ratio: 78.0%, compress: 87.9 MB/s, decompress: 0.0 MB/s
Deflate/5: ratio: 77.8%, compress: 86.5 MB/s, decompress: 0.0 MB/s
Deflate/6: ratio: 77.7%, compress: 83.8 MB/s, decompress: 0.0 MB/s
Deflate/7: ratio: 77.7%, compress: 73.4 MB/s, decompress: 0.0 MB/s
Deflate/8: ratio: 77.6%, compress: 31.6 MB/s, decompress: 0.0 MB/s
Deflate/9: ratio: 77.4%, compress: 25.8 MB/s, decompress: 0.0 MB/s
Brotli/1: ratio: 77.6%, compress: 433.1 MB/s, decompress: 0.0 MB/s
Brotli/2: ratio: 75.4%, compress: 242.2 MB/s, decompress: 0.0 MB/s
Brotli/3: ratio: 75.3%, compress: 195.5 MB/s, decompress: 0.0 MB/s
Brotli/4: ratio: 72.4%, compress: 81.6 MB/s, decompress: 0.0 MB/s
Brotli/5: ratio: 73.9%, compress: 62.4 MB/s, decompress: 0.0 MB/s
Brotli/6: ratio: 72.9%, compress: 46.6 MB/s, decompress: 0.0 MB/s
Brotli/7: ratio: 71.5%, compress: 23.4 MB/s, decompress: 0.0 MB/s
Brotli/8: ratio: 71.5%, compress: 20.7 MB/s, decompress: 0.0 MB/s
Brotli/9: ratio: 71.2%, compress: 11.2 MB/s, decompress: 0.0 MB/s
Lzma2/1: ratio: 69.8%, compress: 4.2 MB/s, decompress: 0.0 MB/s

View File

@ -71,14 +71,14 @@ fn compare_compression(name: &str, method: Compression, data: &[u8]) {
let duration = elapsed.as_secs() as f64 * 1.0 + elapsed.subsec_nanos() as f64 / 1_000_000_000.0;
let cspeed = data.len() as f64 / duration;
let ratio = compressed.len() as f64 / data.len() as f64;
/*let start = time::Instant::now();
let start = time::Instant::now();
let uncompressed = method.decompress(&compressed).unwrap();
if uncompressed != data {
panic!("{} did not uncompress to the same value", name);
}
let elapsed = start.elapsed();
let duration = elapsed.as_secs() as f64 * 1.0 + elapsed.subsec_nanos() as f64 / 1_000_000_000.0;*/
let dspeed = 0.0;//data.len() as f64 / duration;
let duration = elapsed.as_secs() as f64 * 1.0 + elapsed.subsec_nanos() as f64 / 1_000_000_000.0;
let dspeed = data.len() as f64 / duration;
println!("{}:\tratio: {:.1}%,\tcompress: {:.1} MB/s,\tdecompress: {:.1} MB/s",
name, ratio * 100.0, cspeed / 1_000_000.0, dspeed / 1_000_000.0);
}

View File

@ -65,7 +65,7 @@ impl fmt::Debug for BundleId {
#[derive(Clone)]
pub struct BundleHeader {
pub struct BundleInfo {
pub id: BundleId,
pub compression: Option<Compression>,
pub encryption: Option<Encryption>,
@ -75,7 +75,7 @@ pub struct BundleHeader {
pub chunk_count: usize,
pub chunk_sizes: Vec<usize>
}
serde_impl!(BundleHeader(u64) {
serde_impl!(BundleInfo(u64) {
id: BundleId => 0,
compression: Option<Compression> => 1,
encryption: Option<Encryption> => 2,
@ -86,9 +86,9 @@ serde_impl!(BundleHeader(u64) {
chunk_sizes: Vec<usize> => 7
});
impl Default for BundleHeader {
impl Default for BundleInfo {
fn default() -> Self {
BundleHeader {
BundleInfo {
id: BundleId(vec![]),
compression: None,
encryption: None,
@ -103,46 +103,37 @@ impl Default for BundleHeader {
pub struct Bundle {
pub id: BundleId,
pub info: BundleInfo,
pub version: u8,
pub path: PathBuf,
crypto: Arc<Mutex<Crypto>>,
pub compression: Option<Compression>,
pub encryption: Option<Encryption>,
pub raw_size: usize,
pub encoded_size: usize,
pub checksum: Checksum,
pub content_start: usize,
pub chunk_count: usize,
pub chunk_sizes: Vec<usize>,
pub chunk_positions: Vec<usize>
}
impl Bundle {
fn new(path: PathBuf, version: u8, content_start: usize, crypto: Arc<Mutex<Crypto>>, header: BundleHeader) -> Self {
let mut chunk_positions = Vec::with_capacity(header.chunk_sizes.len());
fn new(path: PathBuf, version: u8, content_start: usize, crypto: Arc<Mutex<Crypto>>, info: BundleInfo) -> Self {
let mut chunk_positions = Vec::with_capacity(info.chunk_sizes.len());
let mut pos = 0;
for len in &header.chunk_sizes {
for len in &info.chunk_sizes {
chunk_positions.push(pos);
pos += *len;
}
Bundle {
id: header.id,
info: info,
version: version,
path: path,
crypto: crypto,
compression: header.compression,
encryption: header.encryption,
raw_size: header.raw_size,
encoded_size: header.encoded_size,
chunk_count: header.chunk_count,
checksum: header.checksum,
content_start: content_start,
chunk_sizes: header.chunk_sizes,
chunk_positions: chunk_positions
}
}
#[inline]
pub fn id(&self) -> BundleId {
self.info.id.clone()
}
pub fn load(path: PathBuf, crypto: Arc<Mutex<Crypto>>) -> Result<Self, BundleError> {
let mut file = BufReader::new(try!(File::open(&path)
.map_err(|e| BundleError::Read(e, path.clone(), "Failed to open bundle file"))));
@ -157,7 +148,7 @@ impl Bundle {
return Err(BundleError::Format(path.clone(), "Unsupported bundle file version"))
}
let mut reader = rmp_serde::Deserializer::new(file);
let header = try!(BundleHeader::deserialize(&mut reader)
let header = try!(BundleInfo::deserialize(&mut reader)
.map_err(|e| BundleError::Decode(e, path.clone())));
file = reader.into_inner();
let content_start = file.seek(SeekFrom::Current(0)).unwrap() as usize;
@ -170,17 +161,17 @@ impl Bundle {
.map_err(|e| BundleError::Read(e, self.path.clone(), "Failed to open bundle file"))));
try!(file.seek(SeekFrom::Start(self.content_start as u64))
.map_err(|e| BundleError::Read(e, self.path.clone(), "Failed to seek to data")));
let mut data = Vec::with_capacity(max(self.encoded_size, self.raw_size)+1024);
let mut data = Vec::with_capacity(max(self.info.encoded_size, self.info.raw_size)+1024);
try!(file.read_to_end(&mut data).map_err(|_| "Failed to read data"));
Ok(data)
}
#[inline]
fn decode_contents(&self, mut data: Vec<u8>) -> Result<Vec<u8>, BundleError> {
if let Some(ref encryption) = self.encryption {
if let Some(ref encryption) = self.info.encryption {
data = try!(self.crypto.lock().unwrap().decrypt(encryption.clone(), &data));
}
if let Some(ref compression) = self.compression {
if let Some(ref compression) = self.info.compression {
data = try!(compression.decompress(&data));
}
Ok(data)
@ -193,39 +184,39 @@ impl Bundle {
#[inline]
pub fn get_chunk_position(&self, id: usize) -> Result<(usize, usize), BundleError> {
if id >= self.chunk_count {
if id >= self.info.chunk_count {
return Err("Invalid chunk id".into())
}
Ok((self.chunk_positions[id], self.chunk_sizes[id]))
Ok((self.chunk_positions[id], self.info.chunk_sizes[id]))
}
pub fn check(&self, full: bool) -> Result<(), BundleError> {
if self.chunk_count != self.chunk_sizes.len() {
return Err(BundleError::Integrity(self.id.clone(),
if self.info.chunk_count != self.info.chunk_sizes.len() {
return Err(BundleError::Integrity(self.id(),
"Chunk list size does not match chunk count"))
}
if self.chunk_sizes.iter().sum::<usize>() != self.raw_size {
return Err(BundleError::Integrity(self.id.clone(),
if self.info.chunk_sizes.iter().sum::<usize>() != self.info.raw_size {
return Err(BundleError::Integrity(self.id(),
"Individual chunk sizes do not add up to total size"))
}
if !full {
let size = try!(fs::metadata(&self.path)
.map_err(|e| BundleError::Read(e, self.path.clone(), "Failed to get size of file"))
).len();
if size as usize != self.encoded_size + self.content_start {
return Err(BundleError::Integrity(self.id.clone(),
if size as usize != self.info.encoded_size + self.content_start {
return Err(BundleError::Integrity(self.id(),
"File size does not match size in header, truncated file"))
}
return Ok(())
}
let encoded_contents = try!(self.load_encoded_contents());
if self.encoded_size != encoded_contents.len() {
return Err(BundleError::Integrity(self.id.clone(),
if self.info.encoded_size != encoded_contents.len() {
return Err(BundleError::Integrity(self.id(),
"Encoded data size does not match size in header, truncated bundle"))
}
let contents = try!(self.decode_contents(encoded_contents));
if self.raw_size != contents.len() {
return Err(BundleError::Integrity(self.id.clone(),
if self.info.raw_size != contents.len() {
return Err(BundleError::Integrity(self.id(),
"Raw data size does not match size in header, truncated bundle"))
}
//TODO: verify checksum
@ -236,7 +227,8 @@ impl Bundle {
impl Debug for Bundle {
fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(fmt, "Bundle(\n\tid: {}\n\tpath: {:?}\n\tchunks: {}\n\tsize: {}, encoded: {}\n\tcompression: {:?}\n)",
self.id.to_string(), self.path, self.chunk_count, self.raw_size, self.encoded_size, self.compression)
self.info.id.to_string(), self.path, self.info.chunk_count, self.info.raw_size,
self.info.encoded_size, self.info.compression)
}
}
@ -306,7 +298,7 @@ impl BundleWriter {
.map_err(|e| BundleError::Write(e, path.clone(), "Failed to write bundle header")));
try!(file.write_all(&[HEADER_VERSION])
.map_err(|e| BundleError::Write(e, path.clone(), "Failed to write bundle header")));
let header = BundleHeader {
let header = BundleInfo {
checksum: checksum,
compression: self.compression,
encryption: self.encryption,
@ -331,6 +323,11 @@ impl BundleWriter {
pub fn size(&self) -> usize {
self.data.len()
}
#[inline]
pub fn raw_size(&self) -> usize {
self.raw_size
}
}
@ -361,15 +358,15 @@ impl BundleDb {
fn bundle_path(&self, bundle: &BundleId) -> (PathBuf, PathBuf) {
let mut folder = self.path.clone();
let mut file = bundle.to_string() + ".bundle";
let mut file = bundle.to_string()[0..32].to_owned() + ".bundle";
let mut count = self.bundles.len();
while count >= 1000 {
while count >= 100 {
if file.len() < 10 {
break
}
folder = folder.join(&file[0..3]);
file = file[3..].to_string();
count /= 1000;
folder = folder.join(&file[0..2]);
file = file[2..].to_string();
count /= 100;
}
(folder, file.into())
}
@ -386,7 +383,7 @@ impl BundleDb {
paths.push(path);
} else {
let bundle = try!(Bundle::load(path, self.crypto.clone()));
self.bundles.insert(bundle.id.clone(), bundle);
self.bundles.insert(bundle.id(), bundle);
}
}
}
@ -440,7 +437,7 @@ impl BundleDb {
#[inline]
pub fn add_bundle(&mut self, bundle: BundleWriter) -> Result<&Bundle, BundleError> {
let bundle = try!(bundle.finish(&self));
let id = bundle.id.clone();
let id = bundle.id();
self.bundles.insert(id.clone(), bundle);
Ok(self.get_bundle(&id).unwrap())
}
@ -458,7 +455,7 @@ impl BundleDb {
#[inline]
pub fn delete_bundle(&mut self, bundle: &BundleId) -> Result<(), BundleError> {
if let Some(bundle) = self.bundles.remove(bundle) {
fs::remove_file(&bundle.path).map_err(|e| BundleError::Remove(e, bundle.id.clone()))
fs::remove_file(&bundle.path).map_err(|e| BundleError::Remove(e, bundle.id()))
} else {
Err("No such bundle".into())
}

View File

@ -20,7 +20,7 @@ mod algotest;
use chunker::ChunkerType;
use repository::{Repository, Config, Mode};
use util::{ChecksumType, Compression, HashMethod};
use util::{ChecksumType, Compression, HashMethod, to_file_size};
use std::fs::File;
use std::io::Read;
@ -32,8 +32,10 @@ use docopt::Docopt;
static USAGE: &'static str = "
Usage:
zvault init <repo>
zvault algotest <path>
zvault info <repo>
zvault bundles <repo>
zvault check [--full] <repo>
zvault algotest <path>
zvault test <repo> <path>
Options:
@ -48,9 +50,11 @@ Options:
#[derive(RustcDecodable, Debug)]
struct Args {
cmd_init: bool,
cmd_info: bool,
cmd_algotest: bool,
cmd_test: bool,
cmd_check: bool,
cmd_bundles: bool,
arg_repo: Option<String>,
arg_path: Option<String>,
flag_full: bool,
@ -63,7 +67,7 @@ struct Args {
fn main() {
let args: Args = Docopt::new(USAGE).and_then(|d| d.decode()).unwrap_or_else(|e| e.exit());
println!("{:?}", args);
//println!("{:?}", args);
if args.cmd_algotest {
algotest::run(&args.arg_path.unwrap());
@ -71,7 +75,7 @@ fn main() {
}
if args.cmd_init {
let chunker = ChunkerType::from(&args.flag_chunker, args.flag_chunk_size, 0).expect("No such chunk algorithm");
let chunker = ChunkerType::from(&args.flag_chunker, args.flag_chunk_size*1024, 0).expect("No such chunk algorithm");
let compression = if args.flag_compression == "none" {
None
} else {
@ -94,6 +98,35 @@ fn main() {
return
}
if args.cmd_info {
let info = repo.info();
println!("Bundles: {}", info.bundle_count);
println!("Total size: {}", to_file_size(info.encoded_data_size));
println!("Uncompressed size: {}", to_file_size(info.raw_data_size));
println!("Compression ratio: {:.1}", info.compression_ratio * 100.0);
println!("Chunk count: {}", info.chunk_count);
println!("Average chunk size: {}", to_file_size(info.avg_chunk_size as u64));
return
}
if args.cmd_bundles {
for bundle in repo.list_bundles() {
println!("Bundle {}", bundle.id);
println!(" - Chunks: {}", bundle.chunk_count);
println!(" - Size: {}", to_file_size(bundle.encoded_size as u64));
println!(" - Data size: {}", to_file_size(bundle.raw_size as u64));
let ratio = bundle.encoded_size as f32 / bundle.raw_size as f32;
let compression = if let Some(ref c) = bundle.compression {
c.to_string()
} else {
"none".to_string()
};
println!(" - Compression: {}, ratio: {:.1}%", compression, ratio * 100.0);
println!();
}
return
}
if args.cmd_test {
print!("Integrity check before...");
repo.check(true).unwrap();

View File

@ -2,7 +2,6 @@ use std::mem;
use std::io::{Read, Write, Cursor};
use super::{Repository, Mode};
use super::bundle_map::BundleInfo;
use ::index::Location;
use ::util::Hash;
@ -19,7 +18,7 @@ impl Repository {
};
// Lookup bundle id from map
let bundle_id = if let Some(bundle_info) = self.bundle_map.get(found.bundle) {
bundle_info.id.clone()
bundle_info.id()
} else {
return Err("Bundle id not found in map")
};
@ -50,23 +49,24 @@ impl Repository {
debug_assert!(writer.is_some());
let chunk_id;
let size;
let raw_size;
{
// Add chunk to bundle writer and determine the size of the bundle
let writer_obj = writer.as_mut().unwrap();
chunk_id = try!(writer_obj.add(data).map_err(|_| "Failed to write chunk"));
size = writer_obj.size();
raw_size = writer_obj.raw_size();
}
let bundle_id = match mode {
Mode::Content => self.next_content_bundle,
Mode::Meta => self.next_meta_bundle
};
// Finish bundle if over maximum size
if size >= self.config.bundle_size {
if size >= self.config.bundle_size || raw_size >= 4 * self.config.bundle_size {
let mut finished = None;
mem::swap(writer, &mut finished);
let bundle = try!(self.bundles.add_bundle(finished.unwrap()).map_err(|_| "Failed to write finished bundle"));
let bundle_info = BundleInfo{id: bundle.id.clone()};
self.bundle_map.set(bundle_id, bundle_info);
self.bundle_map.set(bundle_id, bundle);
if self.next_meta_bundle == bundle_id {
self.next_meta_bundle = next_free_bundle_id
}

View File

@ -7,7 +7,7 @@ use rmp_serde;
use serde::Deserialize;
use serde::Serialize;
use ::bundle::BundleId;
use ::bundle::{Bundle, BundleId, BundleInfo};
static HEADER_STRING: [u8; 7] = *b"zbunmap";
@ -15,15 +15,22 @@ static HEADER_VERSION: u8 = 1;
#[derive(Default)]
pub struct BundleInfo {
pub id: BundleId
pub struct BundleData {
pub info: BundleInfo
}
serde_impl!(BundleInfo(u64) {
id: BundleId => 0
serde_impl!(BundleData(u64) {
info: BundleInfo => 0
});
impl BundleData {
#[inline]
pub fn id(&self) -> BundleId {
self.info.id.clone()
}
}
pub struct BundleMap(HashMap<u32, BundleInfo>);
pub struct BundleMap(HashMap<u32, BundleData>);
impl BundleMap {
pub fn create() -> Self {
@ -63,12 +70,18 @@ impl BundleMap {
}
#[inline]
pub fn get(&self, id: u32) -> Option<&BundleInfo> {
pub fn get(&self, id: u32) -> Option<&BundleData> {
self.0.get(&id)
}
#[inline]
pub fn set(&mut self, id: u32, info: BundleInfo) {
self.0.insert(id, info);
pub fn set(&mut self, id: u32, bundle: &Bundle) {
let data = BundleData { info: bundle.info.clone() };
self.0.insert(id, data);
}
#[inline]
pub fn bundles(&self) -> Vec<&BundleData> {
self.0.values().collect()
}
}

34
src/repository/info.rs Normal file
View File

@ -0,0 +1,34 @@
use super::Repository;
use ::bundle::BundleInfo;
pub struct RepositoryInfo {
pub bundle_count: usize,
pub encoded_data_size: u64,
pub raw_data_size: u64,
pub compression_ratio: f32,
pub chunk_count: usize,
pub avg_chunk_size: f32
}
impl Repository {
#[inline]
pub fn list_bundles(&self) -> Vec<&BundleInfo> {
self.bundle_map.bundles().iter().map(|b| &b.info).collect()
}
pub fn info(&self) -> RepositoryInfo {
let bundles = self.list_bundles();
let encoded_data_size = bundles.iter().map(|b| b.encoded_size as u64).sum();
let raw_data_size = bundles.iter().map(|b| b.raw_size as u64).sum();
let chunk_count = bundles.iter().map(|b| b.chunk_count).sum();
RepositoryInfo {
bundle_count: bundles.len(),
chunk_count: chunk_count,
encoded_data_size: encoded_data_size,
raw_data_size: raw_data_size,
compression_ratio: encoded_data_size as f32 / raw_data_size as f32,
avg_chunk_size: raw_data_size as f32 / chunk_count as f32
}
}
}

View File

@ -13,7 +13,7 @@ impl Repository {
};
// Lookup bundle id from map
let bundle_id = if let Some(bundle_info) = self.bundle_map.get(found.bundle) {
bundle_info.id.clone()
bundle_info.id()
} else {
return Err("Bundle id not found in map")
};
@ -24,7 +24,7 @@ impl Repository {
return Err("Bundle not found in bundledb")
};
// Get chunk from bundle
if bundle.chunk_count > found.chunk as usize {
if bundle.info.chunk_count > found.chunk as usize {
Ok(())
} else {
Err("Bundle does not contain that chunk")

View File

@ -2,6 +2,7 @@ mod config;
mod bundle_map;
mod integrity;
mod basic_io;
mod info;
use std::mem;
use std::cmp::max;
@ -13,7 +14,7 @@ use super::bundle::{BundleDb, BundleWriter};
use super::chunker::Chunker;
pub use self::config::Config;
use self::bundle_map::{BundleMap, BundleInfo};
use self::bundle_map::BundleMap;
#[derive(Eq, Debug, PartialEq, Clone, Copy)]
@ -111,8 +112,7 @@ impl Repository {
mem::swap(&mut self.content_bundle, &mut finished);
{
let bundle = try!(self.bundles.add_bundle(finished.unwrap()).map_err(|_| "Failed to write finished bundle"));
let bundle_info = BundleInfo{id: bundle.id.clone()};
self.bundle_map.set(self.next_content_bundle, bundle_info);
self.bundle_map.set(self.next_content_bundle, bundle);
}
self.next_content_bundle = self.next_free_bundle_id()
}
@ -121,8 +121,7 @@ impl Repository {
mem::swap(&mut self.meta_bundle, &mut finished);
{
let bundle = try!(self.bundles.add_bundle(finished.unwrap()).map_err(|_| "Failed to write finished bundle"));
let bundle_info = BundleInfo{id: bundle.id.clone()};
self.bundle_map.set(self.next_meta_bundle, bundle_info);
self.bundle_map.set(self.next_meta_bundle, bundle);
}
self.next_meta_bundle = self.next_free_bundle_id()
}

View File

@ -35,7 +35,7 @@ impl Compression {
#[inline]
pub fn from_string(name: &str) -> Result<Self, &'static str> {
let (name, level) = if let Some(pos) = name.find("/") {
let (name, level) = if let Some(pos) = name.find('/') {
let level = try!(u8::from_str(&name[pos+1..]).map_err(|_| "Level must be a number"));
let name = &name[..pos];
(name, level)

View File

@ -9,3 +9,28 @@ pub use self::compression::*;
pub use self::encryption::*;
pub use self::hash::*;
pub use self::lru_cache::*;
pub fn to_file_size(size: u64) -> String {
let mut size = size as f32;
if size >= 512.0 {
size /= 1024.0;
} else {
return format!("{:.0} Bytes", size);
}
if size >= 512.0 {
size /= 1024.0;
} else {
return format!("{:.1} KiB", size);
}
if size >= 512.0 {
size /= 1024.0;
} else {
return format!("{:.1} MiB", size);
}
if size >= 512.0 {
size /= 1024.0;
} else {
return format!("{:.1} GiB", size);
}
format!("{:.1} TiB", size)
}