From 5fe41127fc3c4b18d9ed271a3d7f9a4bc78773e8 Mon Sep 17 00:00:00 2001 From: Dennis Schwerdel Date: Thu, 3 Aug 2017 07:34:16 +0200 Subject: [PATCH] More tests, forcing cut-point-skipping, using black_box --- chunking/benches/all.rs | 36 ++++++++++++++++++++++++++++-------- chunking/src/fastcdc.rs | 10 ++-------- chunking/tests/all.rs | 19 ++++++++++++++----- src/chunker.rs | 2 +- 4 files changed, 45 insertions(+), 22 deletions(-) diff --git a/chunking/benches/all.rs b/chunking/benches/all.rs index ebe8ec3..79b2fb9 100644 --- a/chunking/benches/all.rs +++ b/chunking/benches/all.rs @@ -26,10 +26,22 @@ fn random_data(seed: u64, size: usize) -> Vec { } -struct DevNull; +struct CutPositions(Vec, u64); -impl Write for DevNull { +impl CutPositions { + pub fn new() -> Self { + CutPositions(vec![], 0) + } + + pub fn positions(&self) -> &[u64] { + &self.0 + } +} + +impl Write for CutPositions { fn write(&mut self, data: &[u8]) -> Result { + self.1 += data.len() as u64; + self.0.push(self.1); Ok(data.len()) } @@ -53,7 +65,9 @@ fn test_fixed_8192(b: &mut Bencher) { b.iter(|| { let mut chunker = FixedChunker::new(8*1024); let mut cursor = Cursor::new(&data); - while chunker.chunk(&mut cursor, &mut DevNull).unwrap() == ChunkerStatus::Continue {} + let mut sink = CutPositions::new(); + while chunker.chunk(&mut cursor, &mut sink).unwrap() == ChunkerStatus::Continue {}; + test::black_box(sink.positions().len()) }) } @@ -72,7 +86,9 @@ fn test_ae_8192(b: &mut Bencher) { b.iter(|| { let mut chunker = AeChunker::new(8*1024); let mut cursor = Cursor::new(&data); - while chunker.chunk(&mut cursor, &mut DevNull).unwrap() == ChunkerStatus::Continue {} + let mut sink = CutPositions::new(); + while chunker.chunk(&mut cursor, &mut sink).unwrap() == ChunkerStatus::Continue {}; + test::black_box(sink.positions().len()) }) } @@ -91,7 +107,9 @@ fn test_rabin_8192(b: &mut Bencher) { b.iter(|| { let mut chunker = RabinChunker::new(8*1024, 0); let mut cursor = Cursor::new(&data); - while chunker.chunk(&mut cursor, &mut DevNull).unwrap() == ChunkerStatus::Continue {} + let mut sink = CutPositions::new(); + while chunker.chunk(&mut cursor, &mut sink).unwrap() == ChunkerStatus::Continue {}; + test::black_box(sink.positions().len()) }) } @@ -99,7 +117,7 @@ fn test_rabin_8192(b: &mut Bencher) { #[bench] fn test_fastcdc_init(b: &mut Bencher) { b.iter(|| { - FastCdcChunker::new(8*1024, 0, true); + FastCdcChunker::new(8*1024, 0); }) } @@ -108,8 +126,10 @@ fn test_fastcdc_8192(b: &mut Bencher) { let data = random_data(0, 1024*1024); b.bytes = data.len() as u64; b.iter(|| { - let mut chunker = FastCdcChunker::new(8*1024, 0, true); + let mut chunker = FastCdcChunker::new(8*1024, 0); let mut cursor = Cursor::new(&data); - while chunker.chunk(&mut cursor, &mut DevNull).unwrap() == ChunkerStatus::Continue {} + let mut sink = CutPositions::new(); + while chunker.chunk(&mut cursor, &mut sink).unwrap() == ChunkerStatus::Continue {}; + test::black_box(sink.positions().len()) }) } diff --git a/chunking/src/fastcdc.rs b/chunking/src/fastcdc.rs index 39e430a..5788889 100644 --- a/chunking/src/fastcdc.rs +++ b/chunking/src/fastcdc.rs @@ -54,11 +54,10 @@ pub struct FastCdcChunker { avg_size: usize, mask_long: u64, mask_short: u64, - cut_point_skip: bool, } impl FastCdcChunker { - pub fn new(avg_size: usize, seed: u64, cut_point_skip: bool) -> Self { + pub fn new(avg_size: usize, seed: u64) -> Self { let (mask_short, mask_long) = get_masks(avg_size, 2, seed); FastCdcChunker { buffer: [0; 4096], @@ -69,7 +68,6 @@ impl FastCdcChunker { avg_size: avg_size, mask_long: mask_long, mask_short: mask_short, - cut_point_skip: cut_point_skip, } } } @@ -101,11 +99,7 @@ impl Chunker for FastCdcChunker { let min_size_p = cmp::min(max, cmp::max(self.min_size as isize - pos as isize, 0) as usize); let avg_size_p = cmp::min(max, cmp::max(self.avg_size as isize - pos as isize, 0) as usize); let max_size_p = cmp::min(max, cmp::max(self.max_size as isize - pos as isize, 0) as usize); - if !self.cut_point_skip && self.min_size > pos { - for i in 0..min_size_p { - hash = (hash << 1).wrapping_add(self.gear[self.buffer[i] as usize]); - } - } + // Skipping first min_size bytes. This is ok as same data still results in same hash. if self.avg_size > pos { for i in min_size_p..avg_size_p { hash = (hash << 1).wrapping_add(self.gear[self.buffer[i] as usize]); diff --git a/chunking/tests/all.rs b/chunking/tests/all.rs index 53e5196..0d8a603 100644 --- a/chunking/tests/all.rs +++ b/chunking/tests/all.rs @@ -37,7 +37,7 @@ fn test_chunking(chunker: &mut Chunker, data: &[u8], chunk_lens: Option<&[usize] pos += chunk.len(); } if let Some(chunk_lens) = chunk_lens { - //assert_eq!(chunk_lens.len(), chunks.len()); + assert_eq!(chunk_lens.len(), chunks.len()); for (i, chunk) in chunks.iter().enumerate() { assert_eq!(chunk.len(), chunk_lens[i]); } @@ -49,6 +49,9 @@ fn test_chunking(chunker: &mut Chunker, data: &[u8], chunk_lens: Option<&[usize] #[test] fn test_fixed() { + test_chunking(&mut FixedChunker::new(8192), &random_data(0, 128*1024), + Some(&[8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, + 8192, 8192, 8192, 8192, 8192, 8192, 0])); let data = random_data(0, 10*1024*1024); for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] { let mut chunker = FixedChunker::new(1024*n); @@ -60,6 +63,9 @@ fn test_fixed() { #[test] fn test_ae() { + test_chunking(&mut AeChunker::new(8192), &random_data(0, 128*1024), + Some(&[7979, 8046, 7979, 8192, 8192, 8192, 7965, 8158, 8404, 8241, + 8011, 8302, 8120, 8335, 8192, 8192, 572])); let data = random_data(0, 10*1024*1024); for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] { let mut chunker = AeChunker::new(1024*n); @@ -71,6 +77,9 @@ fn test_ae() { #[test] fn test_rabin() { + test_chunking(&mut RabinChunker::new(8192, 0), &random_data(0, 128*1024), + Some(&[8604, 4190, 32769, 3680, 26732, 3152, 9947, 6487, 25439, 3944, + 6128])); let data = random_data(0, 10*1024*1024); for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] { let mut chunker = RabinChunker::new(1024*n, 0); @@ -82,14 +91,14 @@ fn test_rabin() { #[test] fn test_fastcdc() { + test_chunking(&mut FastCdcChunker::new(8192, 0), &random_data(0, 128*1024), + Some(&[8712, 8018, 2847, 9157, 8997, 8581, 8867, 5422, 5412, 9478, + 11553, 9206, 4606, 8529, 3821, 11342, 6524])); let data = random_data(0, 10*1024*1024); for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] { - let mut chunker = FastCdcChunker::new(1024*n, 0, true); + let mut chunker = FastCdcChunker::new(1024*n, 0); let len = test_chunking(&mut chunker, &data, None); assert!(len >= data.len()/n/1024/4); assert!(len <= data.len()/n/1024*4); } - test_chunking(&mut FastCdcChunker::new(8192, 0, true), &random_data(0, 128*1024), - Some(&[8712, 8018, 2847, 9157, 8997, 8581, 8867, 5422, 5412, 9478, - 11553, 9206, 4606, 8529, 3821, 11342, 6524])); } diff --git a/src/chunker.rs b/src/chunker.rs index eebcbaa..337c311 100644 --- a/src/chunker.rs +++ b/src/chunker.rs @@ -48,7 +48,7 @@ impl ChunkerType { match *self { ChunkerType::Ae(size) => Box::new(AeChunker::new(size)), ChunkerType::Rabin((size, seed)) => Box::new(RabinChunker::new(size, seed)), - ChunkerType::FastCdc((size, seed)) => Box::new(FastCdcChunker::new(size, seed, true)), + ChunkerType::FastCdc((size, seed)) => Box::new(FastCdcChunker::new(size, seed)), ChunkerType::Fixed(size) => Box::new(FixedChunker::new(size)), } }