From b4e6b34bbece98dd49b4c12bfe00695fb3221f2a Mon Sep 17 00:00:00 2001 From: Dennis Schwerdel Date: Wed, 2 Aug 2017 23:36:01 +0200 Subject: [PATCH] Configurable cut-point-skipping in fastcdc --- chunking/benches/all.rs | 4 ++-- chunking/src/fastcdc.rs | 6 ++++-- chunking/tests/all.rs | 4 ++-- src/chunker.rs | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/chunking/benches/all.rs b/chunking/benches/all.rs index 75cf50f..ebe8ec3 100644 --- a/chunking/benches/all.rs +++ b/chunking/benches/all.rs @@ -99,7 +99,7 @@ fn test_rabin_8192(b: &mut Bencher) { #[bench] fn test_fastcdc_init(b: &mut Bencher) { b.iter(|| { - FastCdcChunker::new(8*1024, 0); + FastCdcChunker::new(8*1024, 0, true); }) } @@ -108,7 +108,7 @@ fn test_fastcdc_8192(b: &mut Bencher) { let data = random_data(0, 1024*1024); b.bytes = data.len() as u64; b.iter(|| { - let mut chunker = FastCdcChunker::new(8*1024, 0); + let mut chunker = FastCdcChunker::new(8*1024, 0, true); let mut cursor = Cursor::new(&data); while chunker.chunk(&mut cursor, &mut DevNull).unwrap() == ChunkerStatus::Continue {} }) diff --git a/chunking/src/fastcdc.rs b/chunking/src/fastcdc.rs index 974e8f9..39e430a 100644 --- a/chunking/src/fastcdc.rs +++ b/chunking/src/fastcdc.rs @@ -54,10 +54,11 @@ pub struct FastCdcChunker { avg_size: usize, mask_long: u64, mask_short: u64, + cut_point_skip: bool, } impl FastCdcChunker { - pub fn new(avg_size: usize, seed: u64) -> Self { + pub fn new(avg_size: usize, seed: u64, cut_point_skip: bool) -> Self { let (mask_short, mask_long) = get_masks(avg_size, 2, seed); FastCdcChunker { buffer: [0; 4096], @@ -68,6 +69,7 @@ impl FastCdcChunker { avg_size: avg_size, mask_long: mask_long, mask_short: mask_short, + cut_point_skip: cut_point_skip, } } } @@ -99,7 +101,7 @@ impl Chunker for FastCdcChunker { let min_size_p = cmp::min(max, cmp::max(self.min_size as isize - pos as isize, 0) as usize); let avg_size_p = cmp::min(max, cmp::max(self.avg_size as isize - pos as isize, 0) as usize); let max_size_p = cmp::min(max, cmp::max(self.max_size as isize - pos as isize, 0) as usize); - if self.min_size > pos { + if !self.cut_point_skip && self.min_size > pos { for i in 0..min_size_p { hash = (hash << 1).wrapping_add(self.gear[self.buffer[i] as usize]); } diff --git a/chunking/tests/all.rs b/chunking/tests/all.rs index 80a7fe2..53e5196 100644 --- a/chunking/tests/all.rs +++ b/chunking/tests/all.rs @@ -84,12 +84,12 @@ fn test_rabin() { fn test_fastcdc() { let data = random_data(0, 10*1024*1024); for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] { - let mut chunker = FastCdcChunker::new(1024*n, 0); + let mut chunker = FastCdcChunker::new(1024*n, 0, true); let len = test_chunking(&mut chunker, &data, None); assert!(len >= data.len()/n/1024/4); assert!(len <= data.len()/n/1024*4); } - test_chunking(&mut FastCdcChunker::new(8192, 0), &random_data(0, 128*1024), + test_chunking(&mut FastCdcChunker::new(8192, 0, true), &random_data(0, 128*1024), Some(&[8712, 8018, 2847, 9157, 8997, 8581, 8867, 5422, 5412, 9478, 11553, 9206, 4606, 8529, 3821, 11342, 6524])); } diff --git a/src/chunker.rs b/src/chunker.rs index 337c311..eebcbaa 100644 --- a/src/chunker.rs +++ b/src/chunker.rs @@ -48,7 +48,7 @@ impl ChunkerType { match *self { ChunkerType::Ae(size) => Box::new(AeChunker::new(size)), ChunkerType::Rabin((size, seed)) => Box::new(RabinChunker::new(size, seed)), - ChunkerType::FastCdc((size, seed)) => Box::new(FastCdcChunker::new(size, seed)), + ChunkerType::FastCdc((size, seed)) => Box::new(FastCdcChunker::new(size, seed, true)), ChunkerType::Fixed(size) => Box::new(FixedChunker::new(size)), } }