More tests, forcing cut-point-skipping, using black_box

This commit is contained in:
Dennis Schwerdel 2017-08-03 07:34:16 +02:00
parent b4e6b34bbe
commit 5fe41127fc
4 changed files with 45 additions and 22 deletions

View File

@ -26,10 +26,22 @@ fn random_data(seed: u64, size: usize) -> Vec<u8> {
} }
struct DevNull; struct CutPositions(Vec<u64>, u64);
impl Write for DevNull { impl CutPositions {
pub fn new() -> Self {
CutPositions(vec![], 0)
}
pub fn positions(&self) -> &[u64] {
&self.0
}
}
impl Write for CutPositions {
fn write(&mut self, data: &[u8]) -> Result<usize, io::Error> { fn write(&mut self, data: &[u8]) -> Result<usize, io::Error> {
self.1 += data.len() as u64;
self.0.push(self.1);
Ok(data.len()) Ok(data.len())
} }
@ -53,7 +65,9 @@ fn test_fixed_8192(b: &mut Bencher) {
b.iter(|| { b.iter(|| {
let mut chunker = FixedChunker::new(8*1024); let mut chunker = FixedChunker::new(8*1024);
let mut cursor = Cursor::new(&data); let mut cursor = Cursor::new(&data);
while chunker.chunk(&mut cursor, &mut DevNull).unwrap() == ChunkerStatus::Continue {} let mut sink = CutPositions::new();
while chunker.chunk(&mut cursor, &mut sink).unwrap() == ChunkerStatus::Continue {};
test::black_box(sink.positions().len())
}) })
} }
@ -72,7 +86,9 @@ fn test_ae_8192(b: &mut Bencher) {
b.iter(|| { b.iter(|| {
let mut chunker = AeChunker::new(8*1024); let mut chunker = AeChunker::new(8*1024);
let mut cursor = Cursor::new(&data); let mut cursor = Cursor::new(&data);
while chunker.chunk(&mut cursor, &mut DevNull).unwrap() == ChunkerStatus::Continue {} let mut sink = CutPositions::new();
while chunker.chunk(&mut cursor, &mut sink).unwrap() == ChunkerStatus::Continue {};
test::black_box(sink.positions().len())
}) })
} }
@ -91,7 +107,9 @@ fn test_rabin_8192(b: &mut Bencher) {
b.iter(|| { b.iter(|| {
let mut chunker = RabinChunker::new(8*1024, 0); let mut chunker = RabinChunker::new(8*1024, 0);
let mut cursor = Cursor::new(&data); let mut cursor = Cursor::new(&data);
while chunker.chunk(&mut cursor, &mut DevNull).unwrap() == ChunkerStatus::Continue {} let mut sink = CutPositions::new();
while chunker.chunk(&mut cursor, &mut sink).unwrap() == ChunkerStatus::Continue {};
test::black_box(sink.positions().len())
}) })
} }
@ -99,7 +117,7 @@ fn test_rabin_8192(b: &mut Bencher) {
#[bench] #[bench]
fn test_fastcdc_init(b: &mut Bencher) { fn test_fastcdc_init(b: &mut Bencher) {
b.iter(|| { b.iter(|| {
FastCdcChunker::new(8*1024, 0, true); FastCdcChunker::new(8*1024, 0);
}) })
} }
@ -108,8 +126,10 @@ fn test_fastcdc_8192(b: &mut Bencher) {
let data = random_data(0, 1024*1024); let data = random_data(0, 1024*1024);
b.bytes = data.len() as u64; b.bytes = data.len() as u64;
b.iter(|| { b.iter(|| {
let mut chunker = FastCdcChunker::new(8*1024, 0, true); let mut chunker = FastCdcChunker::new(8*1024, 0);
let mut cursor = Cursor::new(&data); let mut cursor = Cursor::new(&data);
while chunker.chunk(&mut cursor, &mut DevNull).unwrap() == ChunkerStatus::Continue {} let mut sink = CutPositions::new();
while chunker.chunk(&mut cursor, &mut sink).unwrap() == ChunkerStatus::Continue {};
test::black_box(sink.positions().len())
}) })
} }

View File

@ -54,11 +54,10 @@ pub struct FastCdcChunker {
avg_size: usize, avg_size: usize,
mask_long: u64, mask_long: u64,
mask_short: u64, mask_short: u64,
cut_point_skip: bool,
} }
impl FastCdcChunker { impl FastCdcChunker {
pub fn new(avg_size: usize, seed: u64, cut_point_skip: bool) -> Self { pub fn new(avg_size: usize, seed: u64) -> Self {
let (mask_short, mask_long) = get_masks(avg_size, 2, seed); let (mask_short, mask_long) = get_masks(avg_size, 2, seed);
FastCdcChunker { FastCdcChunker {
buffer: [0; 4096], buffer: [0; 4096],
@ -69,7 +68,6 @@ impl FastCdcChunker {
avg_size: avg_size, avg_size: avg_size,
mask_long: mask_long, mask_long: mask_long,
mask_short: mask_short, mask_short: mask_short,
cut_point_skip: cut_point_skip,
} }
} }
} }
@ -101,11 +99,7 @@ impl Chunker for FastCdcChunker {
let min_size_p = cmp::min(max, cmp::max(self.min_size as isize - pos as isize, 0) as usize); let min_size_p = cmp::min(max, cmp::max(self.min_size as isize - pos as isize, 0) as usize);
let avg_size_p = cmp::min(max, cmp::max(self.avg_size as isize - pos as isize, 0) as usize); let avg_size_p = cmp::min(max, cmp::max(self.avg_size as isize - pos as isize, 0) as usize);
let max_size_p = cmp::min(max, cmp::max(self.max_size as isize - pos as isize, 0) as usize); let max_size_p = cmp::min(max, cmp::max(self.max_size as isize - pos as isize, 0) as usize);
if !self.cut_point_skip && self.min_size > pos { // Skipping first min_size bytes. This is ok as same data still results in same hash.
for i in 0..min_size_p {
hash = (hash << 1).wrapping_add(self.gear[self.buffer[i] as usize]);
}
}
if self.avg_size > pos { if self.avg_size > pos {
for i in min_size_p..avg_size_p { for i in min_size_p..avg_size_p {
hash = (hash << 1).wrapping_add(self.gear[self.buffer[i] as usize]); hash = (hash << 1).wrapping_add(self.gear[self.buffer[i] as usize]);

View File

@ -37,7 +37,7 @@ fn test_chunking(chunker: &mut Chunker, data: &[u8], chunk_lens: Option<&[usize]
pos += chunk.len(); pos += chunk.len();
} }
if let Some(chunk_lens) = chunk_lens { if let Some(chunk_lens) = chunk_lens {
//assert_eq!(chunk_lens.len(), chunks.len()); assert_eq!(chunk_lens.len(), chunks.len());
for (i, chunk) in chunks.iter().enumerate() { for (i, chunk) in chunks.iter().enumerate() {
assert_eq!(chunk.len(), chunk_lens[i]); assert_eq!(chunk.len(), chunk_lens[i]);
} }
@ -49,6 +49,9 @@ fn test_chunking(chunker: &mut Chunker, data: &[u8], chunk_lens: Option<&[usize]
#[test] #[test]
fn test_fixed() { fn test_fixed() {
test_chunking(&mut FixedChunker::new(8192), &random_data(0, 128*1024),
Some(&[8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192,
8192, 8192, 8192, 8192, 8192, 8192, 0]));
let data = random_data(0, 10*1024*1024); let data = random_data(0, 10*1024*1024);
for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] { for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] {
let mut chunker = FixedChunker::new(1024*n); let mut chunker = FixedChunker::new(1024*n);
@ -60,6 +63,9 @@ fn test_fixed() {
#[test] #[test]
fn test_ae() { fn test_ae() {
test_chunking(&mut AeChunker::new(8192), &random_data(0, 128*1024),
Some(&[7979, 8046, 7979, 8192, 8192, 8192, 7965, 8158, 8404, 8241,
8011, 8302, 8120, 8335, 8192, 8192, 572]));
let data = random_data(0, 10*1024*1024); let data = random_data(0, 10*1024*1024);
for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] { for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] {
let mut chunker = AeChunker::new(1024*n); let mut chunker = AeChunker::new(1024*n);
@ -71,6 +77,9 @@ fn test_ae() {
#[test] #[test]
fn test_rabin() { fn test_rabin() {
test_chunking(&mut RabinChunker::new(8192, 0), &random_data(0, 128*1024),
Some(&[8604, 4190, 32769, 3680, 26732, 3152, 9947, 6487, 25439, 3944,
6128]));
let data = random_data(0, 10*1024*1024); let data = random_data(0, 10*1024*1024);
for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] { for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] {
let mut chunker = RabinChunker::new(1024*n, 0); let mut chunker = RabinChunker::new(1024*n, 0);
@ -82,14 +91,14 @@ fn test_rabin() {
#[test] #[test]
fn test_fastcdc() { fn test_fastcdc() {
test_chunking(&mut FastCdcChunker::new(8192, 0), &random_data(0, 128*1024),
Some(&[8712, 8018, 2847, 9157, 8997, 8581, 8867, 5422, 5412, 9478,
11553, 9206, 4606, 8529, 3821, 11342, 6524]));
let data = random_data(0, 10*1024*1024); let data = random_data(0, 10*1024*1024);
for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] { for n in &[1usize,2,4,8,16,32,64,128,256,512,1024] {
let mut chunker = FastCdcChunker::new(1024*n, 0, true); let mut chunker = FastCdcChunker::new(1024*n, 0);
let len = test_chunking(&mut chunker, &data, None); let len = test_chunking(&mut chunker, &data, None);
assert!(len >= data.len()/n/1024/4); assert!(len >= data.len()/n/1024/4);
assert!(len <= data.len()/n/1024*4); assert!(len <= data.len()/n/1024*4);
} }
test_chunking(&mut FastCdcChunker::new(8192, 0, true), &random_data(0, 128*1024),
Some(&[8712, 8018, 2847, 9157, 8997, 8581, 8867, 5422, 5412, 9478,
11553, 9206, 4606, 8529, 3821, 11342, 6524]));
} }

View File

@ -48,7 +48,7 @@ impl ChunkerType {
match *self { match *self {
ChunkerType::Ae(size) => Box::new(AeChunker::new(size)), ChunkerType::Ae(size) => Box::new(AeChunker::new(size)),
ChunkerType::Rabin((size, seed)) => Box::new(RabinChunker::new(size, seed)), ChunkerType::Rabin((size, seed)) => Box::new(RabinChunker::new(size, seed)),
ChunkerType::FastCdc((size, seed)) => Box::new(FastCdcChunker::new(size, seed, true)), ChunkerType::FastCdc((size, seed)) => Box::new(FastCdcChunker::new(size, seed)),
ChunkerType::Fixed(size) => Box::new(FixedChunker::new(size)), ChunkerType::Fixed(size) => Box::new(FixedChunker::new(size)),
} }
} }