mirror of https://github.com/dswd/zvault
Also including the first min_size bytes in hash (oops), performance improvements
This commit is contained in:
parent
54e2329228
commit
837df8bbd3
|
@ -1,6 +1,7 @@
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
use std::ptr;
|
use std::ptr;
|
||||||
|
use std::cmp;
|
||||||
|
|
||||||
// FastCDC
|
// FastCDC
|
||||||
// Paper: "FastCDC: a Fast and Efficient Content-Defined Chunking Approach for Data Deduplication"
|
// Paper: "FastCDC: a Fast and Efficient Content-Defined Chunking Approach for Data Deduplication"
|
||||||
|
@ -92,26 +93,44 @@ impl Chunker for FastCdcChunker {
|
||||||
if max == 0 {
|
if max == 0 {
|
||||||
return Ok(ChunkerStatus::Finished)
|
return Ok(ChunkerStatus::Finished)
|
||||||
}
|
}
|
||||||
for i in 0..max {
|
let min_size_p = cmp::min(max, cmp::max(min_size as isize - pos as isize, 0) as usize);
|
||||||
if pos >= min_size {
|
let avg_size_p = cmp::min(max, cmp::max(avg_size as isize - pos as isize, 0) as usize);
|
||||||
// Hash update
|
let max_size_p = cmp::min(max, cmp::max(max_size as isize - pos as isize, 0) as usize);
|
||||||
|
if min_size > pos {
|
||||||
|
for i in 0..min_size_p {
|
||||||
hash = (hash << 1).wrapping_add(gear[buffer[i] as usize]);
|
hash = (hash << 1).wrapping_add(gear[buffer[i] as usize]);
|
||||||
// 3 options for break point
|
}
|
||||||
// 1) mask_short matches and chunk is smaller than average
|
}
|
||||||
// 2) mask_long matches and chunk is longer or equal to average
|
if avg_size > pos {
|
||||||
// 3) chunk reached max_size
|
for i in min_size_p..avg_size_p {
|
||||||
if pos < avg_size && hash & mask_short == 0
|
hash = (hash << 1).wrapping_add(gear[buffer[i] as usize]);
|
||||||
|| pos >= avg_size && hash & mask_long == 0
|
if hash & mask_short == 0 {
|
||||||
|| pos >= max_size {
|
|
||||||
// Write all bytes from this chunk out to sink and store rest for next chunk
|
|
||||||
try!(w.write_all(&buffer[..i+1]).map_err(ChunkerError::Write));
|
try!(w.write_all(&buffer[..i+1]).map_err(ChunkerError::Write));
|
||||||
unsafe { ptr::copy(buffer[i+1..].as_ptr(), buffer.as_mut_ptr(), max-i-1) };
|
unsafe { ptr::copy(buffer[i+1..].as_ptr(), buffer.as_mut_ptr(), max-i-1) };
|
||||||
self.buffered = max-i-1;
|
self.buffered = max-i-1;
|
||||||
return Ok(ChunkerStatus::Continue);
|
return Ok(ChunkerStatus::Continue);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pos += 1;
|
|
||||||
}
|
}
|
||||||
|
if max_size > pos {
|
||||||
|
for i in avg_size_p..max_size_p {
|
||||||
|
hash = (hash << 1).wrapping_add(gear[buffer[i] as usize]);
|
||||||
|
if hash & mask_long == 0 {
|
||||||
|
try!(w.write_all(&buffer[..i+1]).map_err(ChunkerError::Write));
|
||||||
|
unsafe { ptr::copy(buffer[i+1..].as_ptr(), buffer.as_mut_ptr(), max-i-1) };
|
||||||
|
self.buffered = max-i-1;
|
||||||
|
return Ok(ChunkerStatus::Continue);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if max + pos >= max_size {
|
||||||
|
let i = max_size_p;
|
||||||
|
try!(w.write_all(&buffer[..i]).map_err(ChunkerError::Write));
|
||||||
|
unsafe { ptr::copy(buffer[i..].as_ptr(), buffer.as_mut_ptr(), max-i) };
|
||||||
|
self.buffered = max-i;
|
||||||
|
return Ok(ChunkerStatus::Continue);
|
||||||
|
}
|
||||||
|
pos += max;
|
||||||
try!(w.write_all(&buffer[..max]).map_err(ChunkerError::Write));
|
try!(w.write_all(&buffer[..max]).map_err(ChunkerError::Write));
|
||||||
self.buffered = 0;
|
self.buffered = 0;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue