2017-03-10 11:43:32 +00:00
|
|
|
use super::*;
|
|
|
|
|
|
|
|
use std::ptr;
|
|
|
|
|
|
|
|
// AE Chunker
|
|
|
|
// Paper: "AE: An Asymmetric Extremum Content Defined Chunking Algorithm for Fast and Bandwidth-Efficient Data Deduplication"
|
|
|
|
|
|
|
|
|
|
|
|
pub struct AeChunker {
|
2018-02-24 22:28:18 +00:00
|
|
|
buffer: [u8; 0x1000],
|
2017-03-10 11:43:32 +00:00
|
|
|
buffered: usize,
|
|
|
|
window_size: usize
|
|
|
|
}
|
|
|
|
|
|
|
|
impl AeChunker {
|
|
|
|
pub fn new(avg_size: usize) -> AeChunker {
|
|
|
|
// Experiments show that this claim from the paper is wrong and results in smaller chunks
|
|
|
|
//let window_size = (avg_size as f64 / (consts::E - 1.0)) as usize;
|
|
|
|
let window_size = avg_size - 256;
|
|
|
|
AeChunker{
|
2018-02-24 22:28:18 +00:00
|
|
|
buffer: [0; 0x1000],
|
2017-03-10 11:43:32 +00:00
|
|
|
buffered: 0,
|
2018-03-03 16:25:05 +00:00
|
|
|
window_size,
|
2017-03-10 11:43:32 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-19 15:35:25 +00:00
|
|
|
impl Chunker for AeChunker {
|
2017-03-10 11:43:32 +00:00
|
|
|
#[allow(unknown_lints,explicit_counter_loop)]
|
2018-02-19 21:30:59 +00:00
|
|
|
fn chunk(&mut self, r: &mut Read, w: &mut Write) -> Result<ChunkerStatus, ChunkerError> {
|
2017-03-10 11:43:32 +00:00
|
|
|
let mut max;
|
|
|
|
let mut pos = 0;
|
|
|
|
let mut max_pos = 0;
|
|
|
|
let mut max_val = 0;
|
|
|
|
loop {
|
|
|
|
// Fill the buffer, there might be some bytes still in there from last chunk
|
|
|
|
max = try!(r.read(&mut self.buffer[self.buffered..]).map_err(ChunkerError::Read)) + self.buffered;
|
|
|
|
// If nothing to do, finish
|
|
|
|
if max == 0 {
|
|
|
|
return Ok(ChunkerStatus::Finished)
|
|
|
|
}
|
|
|
|
for i in 0..max {
|
|
|
|
let val = self.buffer[i];
|
|
|
|
if val <= max_val {
|
|
|
|
if pos == max_pos + self.window_size {
|
|
|
|
// Write all bytes from this chunk out to sink and store rest for next chunk
|
|
|
|
try!(w.write_all(&self.buffer[..i+1]).map_err(ChunkerError::Write));
|
|
|
|
unsafe { ptr::copy(self.buffer[i+1..].as_ptr(), self.buffer.as_mut_ptr(), max-i-1) };
|
|
|
|
self.buffered = max-i-1;
|
|
|
|
return Ok(ChunkerStatus::Continue);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
max_val = val;
|
|
|
|
max_pos = pos;
|
|
|
|
}
|
|
|
|
pos += 1;
|
|
|
|
}
|
|
|
|
try!(w.write_all(&self.buffer[..max]).map_err(ChunkerError::Write));
|
|
|
|
self.buffered = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|