mirror of https://github.com/dswd/zvault
Moved chunker to separate crate (re #17)
parent
229c4f7e28
commit
87f7cc0feb
@ -0,0 +1,31 @@
|
||||
[root]
|
||||
name = "chunking"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"quick-error 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 0.9.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_utils 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quick-error"
|
||||
version = "1.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "serde"
|
||||
version = "0.9.14"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "serde_utils"
|
||||
version = "0.5.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
dependencies = [
|
||||
"serde 0.9.14 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[metadata]
|
||||
"checksum quick-error 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0aad603e8d7fb67da22dbdf1f4b826ce8829e406124109e73cf1b2454b93a71c"
|
||||
"checksum serde 0.9.14 (registry+https://github.com/rust-lang/crates.io-index)" = "a4c9a40d556f8431394def53446db659f796dc87a53ef67b7541f21057fbdd91"
|
||||
"checksum serde_utils 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b34a52969c7fc0254e214b82518c9a95dc88c84fc84cd847add314996a031be6"
|
@ -0,0 +1,7 @@
|
||||
[package]
|
||||
name = "chunking"
|
||||
version = "0.1.0"
|
||||
authors = ["Dennis Schwerdel <schwerdel@googlemail.com>"]
|
||||
|
||||
[dependencies]
|
||||
quick-error = "1.1"
|
@ -0,0 +1,51 @@
|
||||
#[macro_use] extern crate quick_error;
|
||||
|
||||
use std::io::{self, Write, Read};
|
||||
|
||||
mod ae;
|
||||
mod rabin;
|
||||
mod fastcdc;
|
||||
|
||||
pub use self::ae::AeChunker;
|
||||
pub use self::rabin::RabinChunker;
|
||||
pub use self::fastcdc::FastCdcChunker;
|
||||
|
||||
// https://moinakg.wordpress.com/2013/06/22/high-performance-content-defined-chunking/
|
||||
|
||||
// Paper: "A Comprehensive Study of the Past, Present, and Future of Data Deduplication"
|
||||
// Paper-URL: http://wxia.hustbackup.cn/IEEE-Survey-final.pdf
|
||||
|
||||
// https://borgbackup.readthedocs.io/en/stable/internals.html#chunks
|
||||
// https://github.com/bup/bup/blob/master/lib/bup/bupsplit.c
|
||||
|
||||
quick_error!{
|
||||
#[derive(Debug)]
|
||||
pub enum ChunkerError {
|
||||
Read(err: io::Error) {
|
||||
cause(err)
|
||||
description("Failed to read input")
|
||||
display("Chunker error: failed to read input\n\tcaused by: {}", err)
|
||||
}
|
||||
Write(err: io::Error) {
|
||||
cause(err)
|
||||
description("Failed to write to output")
|
||||
display("Chunker error: failed to write to output\n\tcaused by: {}", err)
|
||||
}
|
||||
Custom(reason: &'static str) {
|
||||
from()
|
||||
description("Custom error")
|
||||
display("Chunker error: {}", reason)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub enum ChunkerStatus {
|
||||
Continue,
|
||||
Finished
|
||||
}
|
||||
|
||||
pub trait Chunker {
|
||||
fn chunk(&mut self, r: &mut Read, w: &mut Write) -> Result<ChunkerStatus, ChunkerError>;
|
||||
}
|
@ -0,0 +1,77 @@
|
||||
pub use chunking::*;
|
||||
|
||||
use std::str::FromStr;
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
|
||||
pub enum ChunkerType {
|
||||
Ae(usize),
|
||||
Rabin((usize, u32)),
|
||||
FastCdc((usize, u64))
|
||||
}
|
||||
serde_impl!(ChunkerType(u64) {
|
||||
Ae(usize) => 1,
|
||||
Rabin((usize, u32)) => 2,
|
||||
FastCdc((usize, u64)) => 3
|
||||
});
|
||||
|
||||
|
||||
impl ChunkerType {
|
||||
pub fn from(name: &str, avg_size: usize, seed: u64) -> Result<Self, &'static str> {
|
||||
match name {
|
||||
"ae" => Ok(ChunkerType::Ae(avg_size)),
|
||||
"rabin" => Ok(ChunkerType::Rabin((avg_size, seed as u32))),
|
||||
"fastcdc" => Ok(ChunkerType::FastCdc((avg_size, seed))),
|
||||
_ => Err("Unsupported chunker type")
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_string(name: &str) -> Result<Self, &'static str> {
|
||||
let (name, size) = if let Some(pos) = name.find('/') {
|
||||
let size = try!(usize::from_str(&name[pos+1..]).map_err(|_| "Chunk size must be a number"));
|
||||
let name = &name[..pos];
|
||||
(name, size)
|
||||
} else {
|
||||
(name, 8)
|
||||
};
|
||||
Self::from(name, size * 1024, 0)
|
||||
}
|
||||
|
||||
|
||||
#[inline]
|
||||
pub fn create(&self) -> Box<Chunker> {
|
||||
match *self {
|
||||
ChunkerType::Ae(size) => Box::new(AeChunker::new(size)),
|
||||
ChunkerType::Rabin((size, seed)) => Box::new(RabinChunker::new(size, seed)),
|
||||
ChunkerType::FastCdc((size, seed)) => Box::new(FastCdcChunker::new(size, seed))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn name(&self) -> &'static str {
|
||||
match *self {
|
||||
ChunkerType::Ae(_size) => "ae",
|
||||
ChunkerType::Rabin((_size, _seed)) => "rabin",
|
||||
ChunkerType::FastCdc((_size, _seed)) => "fastcdc"
|
||||
}
|
||||
}
|
||||
|
||||
pub fn avg_size(&self) -> usize {
|
||||
match *self {
|
||||
ChunkerType::Ae(size) => size,
|
||||
ChunkerType::Rabin((size, _seed)) => size,
|
||||
ChunkerType::FastCdc((size, _seed)) => size
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_string(&self) -> String {
|
||||
format!("{}/{}", self.name(), self.avg_size()/1024)
|
||||
}
|
||||
|
||||
pub fn seed(&self) -> u64 {
|
||||
match *self {
|
||||
ChunkerType::Ae(_size) => 0,
|
||||
ChunkerType::Rabin((_size, seed)) => seed as u64,
|
||||
ChunkerType::FastCdc((_size, seed)) => seed
|
||||
}
|
||||
}
|
||||
}
|
@ -1,151 +0,0 @@
|
||||
use std::io::{self, Write, Read};
|
||||
use std::str::FromStr;
|
||||
|
||||
mod ae;
|
||||
mod rabin;
|
||||
mod fastcdc;
|
||||
|
||||
pub use self::ae::AeChunker;
|
||||
pub use self::rabin::RabinChunker;
|
||||
pub use self::fastcdc::FastCdcChunker;
|
||||
|
||||
// https://moinakg.wordpress.com/2013/06/22/high-performance-content-defined-chunking/
|
||||
|
||||
// Paper: "A Comprehensive Study of the Past, Present, and Future of Data Deduplication"
|
||||
// Paper-URL: http://wxia.hustbackup.cn/IEEE-Survey-final.pdf
|
||||
|
||||
// https://borgbackup.readthedocs.io/en/stable/internals.html#chunks
|
||||
// https://github.com/bup/bup/blob/master/lib/bup/bupsplit.c
|
||||
|
||||
quick_error!{
|
||||
#[derive(Debug)]
|
||||
pub enum ChunkerError {
|
||||
Read(err: io::Error) {
|
||||
cause(err)
|
||||
description("Failed to read input")
|
||||
display("Chunker error: failed to read input\n\tcaused by: {}", err)
|
||||
}
|
||||
Write(err: io::Error) {
|
||||
cause(err)
|
||||
description("Failed to write to output")
|
||||
display("Chunker error: failed to write to output\n\tcaused by: {}", err)
|
||||
}
|
||||
Custom(reason: &'static str) {
|
||||
from()
|
||||
description("Custom error")
|
||||
display("Chunker error: {}", reason)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub enum ChunkerStatus {
|
||||
Continue,
|
||||
Finished
|
||||
}
|
||||
|
||||
pub trait IChunker: Sized {
|
||||
fn chunk<R: Read, W: Write>(&mut self, r: &mut R, w: &mut W) -> Result<ChunkerStatus, ChunkerError>;
|
||||
fn get_type(&self) -> ChunkerType;
|
||||
}
|
||||
|
||||
pub enum Chunker {
|
||||
Ae(Box<AeChunker>),
|
||||
Rabin(Box<RabinChunker>),
|
||||
FastCdc(Box<FastCdcChunker>)
|
||||
}
|
||||
|
||||
|
||||
impl IChunker for Chunker {
|
||||
fn get_type(&self) -> ChunkerType {
|
||||
match *self {
|
||||
Chunker::Ae(ref c) => c.get_type(),
|
||||
Chunker::Rabin(ref c) => c.get_type(),
|
||||
Chunker::FastCdc(ref c) => c.get_type()
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn chunk<R: Read, W: Write>(&mut self, r: &mut R, w: &mut W) -> Result<ChunkerStatus, ChunkerError> {
|
||||
match *self {
|
||||
Chunker::Ae(ref mut c) => c.chunk(r, w),
|
||||
Chunker::Rabin(ref mut c) => c.chunk(r, w),
|
||||
Chunker::FastCdc(ref mut c) => c.chunk(r, w)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
|
||||
pub enum ChunkerType {
|
||||
Ae(usize),
|
||||
Rabin((usize, u32)),
|
||||
FastCdc((usize, u64))
|
||||
}
|
||||
serde_impl!(ChunkerType(u64) {
|
||||
Ae(usize) => 1,
|
||||
Rabin((usize, u32)) => 2,
|
||||
FastCdc((usize, u64)) => 3
|
||||
});
|
||||
|
||||
|
||||
impl ChunkerType {
|
||||
pub fn from(name: &str, avg_size: usize, seed: u64) -> Result<Self, &'static str> {
|
||||
match name {
|
||||
"ae" => Ok(ChunkerType::Ae(avg_size)),
|
||||
"rabin" => Ok(ChunkerType::Rabin((avg_size, seed as u32))),
|
||||
"fastcdc" => Ok(ChunkerType::FastCdc((avg_size, seed))),
|
||||
_ => Err("Unsupported chunker type")
|
||||
}
|
||||
}
|
||||
|
||||
pub fn from_string(name: &str) -> Result<Self, &'static str> {
|
||||
let (name, size) = if let Some(pos) = name.find('/') {
|
||||
let size = try!(usize::from_str(&name[pos+1..]).map_err(|_| "Chunk size must be a number"));
|
||||
let name = &name[..pos];
|
||||
(name, size)
|
||||
} else {
|
||||
(name, 8)
|
||||
};
|
||||
Self::from(name, size * 1024, 0)
|
||||
}
|
||||
|
||||
|
||||
#[inline]
|
||||
pub fn create(&self) -> Chunker {
|
||||
match *self {
|
||||
ChunkerType::Ae(size) => Chunker::Ae(Box::new(AeChunker::new(size))),
|
||||
ChunkerType::Rabin((size, seed)) => Chunker::Rabin(Box::new(RabinChunker::new(size, seed))),
|
||||
ChunkerType::FastCdc((size, seed)) => Chunker::FastCdc(Box::new(FastCdcChunker::new(size, seed)))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn name(&self) -> &'static str {
|
||||
match *self {
|
||||
ChunkerType::Ae(_size) => "ae",
|
||||
ChunkerType::Rabin((_size, _seed)) => "rabin",
|
||||
ChunkerType::FastCdc((_size, _seed)) => "fastcdc"
|
||||
}
|
||||
}
|
||||
|
||||
pub fn avg_size(&self) -> usize {
|
||||
match *self {
|
||||
ChunkerType::Ae(size) => size,
|
||||
ChunkerType::Rabin((size, _seed)) => size,
|
||||
ChunkerType::FastCdc((size, _seed)) => size
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_string(&self) -> String {
|
||||
format!("{}/{}", self.name(), self.avg_size()/1024)
|
||||
}
|
||||
|
||||
pub fn seed(&self) -> u64 {
|
||||
match *self {
|
||||
ChunkerType::Ae(_size) => 0,
|
||||
ChunkerType::Rabin((_size, seed)) => seed as u64,
|
||||
ChunkerType::FastCdc((_size, seed)) => seed
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue