Browse Source

Moved chunker to separate crate (re #17)

pull/10/head
Dennis Schwerdel 5 years ago
parent
commit
87f7cc0feb
  1. 8
      Cargo.lock
  2. 1
      Cargo.toml
  3. 31
      chunking/Cargo.lock
  4. 7
      chunking/Cargo.toml
  5. 11
      chunking/src/ae.rs
  6. 12
      chunking/src/fastcdc.rs
  7. 51
      chunking/src/lib.rs
  8. 11
      chunking/src/rabin.rs
  9. 77
      src/chunker.rs
  10. 151
      src/chunker/mod.rs
  11. 2
      src/cli/algotest.rs
  12. 4
      src/main.rs
  13. 2
      src/prelude.rs
  14. 2
      src/repository/mod.rs

8
Cargo.lock generated

@ -6,6 +6,7 @@ dependencies = [
"blake2-rfc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)",
"byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)",
"chrono 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"chunking 0.1.0",
"clap 2.23.2 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)",
"filetime 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
@ -88,6 +89,13 @@ dependencies = [
"time 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "chunking"
version = "0.1.0"
dependencies = [
"quick-error 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "clap"
version = "2.23.2"

1
Cargo.toml

@ -33,6 +33,7 @@ users = "0.5"
time = "*"
libc = "*"
index = {path="index"}
chunking = {path="chunking"}
[build-dependencies]
pkg-config = "0.3"

31
chunking/Cargo.lock generated

@ -0,0 +1,31 @@
[root]
name = "chunking"
version = "0.1.0"
dependencies = [
"quick-error 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 0.9.14 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_utils 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "quick-error"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "serde"
version = "0.9.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "serde_utils"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"serde 0.9.14 (registry+https://github.com/rust-lang/crates.io-index)",
]
[metadata]
"checksum quick-error 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0aad603e8d7fb67da22dbdf1f4b826ce8829e406124109e73cf1b2454b93a71c"
"checksum serde 0.9.14 (registry+https://github.com/rust-lang/crates.io-index)" = "a4c9a40d556f8431394def53446db659f796dc87a53ef67b7541f21057fbdd91"
"checksum serde_utils 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b34a52969c7fc0254e214b82518c9a95dc88c84fc84cd847add314996a031be6"

7
chunking/Cargo.toml

@ -0,0 +1,7 @@
[package]
name = "chunking"
version = "0.1.0"
authors = ["Dennis Schwerdel <schwerdel@googlemail.com>"]
[dependencies]
quick-error = "1.1"

11
src/chunker/ae.rs → chunking/src/ae.rs

@ -10,7 +10,6 @@ use std::ptr;
pub struct AeChunker {
buffer: [u8; 4096],
buffered: usize,
avg_size: usize,
window_size: usize
}
@ -23,19 +22,13 @@ impl AeChunker {
buffer: [0; 4096],
buffered: 0,
window_size: window_size,
avg_size: avg_size
}
}
}
impl IChunker for AeChunker {
#[inline]
fn get_type(&self) -> ChunkerType {
ChunkerType::Ae(self.avg_size)
}
impl Chunker for AeChunker {
#[allow(unknown_lints,explicit_counter_loop)]
fn chunk<R: Read, W: Write>(&mut self, r: &mut R, mut w: &mut W) -> Result<ChunkerStatus, ChunkerError> {
fn chunk(&mut self, r: &mut Read, mut w: &mut Write) -> Result<ChunkerStatus, ChunkerError> {
let mut max;
let mut pos = 0;
let mut max_pos = 0;

12
src/chunker/fastcdc.rs → chunking/src/fastcdc.rs

@ -53,7 +53,6 @@ pub struct FastCdcChunker {
avg_size: usize,
mask_long: u64,
mask_short: u64,
seed: u64
}
@ -69,20 +68,13 @@ impl FastCdcChunker {
avg_size: avg_size,
mask_long: mask_long,
mask_short: mask_short,
seed: seed
}
}
}
impl IChunker for FastCdcChunker {
#[inline]
fn get_type(&self) -> ChunkerType {
ChunkerType::FastCdc((self.avg_size, self.seed))
}
impl Chunker for FastCdcChunker {
#[allow(unknown_lints,explicit_counter_loop,needless_range_loop)]
fn chunk<R: Read, W: Write>(&mut self, r: &mut R, mut w: &mut W) -> Result<ChunkerStatus, ChunkerError> {
fn chunk(&mut self, r: &mut Read, mut w: &mut Write) -> Result<ChunkerStatus, ChunkerError> {
let mut max;
let mut hash = 0u64;
let mut pos = 0;

51
chunking/src/lib.rs

@ -0,0 +1,51 @@
#[macro_use] extern crate quick_error;
use std::io::{self, Write, Read};
mod ae;
mod rabin;
mod fastcdc;
pub use self::ae::AeChunker;
pub use self::rabin::RabinChunker;
pub use self::fastcdc::FastCdcChunker;
// https://moinakg.wordpress.com/2013/06/22/high-performance-content-defined-chunking/
// Paper: "A Comprehensive Study of the Past, Present, and Future of Data Deduplication"
// Paper-URL: http://wxia.hustbackup.cn/IEEE-Survey-final.pdf
// https://borgbackup.readthedocs.io/en/stable/internals.html#chunks
// https://github.com/bup/bup/blob/master/lib/bup/bupsplit.c
quick_error!{
#[derive(Debug)]
pub enum ChunkerError {
Read(err: io::Error) {
cause(err)
description("Failed to read input")
display("Chunker error: failed to read input\n\tcaused by: {}", err)
}
Write(err: io::Error) {
cause(err)
description("Failed to write to output")
display("Chunker error: failed to write to output\n\tcaused by: {}", err)
}
Custom(reason: &'static str) {
from()
description("Custom error")
display("Chunker error: {}", reason)
}
}
}
#[derive(Debug, Eq, PartialEq)]
pub enum ChunkerStatus {
Continue,
Finished
}
pub trait Chunker {
fn chunk(&mut self, r: &mut Read, w: &mut Write) -> Result<ChunkerStatus, ChunkerError>;
}

11
src/chunker/rabin.rs → chunking/src/rabin.rs

@ -43,7 +43,6 @@ pub struct RabinChunker {
max_size: usize,
window_size: usize,
chunk_mask: u32,
avg_size: usize
}
@ -62,19 +61,13 @@ impl RabinChunker {
max_size: avg_size*4,
window_size: window_size,
chunk_mask: chunk_mask,
avg_size: avg_size
}
}
}
impl IChunker for RabinChunker {
#[inline]
fn get_type(&self) -> ChunkerType {
ChunkerType::Rabin((self.avg_size, self.seed))
}
impl Chunker for RabinChunker {
#[allow(unknown_lints,explicit_counter_loop)]
fn chunk<R: Read, W: Write>(&mut self, r: &mut R, mut w: &mut W) -> Result<ChunkerStatus, ChunkerError> {
fn chunk(&mut self, r: &mut Read, mut w: &mut Write) -> Result<ChunkerStatus, ChunkerError> {
let mut max;
let mut hash = 0u32;
let mut pos = 0;

77
src/chunker.rs

@ -0,0 +1,77 @@
pub use chunking::*;
use std::str::FromStr;
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum ChunkerType {
Ae(usize),
Rabin((usize, u32)),
FastCdc((usize, u64))
}
serde_impl!(ChunkerType(u64) {
Ae(usize) => 1,
Rabin((usize, u32)) => 2,
FastCdc((usize, u64)) => 3
});
impl ChunkerType {
pub fn from(name: &str, avg_size: usize, seed: u64) -> Result<Self, &'static str> {
match name {
"ae" => Ok(ChunkerType::Ae(avg_size)),
"rabin" => Ok(ChunkerType::Rabin((avg_size, seed as u32))),
"fastcdc" => Ok(ChunkerType::FastCdc((avg_size, seed))),
_ => Err("Unsupported chunker type")
}
}
pub fn from_string(name: &str) -> Result<Self, &'static str> {
let (name, size) = if let Some(pos) = name.find('/') {
let size = try!(usize::from_str(&name[pos+1..]).map_err(|_| "Chunk size must be a number"));
let name = &name[..pos];
(name, size)
} else {
(name, 8)
};
Self::from(name, size * 1024, 0)
}
#[inline]
pub fn create(&self) -> Box<Chunker> {
match *self {
ChunkerType::Ae(size) => Box::new(AeChunker::new(size)),
ChunkerType::Rabin((size, seed)) => Box::new(RabinChunker::new(size, seed)),
ChunkerType::FastCdc((size, seed)) => Box::new(FastCdcChunker::new(size, seed))
}
}
pub fn name(&self) -> &'static str {
match *self {
ChunkerType::Ae(_size) => "ae",
ChunkerType::Rabin((_size, _seed)) => "rabin",
ChunkerType::FastCdc((_size, _seed)) => "fastcdc"
}
}
pub fn avg_size(&self) -> usize {
match *self {
ChunkerType::Ae(size) => size,
ChunkerType::Rabin((size, _seed)) => size,
ChunkerType::FastCdc((size, _seed)) => size
}
}
pub fn to_string(&self) -> String {
format!("{}/{}", self.name(), self.avg_size()/1024)
}
pub fn seed(&self) -> u64 {
match *self {
ChunkerType::Ae(_size) => 0,
ChunkerType::Rabin((_size, seed)) => seed as u64,
ChunkerType::FastCdc((_size, seed)) => seed
}
}
}

151
src/chunker/mod.rs

@ -1,151 +0,0 @@
use std::io::{self, Write, Read};
use std::str::FromStr;
mod ae;
mod rabin;
mod fastcdc;
pub use self::ae::AeChunker;
pub use self::rabin::RabinChunker;
pub use self::fastcdc::FastCdcChunker;
// https://moinakg.wordpress.com/2013/06/22/high-performance-content-defined-chunking/
// Paper: "A Comprehensive Study of the Past, Present, and Future of Data Deduplication"
// Paper-URL: http://wxia.hustbackup.cn/IEEE-Survey-final.pdf
// https://borgbackup.readthedocs.io/en/stable/internals.html#chunks
// https://github.com/bup/bup/blob/master/lib/bup/bupsplit.c
quick_error!{
#[derive(Debug)]
pub enum ChunkerError {
Read(err: io::Error) {
cause(err)
description("Failed to read input")
display("Chunker error: failed to read input\n\tcaused by: {}", err)
}
Write(err: io::Error) {
cause(err)
description("Failed to write to output")
display("Chunker error: failed to write to output\n\tcaused by: {}", err)
}
Custom(reason: &'static str) {
from()
description("Custom error")
display("Chunker error: {}", reason)
}
}
}
#[derive(Debug, Eq, PartialEq)]
pub enum ChunkerStatus {
Continue,
Finished
}
pub trait IChunker: Sized {
fn chunk<R: Read, W: Write>(&mut self, r: &mut R, w: &mut W) -> Result<ChunkerStatus, ChunkerError>;
fn get_type(&self) -> ChunkerType;
}
pub enum Chunker {
Ae(Box<AeChunker>),
Rabin(Box<RabinChunker>),
FastCdc(Box<FastCdcChunker>)
}
impl IChunker for Chunker {
fn get_type(&self) -> ChunkerType {
match *self {
Chunker::Ae(ref c) => c.get_type(),
Chunker::Rabin(ref c) => c.get_type(),
Chunker::FastCdc(ref c) => c.get_type()
}
}
#[inline]
fn chunk<R: Read, W: Write>(&mut self, r: &mut R, w: &mut W) -> Result<ChunkerStatus, ChunkerError> {
match *self {
Chunker::Ae(ref mut c) => c.chunk(r, w),
Chunker::Rabin(ref mut c) => c.chunk(r, w),
Chunker::FastCdc(ref mut c) => c.chunk(r, w)
}
}
}
#[derive(Debug, Clone, Copy, Eq, PartialEq)]
pub enum ChunkerType {
Ae(usize),
Rabin((usize, u32)),
FastCdc((usize, u64))
}
serde_impl!(ChunkerType(u64) {
Ae(usize) => 1,
Rabin((usize, u32)) => 2,
FastCdc((usize, u64)) => 3
});
impl ChunkerType {
pub fn from(name: &str, avg_size: usize, seed: u64) -> Result<Self, &'static str> {
match name {
"ae" => Ok(ChunkerType::Ae(avg_size)),
"rabin" => Ok(ChunkerType::Rabin((avg_size, seed as u32))),
"fastcdc" => Ok(ChunkerType::FastCdc((avg_size, seed))),
_ => Err("Unsupported chunker type")
}
}
pub fn from_string(name: &str) -> Result<Self, &'static str> {
let (name, size) = if let Some(pos) = name.find('/') {
let size = try!(usize::from_str(&name[pos+1..]).map_err(|_| "Chunk size must be a number"));
let name = &name[..pos];
(name, size)
} else {
(name, 8)
};
Self::from(name, size * 1024, 0)
}
#[inline]
pub fn create(&self) -> Chunker {
match *self {
ChunkerType::Ae(size) => Chunker::Ae(Box::new(AeChunker::new(size))),
ChunkerType::Rabin((size, seed)) => Chunker::Rabin(Box::new(RabinChunker::new(size, seed))),
ChunkerType::FastCdc((size, seed)) => Chunker::FastCdc(Box::new(FastCdcChunker::new(size, seed)))
}
}
pub fn name(&self) -> &'static str {
match *self {
ChunkerType::Ae(_size) => "ae",
ChunkerType::Rabin((_size, _seed)) => "rabin",
ChunkerType::FastCdc((_size, _seed)) => "fastcdc"
}
}
pub fn avg_size(&self) -> usize {
match *self {
ChunkerType::Ae(size) => size,
ChunkerType::Rabin((size, _seed)) => size,
ChunkerType::FastCdc((size, _seed)) => size
}
}
pub fn to_string(&self) -> String {
format!("{}/{}", self.name(), self.avg_size()/1024)
}
pub fn seed(&self) -> u64 {
match *self {
ChunkerType::Ae(_size) => 0,
ChunkerType::Rabin((_size, seed)) => seed as u64,
ChunkerType::FastCdc((_size, seed)) => seed
}
}
}

2
src/cli/algotest.rs

@ -32,7 +32,7 @@ impl Write for ChunkSink {
}
}
fn chunk(data: &[u8], mut chunker: Chunker, sink: &mut ChunkSink) {
fn chunk(data: &[u8], mut chunker: Box<Chunker>, sink: &mut ChunkSink) {
let mut cursor = Cursor::new(data);
while chunker.chunk(&mut cursor, sink).unwrap() == ChunkerStatus::Continue {
sink.end_chunk();

4
src/main.rs

@ -28,15 +28,15 @@ extern crate users;
extern crate libc;
extern crate tar;
extern crate index;
extern crate chunking;
pub mod util;
mod bundledb;
//pub mod index;
mod chunker;
mod repository;
mod cli;
mod prelude;
mod mount;
mod chunker;
use std::process::exit;

2
src/prelude.rs

@ -1,6 +1,6 @@
pub use ::util::*;
pub use ::bundledb::{BundleReader, BundleMode, BundleWriter, BundleInfo, BundleId, BundleDbError, BundleDb, BundleWriterError, StoredBundle};
pub use ::chunker::{ChunkerType, Chunker, ChunkerStatus, IChunker, ChunkerError};
pub use ::chunker::{ChunkerType, Chunker, ChunkerStatus, ChunkerError};
pub use ::repository::{Repository, Backup, Config, RepositoryError, RepositoryInfo, Inode, FileType, IntegrityError, BackupFileError, BackupError, BackupOptions, BundleAnalysis, FileData, DiffType, InodeError, RepositoryLayout, Location};
pub use ::index::{Index, IndexError};
pub use ::mount::FuseFilesystem;

2
src/repository/mod.rs

@ -77,7 +77,7 @@ pub struct Repository {
bundles: BundleDb,
data_bundle: Option<BundleWriter>,
meta_bundle: Option<BundleWriter>,
chunker: Chunker,
chunker: Box<Chunker>,
remote_locks: LockFolder,
local_locks: LockFolder,
lock: LockHandle,

Loading…
Cancel
Save