From 87f7cc0feb2713bc5e183b22822d122549c86e07 Mon Sep 17 00:00:00 2001 From: Dennis Schwerdel Date: Wed, 19 Apr 2017 17:35:25 +0200 Subject: [PATCH] Moved chunker to separate crate (re #17) --- Cargo.lock | 8 ++ Cargo.toml | 1 + chunking/Cargo.lock | 31 +++++ chunking/Cargo.toml | 7 ++ {src/chunker => chunking/src}/ae.rs | 11 +- {src/chunker => chunking/src}/fastcdc.rs | 12 +- chunking/src/lib.rs | 51 ++++++++ {src/chunker => chunking/src}/rabin.rs | 11 +- src/chunker.rs | 77 ++++++++++++ src/chunker/mod.rs | 151 ----------------------- src/cli/algotest.rs | 2 +- src/main.rs | 4 +- src/prelude.rs | 2 +- src/repository/mod.rs | 2 +- 14 files changed, 186 insertions(+), 184 deletions(-) create mode 100644 chunking/Cargo.lock create mode 100644 chunking/Cargo.toml rename {src/chunker => chunking/src}/ae.rs (87%) rename {src/chunker => chunking/src}/fastcdc.rs (93%) create mode 100644 chunking/src/lib.rs rename {src/chunker => chunking/src}/rabin.rs (92%) create mode 100644 src/chunker.rs delete mode 100644 src/chunker/mod.rs diff --git a/Cargo.lock b/Cargo.lock index cc6d17b..5c95479 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6,6 +6,7 @@ dependencies = [ "blake2-rfc 0.2.17 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "chrono 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "chunking 0.1.0", "clap 2.23.2 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", "filetime 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", @@ -88,6 +89,13 @@ dependencies = [ "time 0.1.36 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "chunking" +version = "0.1.0" +dependencies = [ + "quick-error 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "clap" version = "2.23.2" diff --git a/Cargo.toml b/Cargo.toml index d4f2303..424289a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -33,6 +33,7 @@ users = "0.5" time = "*" libc = "*" index = {path="index"} +chunking = {path="chunking"} [build-dependencies] pkg-config = "0.3" diff --git a/chunking/Cargo.lock b/chunking/Cargo.lock new file mode 100644 index 0000000..d4da0da --- /dev/null +++ b/chunking/Cargo.lock @@ -0,0 +1,31 @@ +[root] +name = "chunking" +version = "0.1.0" +dependencies = [ + "quick-error 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "serde 0.9.14 (registry+https://github.com/rust-lang/crates.io-index)", + "serde_utils 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "quick-error" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "serde" +version = "0.9.14" +source = "registry+https://github.com/rust-lang/crates.io-index" + +[[package]] +name = "serde_utils" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "serde 0.9.14 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[metadata] +"checksum quick-error 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "0aad603e8d7fb67da22dbdf1f4b826ce8829e406124109e73cf1b2454b93a71c" +"checksum serde 0.9.14 (registry+https://github.com/rust-lang/crates.io-index)" = "a4c9a40d556f8431394def53446db659f796dc87a53ef67b7541f21057fbdd91" +"checksum serde_utils 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b34a52969c7fc0254e214b82518c9a95dc88c84fc84cd847add314996a031be6" diff --git a/chunking/Cargo.toml b/chunking/Cargo.toml new file mode 100644 index 0000000..66028de --- /dev/null +++ b/chunking/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "chunking" +version = "0.1.0" +authors = ["Dennis Schwerdel "] + +[dependencies] +quick-error = "1.1" diff --git a/src/chunker/ae.rs b/chunking/src/ae.rs similarity index 87% rename from src/chunker/ae.rs rename to chunking/src/ae.rs index c837556..a2438db 100644 --- a/src/chunker/ae.rs +++ b/chunking/src/ae.rs @@ -10,7 +10,6 @@ use std::ptr; pub struct AeChunker { buffer: [u8; 4096], buffered: usize, - avg_size: usize, window_size: usize } @@ -23,19 +22,13 @@ impl AeChunker { buffer: [0; 4096], buffered: 0, window_size: window_size, - avg_size: avg_size } } } -impl IChunker for AeChunker { - #[inline] - fn get_type(&self) -> ChunkerType { - ChunkerType::Ae(self.avg_size) - } - +impl Chunker for AeChunker { #[allow(unknown_lints,explicit_counter_loop)] - fn chunk(&mut self, r: &mut R, mut w: &mut W) -> Result { + fn chunk(&mut self, r: &mut Read, mut w: &mut Write) -> Result { let mut max; let mut pos = 0; let mut max_pos = 0; diff --git a/src/chunker/fastcdc.rs b/chunking/src/fastcdc.rs similarity index 93% rename from src/chunker/fastcdc.rs rename to chunking/src/fastcdc.rs index 4055afc..0a95729 100644 --- a/src/chunker/fastcdc.rs +++ b/chunking/src/fastcdc.rs @@ -53,7 +53,6 @@ pub struct FastCdcChunker { avg_size: usize, mask_long: u64, mask_short: u64, - seed: u64 } @@ -69,20 +68,13 @@ impl FastCdcChunker { avg_size: avg_size, mask_long: mask_long, mask_short: mask_short, - seed: seed } } } -impl IChunker for FastCdcChunker { - #[inline] - fn get_type(&self) -> ChunkerType { - ChunkerType::FastCdc((self.avg_size, self.seed)) - } - - +impl Chunker for FastCdcChunker { #[allow(unknown_lints,explicit_counter_loop,needless_range_loop)] - fn chunk(&mut self, r: &mut R, mut w: &mut W) -> Result { + fn chunk(&mut self, r: &mut Read, mut w: &mut Write) -> Result { let mut max; let mut hash = 0u64; let mut pos = 0; diff --git a/chunking/src/lib.rs b/chunking/src/lib.rs new file mode 100644 index 0000000..4c1413f --- /dev/null +++ b/chunking/src/lib.rs @@ -0,0 +1,51 @@ +#[macro_use] extern crate quick_error; + +use std::io::{self, Write, Read}; + +mod ae; +mod rabin; +mod fastcdc; + +pub use self::ae::AeChunker; +pub use self::rabin::RabinChunker; +pub use self::fastcdc::FastCdcChunker; + +// https://moinakg.wordpress.com/2013/06/22/high-performance-content-defined-chunking/ + +// Paper: "A Comprehensive Study of the Past, Present, and Future of Data Deduplication" +// Paper-URL: http://wxia.hustbackup.cn/IEEE-Survey-final.pdf + +// https://borgbackup.readthedocs.io/en/stable/internals.html#chunks +// https://github.com/bup/bup/blob/master/lib/bup/bupsplit.c + +quick_error!{ + #[derive(Debug)] + pub enum ChunkerError { + Read(err: io::Error) { + cause(err) + description("Failed to read input") + display("Chunker error: failed to read input\n\tcaused by: {}", err) + } + Write(err: io::Error) { + cause(err) + description("Failed to write to output") + display("Chunker error: failed to write to output\n\tcaused by: {}", err) + } + Custom(reason: &'static str) { + from() + description("Custom error") + display("Chunker error: {}", reason) + } + } +} + + +#[derive(Debug, Eq, PartialEq)] +pub enum ChunkerStatus { + Continue, + Finished +} + +pub trait Chunker { + fn chunk(&mut self, r: &mut Read, w: &mut Write) -> Result; +} diff --git a/src/chunker/rabin.rs b/chunking/src/rabin.rs similarity index 92% rename from src/chunker/rabin.rs rename to chunking/src/rabin.rs index d3fa269..36e54a0 100644 --- a/src/chunker/rabin.rs +++ b/chunking/src/rabin.rs @@ -43,7 +43,6 @@ pub struct RabinChunker { max_size: usize, window_size: usize, chunk_mask: u32, - avg_size: usize } @@ -62,19 +61,13 @@ impl RabinChunker { max_size: avg_size*4, window_size: window_size, chunk_mask: chunk_mask, - avg_size: avg_size } } } -impl IChunker for RabinChunker { - #[inline] - fn get_type(&self) -> ChunkerType { - ChunkerType::Rabin((self.avg_size, self.seed)) - } - +impl Chunker for RabinChunker { #[allow(unknown_lints,explicit_counter_loop)] - fn chunk(&mut self, r: &mut R, mut w: &mut W) -> Result { + fn chunk(&mut self, r: &mut Read, mut w: &mut Write) -> Result { let mut max; let mut hash = 0u32; let mut pos = 0; diff --git a/src/chunker.rs b/src/chunker.rs new file mode 100644 index 0000000..ac3d819 --- /dev/null +++ b/src/chunker.rs @@ -0,0 +1,77 @@ +pub use chunking::*; + +use std::str::FromStr; + + +#[derive(Debug, Clone, Copy, Eq, PartialEq)] +pub enum ChunkerType { + Ae(usize), + Rabin((usize, u32)), + FastCdc((usize, u64)) +} +serde_impl!(ChunkerType(u64) { + Ae(usize) => 1, + Rabin((usize, u32)) => 2, + FastCdc((usize, u64)) => 3 +}); + + +impl ChunkerType { + pub fn from(name: &str, avg_size: usize, seed: u64) -> Result { + match name { + "ae" => Ok(ChunkerType::Ae(avg_size)), + "rabin" => Ok(ChunkerType::Rabin((avg_size, seed as u32))), + "fastcdc" => Ok(ChunkerType::FastCdc((avg_size, seed))), + _ => Err("Unsupported chunker type") + } + } + + pub fn from_string(name: &str) -> Result { + let (name, size) = if let Some(pos) = name.find('/') { + let size = try!(usize::from_str(&name[pos+1..]).map_err(|_| "Chunk size must be a number")); + let name = &name[..pos]; + (name, size) + } else { + (name, 8) + }; + Self::from(name, size * 1024, 0) + } + + + #[inline] + pub fn create(&self) -> Box { + match *self { + ChunkerType::Ae(size) => Box::new(AeChunker::new(size)), + ChunkerType::Rabin((size, seed)) => Box::new(RabinChunker::new(size, seed)), + ChunkerType::FastCdc((size, seed)) => Box::new(FastCdcChunker::new(size, seed)) + } + } + + pub fn name(&self) -> &'static str { + match *self { + ChunkerType::Ae(_size) => "ae", + ChunkerType::Rabin((_size, _seed)) => "rabin", + ChunkerType::FastCdc((_size, _seed)) => "fastcdc" + } + } + + pub fn avg_size(&self) -> usize { + match *self { + ChunkerType::Ae(size) => size, + ChunkerType::Rabin((size, _seed)) => size, + ChunkerType::FastCdc((size, _seed)) => size + } + } + + pub fn to_string(&self) -> String { + format!("{}/{}", self.name(), self.avg_size()/1024) + } + + pub fn seed(&self) -> u64 { + match *self { + ChunkerType::Ae(_size) => 0, + ChunkerType::Rabin((_size, seed)) => seed as u64, + ChunkerType::FastCdc((_size, seed)) => seed + } + } +} diff --git a/src/chunker/mod.rs b/src/chunker/mod.rs deleted file mode 100644 index d63bd59..0000000 --- a/src/chunker/mod.rs +++ /dev/null @@ -1,151 +0,0 @@ -use std::io::{self, Write, Read}; -use std::str::FromStr; - -mod ae; -mod rabin; -mod fastcdc; - -pub use self::ae::AeChunker; -pub use self::rabin::RabinChunker; -pub use self::fastcdc::FastCdcChunker; - -// https://moinakg.wordpress.com/2013/06/22/high-performance-content-defined-chunking/ - -// Paper: "A Comprehensive Study of the Past, Present, and Future of Data Deduplication" -// Paper-URL: http://wxia.hustbackup.cn/IEEE-Survey-final.pdf - -// https://borgbackup.readthedocs.io/en/stable/internals.html#chunks -// https://github.com/bup/bup/blob/master/lib/bup/bupsplit.c - -quick_error!{ - #[derive(Debug)] - pub enum ChunkerError { - Read(err: io::Error) { - cause(err) - description("Failed to read input") - display("Chunker error: failed to read input\n\tcaused by: {}", err) - } - Write(err: io::Error) { - cause(err) - description("Failed to write to output") - display("Chunker error: failed to write to output\n\tcaused by: {}", err) - } - Custom(reason: &'static str) { - from() - description("Custom error") - display("Chunker error: {}", reason) - } - } -} - - -#[derive(Debug, Eq, PartialEq)] -pub enum ChunkerStatus { - Continue, - Finished -} - -pub trait IChunker: Sized { - fn chunk(&mut self, r: &mut R, w: &mut W) -> Result; - fn get_type(&self) -> ChunkerType; -} - -pub enum Chunker { - Ae(Box), - Rabin(Box), - FastCdc(Box) -} - - -impl IChunker for Chunker { - fn get_type(&self) -> ChunkerType { - match *self { - Chunker::Ae(ref c) => c.get_type(), - Chunker::Rabin(ref c) => c.get_type(), - Chunker::FastCdc(ref c) => c.get_type() - } - } - - #[inline] - fn chunk(&mut self, r: &mut R, w: &mut W) -> Result { - match *self { - Chunker::Ae(ref mut c) => c.chunk(r, w), - Chunker::Rabin(ref mut c) => c.chunk(r, w), - Chunker::FastCdc(ref mut c) => c.chunk(r, w) - } - } -} - - -#[derive(Debug, Clone, Copy, Eq, PartialEq)] -pub enum ChunkerType { - Ae(usize), - Rabin((usize, u32)), - FastCdc((usize, u64)) -} -serde_impl!(ChunkerType(u64) { - Ae(usize) => 1, - Rabin((usize, u32)) => 2, - FastCdc((usize, u64)) => 3 -}); - - -impl ChunkerType { - pub fn from(name: &str, avg_size: usize, seed: u64) -> Result { - match name { - "ae" => Ok(ChunkerType::Ae(avg_size)), - "rabin" => Ok(ChunkerType::Rabin((avg_size, seed as u32))), - "fastcdc" => Ok(ChunkerType::FastCdc((avg_size, seed))), - _ => Err("Unsupported chunker type") - } - } - - pub fn from_string(name: &str) -> Result { - let (name, size) = if let Some(pos) = name.find('/') { - let size = try!(usize::from_str(&name[pos+1..]).map_err(|_| "Chunk size must be a number")); - let name = &name[..pos]; - (name, size) - } else { - (name, 8) - }; - Self::from(name, size * 1024, 0) - } - - - #[inline] - pub fn create(&self) -> Chunker { - match *self { - ChunkerType::Ae(size) => Chunker::Ae(Box::new(AeChunker::new(size))), - ChunkerType::Rabin((size, seed)) => Chunker::Rabin(Box::new(RabinChunker::new(size, seed))), - ChunkerType::FastCdc((size, seed)) => Chunker::FastCdc(Box::new(FastCdcChunker::new(size, seed))) - } - } - - pub fn name(&self) -> &'static str { - match *self { - ChunkerType::Ae(_size) => "ae", - ChunkerType::Rabin((_size, _seed)) => "rabin", - ChunkerType::FastCdc((_size, _seed)) => "fastcdc" - } - } - - pub fn avg_size(&self) -> usize { - match *self { - ChunkerType::Ae(size) => size, - ChunkerType::Rabin((size, _seed)) => size, - ChunkerType::FastCdc((size, _seed)) => size - } - } - - pub fn to_string(&self) -> String { - format!("{}/{}", self.name(), self.avg_size()/1024) - } - - pub fn seed(&self) -> u64 { - match *self { - ChunkerType::Ae(_size) => 0, - ChunkerType::Rabin((_size, seed)) => seed as u64, - ChunkerType::FastCdc((_size, seed)) => seed - } - } -} diff --git a/src/cli/algotest.rs b/src/cli/algotest.rs index 0a13119..06c2274 100644 --- a/src/cli/algotest.rs +++ b/src/cli/algotest.rs @@ -32,7 +32,7 @@ impl Write for ChunkSink { } } -fn chunk(data: &[u8], mut chunker: Chunker, sink: &mut ChunkSink) { +fn chunk(data: &[u8], mut chunker: Box, sink: &mut ChunkSink) { let mut cursor = Cursor::new(data); while chunker.chunk(&mut cursor, sink).unwrap() == ChunkerStatus::Continue { sink.end_chunk(); diff --git a/src/main.rs b/src/main.rs index d7214bd..9bf58e3 100644 --- a/src/main.rs +++ b/src/main.rs @@ -28,15 +28,15 @@ extern crate users; extern crate libc; extern crate tar; extern crate index; +extern crate chunking; pub mod util; mod bundledb; -//pub mod index; -mod chunker; mod repository; mod cli; mod prelude; mod mount; +mod chunker; use std::process::exit; diff --git a/src/prelude.rs b/src/prelude.rs index 8a6f33b..0637ac6 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -1,6 +1,6 @@ pub use ::util::*; pub use ::bundledb::{BundleReader, BundleMode, BundleWriter, BundleInfo, BundleId, BundleDbError, BundleDb, BundleWriterError, StoredBundle}; -pub use ::chunker::{ChunkerType, Chunker, ChunkerStatus, IChunker, ChunkerError}; +pub use ::chunker::{ChunkerType, Chunker, ChunkerStatus, ChunkerError}; pub use ::repository::{Repository, Backup, Config, RepositoryError, RepositoryInfo, Inode, FileType, IntegrityError, BackupFileError, BackupError, BackupOptions, BundleAnalysis, FileData, DiffType, InodeError, RepositoryLayout, Location}; pub use ::index::{Index, IndexError}; pub use ::mount::FuseFilesystem; diff --git a/src/repository/mod.rs b/src/repository/mod.rs index e41dca7..899afc0 100644 --- a/src/repository/mod.rs +++ b/src/repository/mod.rs @@ -77,7 +77,7 @@ pub struct Repository { bundles: BundleDb, data_bundle: Option, meta_bundle: Option, - chunker: Chunker, + chunker: Box, remote_locks: LockFolder, local_locks: LockFolder, lock: LockHandle,