mirror of https://github.com/dswd/zvault
Index stats
This commit is contained in:
parent
3b7bb52620
commit
224bf1d25c
|
@ -100,8 +100,8 @@ regarded as not set at all.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
|
|
||||||
- `~/.zvault` references the repository in `~/.zvault` and is identical with
|
- `~/.zvault/repos/default` references the repository in
|
||||||
`::`.
|
`~/.zvault/repos/default` and is identical with `::`.
|
||||||
- `::backup1` references the backup `backup1` in the default repository
|
- `::backup1` references the backup `backup1` in the default repository
|
||||||
- `::backup1::/` references the root folder of the backup `backup1` in the
|
- `::backup1::/` references the root folder of the backup `backup1` in the
|
||||||
default repository
|
default repository
|
||||||
|
@ -189,7 +189,7 @@ The chunker algortihm and chunk size are configured together in the format
|
||||||
`algorithm/size` where algorithm is one of `rabin`, `ae` and `fastcdc` and size
|
`algorithm/size` where algorithm is one of `rabin`, `ae` and `fastcdc` and size
|
||||||
is the size in KiB e.g. `16`. So the recommended configuration is `fastcdc/16`.
|
is the size in KiB e.g. `16`. So the recommended configuration is `fastcdc/16`.
|
||||||
|
|
||||||
Please not that since the chunker algorithm and chunk size affect the chunks
|
Please note that since the chunker algorithm and chunk size affect the chunks
|
||||||
created from the input data, any change to those values will make existing
|
created from the input data, any change to those values will make existing
|
||||||
chunks inaccessible for deduplication purposes. The old data is still readable
|
chunks inaccessible for deduplication purposes. The old data is still readable
|
||||||
but new backups will have to store all data again.
|
but new backups will have to store all data again.
|
||||||
|
@ -198,7 +198,7 @@ but new backups will have to store all data again.
|
||||||
### Compression
|
### Compression
|
||||||
ZVault offers different compression algorithms that can be used to compress the
|
ZVault offers different compression algorithms that can be used to compress the
|
||||||
stored data after deduplication. The compression ratio that can be achieved
|
stored data after deduplication. The compression ratio that can be achieved
|
||||||
mostly depends on the input data (test data can be compressed well and media
|
mostly depends on the input data (text data can be compressed well and media
|
||||||
data like music and videos are already compressed and can not be compressed
|
data like music and videos are already compressed and can not be compressed
|
||||||
significantly).
|
significantly).
|
||||||
|
|
||||||
|
|
|
@ -859,6 +859,7 @@ pub fn run() -> Result<(), ErrorCode> {
|
||||||
print_backup(&backup);
|
print_backup(&backup);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
println!("{:?}", repo.statistics());
|
||||||
print_repoinfo(&repo.info());
|
print_repoinfo(&repo.info());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
24
src/index.rs
24
src/index.rs
|
@ -8,6 +8,7 @@ use std::os::unix::io::AsRawFd;
|
||||||
|
|
||||||
use mmap::{MemoryMap, MapOption, MapError};
|
use mmap::{MemoryMap, MapOption, MapError};
|
||||||
|
|
||||||
|
use ::prelude::*;
|
||||||
|
|
||||||
pub const MAX_USAGE: f64 = 0.9;
|
pub const MAX_USAGE: f64 = 0.9;
|
||||||
pub const MIN_USAGE: f64 = 0.35;
|
pub const MIN_USAGE: f64 = 0.35;
|
||||||
|
@ -373,6 +374,11 @@ impl<K: Key, V: Value> Index<K, V> {
|
||||||
self.header.capacity = self.capacity as u64;
|
self.header.capacity = self.capacity as u64;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn get_displacement(&self, entry: &Entry<K, V>, pos: usize) -> usize {
|
||||||
|
(pos + self.capacity - (entry.get_key().hash() as usize & self.mask)) & self.mask
|
||||||
|
}
|
||||||
|
|
||||||
/// Finds the position for this key
|
/// Finds the position for this key
|
||||||
/// If the key is in the table, it will be the position of the key,
|
/// If the key is in the table, it will be the position of the key,
|
||||||
/// otherwise it will be the position where this key should be inserted
|
/// otherwise it will be the position where this key should be inserted
|
||||||
|
@ -387,7 +393,7 @@ impl<K: Key, V: Value> Index<K, V> {
|
||||||
if entry.get_key() == key {
|
if entry.get_key() == key {
|
||||||
return LocateResult::Found(pos);
|
return LocateResult::Found(pos);
|
||||||
}
|
}
|
||||||
let odist = (pos + self.capacity - (entry.get_key().hash() as usize & self.mask)) & self.mask;
|
let odist = self.get_displacement(entry, pos);
|
||||||
if dist > odist {
|
if dist > odist {
|
||||||
return LocateResult::Steal(pos);
|
return LocateResult::Steal(pos);
|
||||||
}
|
}
|
||||||
|
@ -579,4 +585,20 @@ impl<K: Key, V: Value> Index<K, V> {
|
||||||
}
|
}
|
||||||
self.entries = 0;
|
self.entries = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn statistics(&self) -> IndexStatistics {
|
||||||
|
IndexStatistics {
|
||||||
|
displacement: ValueStats::from_iter(|| self.data.iter().enumerate().filter(
|
||||||
|
|&(_, entry)| entry.is_used()).map(
|
||||||
|
|(index, entry)| self.get_displacement(entry, index) as f32))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct IndexStatistics {
|
||||||
|
pub displacement: ValueStats
|
||||||
|
}
|
|
@ -4,8 +4,9 @@ pub use bundledb::{BundleReader, BundleMode, BundleWriter, BundleInfo, BundleId,
|
||||||
pub use chunker::{ChunkerType, Chunker, ChunkerStatus, ChunkerError};
|
pub use chunker::{ChunkerType, Chunker, ChunkerStatus, ChunkerError};
|
||||||
pub use repository::{Repository, Backup, Config, RepositoryError, RepositoryInfo, Inode, FileType,
|
pub use repository::{Repository, Backup, Config, RepositoryError, RepositoryInfo, Inode, FileType,
|
||||||
IntegrityError, BackupFileError, BackupError, BackupOptions, BundleAnalysis,
|
IntegrityError, BackupFileError, BackupError, BackupOptions, BundleAnalysis,
|
||||||
FileData, DiffType, InodeError, RepositoryLayout, Location};
|
FileData, DiffType, InodeError, RepositoryLayout, Location,
|
||||||
pub use index::{Index, IndexError};
|
RepositoryStatistics};
|
||||||
|
pub use index::{Index, IndexError, IndexStatistics};
|
||||||
pub use mount::FuseFilesystem;
|
pub use mount::FuseFilesystem;
|
||||||
pub use translation::CowStr;
|
pub use translation::CowStr;
|
||||||
|
|
||||||
|
|
|
@ -39,6 +39,12 @@ pub struct RepositoryInfo {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
pub struct RepositoryStatistics {
|
||||||
|
pub index: IndexStatistics
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
impl Repository {
|
impl Repository {
|
||||||
fn mark_used(
|
fn mark_used(
|
||||||
&self,
|
&self,
|
||||||
|
@ -147,4 +153,11 @@ impl Repository {
|
||||||
index_entries: self.index.len()
|
index_entries: self.index.len()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[allow(dead_code)]
|
||||||
|
pub fn statistics(&self) -> RepositoryStatistics {
|
||||||
|
RepositoryStatistics {
|
||||||
|
index: self.index.statistics()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,7 +27,7 @@ pub use self::metadata::{Inode, FileType, FileData, InodeError};
|
||||||
pub use self::backup::{BackupError, BackupOptions, DiffType};
|
pub use self::backup::{BackupError, BackupOptions, DiffType};
|
||||||
pub use self::backup_file::{Backup, BackupFileError};
|
pub use self::backup_file::{Backup, BackupFileError};
|
||||||
pub use self::integrity::IntegrityError;
|
pub use self::integrity::IntegrityError;
|
||||||
pub use self::info::{RepositoryInfo, BundleAnalysis};
|
pub use self::info::{RepositoryInfo, BundleAnalysis, RepositoryStatistics};
|
||||||
pub use self::layout::RepositoryLayout;
|
pub use self::layout::RepositoryLayout;
|
||||||
use self::bundle_map::BundleMap;
|
use self::bundle_map::BundleMap;
|
||||||
|
|
||||||
|
|
|
@ -9,6 +9,7 @@ mod cli;
|
||||||
mod hostname;
|
mod hostname;
|
||||||
mod fs;
|
mod fs;
|
||||||
mod lock;
|
mod lock;
|
||||||
|
mod statistics;
|
||||||
pub mod msgpack;
|
pub mod msgpack;
|
||||||
|
|
||||||
pub use self::fs::*;
|
pub use self::fs::*;
|
||||||
|
@ -22,3 +23,4 @@ pub use self::hex::*;
|
||||||
pub use self::cli::*;
|
pub use self::cli::*;
|
||||||
pub use self::hostname::*;
|
pub use self::hostname::*;
|
||||||
pub use self::lock::*;
|
pub use self::lock::*;
|
||||||
|
pub use self::statistics::*;
|
|
@ -0,0 +1,57 @@
|
||||||
|
|
||||||
|
|
||||||
|
#[derive(Debug, Default)]
|
||||||
|
pub struct ValueStats {
|
||||||
|
pub min: f32,
|
||||||
|
pub max: f32,
|
||||||
|
pub avg: f32,
|
||||||
|
pub stddev: f32,
|
||||||
|
pub count: usize,
|
||||||
|
pub count_xs: usize,
|
||||||
|
pub count_s: usize,
|
||||||
|
pub count_m: usize,
|
||||||
|
pub count_l: usize,
|
||||||
|
pub count_xl: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl ValueStats {
|
||||||
|
pub fn from_iter<T: Iterator<Item=f32>, F: Fn() -> T>(iter: F) -> ValueStats {
|
||||||
|
let mut stats = ValueStats::default();
|
||||||
|
stats.min = ::std::f32::INFINITY;
|
||||||
|
let mut sum = 0.0f64;
|
||||||
|
for val in iter() {
|
||||||
|
if stats.min > val {
|
||||||
|
stats.min = val;
|
||||||
|
}
|
||||||
|
if stats.max < val {
|
||||||
|
stats.max = val;
|
||||||
|
}
|
||||||
|
sum += val as f64;
|
||||||
|
stats.count += 1;
|
||||||
|
}
|
||||||
|
stats.avg = (sum as f32) / (stats.count as f32);
|
||||||
|
if stats.count < 2 {
|
||||||
|
stats.count_m = stats.count;
|
||||||
|
return stats;
|
||||||
|
}
|
||||||
|
sum = 0.0;
|
||||||
|
for val in iter() {
|
||||||
|
sum += f64::from(val - stats.avg) * f64::from(val - stats.avg);
|
||||||
|
}
|
||||||
|
stats.stddev = (sum.sqrt() as f32)/(stats.count as f32-1.0);
|
||||||
|
for val in iter() {
|
||||||
|
if val < stats.avg - 2.0 * stats.stddev {
|
||||||
|
stats.count_xs += 1;
|
||||||
|
} else if val < stats.avg - stats.stddev {
|
||||||
|
stats.count_s += 1;
|
||||||
|
} else if val < stats.avg + stats.stddev {
|
||||||
|
stats.count_m += 1;
|
||||||
|
} else if val < stats.avg + 2.0 * stats.stddev {
|
||||||
|
stats.count_l += 1;
|
||||||
|
} else {
|
||||||
|
stats.count_xl += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stats
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue