mirror of https://github.com/dswd/zvault
Index stats
This commit is contained in:
parent
3b7bb52620
commit
224bf1d25c
|
@ -100,8 +100,8 @@ regarded as not set at all.
|
|||
|
||||
Examples:
|
||||
|
||||
- `~/.zvault` references the repository in `~/.zvault` and is identical with
|
||||
`::`.
|
||||
- `~/.zvault/repos/default` references the repository in
|
||||
`~/.zvault/repos/default` and is identical with `::`.
|
||||
- `::backup1` references the backup `backup1` in the default repository
|
||||
- `::backup1::/` references the root folder of the backup `backup1` in the
|
||||
default repository
|
||||
|
@ -189,7 +189,7 @@ The chunker algortihm and chunk size are configured together in the format
|
|||
`algorithm/size` where algorithm is one of `rabin`, `ae` and `fastcdc` and size
|
||||
is the size in KiB e.g. `16`. So the recommended configuration is `fastcdc/16`.
|
||||
|
||||
Please not that since the chunker algorithm and chunk size affect the chunks
|
||||
Please note that since the chunker algorithm and chunk size affect the chunks
|
||||
created from the input data, any change to those values will make existing
|
||||
chunks inaccessible for deduplication purposes. The old data is still readable
|
||||
but new backups will have to store all data again.
|
||||
|
@ -198,7 +198,7 @@ but new backups will have to store all data again.
|
|||
### Compression
|
||||
ZVault offers different compression algorithms that can be used to compress the
|
||||
stored data after deduplication. The compression ratio that can be achieved
|
||||
mostly depends on the input data (test data can be compressed well and media
|
||||
mostly depends on the input data (text data can be compressed well and media
|
||||
data like music and videos are already compressed and can not be compressed
|
||||
significantly).
|
||||
|
||||
|
|
|
@ -859,6 +859,7 @@ pub fn run() -> Result<(), ErrorCode> {
|
|||
print_backup(&backup);
|
||||
}
|
||||
} else {
|
||||
println!("{:?}", repo.statistics());
|
||||
print_repoinfo(&repo.info());
|
||||
}
|
||||
}
|
||||
|
|
24
src/index.rs
24
src/index.rs
|
@ -8,6 +8,7 @@ use std::os::unix::io::AsRawFd;
|
|||
|
||||
use mmap::{MemoryMap, MapOption, MapError};
|
||||
|
||||
use ::prelude::*;
|
||||
|
||||
pub const MAX_USAGE: f64 = 0.9;
|
||||
pub const MIN_USAGE: f64 = 0.35;
|
||||
|
@ -373,6 +374,11 @@ impl<K: Key, V: Value> Index<K, V> {
|
|||
self.header.capacity = self.capacity as u64;
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_displacement(&self, entry: &Entry<K, V>, pos: usize) -> usize {
|
||||
(pos + self.capacity - (entry.get_key().hash() as usize & self.mask)) & self.mask
|
||||
}
|
||||
|
||||
/// Finds the position for this key
|
||||
/// If the key is in the table, it will be the position of the key,
|
||||
/// otherwise it will be the position where this key should be inserted
|
||||
|
@ -387,7 +393,7 @@ impl<K: Key, V: Value> Index<K, V> {
|
|||
if entry.get_key() == key {
|
||||
return LocateResult::Found(pos);
|
||||
}
|
||||
let odist = (pos + self.capacity - (entry.get_key().hash() as usize & self.mask)) & self.mask;
|
||||
let odist = self.get_displacement(entry, pos);
|
||||
if dist > odist {
|
||||
return LocateResult::Steal(pos);
|
||||
}
|
||||
|
@ -579,4 +585,20 @@ impl<K: Key, V: Value> Index<K, V> {
|
|||
}
|
||||
self.entries = 0;
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn statistics(&self) -> IndexStatistics {
|
||||
IndexStatistics {
|
||||
displacement: ValueStats::from_iter(|| self.data.iter().enumerate().filter(
|
||||
|&(_, entry)| entry.is_used()).map(
|
||||
|(index, entry)| self.get_displacement(entry, index) as f32))
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct IndexStatistics {
|
||||
pub displacement: ValueStats
|
||||
}
|
|
@ -4,8 +4,9 @@ pub use bundledb::{BundleReader, BundleMode, BundleWriter, BundleInfo, BundleId,
|
|||
pub use chunker::{ChunkerType, Chunker, ChunkerStatus, ChunkerError};
|
||||
pub use repository::{Repository, Backup, Config, RepositoryError, RepositoryInfo, Inode, FileType,
|
||||
IntegrityError, BackupFileError, BackupError, BackupOptions, BundleAnalysis,
|
||||
FileData, DiffType, InodeError, RepositoryLayout, Location};
|
||||
pub use index::{Index, IndexError};
|
||||
FileData, DiffType, InodeError, RepositoryLayout, Location,
|
||||
RepositoryStatistics};
|
||||
pub use index::{Index, IndexError, IndexStatistics};
|
||||
pub use mount::FuseFilesystem;
|
||||
pub use translation::CowStr;
|
||||
|
||||
|
|
|
@ -39,6 +39,12 @@ pub struct RepositoryInfo {
|
|||
}
|
||||
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RepositoryStatistics {
|
||||
pub index: IndexStatistics
|
||||
}
|
||||
|
||||
|
||||
impl Repository {
|
||||
fn mark_used(
|
||||
&self,
|
||||
|
@ -147,4 +153,11 @@ impl Repository {
|
|||
index_entries: self.index.len()
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(dead_code)]
|
||||
pub fn statistics(&self) -> RepositoryStatistics {
|
||||
RepositoryStatistics {
|
||||
index: self.index.statistics()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,7 @@ pub use self::metadata::{Inode, FileType, FileData, InodeError};
|
|||
pub use self::backup::{BackupError, BackupOptions, DiffType};
|
||||
pub use self::backup_file::{Backup, BackupFileError};
|
||||
pub use self::integrity::IntegrityError;
|
||||
pub use self::info::{RepositoryInfo, BundleAnalysis};
|
||||
pub use self::info::{RepositoryInfo, BundleAnalysis, RepositoryStatistics};
|
||||
pub use self::layout::RepositoryLayout;
|
||||
use self::bundle_map::BundleMap;
|
||||
|
||||
|
|
|
@ -9,6 +9,7 @@ mod cli;
|
|||
mod hostname;
|
||||
mod fs;
|
||||
mod lock;
|
||||
mod statistics;
|
||||
pub mod msgpack;
|
||||
|
||||
pub use self::fs::*;
|
||||
|
@ -22,3 +23,4 @@ pub use self::hex::*;
|
|||
pub use self::cli::*;
|
||||
pub use self::hostname::*;
|
||||
pub use self::lock::*;
|
||||
pub use self::statistics::*;
|
|
@ -0,0 +1,57 @@
|
|||
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ValueStats {
|
||||
pub min: f32,
|
||||
pub max: f32,
|
||||
pub avg: f32,
|
||||
pub stddev: f32,
|
||||
pub count: usize,
|
||||
pub count_xs: usize,
|
||||
pub count_s: usize,
|
||||
pub count_m: usize,
|
||||
pub count_l: usize,
|
||||
pub count_xl: usize,
|
||||
}
|
||||
|
||||
impl ValueStats {
|
||||
pub fn from_iter<T: Iterator<Item=f32>, F: Fn() -> T>(iter: F) -> ValueStats {
|
||||
let mut stats = ValueStats::default();
|
||||
stats.min = ::std::f32::INFINITY;
|
||||
let mut sum = 0.0f64;
|
||||
for val in iter() {
|
||||
if stats.min > val {
|
||||
stats.min = val;
|
||||
}
|
||||
if stats.max < val {
|
||||
stats.max = val;
|
||||
}
|
||||
sum += val as f64;
|
||||
stats.count += 1;
|
||||
}
|
||||
stats.avg = (sum as f32) / (stats.count as f32);
|
||||
if stats.count < 2 {
|
||||
stats.count_m = stats.count;
|
||||
return stats;
|
||||
}
|
||||
sum = 0.0;
|
||||
for val in iter() {
|
||||
sum += f64::from(val - stats.avg) * f64::from(val - stats.avg);
|
||||
}
|
||||
stats.stddev = (sum.sqrt() as f32)/(stats.count as f32-1.0);
|
||||
for val in iter() {
|
||||
if val < stats.avg - 2.0 * stats.stddev {
|
||||
stats.count_xs += 1;
|
||||
} else if val < stats.avg - stats.stddev {
|
||||
stats.count_s += 1;
|
||||
} else if val < stats.avg + stats.stddev {
|
||||
stats.count_m += 1;
|
||||
} else if val < stats.avg + 2.0 * stats.stddev {
|
||||
stats.count_l += 1;
|
||||
} else {
|
||||
stats.count_xl += 1;
|
||||
}
|
||||
}
|
||||
stats
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue