From 59315b0cb7c290d9bd3bbb1145a4967cd713d55a Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Fri, 17 Feb 2017 15:16:28 +0100 Subject: [PATCH] stats utility in its own crate --- util/src/lib.rs | 1 - util/stats/Cargo.toml | 7 ++ util/{src/stats.rs => stats/src/lib.rs} | 90 ++++++++++++++++++++----- 3 files changed, 79 insertions(+), 19 deletions(-) create mode 100644 util/stats/Cargo.toml rename util/{src/stats.rs => stats/src/lib.rs} (53%) diff --git a/util/src/lib.rs b/util/src/lib.rs index 720b80869..b67154f7b 100644 --- a/util/src/lib.rs +++ b/util/src/lib.rs @@ -142,7 +142,6 @@ pub mod semantic_version; pub mod log; pub mod path; pub mod snappy; -pub mod stats; pub mod cache; mod timer; diff --git a/util/stats/Cargo.toml b/util/stats/Cargo.toml new file mode 100644 index 000000000..99e81c9e7 --- /dev/null +++ b/util/stats/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "stats" +version = "0.1.0" +authors = ["Parity Technologies "] + +[dependencies] +log = "0.3" diff --git a/util/src/stats.rs b/util/stats/src/lib.rs similarity index 53% rename from util/src/stats.rs rename to util/stats/src/lib.rs index c4c08ddc8..ccfca525b 100644 --- a/util/src/stats.rs +++ b/util/stats/src/lib.rs @@ -14,22 +14,77 @@ // You should have received a copy of the GNU General Public License // along with Parity. If not, see . -//! Statistical functions. +//! Statistical functions and helpers. -use bigint::prelude::*; +use std::iter::FromIterator; +use std::ops::{Add, Sub, Div}; + +#[macro_use] +extern crate log; + +/// Sorted corpus of data. +#[derive(Debug, Clone, PartialEq)] +pub struct Corpus(Vec); + +impl From> for Corpus { + fn from(mut data: Vec) -> Self { + data.sort(); + Corpus(data) + } +} + +impl FromIterator for Corpus { + fn from_iter>(iterable: I) -> Self { + iterable.into_iter().collect::>().into() + } +} + +impl Corpus { + /// Get the median element, if it exists. + pub fn median(&self) -> Option<&T> { + self.0.get(self.0.len() / 2) + } + + /// Whether the corpus is empty. + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Number of elements in the corpus. + pub fn len(&self) -> usize { + self.0.len() + } + + /// Split the corpus at a given point. + pub fn split_at(self, idx: usize) -> (Self, Self) { + let (left, right) = self.0.split_at(idx); + (Corpus(left), Corpus(right)) + } +} + +impl Corpus + where T: Add + Sub + Div + From +{ + /// Create a histogram of this corpus if it at least spans the buckets. Bounds are left closed. + pub fn histogram(&self, bucket_number: usize) -> Option> { + Histogram::create(&self.0, bucket_number) + } +} /// Discretised histogram. #[derive(Debug, PartialEq)] -pub struct Histogram { +pub struct Histogram { /// Bounds of each bucket. - pub bucket_bounds: Vec, + pub bucket_bounds: Vec, /// Count within each bucket. - pub counts: Vec + pub counts: Vec, } -impl Histogram { - /// Histogram of a sorted corpus if it at least spans the buckets. Bounds are left closed. - pub fn new(corpus: &[U256], bucket_number: usize) -> Option { +impl Histogram + where T: Add + Sub + Div + From +{ + // Histogram of a sorted corpus if it at least spans the buckets. Bounds are left closed. + fn create(corpus: &[T], bucket_number: usize) -> Option> { if corpus.len() < 1 { return None; } let corpus_end = corpus.last().expect("there is at least 1 element; qed").clone(); let corpus_start = corpus.first().expect("there is at least 1 element; qed").clone(); @@ -63,42 +118,41 @@ impl Histogram { #[cfg(test)] mod tests { - use bigint::prelude::U256; use super::Histogram; #[test] fn check_histogram() { - let hist = Histogram::new(slice_into![643,689,1408,2000,2296,2512,4250,4320,4842,4958,5804,6065,6098,6354,7002,7145,7845,8589,8593,8895], 5).unwrap(); - let correct_bounds: Vec = vec_into![643, 2294, 3945, 5596, 7247, 8898]; + let hist = Histogram::create(&[643,689,1408,2000,2296,2512,4250,4320,4842,4958,5804,6065,6098,6354,7002,7145,7845,8589,8593,8895], 5).unwrap(); + let correct_bounds: Vec = vec![643, 2294, 3945, 5596, 7247, 8898]; assert_eq!(Histogram { bucket_bounds: correct_bounds, counts: vec![4,2,4,6,4] }, hist); } #[test] fn smaller_data_range_than_bucket_range() { assert_eq!( - Histogram::new(slice_into![1, 2, 2], 3), - Some(Histogram { bucket_bounds: vec_into![1, 2, 3, 4], counts: vec![1, 2, 0] }) + Histogram::create(&[1, 2, 2], 3), + Some(Histogram { bucket_bounds: vec![1, 2, 3, 4], counts: vec![1, 2, 0] }) ); } #[test] fn data_range_is_not_multiple_of_bucket_range() { assert_eq!( - Histogram::new(slice_into![1, 2, 5], 2), - Some(Histogram { bucket_bounds: vec_into![1, 4, 7], counts: vec![2, 1] }) + Histogram::create(&[1, 2, 5], 2), + Some(Histogram { bucket_bounds: vec![1, 4, 7], counts: vec![2, 1] }) ); } #[test] fn data_range_is_multiple_of_bucket_range() { assert_eq!( - Histogram::new(slice_into![1, 2, 6], 2), - Some(Histogram { bucket_bounds: vec_into![1, 4, 7], counts: vec![2, 1] }) + Histogram::create(&[1, 2, 6], 2), + Some(Histogram { bucket_bounds: vec![1, 4, 7], counts: vec![2, 1] }) ); } #[test] fn none_when_too_few_data() { - assert!(Histogram::new(slice_into![], 1).is_none()); + assert!(Histogram::::create(&[], 1).is_none()); } }