2016-12-11 19:30:54 +01:00
|
|
|
// Copyright 2015, 2016 Parity Technologies (UK) Ltd.
|
2016-10-31 12:57:48 +01:00
|
|
|
// This file is part of Parity.
|
|
|
|
|
|
|
|
// Parity is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
|
|
|
|
// Parity is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU General Public License for more details.
|
|
|
|
|
|
|
|
// You should have received a copy of the GNU General Public License
|
|
|
|
// along with Parity. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
//! Statistical functions.
|
|
|
|
|
2016-12-23 16:53:06 +01:00
|
|
|
use bigint::prelude::*;
|
2016-10-31 12:57:48 +01:00
|
|
|
|
|
|
|
/// Discretised histogram.
|
|
|
|
#[derive(Debug, PartialEq)]
|
|
|
|
pub struct Histogram {
|
|
|
|
/// Bounds of each bucket.
|
|
|
|
pub bucket_bounds: Vec<U256>,
|
|
|
|
/// Count within each bucket.
|
|
|
|
pub counts: Vec<u64>
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Histogram {
|
2016-11-03 21:15:27 +01:00
|
|
|
/// Histogram of a sorted corpus if it at least spans the buckets. Bounds are left closed.
|
2016-10-31 12:57:48 +01:00
|
|
|
pub fn new(corpus: &[U256], bucket_number: usize) -> Option<Histogram> {
|
2016-11-03 21:15:27 +01:00
|
|
|
if corpus.len() < 1 { return None; }
|
|
|
|
let corpus_end = corpus.last().expect("there is at least 1 element; qed").clone();
|
|
|
|
let corpus_start = corpus.first().expect("there is at least 1 element; qed").clone();
|
|
|
|
// Bucket needs to be at least 1 wide.
|
|
|
|
let bucket_size = {
|
|
|
|
// Round up to get the entire corpus included.
|
|
|
|
let raw_bucket_size = (corpus_end - corpus_start + bucket_number.into()) / bucket_number.into();
|
|
|
|
if raw_bucket_size == 0.into() { 1.into() } else { raw_bucket_size }
|
|
|
|
};
|
2016-10-31 12:57:48 +01:00
|
|
|
let mut bucket_end = corpus_start + bucket_size;
|
|
|
|
|
|
|
|
let mut bucket_bounds = vec![corpus_start; bucket_number + 1];
|
|
|
|
let mut counts = vec![0; bucket_number];
|
|
|
|
let mut corpus_i = 0;
|
|
|
|
// Go through the corpus adding to buckets.
|
|
|
|
for bucket in 0..bucket_number {
|
2016-11-03 21:15:27 +01:00
|
|
|
while corpus.get(corpus_i).map_or(false, |v| v < &bucket_end) {
|
|
|
|
// Initialized to size bucket_number above; iterates up to bucket_number; qed
|
2016-10-31 12:57:48 +01:00
|
|
|
counts[bucket] += 1;
|
|
|
|
corpus_i += 1;
|
|
|
|
}
|
2016-11-03 21:15:27 +01:00
|
|
|
// Initialized to size bucket_number + 1 above; iterates up to bucket_number; subscript is in range; qed
|
2016-10-31 12:57:48 +01:00
|
|
|
bucket_bounds[bucket + 1] = bucket_end;
|
|
|
|
bucket_end = bucket_end + bucket_size;
|
|
|
|
}
|
|
|
|
Some(Histogram { bucket_bounds: bucket_bounds, counts: counts })
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
2016-12-23 16:53:06 +01:00
|
|
|
use bigint::prelude::U256;
|
2016-10-31 12:57:48 +01:00
|
|
|
use super::Histogram;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn check_histogram() {
|
2016-11-28 13:20:49 +01:00
|
|
|
let hist = Histogram::new(slice_into![643,689,1408,2000,2296,2512,4250,4320,4842,4958,5804,6065,6098,6354,7002,7145,7845,8589,8593,8895], 5).unwrap();
|
2016-11-03 21:15:27 +01:00
|
|
|
let correct_bounds: Vec<U256> = vec_into![643, 2294, 3945, 5596, 7247, 8898];
|
|
|
|
assert_eq!(Histogram { bucket_bounds: correct_bounds, counts: vec![4,2,4,6,4] }, hist);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn smaller_data_range_than_bucket_range() {
|
|
|
|
assert_eq!(
|
2016-11-28 13:20:49 +01:00
|
|
|
Histogram::new(slice_into![1, 2, 2], 3),
|
2016-11-03 21:15:27 +01:00
|
|
|
Some(Histogram { bucket_bounds: vec_into![1, 2, 3, 4], counts: vec![1, 2, 0] })
|
|
|
|
);
|
|
|
|
}
|
2016-10-31 12:57:48 +01:00
|
|
|
|
2016-11-03 21:15:27 +01:00
|
|
|
#[test]
|
|
|
|
fn data_range_is_not_multiple_of_bucket_range() {
|
|
|
|
assert_eq!(
|
2016-11-28 13:20:49 +01:00
|
|
|
Histogram::new(slice_into![1, 2, 5], 2),
|
2016-11-03 21:15:27 +01:00
|
|
|
Some(Histogram { bucket_bounds: vec_into![1, 4, 7], counts: vec![2, 1] })
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn data_range_is_multiple_of_bucket_range() {
|
|
|
|
assert_eq!(
|
2016-11-28 13:20:49 +01:00
|
|
|
Histogram::new(slice_into![1, 2, 6], 2),
|
2016-11-03 21:15:27 +01:00
|
|
|
Some(Histogram { bucket_bounds: vec_into![1, 4, 7], counts: vec![2, 1] })
|
|
|
|
);
|
2016-10-31 12:57:48 +01:00
|
|
|
}
|
2016-11-03 08:22:35 +01:00
|
|
|
|
|
|
|
#[test]
|
2016-11-03 21:15:27 +01:00
|
|
|
fn none_when_too_few_data() {
|
2016-11-28 13:20:49 +01:00
|
|
|
assert!(Histogram::new(slice_into![], 1).is_none());
|
2016-11-03 08:22:35 +01:00
|
|
|
}
|
2016-10-31 12:57:48 +01:00
|
|
|
}
|