bloom refactor (#7475)

* ethereum-types refactor in progress

* ethereum-types refactor in progress

* ethereum-types refactor in progress

* ethereum-types refactor in progress

* ethereum-types refactor finished

* cleanup bloom mess

* simplify usage of Bloom in few places

* removed obsolete util/src/lib.rs

* removed commented out code

* ethereum-types 0.1.4

* updated ethereum-types and tiny-keccak
This commit is contained in:
Marek Kotewicz
2018-01-14 22:43:28 +01:00
committed by GitHub
parent e7f36665bb
commit 668d910c44
65 changed files with 2023 additions and 519 deletions

View File

@@ -0,0 +1,158 @@
use std::collections::{HashMap, HashSet};
use std::ops::Range;
use number::Number;
use position::{Position, Manager as PositionManager};
use bloom::Bloom;
use filter::Filter;
use config::Config;
use database::BloomDatabase;
/// Prepares all bloom database operations.
pub struct BloomChain<'a> {
positioner: PositionManager,
db: &'a BloomDatabase,
}
impl<'a> BloomChain<'a> {
/// Creates new bloom chain.
pub fn new(config: Config, db: &'a BloomDatabase) -> Self {
let positioner = PositionManager::new(config.elements_per_index, config.levels);
BloomChain {
positioner: positioner,
db: db,
}
}
/// Internal function which does bloom search recursively.
fn blocks(&self, range: &Range<Number>, bloom: &Bloom, level: usize, offset: usize) -> Option<Vec<usize>> {
let index = self.positioner.position(offset, level);
match self.db.bloom_at(&index) {
None => return None,
Some(level_bloom) => match level {
// if we are on the lowest level
0 if level_bloom.contains_bloom(bloom) => return Some(vec![offset]),
// return None if current level doesnt contain given bloom
_ if !level_bloom.contains_bloom(bloom) => return None,
// continue processing && go down
_ => ()
}
};
let level_size = self.positioner.level_size(level - 1);
let from_position = self.positioner.position(range.start, level - 1);
let to_position = self.positioner.position(range.end, level - 1);
let res: Vec<usize> = self.positioner.lower_level_positions(&index).into_iter()
// chose only blooms in range
.filter(|li| li.index >= from_position.index && li.index <= to_position.index)
// map them to offsets
.map(|li| li.index * level_size)
// get all blocks that may contain our bloom
// filter existing ones
.filter_map(|off| self.blocks(range, bloom, level - 1, off))
// flatten nested structures
.flat_map(|v| v)
.collect();
Some(res)
}
/// Inserts the bloom at all filter levels.
pub fn insert(&self, number: Number, bloom: Bloom) -> HashMap<Position, Bloom> {
let mut result: HashMap<Position, Bloom> = HashMap::new();
for level in 0..self.positioner.levels() {
let position = self.positioner.position(number, level);
let new_bloom = match self.db.bloom_at(&position) {
Some(mut old_bloom) => {
old_bloom.accrue_bloom(&bloom);
old_bloom
},
None => bloom.clone(),
};
result.insert(position, new_bloom);
}
result
}
/// Resets data in range.
/// Inserts new data.
/// Inserted data may exceed reseted range.
pub fn replace(&self, range: &Range<Number>, blooms: Vec<Bloom>) -> HashMap<Position, Bloom> {
let mut result: HashMap<Position, Bloom> = HashMap::new();
// insert all new blooms at level 0
for (i, bloom) in blooms.iter().enumerate() {
result.insert(self.positioner.position(range.start + i, 0), bloom.clone());
}
// reset the rest of blooms
for reset_number in range.start + blooms.len()..(range.end + 1) {
result.insert(self.positioner.position(reset_number, 0), Bloom::default());
}
for level in 1..self.positioner.levels() {
for i in 0..blooms.len() {
let index = self.positioner.position(range.start + i, level);
let new_bloom = {
// use new blooms before db blooms where necessary
let bloom_at = | index | { result.get(&index).cloned().or_else(|| self.db.bloom_at(&index)) };
self.positioner.lower_level_positions(&index)
.into_iter()
// get blooms
// filter existing ones
.filter_map(bloom_at)
// BitOr all of them
.fold(Bloom::default(), |mut acc, bloom| {
acc.accrue_bloom(&bloom);
acc
})
};
result.insert(index, new_bloom);
}
}
result
}
/// Returns all numbers with given bloom.
pub fn with_bloom(&self, range: &Range<Number>, bloom: &Bloom) -> Vec<Number> {
let mut result = vec![];
// lets start from highest level
let max_level = self.positioner.max_level();
let level_size = self.positioner.level_size(max_level);
let from_position = self.positioner.position(range.start, max_level);
let to_position = self.positioner.position(range.end, max_level);
for index in from_position.index..to_position.index + 1 {
// offset will be used to calculate where we are right now
let offset = level_size * index;
// go doooown!
if let Some(blocks) = self.blocks(range, bloom, max_level, offset) {
result.extend(blocks);
}
}
result
}
/// Filter the chain returing all numbers matching the filter.
pub fn filter(&self, filter: &Filter) -> Vec<Number> {
let range = filter.range();
let mut blocks = filter.bloom_possibilities()
.into_iter()
.flat_map(|ref bloom| self.with_bloom(&range, bloom))
.collect::<HashSet<Number>>()
.into_iter()
.collect::<Vec<Number>>();
blocks.sort();
blocks
}
}

View File

@@ -0,0 +1,17 @@
/// `BloomChain` configuration.
#[derive(Debug, PartialEq, Clone, Copy)]
pub struct Config {
/// Number of levels.
pub levels: usize,
/// Number of elements in a single index.
pub elements_per_index: usize,
}
impl Default for Config {
fn default() -> Self {
Config {
levels: 3,
elements_per_index: 16,
}
}
}

View File

@@ -0,0 +1,7 @@
use position::Position;
use bloom::Bloom;
/// Readonly `Bloom` database.
pub trait BloomDatabase {
fn bloom_at(&self, position: &Position) -> Option<Bloom>;
}

View File

@@ -0,0 +1,11 @@
use std::ops::Range;
use bloom::Bloom;
use number::Number;
/// Should be used to filter blocks from `BloomChain`.
pub trait Filter {
/// All bloom possibilities that we are searching for.
fn bloom_possibilities(&self) -> Vec<Bloom>;
/// Range of search.
fn range(&self) -> Range<Number>;
}

View File

@@ -0,0 +1,31 @@
use bloom::Bloom;
use config::Config;
use database::BloomDatabase;
use position::Position;
use group::position::Manager as PositionManager;
use super::BloomGroupDatabase;
/// Bridge between `BloomDatabase` and `BloomGroupDatabase`.
pub struct GroupDatabaseBridge<'a> {
positioner: PositionManager,
db: &'a BloomGroupDatabase,
}
impl<'a> GroupDatabaseBridge<'a> {
pub fn new(config: Config, db: &'a BloomGroupDatabase) -> Self {
let positioner = PositionManager::new(config.elements_per_index);
GroupDatabaseBridge {
positioner: positioner,
db: db,
}
}
}
impl<'a> BloomDatabase for GroupDatabaseBridge<'a> {
fn bloom_at(&self, position: &Position) -> Option<Bloom> {
let position = self.positioner.position(position);
self.db.blooms_at(&position.group)
.and_then(|group| group.blooms.into_iter().nth(position.number))
}
}

View File

@@ -0,0 +1,70 @@
use std::collections::HashMap;
use std::ops::Range;
use bloom::Bloom;
use chain::BloomChain;
use config::Config;
use number::Number;
use filter::Filter;
use position::Position as BloomPosition;
use super::{GroupDatabaseBridge, BloomGroupDatabase, BloomGroup, GroupPosition};
use super::position::Manager as PositionManager;
/// Performs all bloom database operations using `BloomGroup`s.
pub struct BloomGroupChain<'a> {
config: Config,
db: &'a BloomGroupDatabase,
bridge: GroupDatabaseBridge<'a>,
}
impl<'a> BloomGroupChain<'a> {
pub fn new(config: Config, db: &'a BloomGroupDatabase) -> Self {
let bridge = GroupDatabaseBridge::new(config, db);
BloomGroupChain {
config: config,
db: db,
bridge: bridge,
}
}
fn group_blooms(&self, blooms: HashMap<BloomPosition, Bloom>) -> HashMap<GroupPosition, BloomGroup> {
let positioner = PositionManager::new(self.config.elements_per_index);
blooms.into_iter()
.fold(HashMap::new(), | mut acc, (position, bloom) | {
{
let position = positioner.position(&position);
let group = acc
.entry(position.group.clone())
.or_insert_with(|| self.db
.blooms_at(&position.group)
.unwrap_or_else(|| BloomGroup::new(self.config.elements_per_index))
);
assert_eq!(self.config.elements_per_index, group.blooms.len());
group.blooms[position.number] = bloom;
}
acc
})
}
pub fn insert(&self, number: Number, bloom: Bloom) -> HashMap<GroupPosition, BloomGroup> {
let bloom_chain = BloomChain::new(self.config, &self.bridge);
let modified_blooms = bloom_chain.insert(number, bloom);
self.group_blooms(modified_blooms)
}
pub fn replace(&self, range: &Range<Number>, blooms: Vec<Bloom>) -> HashMap<GroupPosition, BloomGroup> {
let bloom_chain = BloomChain::new(self.config, &self.bridge);
let modified_blooms = bloom_chain.replace(range, blooms);
self.group_blooms(modified_blooms)
}
pub fn with_bloom(&self, range: &Range<Number>, bloom: &Bloom) -> Vec<Number> {
let bloom_chain = BloomChain::new(self.config, &self.bridge);
bloom_chain.with_bloom(range, bloom)
}
pub fn filter(&self, filter: &Filter) -> Vec<Number> {
let bloom_chain = BloomChain::new(self.config, &self.bridge);
bloom_chain.filter(filter)
}
}

View File

@@ -0,0 +1,6 @@
use group::{GroupPosition, BloomGroup};
/// Readonly `BloomGroup` database.
pub trait BloomGroupDatabase {
fn blooms_at(&self, position: &GroupPosition) -> Option<BloomGroup>;
}

View File

@@ -0,0 +1,17 @@
use bloom::Bloom;
/// Group of blooms that are in the same index.
#[derive(Clone)]
pub struct BloomGroup {
pub blooms: Vec<Bloom>,
}
impl BloomGroup {
pub fn new(size: usize) -> Self {
let blooms = (0..size).into_iter().map(|_| Bloom::default()).collect();
BloomGroup {
blooms: blooms
}
}
}

View File

@@ -0,0 +1,16 @@
//! Bloom grouping.
//!
//! Optimization gathering together blooms that are in the same index and are likely to be retrived together.
mod bridge;
mod chain;
mod database;
mod group;
mod position;
pub use self::bridge::GroupDatabaseBridge;
pub use self::chain::BloomGroupChain;
pub use self::database::BloomGroupDatabase;
pub use self::group::BloomGroup;
pub use self::position::GroupPosition;

View File

@@ -0,0 +1,28 @@
use super::{Position, GroupPosition};
use position::Position as BloomPosition;
pub struct Manager {
index_size: usize
}
impl Manager {
pub fn new(index_size: usize) -> Self {
Manager {
index_size: index_size
}
}
pub fn group_position(&self, pos: &BloomPosition) -> GroupPosition {
GroupPosition {
level: pos.level,
index: pos.index / self.index_size,
}
}
pub fn position(&self, pos: &BloomPosition) -> Position {
Position {
group: self.group_position(pos),
number: pos.index % self.index_size,
}
}
}

View File

@@ -0,0 +1,5 @@
mod position;
mod manager;
pub use self::position::{Position, GroupPosition};
pub use self::manager::Manager;

View File

@@ -0,0 +1,17 @@
/// Uniquely identifies bloom group position.
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct GroupPosition {
/// Bloom level.
pub level: usize,
/// Index of the group.
pub index: usize,
}
/// Uniquely identifies bloom position including the position in the group.
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
pub struct Position {
/// Group position.
pub group: GroupPosition,
/// Number in group.
pub number: usize,
}

View File

@@ -0,0 +1,17 @@
extern crate ethbloom as bloom;
mod chain;
mod config;
mod database;
pub mod group;
mod number;
mod position;
mod filter;
pub use bloom::{Bloom, BloomRef, Input};
pub use chain::BloomChain;
pub use config::Config;
pub use database::BloomDatabase;
pub use number::Number;
pub use position::Position;
pub use filter::Filter;

View File

@@ -0,0 +1,2 @@
/// Represents block number.
pub type Number = usize;

View File

@@ -0,0 +1,142 @@
//! Simplifies working with bloom indexes.
use super::Position;
/// Simplifies working with bloom indexes.
pub struct Manager {
index_size: usize,
level_sizes: Vec<usize>,
}
impl Manager {
/// Creates new indexer.
pub fn new(index_size: usize, levels: usize) -> Self {
if levels == 0 {
panic!("Manager requires at least 1 level.");
}
let mut level_sizes = vec![1];
level_sizes.extend_from_slice(&(1..).into_iter()
.scan(1, |acc, _| {
*acc = *acc * index_size;
Some(*acc)
})
.take(levels - 1)
.collect::<Vec<usize>>());
Manager {
index_size: index_size,
level_sizes: level_sizes,
}
}
/// Unsafely get level size.
pub fn level_size(&self, level: usize) -> usize {
self.level_sizes[level as usize]
}
/// Converts block number and level to `Position`.
pub fn position(&self, block_number: usize, level: usize) -> Position {
Position {
level: level,
index: block_number / self.level_size(level),
}
}
/// Return bloom which are dependencies for given index.
///
/// Bloom indexes are ordered from lowest to highest.
pub fn lower_level_positions(&self, index: &Position) -> Vec<Position> {
// this is the lowest level
if index.level == 0 {
return vec![];
}
let new_level = index.level - 1;
let offset = self.index_size * index.index;
(0..self.index_size)
.map(|i| Position {
level: new_level,
index: offset + i
})
.collect()
}
/// Return number of levels.
pub fn levels(&self) -> usize {
self.level_sizes.len()
}
/// Returns max indexer level.
pub fn max_level(&self) -> usize {
self.level_sizes.len() - 1
}
}
#[cfg(test)]
mod tests {
use position::Position;
use super::*;
#[test]
fn test_level_size() {
let indexer = Manager::new(16, 3);
assert_eq!(indexer.level_size(0), 1);
assert_eq!(indexer.level_size(1), 16);
assert_eq!(indexer.level_size(2), 256);
}
#[test]
fn test_position() {
let indexer = Manager::new(16, 3);
let bi0 = indexer.position(0, 0);
assert_eq!(bi0.level, 0);
assert_eq!(bi0.index, 0);
let bi1 = indexer.position(1, 0);
assert_eq!(bi1.level, 0);
assert_eq!(bi1.index, 1);
let bi2 = indexer.position(2, 0);
assert_eq!(bi2.level, 0);
assert_eq!(bi2.index, 2);
let bi3 = indexer.position(3, 1);
assert_eq!(bi3.level, 1);
assert_eq!(bi3.index, 0);
let bi4 = indexer.position(15, 1);
assert_eq!(bi4.level, 1);
assert_eq!(bi4.index, 0);
let bi5 = indexer.position(16, 1);
assert_eq!(bi5.level, 1);
assert_eq!(bi5.index, 1);
let bi6 = indexer.position(255, 2);
assert_eq!(bi6.level, 2);
assert_eq!(bi6.index, 0);
let bi7 = indexer.position(256, 2);
assert_eq!(bi7.level, 2);
assert_eq!(bi7.index, 1);
}
#[test]
fn test_lower_level_positions() {
let indexer = Manager::new(16, 3);
let bi = indexer.position(256, 2);
assert_eq!(bi.level, 2);
assert_eq!(bi.index, 1);
let mut ebis = vec![];
for i in 16..32 {
ebis.push(Position { level: 1, index: i});
}
let bis = indexer.lower_level_positions(&bi);
assert_eq!(ebis, bis);
}
}

View File

@@ -0,0 +1,5 @@
pub mod position;
pub mod manager;
pub use self::position::Position;
pub use self::manager::Manager;

View File

@@ -0,0 +1,8 @@
/// Uniquely identifies bloom position.
#[derive(Debug, PartialEq, Eq, Hash)]
pub struct Position {
/// Bloom level.
pub level: usize,
/// Index of the bloom.
pub index: usize,
}