bloom filters

This commit is contained in:
debris 2015-11-28 21:42:50 +01:00
parent b405c061a1
commit da60046e3d
3 changed files with 60 additions and 90 deletions

View File

@ -1,16 +1,45 @@
//! basic implementation of multilevel bloom filter //! Multilevel blockchain bloom filter.
use std::collections::{HashMap}; use std::collections::{HashMap};
use hash::*; use hash::*;
use filter::*;
use sha3::*; use sha3::*;
use num::pow; use num::pow;
/// in memory cache for blooms /// Represents bloom index in cache
///
/// On cache level 0, every block bloom is represented by different index.
/// On higher cache levels, multiple block blooms are represented by one
/// index. Their `BloomIndex` can be created from block number and given level.
#[derive(Eq, PartialEq, Hash, Clone, Debug)]
pub struct BloomIndex {
pub level: u8,
pub index: usize,
}
impl BloomIndex {
/// Default constructor for `BloomIndex`
pub fn new(level: u8, index: usize) -> BloomIndex {
BloomIndex {
level: level,
index: index,
}
}
}
/// Types implementing this trait should provide read access for bloom filters database.
pub trait FilterDataSource {
/// returns reference to log at given position if it exists
fn bloom_at_index(&self, index: &BloomIndex) -> Option<&H2048>;
}
/// In memory cache for blooms.
///
/// Stores all blooms in HashMap, which indexes them by `BloomIndex`.
pub struct MemoryCache { pub struct MemoryCache {
blooms: HashMap<BloomIndex, H2048>, blooms: HashMap<BloomIndex, H2048>,
} }
impl MemoryCache { impl MemoryCache {
/// Default constructor for MemoryCache
pub fn new() -> MemoryCache { pub fn new() -> MemoryCache {
MemoryCache { blooms: HashMap::new() } MemoryCache { blooms: HashMap::new() }
} }
@ -29,7 +58,7 @@ impl FilterDataSource for MemoryCache {
} }
} }
/// Should be used to find blocks in FilterDataSource /// Should be used for search operations on blockchain.
pub struct ChainFilter<'a, D> pub struct ChainFilter<'a, D>
where D: FilterDataSource + 'a where D: FilterDataSource + 'a
{ {
@ -40,7 +69,9 @@ pub struct ChainFilter<'a, D>
impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource
{ {
/// creates new filter instance /// Creates new filter instance.
///
/// Borrows `FilterDataSource` for reading.
pub fn new(data_source: &'a D, index_size: usize, levels: u8) -> Self { pub fn new(data_source: &'a D, index_size: usize, levels: u8) -> Self {
if levels == 0 { if levels == 0 {
panic!("ChainFilter requires and least 1 level"); panic!("ChainFilter requires and least 1 level");
@ -98,9 +129,7 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource
self.level_sizes.len() as u8 - 1 self.level_sizes.len() as u8 - 1
} }
/// internal function which actually does bloom search /// internal function which does bloom search recursively
/// TODO: optimize it, maybe non-recursive version?
/// TODO: clean up?
fn blocks(&self, bloom: &H2048, from_block: usize, to_block: usize, level: u8, offset: usize) -> Option<Vec<usize>> { fn blocks(&self, bloom: &H2048, from_block: usize, to_block: usize, level: u8, offset: usize) -> Option<Vec<usize>> {
let index = self.bloom_index(offset, level); let index = self.bloom_index(offset, level);
@ -136,14 +165,9 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource
.collect(); .collect();
Some(res) Some(res)
} }
}
impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource /// Adds new bloom to all filter levels
{ pub fn add_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap<BloomIndex, H2048> {
/// add new bloom to all levels
///
/// BitOr new bloom with all levels of filter
fn add_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap<BloomIndex, H2048> {
let mut result: HashMap<BloomIndex, H2048> = HashMap::new(); let mut result: HashMap<BloomIndex, H2048> = HashMap::new();
for level in 0..self.levels() { for level in 0..self.levels() {
@ -159,10 +183,8 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource
result result
} }
/// add new blooms starting from block number /// Adds new blooms starting from block number.
/// pub fn add_blooms(&self, blooms: &[H2048], block_number: usize) -> HashMap<BloomIndex, H2048> {
/// BitOr new blooms with all levels of filter
fn add_blooms(&self, blooms: &[H2048], block_number: usize) -> HashMap<BloomIndex, H2048> {
let mut result: HashMap<BloomIndex, H2048> = HashMap::new(); let mut result: HashMap<BloomIndex, H2048> = HashMap::new();
for level in 0..self.levels() { for level in 0..self.levels() {
@ -192,8 +214,8 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource
result result
} }
/// reset bloom at level 0 and forces rebuild on higher levels /// Resets bloom at level 0 and forces rebuild on higher levels.
fn reset_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap<BloomIndex, H2048> { pub fn reset_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap<BloomIndex, H2048> {
let mut result: HashMap<BloomIndex, H2048> = HashMap::new(); let mut result: HashMap<BloomIndex, H2048> = HashMap::new();
let mut reset_index = self.bloom_index(block_number, 0); let mut reset_index = self.bloom_index(block_number, 0);
@ -204,14 +226,14 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource
// get all bloom indexes that were used to construct this bloom // get all bloom indexes that were used to construct this bloom
let lower_indexes = self.lower_level_bloom_indexes(&index); let lower_indexes = self.lower_level_bloom_indexes(&index);
let new_bloom = lower_indexes.into_iter() let new_bloom = lower_indexes.into_iter()
// skip reseted one // skip reseted one
.filter(|li| li != &reset_index) .filter(|li| li != &reset_index)
// get blooms for these indexes // get blooms for these indexes
.map(|li| self.data_source.bloom_at_index(&li)) .map(|li| self.data_source.bloom_at_index(&li))
// filter existing ones // filter existing ones
.filter_map(|b| b) .filter_map(|b| b)
// BitOr all of them // BitOr all of them
.fold(H2048::new(), |acc, bloom| &acc | bloom); .fold(H2048::new(), |acc, bloom| &acc | bloom);
reset_index = index.clone(); reset_index = index.clone();
result.insert(index, &new_bloom | bloom); result.insert(index, &new_bloom | bloom);
@ -220,27 +242,27 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource
result result
} }
/// sets lowest level bloom to 0 and forces rebuild on higher levels /// Sets lowest level bloom to 0 and forces rebuild on higher levels.
fn clear_bloom(&self, block_number: usize) -> HashMap<BloomIndex, H2048> { pub fn clear_bloom(&self, block_number: usize) -> HashMap<BloomIndex, H2048> {
self.reset_bloom(&H2048::new(), block_number) self.reset_bloom(&H2048::new(), block_number)
} }
/// returns numbers of blocks that may contain Address /// Returns numbers of blocks that may contain Address.
fn blocks_with_address(&self, address: &Address, from_block: usize, to_block: usize) -> Vec<usize> { pub fn blocks_with_address(&self, address: &Address, from_block: usize, to_block: usize) -> Vec<usize> {
let mut bloom = H2048::new(); let mut bloom = H2048::new();
bloom.shift_bloom(&address.sha3()); bloom.shift_bloom(&address.sha3());
self.blocks_with_bloom(&bloom, from_block, to_block) self.blocks_with_bloom(&bloom, from_block, to_block)
} }
/// returns numbers of blocks that may contain Topic /// Returns numbers of blocks that may contain Topic.
fn blocks_with_topics(&self, topic: &H256, from_block: usize, to_block: usize) -> Vec<usize> { pub fn blocks_with_topics(&self, topic: &H256, from_block: usize, to_block: usize) -> Vec<usize> {
let mut bloom = H2048::new(); let mut bloom = H2048::new();
bloom.shift_bloom(&topic.sha3()); bloom.shift_bloom(&topic.sha3());
self.blocks_with_bloom(&bloom, from_block, to_block) self.blocks_with_bloom(&bloom, from_block, to_block)
} }
/// returns numbers of blocks that may log bloom /// Returns numbers of blocks that may log bloom.
fn blocks_with_bloom(&self, bloom: &H2048, from_block: usize, to_block: usize) -> Vec<usize> { pub fn blocks_with_bloom(&self, bloom: &H2048, from_block: usize, to_block: usize) -> Vec<usize> {
let mut result = vec![]; let mut result = vec![];
// lets start from highest level // lets start from highest level
let max_level = self.max_level(); let max_level = self.max_level();
@ -266,7 +288,6 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use hash::*; use hash::*;
use filter::*;
use chainfilter::*; use chainfilter::*;
use sha3::*; use sha3::*;
use std::str::FromStr; use std::str::FromStr;

View File

@ -1,51 +0,0 @@
//! multilevel bloom filter interface
use hash::*;
use std::collections::HashMap;
/// Represents bloom index in cache
///
/// On bloom level 0, all positions represent different blooms.
/// On higher levels multiple positions represent one bloom
/// and should be transformed to `BlockIndex` to get index of this bloom
#[derive(Eq, PartialEq, Hash, Clone, Debug)]
pub struct BloomIndex {
pub level: u8,
pub index: usize,
}
impl BloomIndex {
pub fn new(level: u8, index: usize) -> BloomIndex {
BloomIndex {
level: level,
index: index,
}
}
}
pub trait FilterDataSource {
/// returns reference to log at given position if it exists
fn bloom_at_index(&self, index: &BloomIndex) -> Option<&H2048>;
}
pub trait Filter: Sized {
/// add new bloom to all levels
fn add_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap<BloomIndex, H2048>;
/// add new blooms starting from block number
fn add_blooms(&self, blooms: &[H2048], block_number: usize) -> HashMap<BloomIndex, H2048>;
/// reset bloom at level 0 and forces rebuild on higher levels
fn reset_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap<BloomIndex, H2048>;
/// sets lowest level bloom to 0 and forces rebuild on higher levels
fn clear_bloom(&self, block_number: usize) -> HashMap<BloomIndex, H2048>;
/// returns numbers of blocks that may contain Address
fn blocks_with_address(&self, address: &Address, from_block: usize, to_block: usize) -> Vec<usize>;
/// returns numbers of blocks that may contain Topic
fn blocks_with_topics(&self, topic: &H256, from_block: usize, to_block: usize) -> Vec<usize>;
/// returns numbers of blocks that may log bloom
fn blocks_with_bloom(&self, bloom: &H2048, from_block: usize, to_block: usize) -> Vec<usize>;
}

View File

@ -24,7 +24,7 @@ pub mod sha3;
pub mod hashdb; pub mod hashdb;
pub mod memorydb; pub mod memorydb;
pub mod math; pub mod math;
pub mod filter; //pub mod filter;
pub mod chainfilter; pub mod chainfilter;
//pub mod network; //pub mod network;