bloom filters

This commit is contained in:
debris 2015-11-28 19:11:04 +01:00
parent e769406b92
commit 3e28c2da31
2 changed files with 111 additions and 9 deletions

View File

@ -1,5 +1,5 @@
//! basic implementation of multilevel bloom filter //! basic implementation of multilevel bloom filter
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap};
use hash::*; use hash::*;
use filter::*; use filter::*;
use sha3::*; use sha3::*;
@ -14,6 +14,9 @@ impl MemoryCache {
MemoryCache { blooms: HashMap::new() } MemoryCache { blooms: HashMap::new() }
} }
/// inserts all blooms into cache
///
/// TODO: verify if extend update old items
pub fn insert_blooms(&mut self, blooms: HashMap<BloomIndex, H2048>) { pub fn insert_blooms(&mut self, blooms: HashMap<BloomIndex, H2048>) {
self.blooms.extend(blooms); self.blooms.extend(blooms);
} }
@ -67,8 +70,11 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource
} }
/// return bloom which are dependencies for given index /// return bloom which are dependencies for given index
fn lower_level_bloom_indexes(&self, index: &BloomIndex) -> HashSet<BloomIndex> { ///
let mut indexes: HashSet<BloomIndex> = HashSet::with_capacity(self.index_size); /// bloom indexes are ordered from lowest to highest
fn lower_level_bloom_indexes(&self, index: &BloomIndex) -> Vec<BloomIndex> {
//let mut indexes: HashSet<BloomIndex> = HashSet::with_capacity(self.index_size);
let mut indexes: Vec<BloomIndex> = vec![];
// this is the lower level // this is the lower level
if index.level == 0 { if index.level == 0 {
@ -79,7 +85,7 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource
let offset = self.index_size * index.index; let offset = self.index_size * index.index;
for i in 0..self.index_size { for i in 0..self.index_size {
indexes.insert(BloomIndex { indexes.push(BloomIndex {
level: new_level, level: new_level,
index: offset + i, index: offset + i,
}); });
@ -87,6 +93,51 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource
indexes indexes
} }
/// returns max filter level
fn max_level(&self) -> u8 {
self.levels - 1
}
/// internal function which actually does bloom search
/// TODO: optimize it, maybe non-recursive version?
/// TODO2: clean up?
fn blocks(&self, bloom: &H2048, from_block: usize, to_block: usize, level: u8, offset: usize) -> Vec<usize> {
let mut result = vec![];
let index = self.bloom_index(offset, level);
match self.data_source.bloom_at_index(&index) {
None => (),
Some(level_bloom) => match level {
0 => {
// to_block exclusive
if offset < to_block {
result.push(offset);
}
},
_ => match level_bloom.contains(bloom) {
false => (),
true => {
let level_size = self.level_size(level - 1);
let from_index = self.bloom_index(from_block, level - 1);
let to_index = self.bloom_index(to_block, level - 1);
let res: Vec<usize> = self.lower_level_bloom_indexes(&index).into_iter()
// chose only blooms in range
.filter(|li| li.index >= from_index.index && li.index <= to_index.index)
// map them to offsets
.map(|li| li.index * level_size)
// get all blocks that may contain our bloom
.map(|off| self.blocks(bloom, from_block, to_block, level - 1, off))
// flatten nested structure
.flat_map(|v| v)
.collect();
return res
}
}
}
}
result
}
} }
impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource
@ -191,16 +242,33 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource
/// returns numbers of blocks that may log bloom /// returns numbers of blocks that may log bloom
fn blocks_with_bloom(&self, bloom: &H2048, from_block: usize, to_block: usize) -> Vec<usize> { fn blocks_with_bloom(&self, bloom: &H2048, from_block: usize, to_block: usize) -> Vec<usize> {
panic!(); let mut result = vec![];
// lets start from highest level
let max_level = self.max_level();
let level_size = self.level_size(max_level);
let from_index = self.bloom_index(from_block, max_level);
let to_index = self.bloom_index(to_block, max_level);
for index in from_index.index..to_index.index + 1 {
// offset will be used to calculate where we are right now
let offset = level_size * index;
// go doooown!
result.extend(self.blocks(bloom, from_block, to_block, max_level, offset));
}
result
} }
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use std::collections::{HashMap, HashSet}; use std::collections::{HashMap};
use hash::*; use hash::*;
use filter::*; use filter::*;
use chainfilter::*; use chainfilter::*;
use sha3::*;
use std::str::FromStr;
#[test] #[test]
fn test_level_size() { fn test_level_size() {
@ -258,12 +326,41 @@ mod tests {
assert_eq!(bi.level, 2); assert_eq!(bi.level, 2);
assert_eq!(bi.index, 1); assert_eq!(bi.index, 1);
let mut ebis = HashSet::with_capacity(16); let mut ebis = vec![];
for i in 16..32 { for i in 16..32 {
ebis.insert(BloomIndex::new(1, i)); ebis.push(BloomIndex::new(1, i));
} }
let bis = filter.lower_level_bloom_indexes(&bi); let bis = filter.lower_level_bloom_indexes(&bi);
assert_eq!(ebis, bis); assert_eq!(ebis, bis);
} }
#[test]
fn test_basic_search() {
let index_size = 16;
let bloom_levels = 3;
let mut cache = MemoryCache::new();
let topic = H256::from_str("8d936b1bd3fc635710969ccfba471fb17d598d9d1971b538dd712e1e4b4f4dba").unwrap();
let modified_blooms = {
let filter = ChainFilter::new(&cache, index_size, bloom_levels);
let block_number = 23;
let mut bloom = H2048::new();
bloom.shift_bloom(&topic.sha3());
filter.add_bloom(&bloom, block_number)
};
// number of modified blooms should always be equal number of levels
assert_eq!(modified_blooms.len(), bloom_levels as usize);
cache.insert_blooms(modified_blooms);
{
let filter = ChainFilter::new(&cache, index_size, bloom_levels);
let blocks = filter.blocks_with_topics(&topic, 0, 100);
println!("{:?}", blocks);
assert!(false);
}
}
} }

View File

@ -19,6 +19,7 @@ pub trait FixedHash: Sized + BytesConvertable {
fn shift_bloom<'a, T>(&'a mut self, b: &T) -> &'a mut Self where T: FixedHash; fn shift_bloom<'a, T>(&'a mut self, b: &T) -> &'a mut Self where T: FixedHash;
fn bloom_part<T>(&self, m: usize) -> T where T: FixedHash; fn bloom_part<T>(&self, m: usize) -> T where T: FixedHash;
fn contains_bloom<T>(&self, b: &T) -> bool where T: FixedHash; fn contains_bloom<T>(&self, b: &T) -> bool where T: FixedHash;
fn contains<'a>(&'a self, b: &'a Self) -> bool;
} }
macro_rules! impl_hash { macro_rules! impl_hash {
@ -108,7 +109,11 @@ macro_rules! impl_hash {
fn contains_bloom<T>(&self, b: &T) -> bool where T: FixedHash { fn contains_bloom<T>(&self, b: &T) -> bool where T: FixedHash {
let bp: Self = b.bloom_part($size); let bp: Self = b.bloom_part($size);
(&bp & self) == bp self.contains(&bp)
}
fn contains<'a>(&'a self, b: &'a Self) -> bool {
&(b & self) == b
} }
} }