From 3e28c2da313ad0d4ebcbcded052b796c05b89e7e Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 19:11:04 +0100 Subject: [PATCH] bloom filters --- src/chainfilter.rs | 113 +++++++++++++++++++++++++++++++++++++++++---- src/hash.rs | 7 ++- 2 files changed, 111 insertions(+), 9 deletions(-) diff --git a/src/chainfilter.rs b/src/chainfilter.rs index 2b7754d3b..902ee3a11 100644 --- a/src/chainfilter.rs +++ b/src/chainfilter.rs @@ -1,5 +1,5 @@ //! basic implementation of multilevel bloom filter -use std::collections::{HashMap, HashSet}; +use std::collections::{HashMap}; use hash::*; use filter::*; use sha3::*; @@ -14,6 +14,9 @@ impl MemoryCache { MemoryCache { blooms: HashMap::new() } } + /// inserts all blooms into cache + /// + /// TODO: verify if extend update old items pub fn insert_blooms(&mut self, blooms: HashMap) { self.blooms.extend(blooms); } @@ -67,8 +70,11 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource } /// return bloom which are dependencies for given index - fn lower_level_bloom_indexes(&self, index: &BloomIndex) -> HashSet { - let mut indexes: HashSet = HashSet::with_capacity(self.index_size); + /// + /// bloom indexes are ordered from lowest to highest + fn lower_level_bloom_indexes(&self, index: &BloomIndex) -> Vec { + //let mut indexes: HashSet = HashSet::with_capacity(self.index_size); + let mut indexes: Vec = vec![]; // this is the lower level if index.level == 0 { @@ -79,7 +85,7 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource let offset = self.index_size * index.index; for i in 0..self.index_size { - indexes.insert(BloomIndex { + indexes.push(BloomIndex { level: new_level, index: offset + i, }); @@ -87,6 +93,51 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource indexes } + + /// returns max filter level + fn max_level(&self) -> u8 { + self.levels - 1 + } + + /// internal function which actually does bloom search + /// TODO: optimize it, maybe non-recursive version? + /// TODO2: clean up? + fn blocks(&self, bloom: &H2048, from_block: usize, to_block: usize, level: u8, offset: usize) -> Vec { + let mut result = vec![]; + let index = self.bloom_index(offset, level); + + match self.data_source.bloom_at_index(&index) { + None => (), + Some(level_bloom) => match level { + 0 => { + // to_block exclusive + if offset < to_block { + result.push(offset); + } + }, + _ => match level_bloom.contains(bloom) { + false => (), + true => { + let level_size = self.level_size(level - 1); + let from_index = self.bloom_index(from_block, level - 1); + let to_index = self.bloom_index(to_block, level - 1); + let res: Vec = self.lower_level_bloom_indexes(&index).into_iter() + // chose only blooms in range + .filter(|li| li.index >= from_index.index && li.index <= to_index.index) + // map them to offsets + .map(|li| li.index * level_size) + // get all blocks that may contain our bloom + .map(|off| self.blocks(bloom, from_block, to_block, level - 1, off)) + // flatten nested structure + .flat_map(|v| v) + .collect(); + return res + } + } + } + } + result + } } impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource @@ -191,16 +242,33 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource /// returns numbers of blocks that may log bloom fn blocks_with_bloom(&self, bloom: &H2048, from_block: usize, to_block: usize) -> Vec { - panic!(); + let mut result = vec![]; + // lets start from highest level + let max_level = self.max_level(); + let level_size = self.level_size(max_level); + let from_index = self.bloom_index(from_block, max_level); + let to_index = self.bloom_index(to_block, max_level); + + for index in from_index.index..to_index.index + 1 { + // offset will be used to calculate where we are right now + let offset = level_size * index; + + // go doooown! + result.extend(self.blocks(bloom, from_block, to_block, max_level, offset)); + } + + result } } #[cfg(test)] mod tests { - use std::collections::{HashMap, HashSet}; + use std::collections::{HashMap}; use hash::*; use filter::*; use chainfilter::*; + use sha3::*; + use std::str::FromStr; #[test] fn test_level_size() { @@ -258,12 +326,41 @@ mod tests { assert_eq!(bi.level, 2); assert_eq!(bi.index, 1); - let mut ebis = HashSet::with_capacity(16); + let mut ebis = vec![]; for i in 16..32 { - ebis.insert(BloomIndex::new(1, i)); + ebis.push(BloomIndex::new(1, i)); } let bis = filter.lower_level_bloom_indexes(&bi); assert_eq!(ebis, bis); } + + #[test] + fn test_basic_search() { + let index_size = 16; + let bloom_levels = 3; + + let mut cache = MemoryCache::new(); + let topic = H256::from_str("8d936b1bd3fc635710969ccfba471fb17d598d9d1971b538dd712e1e4b4f4dba").unwrap(); + + let modified_blooms = { + let filter = ChainFilter::new(&cache, index_size, bloom_levels); + let block_number = 23; + let mut bloom = H2048::new(); + bloom.shift_bloom(&topic.sha3()); + filter.add_bloom(&bloom, block_number) + }; + + // number of modified blooms should always be equal number of levels + assert_eq!(modified_blooms.len(), bloom_levels as usize); + cache.insert_blooms(modified_blooms); + + { + let filter = ChainFilter::new(&cache, index_size, bloom_levels); + let blocks = filter.blocks_with_topics(&topic, 0, 100); + println!("{:?}", blocks); + assert!(false); + } + + } } diff --git a/src/hash.rs b/src/hash.rs index a0493f2bb..c79234300 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -19,6 +19,7 @@ pub trait FixedHash: Sized + BytesConvertable { fn shift_bloom<'a, T>(&'a mut self, b: &T) -> &'a mut Self where T: FixedHash; fn bloom_part(&self, m: usize) -> T where T: FixedHash; fn contains_bloom(&self, b: &T) -> bool where T: FixedHash; + fn contains<'a>(&'a self, b: &'a Self) -> bool; } macro_rules! impl_hash { @@ -108,7 +109,11 @@ macro_rules! impl_hash { fn contains_bloom(&self, b: &T) -> bool where T: FixedHash { let bp: Self = b.bloom_part($size); - (&bp & self) == bp + self.contains(&bp) + } + + fn contains<'a>(&'a self, b: &'a Self) -> bool { + &(b & self) == b } }