From c55956c7b4d5a5eb3791bdbf41cac5f884709ac7 Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 13:08:30 +0100 Subject: [PATCH 01/22] Filter trait --- src/filter.rs | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ src/lib.rs | 1 + 2 files changed, 50 insertions(+) create mode 100644 src/filter.rs diff --git a/src/filter.rs b/src/filter.rs new file mode 100644 index 000000000..92e953c04 --- /dev/null +++ b/src/filter.rs @@ -0,0 +1,49 @@ +//! multilevel bloom filter interface +use hash::*; +use std::collections::HashMap; + +/// Represents bloom index in cache +/// +/// On bloom level 0, all positions represent different blooms. +/// On higher levels multiple positions represent one bloom +/// and should be transformed to `BlockIndex` to get index of this bloom +#[derive(Eq, PartialEq, Hash)] +pub struct BloomIndex { + level: u8, + level_index: usize, + index: usize, +} + +pub trait FilterDataSource { + /// returns reference to log at given position if it exists + fn bloom_at_index(&self, index: &BloomIndex) -> Option<&H2048>; +} + +pub trait Filter: Sized { + /// creates new filter instance + fn new(data_source: &T, index_size: usize, levels: u8) -> Self where T: FilterDataSource; + + /// converts block number and level to `BloomIndex` + fn bloom_index(&self, block_number: usize, level: u8) -> BloomIndex; + + /// add new bloom to all levels + fn add_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap; + + /// add new blooms starting from block number + fn add_blooms(&self, blooms: &[H2048], block_number: usize) -> HashMap; + + /// reset bloom at level 0 and forces rebuild on higher levels + fn reset_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap; + + /// sets lowest level bloom to 0 and forces rebuild on higher levels + fn clear_bloom(&self, block_number: usize) -> HashMap; + + /// returns numbers of blocks that may contain Address + fn blocks_with_address(&self, address: &Address) -> Vec; + + /// returns numbers of blocks that may contain Topic + fn blocks_with_topics(&self, topic: &H256) -> Vec; + + /// returns numbers of blocks that may log bloom + fn blocks_with_bloom(&self, bloom: &H2048) -> Vec; +} diff --git a/src/lib.rs b/src/lib.rs index acb03afb5..7ebf91f51 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -21,6 +21,7 @@ pub mod sha3; pub mod hashdb; pub mod memorydb; pub mod math; +pub mod filter; //pub mod network; From f0f70f801e89252ee637fca705fb4f2276ef3b16 Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 17:08:38 +0100 Subject: [PATCH 02/22] chainfilter almost done --- Cargo.toml | 1 + src/chainfilter.rs | 265 +++++++++++++++++++++++++++++++++++++++++++++ src/filter.rs | 28 ++--- src/lib.rs | 4 + src/macros.rs | 11 ++ src/math.rs | 1 - 6 files changed, 296 insertions(+), 14 deletions(-) create mode 100644 src/chainfilter.rs create mode 100644 src/macros.rs diff --git a/Cargo.toml b/Cargo.toml index 19c64113c..c4358b8c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,4 @@ mio = "0.*" rand = "0.*" tiny-keccak = "0.3" rocksdb = "0.2.1" +num = "0.1" diff --git a/src/chainfilter.rs b/src/chainfilter.rs new file mode 100644 index 000000000..d5f0873ac --- /dev/null +++ b/src/chainfilter.rs @@ -0,0 +1,265 @@ +//! basic implementation of multilevel bloom filter +use std::collections::{HashMap, HashSet}; +use hash::*; +use filter::*; +use sha3::*; +use num::pow; + +pub struct MemoryCache { + blooms: HashMap, +} + +impl MemoryCache { + pub fn new() -> MemoryCache { + MemoryCache { blooms: HashMap::new() } + } + + pub fn insert_blooms(&mut self, blooms: HashMap) { + self.blooms.extend(blooms); + } +} + +impl FilterDataSource for MemoryCache { + fn bloom_at_index(&self, index: &BloomIndex) -> Option<&H2048> { + self.blooms.get(index) + } +} + +pub struct ChainFilter<'a, D> + where D: FilterDataSource + 'a +{ + data_source: &'a D, + index_size: usize, + levels: u8, + level_sizes: HashMap, +} + +impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource +{ + /// creates new filter instance + pub fn new(data_source: &'a D, index_size: usize, levels: u8) -> Self { + let mut filter = ChainFilter { + data_source: data_source, + index_size: index_size, + levels: levels, + level_sizes: HashMap::new(), + }; + + // cache level sizes, so we do not have to calculate them all the time + for i in 0..levels { + filter.level_sizes.insert(i, pow(index_size, i as usize)); + } + + filter + } + + /// unsafely get level size + fn level_size(&self, level: u8) -> usize { + *self.level_sizes.get(&level).unwrap() + } + + /// converts block number and level to `BloomIndex` + fn bloom_index(&self, block_number: usize, level: u8) -> BloomIndex { + BloomIndex { + level: level, + index: block_number / self.level_size(level), + } + } + + /// return bloom which are dependencies for given index + fn lower_level_bloom_indexes(&self, index: &BloomIndex) -> HashSet { + let mut indexes: HashSet = HashSet::with_capacity(self.index_size); + + // this is the lower level + if index.level == 0 { + return indexes; + } + + let new_level = index.level - 1; + let offset = self.index_size * index.index; + + for i in 0..self.index_size { + indexes.insert(BloomIndex { + level: new_level, + index: offset + i + }); + } + + indexes + } +} + +impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource +{ + /// add new bloom to all levels + /// + /// BitOr new bloom with all levels of filter + fn add_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap { + let mut result: HashMap = HashMap::new(); + + for level in 0..self.levels { + let bloom_index = self.bloom_index(block_number, level); + let new_bloom = match self.data_source.bloom_at_index(&bloom_index) { + Some(old_bloom) => old_bloom | bloom, + None => bloom.clone() + }; + + result.insert(bloom_index, new_bloom); + } + + result + } + + /// add new blooms starting from block number + /// + /// BitOr new blooms with all levels of filter + fn add_blooms(&self, blooms: &[H2048], block_number: usize) -> HashMap { + let mut result: HashMap = HashMap::new(); + + for level in 0..self.levels { + for i in 0..blooms.len() { + let bloom_index = self.bloom_index(block_number + i, level); + let is_new_bloom = match result.get_mut(&bloom_index) { + + // it was already modified + Some(to_shift) => { + *to_shift = &blooms[i] | to_shift; + false + }, + None => true + }; + + // it hasn't been modified yet + if is_new_bloom { + let new_bloom = match self.data_source.bloom_at_index(&bloom_index) { + Some(old_bloom) => old_bloom | &blooms[i], + None => blooms[i].clone() + }; + result.insert(bloom_index, new_bloom); + } + } + } + + result + } + + /// reset bloom at level 0 and forces rebuild on higher levels + fn reset_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap { + let mut result: HashMap = HashMap::new(); + + let mut reset_index = self.bloom_index(block_number, 0); + result.insert(reset_index.clone(), bloom.clone()); + + for level in 1..self.levels { + let index = self.bloom_index(block_number, level); + let lower_indexes = self.lower_level_bloom_indexes(&index); + let new_bloom = lower_indexes.into_iter() + .filter(|li| li != &reset_index) + .map(|li| self.data_source.bloom_at_index(&li)) + .filter_map(|b| b) + .fold(H2048::new(), | acc, bloom | { &acc | bloom }); + + reset_index = index.clone(); + result.insert(index, &new_bloom | bloom); + } + + result + } + + /// sets lowest level bloom to 0 and forces rebuild on higher levels + fn clear_bloom(&self, block_number: usize) -> HashMap { + self.reset_bloom(&H2048::new(), block_number) + } + + /// returns numbers of blocks that may contain Address + fn blocks_with_address(&self, address: &Address, from_block: usize ,to_block: usize) -> Vec { + let mut bloom = H2048::new(); + bloom.shift_bloom(&address.sha3()); + self.blocks_with_bloom(&bloom, from_block, to_block) + } + + /// returns numbers of blocks that may contain Topic + fn blocks_with_topics(&self, topic: &H256, from_block: usize, to_block: usize) -> Vec { + let mut bloom = H2048::new(); + bloom.shift_bloom(&topic.sha3()); + self.blocks_with_bloom(&bloom, from_block, to_block) + } + + /// returns numbers of blocks that may log bloom + fn blocks_with_bloom(&self, bloom: &H2048, from_block: usize, to_block: usize) -> Vec { + panic!(); + } +} + +#[cfg(test)] +mod tests { + use std::collections::{HashMap, HashSet}; + use hash::*; + use filter::*; + use chainfilter::*; + + #[test] + fn test_level_size() { + let cache = MemoryCache::new(); + let filter = ChainFilter::new(&cache, 16, 3); + assert_eq!(filter.level_size(0), 1); + assert_eq!(filter.level_size(1), 16); + assert_eq!(filter.level_size(2), 256); + } + + #[test] + fn test_bloom_index() { + let cache = MemoryCache::new(); + let filter = ChainFilter::new(&cache, 16, 3); + + let bi0 = filter.bloom_index(0, 0); + assert_eq!(bi0.level, 0); + assert_eq!(bi0.index, 0); + + let bi1 = filter.bloom_index(1, 0); + assert_eq!(bi1.level, 0); + assert_eq!(bi1.index, 1); + + let bi2 = filter.bloom_index(2, 0); + assert_eq!(bi2.level, 0); + assert_eq!(bi2.index, 2); + + let bi3 = filter.bloom_index(3, 1); + assert_eq!(bi3.level, 1); + assert_eq!(bi3.index, 0); + + let bi4 = filter.bloom_index(15, 1); + assert_eq!(bi4.level, 1); + assert_eq!(bi4.index, 0); + + let bi5 = filter.bloom_index(16, 1); + assert_eq!(bi5.level, 1); + assert_eq!(bi5.index, 1); + + let bi6 = filter.bloom_index(255, 2); + assert_eq!(bi6.level, 2); + assert_eq!(bi6.index, 0); + + let bi7 = filter.bloom_index(256, 2); + assert_eq!(bi7.level, 2); + assert_eq!(bi7.index, 1); + } + + #[test] + fn test_lower_level_bloom_indexes() { + let cache = MemoryCache::new(); + let filter = ChainFilter::new(&cache, 16, 3); + + let bi = filter.bloom_index(256, 2); + assert_eq!(bi.level, 2); + assert_eq!(bi.index, 1); + + let mut ebis = HashSet::with_capacity(16); + for i in 16..32 { + ebis.insert(BloomIndex::new(1, i)); + } + + let bis = filter.lower_level_bloom_indexes(&bi); + assert_eq!(ebis, bis); + } +} diff --git a/src/filter.rs b/src/filter.rs index 92e953c04..044c59563 100644 --- a/src/filter.rs +++ b/src/filter.rs @@ -7,11 +7,19 @@ use std::collections::HashMap; /// On bloom level 0, all positions represent different blooms. /// On higher levels multiple positions represent one bloom /// and should be transformed to `BlockIndex` to get index of this bloom -#[derive(Eq, PartialEq, Hash)] +#[derive(Eq, PartialEq, Hash, Clone, Debug)] pub struct BloomIndex { - level: u8, - level_index: usize, - index: usize, + pub level: u8, + pub index: usize, +} + +impl BloomIndex { + pub fn new(level: u8, index: usize) -> BloomIndex { + BloomIndex { + level: level, + index: index, + } + } } pub trait FilterDataSource { @@ -20,12 +28,6 @@ pub trait FilterDataSource { } pub trait Filter: Sized { - /// creates new filter instance - fn new(data_source: &T, index_size: usize, levels: u8) -> Self where T: FilterDataSource; - - /// converts block number and level to `BloomIndex` - fn bloom_index(&self, block_number: usize, level: u8) -> BloomIndex; - /// add new bloom to all levels fn add_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap; @@ -39,11 +41,11 @@ pub trait Filter: Sized { fn clear_bloom(&self, block_number: usize) -> HashMap; /// returns numbers of blocks that may contain Address - fn blocks_with_address(&self, address: &Address) -> Vec; + fn blocks_with_address(&self, address: &Address, from_block: usize, to_block: usize) -> Vec; /// returns numbers of blocks that may contain Topic - fn blocks_with_topics(&self, topic: &H256) -> Vec; + fn blocks_with_topics(&self, topic: &H256, from_block: usize, to_block: usize) -> Vec; /// returns numbers of blocks that may log bloom - fn blocks_with_bloom(&self, bloom: &H2048) -> Vec; + fn blocks_with_bloom(&self, bloom: &H2048, from_block: usize, to_block: usize) -> Vec; } diff --git a/src/lib.rs b/src/lib.rs index 7ebf91f51..e7802b114 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,8 +7,11 @@ extern crate mio; extern crate rand; extern crate rocksdb; extern crate tiny_keccak; +extern crate num; #[macro_use] extern crate log; +#[macro_use] +pub mod macros; pub mod error; pub mod hash; @@ -22,6 +25,7 @@ pub mod hashdb; pub mod memorydb; pub mod math; pub mod filter; +pub mod chainfilter; //pub mod network; diff --git a/src/macros.rs b/src/macros.rs new file mode 100644 index 000000000..69286a340 --- /dev/null +++ b/src/macros.rs @@ -0,0 +1,11 @@ +macro_rules! map( + { $($key:expr => $value:expr),+ } => { + { + let mut m = ::std::collections::HashMap::new(); + $( + m.insert($key, $value); + )+ + m + } + }; +); diff --git a/src/math.rs b/src/math.rs index ba039f3d0..c85c4653e 100644 --- a/src/math.rs +++ b/src/math.rs @@ -7,4 +7,3 @@ pub fn log2(x: usize) -> u32 { let n = x.leading_zeros(); ::std::mem::size_of::() as u32 * 8 - n } - From e769406b92d5b6ac0d4673edcccd759508044ded Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 17:10:14 +0100 Subject: [PATCH 03/22] rustfmt --- src/chainfilter.rs | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/src/chainfilter.rs b/src/chainfilter.rs index d5f0873ac..2b7754d3b 100644 --- a/src/chainfilter.rs +++ b/src/chainfilter.rs @@ -81,7 +81,7 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource for i in 0..self.index_size { indexes.insert(BloomIndex { level: new_level, - index: offset + i + index: offset + i, }); } @@ -96,12 +96,12 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource /// BitOr new bloom with all levels of filter fn add_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap { let mut result: HashMap = HashMap::new(); - + for level in 0..self.levels { - let bloom_index = self.bloom_index(block_number, level); + let bloom_index = self.bloom_index(block_number, level); let new_bloom = match self.data_source.bloom_at_index(&bloom_index) { Some(old_bloom) => old_bloom | bloom, - None => bloom.clone() + None => bloom.clone(), }; result.insert(bloom_index, new_bloom); @@ -122,18 +122,18 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource let is_new_bloom = match result.get_mut(&bloom_index) { // it was already modified - Some(to_shift) => { + Some(to_shift) => { *to_shift = &blooms[i] | to_shift; false - }, - None => true + } + None => true, }; // it hasn't been modified yet if is_new_bloom { let new_bloom = match self.data_source.bloom_at_index(&bloom_index) { Some(old_bloom) => old_bloom | &blooms[i], - None => blooms[i].clone() + None => blooms[i].clone(), }; result.insert(bloom_index, new_bloom); } @@ -149,16 +149,16 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource let mut reset_index = self.bloom_index(block_number, 0); result.insert(reset_index.clone(), bloom.clone()); - + for level in 1..self.levels { let index = self.bloom_index(block_number, level); let lower_indexes = self.lower_level_bloom_indexes(&index); let new_bloom = lower_indexes.into_iter() - .filter(|li| li != &reset_index) - .map(|li| self.data_source.bloom_at_index(&li)) - .filter_map(|b| b) - .fold(H2048::new(), | acc, bloom | { &acc | bloom }); - + .filter(|li| li != &reset_index) + .map(|li| self.data_source.bloom_at_index(&li)) + .filter_map(|b| b) + .fold(H2048::new(), |acc, bloom| &acc | bloom); + reset_index = index.clone(); result.insert(index, &new_bloom | bloom); } @@ -172,7 +172,11 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource } /// returns numbers of blocks that may contain Address - fn blocks_with_address(&self, address: &Address, from_block: usize ,to_block: usize) -> Vec { + fn blocks_with_address(&self, + address: &Address, + from_block: usize, + to_block: usize) + -> Vec { let mut bloom = H2048::new(); bloom.shift_bloom(&address.sha3()); self.blocks_with_bloom(&bloom, from_block, to_block) @@ -254,7 +258,7 @@ mod tests { assert_eq!(bi.level, 2); assert_eq!(bi.index, 1); - let mut ebis = HashSet::with_capacity(16); + let mut ebis = HashSet::with_capacity(16); for i in 16..32 { ebis.insert(BloomIndex::new(1, i)); } From 3e28c2da313ad0d4ebcbcded052b796c05b89e7e Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 19:11:04 +0100 Subject: [PATCH 04/22] bloom filters --- src/chainfilter.rs | 113 +++++++++++++++++++++++++++++++++++++++++---- src/hash.rs | 7 ++- 2 files changed, 111 insertions(+), 9 deletions(-) diff --git a/src/chainfilter.rs b/src/chainfilter.rs index 2b7754d3b..902ee3a11 100644 --- a/src/chainfilter.rs +++ b/src/chainfilter.rs @@ -1,5 +1,5 @@ //! basic implementation of multilevel bloom filter -use std::collections::{HashMap, HashSet}; +use std::collections::{HashMap}; use hash::*; use filter::*; use sha3::*; @@ -14,6 +14,9 @@ impl MemoryCache { MemoryCache { blooms: HashMap::new() } } + /// inserts all blooms into cache + /// + /// TODO: verify if extend update old items pub fn insert_blooms(&mut self, blooms: HashMap) { self.blooms.extend(blooms); } @@ -67,8 +70,11 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource } /// return bloom which are dependencies for given index - fn lower_level_bloom_indexes(&self, index: &BloomIndex) -> HashSet { - let mut indexes: HashSet = HashSet::with_capacity(self.index_size); + /// + /// bloom indexes are ordered from lowest to highest + fn lower_level_bloom_indexes(&self, index: &BloomIndex) -> Vec { + //let mut indexes: HashSet = HashSet::with_capacity(self.index_size); + let mut indexes: Vec = vec![]; // this is the lower level if index.level == 0 { @@ -79,7 +85,7 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource let offset = self.index_size * index.index; for i in 0..self.index_size { - indexes.insert(BloomIndex { + indexes.push(BloomIndex { level: new_level, index: offset + i, }); @@ -87,6 +93,51 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource indexes } + + /// returns max filter level + fn max_level(&self) -> u8 { + self.levels - 1 + } + + /// internal function which actually does bloom search + /// TODO: optimize it, maybe non-recursive version? + /// TODO2: clean up? + fn blocks(&self, bloom: &H2048, from_block: usize, to_block: usize, level: u8, offset: usize) -> Vec { + let mut result = vec![]; + let index = self.bloom_index(offset, level); + + match self.data_source.bloom_at_index(&index) { + None => (), + Some(level_bloom) => match level { + 0 => { + // to_block exclusive + if offset < to_block { + result.push(offset); + } + }, + _ => match level_bloom.contains(bloom) { + false => (), + true => { + let level_size = self.level_size(level - 1); + let from_index = self.bloom_index(from_block, level - 1); + let to_index = self.bloom_index(to_block, level - 1); + let res: Vec = self.lower_level_bloom_indexes(&index).into_iter() + // chose only blooms in range + .filter(|li| li.index >= from_index.index && li.index <= to_index.index) + // map them to offsets + .map(|li| li.index * level_size) + // get all blocks that may contain our bloom + .map(|off| self.blocks(bloom, from_block, to_block, level - 1, off)) + // flatten nested structure + .flat_map(|v| v) + .collect(); + return res + } + } + } + } + result + } } impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource @@ -191,16 +242,33 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource /// returns numbers of blocks that may log bloom fn blocks_with_bloom(&self, bloom: &H2048, from_block: usize, to_block: usize) -> Vec { - panic!(); + let mut result = vec![]; + // lets start from highest level + let max_level = self.max_level(); + let level_size = self.level_size(max_level); + let from_index = self.bloom_index(from_block, max_level); + let to_index = self.bloom_index(to_block, max_level); + + for index in from_index.index..to_index.index + 1 { + // offset will be used to calculate where we are right now + let offset = level_size * index; + + // go doooown! + result.extend(self.blocks(bloom, from_block, to_block, max_level, offset)); + } + + result } } #[cfg(test)] mod tests { - use std::collections::{HashMap, HashSet}; + use std::collections::{HashMap}; use hash::*; use filter::*; use chainfilter::*; + use sha3::*; + use std::str::FromStr; #[test] fn test_level_size() { @@ -258,12 +326,41 @@ mod tests { assert_eq!(bi.level, 2); assert_eq!(bi.index, 1); - let mut ebis = HashSet::with_capacity(16); + let mut ebis = vec![]; for i in 16..32 { - ebis.insert(BloomIndex::new(1, i)); + ebis.push(BloomIndex::new(1, i)); } let bis = filter.lower_level_bloom_indexes(&bi); assert_eq!(ebis, bis); } + + #[test] + fn test_basic_search() { + let index_size = 16; + let bloom_levels = 3; + + let mut cache = MemoryCache::new(); + let topic = H256::from_str("8d936b1bd3fc635710969ccfba471fb17d598d9d1971b538dd712e1e4b4f4dba").unwrap(); + + let modified_blooms = { + let filter = ChainFilter::new(&cache, index_size, bloom_levels); + let block_number = 23; + let mut bloom = H2048::new(); + bloom.shift_bloom(&topic.sha3()); + filter.add_bloom(&bloom, block_number) + }; + + // number of modified blooms should always be equal number of levels + assert_eq!(modified_blooms.len(), bloom_levels as usize); + cache.insert_blooms(modified_blooms); + + { + let filter = ChainFilter::new(&cache, index_size, bloom_levels); + let blocks = filter.blocks_with_topics(&topic, 0, 100); + println!("{:?}", blocks); + assert!(false); + } + + } } diff --git a/src/hash.rs b/src/hash.rs index a0493f2bb..c79234300 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -19,6 +19,7 @@ pub trait FixedHash: Sized + BytesConvertable { fn shift_bloom<'a, T>(&'a mut self, b: &T) -> &'a mut Self where T: FixedHash; fn bloom_part(&self, m: usize) -> T where T: FixedHash; fn contains_bloom(&self, b: &T) -> bool where T: FixedHash; + fn contains<'a>(&'a self, b: &'a Self) -> bool; } macro_rules! impl_hash { @@ -108,7 +109,11 @@ macro_rules! impl_hash { fn contains_bloom(&self, b: &T) -> bool where T: FixedHash { let bp: Self = b.bloom_part($size); - (&bp & self) == bp + self.contains(&bp) + } + + fn contains<'a>(&'a self, b: &'a Self) -> bool { + &(b & self) == b } } From faf174c245679ae797de8172c63b02d38022fd98 Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 19:48:41 +0100 Subject: [PATCH 05/22] just a few comments --- src/chainfilter.rs | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/src/chainfilter.rs b/src/chainfilter.rs index 902ee3a11..0ab56e8ea 100644 --- a/src/chainfilter.rs +++ b/src/chainfilter.rs @@ -5,6 +5,7 @@ use filter::*; use sha3::*; use num::pow; +/// in memory cache for blooms pub struct MemoryCache { blooms: HashMap, } @@ -16,7 +17,7 @@ impl MemoryCache { /// inserts all blooms into cache /// - /// TODO: verify if extend update old items + /// if bloom at given index already exists, overwrites it pub fn insert_blooms(&mut self, blooms: HashMap) { self.blooms.extend(blooms); } @@ -28,6 +29,7 @@ impl FilterDataSource for MemoryCache { } } +/// Should be used to find blocks in FilterDataSource pub struct ChainFilter<'a, D> where D: FilterDataSource + 'a { @@ -101,7 +103,7 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource /// internal function which actually does bloom search /// TODO: optimize it, maybe non-recursive version? - /// TODO2: clean up? + /// TODO: clean up? fn blocks(&self, bloom: &H2048, from_block: usize, to_block: usize, level: u8, offset: usize) -> Vec { let mut result = vec![]; let index = self.bloom_index(offset, level); @@ -263,7 +265,6 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource #[cfg(test)] mod tests { - use std::collections::{HashMap}; use hash::*; use filter::*; use chainfilter::*; @@ -336,7 +337,7 @@ mod tests { } #[test] - fn test_basic_search() { + fn test_topic_basic_search() { let index_size = 16; let bloom_levels = 3; @@ -358,9 +359,27 @@ mod tests { { let filter = ChainFilter::new(&cache, index_size, bloom_levels); let blocks = filter.blocks_with_topics(&topic, 0, 100); - println!("{:?}", blocks); - assert!(false); + assert_eq!(blocks.len(), 1); + assert_eq!(blocks[0], 23); } + { + let filter = ChainFilter::new(&cache, index_size, bloom_levels); + let blocks = filter.blocks_with_topics(&topic, 0, 23); + assert_eq!(blocks.len(), 0); + } + + { + let filter = ChainFilter::new(&cache, index_size, bloom_levels); + let blocks = filter.blocks_with_topics(&topic, 23, 24); + assert_eq!(blocks.len(), 1); + assert_eq!(blocks[0], 23); + } + + { + let filter = ChainFilter::new(&cache, index_size, bloom_levels); + let blocks = filter.blocks_with_topics(&topic, 24, 100); + assert_eq!(blocks.len(), 0); + } } } From addd1e5ffd7426d8231d59e322de27306ed800a0 Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 20:00:14 +0100 Subject: [PATCH 06/22] bloom filter cleanup --- src/chainfilter.rs | 55 ++++++++++++++++++++++------------------------ 1 file changed, 26 insertions(+), 29 deletions(-) diff --git a/src/chainfilter.rs b/src/chainfilter.rs index 0ab56e8ea..494037149 100644 --- a/src/chainfilter.rs +++ b/src/chainfilter.rs @@ -105,40 +105,37 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource /// TODO: optimize it, maybe non-recursive version? /// TODO: clean up? fn blocks(&self, bloom: &H2048, from_block: usize, to_block: usize, level: u8, offset: usize) -> Vec { - let mut result = vec![]; let index = self.bloom_index(offset, level); - match self.data_source.bloom_at_index(&index) { - None => (), + let contains = match self.data_source.bloom_at_index(&index) { + None => false, Some(level_bloom) => match level { - 0 => { - // to_block exclusive - if offset < to_block { - result.push(offset); - } - }, - _ => match level_bloom.contains(bloom) { - false => (), - true => { - let level_size = self.level_size(level - 1); - let from_index = self.bloom_index(from_block, level - 1); - let to_index = self.bloom_index(to_block, level - 1); - let res: Vec = self.lower_level_bloom_indexes(&index).into_iter() - // chose only blooms in range - .filter(|li| li.index >= from_index.index && li.index <= to_index.index) - // map them to offsets - .map(|li| li.index * level_size) - // get all blocks that may contain our bloom - .map(|off| self.blocks(bloom, from_block, to_block, level - 1, off)) - // flatten nested structure - .flat_map(|v| v) - .collect(); - return res - } - } + // if we are on the lowest level + // take the value, exclude to_block + 0 if offset < to_block => return vec![offset], + 0 => false, + _ => level_bloom.contains(bloom) } + }; + + if contains { + let level_size = self.level_size(level - 1); + let from_index = self.bloom_index(from_block, level - 1); + let to_index = self.bloom_index(to_block, level - 1); + let res: Vec = self.lower_level_bloom_indexes(&index).into_iter() + // chose only blooms in range + .filter(|li| li.index >= from_index.index && li.index <= to_index.index) + // map them to offsets + .map(|li| li.index * level_size) + // get all blocks that may contain our bloom + .map(|off| self.blocks(bloom, from_block, to_block, level - 1, off)) + // flatten nested structure + .flat_map(|v| v) + .collect(); + return res } - result + + return vec![]; } } From 1a30d918537f8861987beb3f9705e30e2c736a32 Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 20:06:28 +0100 Subject: [PATCH 07/22] removed unused line --- src/chainfilter.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/chainfilter.rs b/src/chainfilter.rs index 494037149..05060023b 100644 --- a/src/chainfilter.rs +++ b/src/chainfilter.rs @@ -75,7 +75,6 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource /// /// bloom indexes are ordered from lowest to highest fn lower_level_bloom_indexes(&self, index: &BloomIndex) -> Vec { - //let mut indexes: HashSet = HashSet::with_capacity(self.index_size); let mut indexes: Vec = vec![]; // this is the lower level From d1223b3b1555ca6701e9fa1342c18f02ca0b93ca Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 20:32:46 +0100 Subject: [PATCH 08/22] bloom filter uses vector instead of hashmap, added few comments --- src/chainfilter.rs | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/src/chainfilter.rs b/src/chainfilter.rs index 05060023b..9bab944c1 100644 --- a/src/chainfilter.rs +++ b/src/chainfilter.rs @@ -35,24 +35,26 @@ pub struct ChainFilter<'a, D> { data_source: &'a D, index_size: usize, - levels: u8, - level_sizes: HashMap, + level_sizes: Vec, } impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource { /// creates new filter instance pub fn new(data_source: &'a D, index_size: usize, levels: u8) -> Self { + if levels == 0 { + panic!("ChainFilter requires and least 1 level"); + } + let mut filter = ChainFilter { data_source: data_source, index_size: index_size, - levels: levels, - level_sizes: HashMap::new(), + level_sizes: vec![] }; // cache level sizes, so we do not have to calculate them all the time for i in 0..levels { - filter.level_sizes.insert(i, pow(index_size, i as usize)); + filter.level_sizes.push(pow(index_size, i as usize)); } filter @@ -60,7 +62,7 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource /// unsafely get level size fn level_size(&self, level: u8) -> usize { - *self.level_sizes.get(&level).unwrap() + self.level_sizes[level as usize] } /// converts block number and level to `BloomIndex` @@ -95,9 +97,14 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource indexes } + /// return number of levels + fn levels(&self) -> u8 { + self.level_sizes.len() as u8 + } + /// returns max filter level fn max_level(&self) -> u8 { - self.levels - 1 + self.level_sizes.len() as u8 - 1 } /// internal function which actually does bloom search @@ -146,7 +153,7 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource fn add_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap { let mut result: HashMap = HashMap::new(); - for level in 0..self.levels { + for level in 0..self.levels() { let bloom_index = self.bloom_index(block_number, level); let new_bloom = match self.data_source.bloom_at_index(&bloom_index) { Some(old_bloom) => old_bloom | bloom, @@ -165,7 +172,7 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource fn add_blooms(&self, blooms: &[H2048], block_number: usize) -> HashMap { let mut result: HashMap = HashMap::new(); - for level in 0..self.levels { + for level in 0..self.levels() { for i in 0..blooms.len() { let bloom_index = self.bloom_index(block_number + i, level); let is_new_bloom = match result.get_mut(&bloom_index) { @@ -199,13 +206,18 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource let mut reset_index = self.bloom_index(block_number, 0); result.insert(reset_index.clone(), bloom.clone()); - for level in 1..self.levels { + for level in 1..self.levels() { let index = self.bloom_index(block_number, level); + // get all bloom indexes that were used to construct this bloom let lower_indexes = self.lower_level_bloom_indexes(&index); let new_bloom = lower_indexes.into_iter() + // skip reseted one .filter(|li| li != &reset_index) + // get blooms for these indexes .map(|li| self.data_source.bloom_at_index(&li)) + // filter existing ones .filter_map(|b| b) + // BitOr all of them .fold(H2048::new(), |acc, bloom| &acc | bloom); reset_index = index.clone(); From 9b02a8bd5f06cd5032009fc23bd3480dd31c66eb Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 20:44:38 +0100 Subject: [PATCH 09/22] cleanedup chain filter --- src/chainfilter.rs | 55 +++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/src/chainfilter.rs b/src/chainfilter.rs index 9bab944c1..6f917e060 100644 --- a/src/chainfilter.rs +++ b/src/chainfilter.rs @@ -110,38 +110,40 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource /// internal function which actually does bloom search /// TODO: optimize it, maybe non-recursive version? /// TODO: clean up? - fn blocks(&self, bloom: &H2048, from_block: usize, to_block: usize, level: u8, offset: usize) -> Vec { + fn blocks(&self, bloom: &H2048, from_block: usize, to_block: usize, level: u8, offset: usize) -> Option> { let index = self.bloom_index(offset, level); - let contains = match self.data_source.bloom_at_index(&index) { - None => false, + match self.data_source.bloom_at_index(&index) { + None => return None, Some(level_bloom) => match level { // if we are on the lowest level // take the value, exclude to_block - 0 if offset < to_block => return vec![offset], - 0 => false, - _ => level_bloom.contains(bloom) + 0 if offset < to_block => return Some(vec![offset]), + // return None if it is is equal to to_block + 0 => return None, + // return None if current level doesnt contain given bloom + _ if !level_bloom.contains(bloom) => return None, + // continue processing && go down + _ => () } }; - if contains { - let level_size = self.level_size(level - 1); - let from_index = self.bloom_index(from_block, level - 1); - let to_index = self.bloom_index(to_block, level - 1); - let res: Vec = self.lower_level_bloom_indexes(&index).into_iter() - // chose only blooms in range - .filter(|li| li.index >= from_index.index && li.index <= to_index.index) - // map them to offsets - .map(|li| li.index * level_size) - // get all blocks that may contain our bloom - .map(|off| self.blocks(bloom, from_block, to_block, level - 1, off)) - // flatten nested structure - .flat_map(|v| v) - .collect(); - return res - } - - return vec![]; + let level_size = self.level_size(level - 1); + let from_index = self.bloom_index(from_block, level - 1); + let to_index = self.bloom_index(to_block, level - 1); + let res: Vec = self.lower_level_bloom_indexes(&index).into_iter() + // chose only blooms in range + .filter(|li| li.index >= from_index.index && li.index <= to_index.index) + // map them to offsets + .map(|li| li.index * level_size) + // get all blocks that may contain our bloom + .map(|off| self.blocks(bloom, from_block, to_block, level - 1, off)) + // filter existing ones + .filter_map(|x| x) + // flatten nested structure + .flat_map(|v| v) + .collect(); + Some(res) } } @@ -264,7 +266,10 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource let offset = level_size * index; // go doooown! - result.extend(self.blocks(bloom, from_block, to_block, max_level, offset)); + match self.blocks(bloom, from_block, to_block, max_level, offset) { + Some(blocks) => result.extend(blocks), + None => () + }; } result From a6240c0d30346f5d99d61d67910ce4e2d504ba56 Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 21:03:31 +0100 Subject: [PATCH 10/22] fixed indention issues --- src/chainfilter.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/src/chainfilter.rs b/src/chainfilter.rs index 6f917e060..01c2b8d82 100644 --- a/src/chainfilter.rs +++ b/src/chainfilter.rs @@ -214,13 +214,13 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource let lower_indexes = self.lower_level_bloom_indexes(&index); let new_bloom = lower_indexes.into_iter() // skip reseted one - .filter(|li| li != &reset_index) + .filter(|li| li != &reset_index) // get blooms for these indexes - .map(|li| self.data_source.bloom_at_index(&li)) + .map(|li| self.data_source.bloom_at_index(&li)) // filter existing ones - .filter_map(|b| b) + .filter_map(|b| b) // BitOr all of them - .fold(H2048::new(), |acc, bloom| &acc | bloom); + .fold(H2048::new(), |acc, bloom| &acc | bloom); reset_index = index.clone(); result.insert(index, &new_bloom | bloom); @@ -235,11 +235,7 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource } /// returns numbers of blocks that may contain Address - fn blocks_with_address(&self, - address: &Address, - from_block: usize, - to_block: usize) - -> Vec { + fn blocks_with_address(&self, address: &Address, from_block: usize, to_block: usize) -> Vec { let mut bloom = H2048::new(); bloom.shift_bloom(&address.sha3()); self.blocks_with_bloom(&bloom, from_block, to_block) From b405c061a1bf5ca26c22ce710684f5f9bb575d58 Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 21:11:10 +0100 Subject: [PATCH 11/22] lower_level_bloom_indexes use collect instead of for loop --- src/chainfilter.rs | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/src/chainfilter.rs b/src/chainfilter.rs index 01c2b8d82..3ca72e4bc 100644 --- a/src/chainfilter.rs +++ b/src/chainfilter.rs @@ -77,24 +77,15 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource /// /// bloom indexes are ordered from lowest to highest fn lower_level_bloom_indexes(&self, index: &BloomIndex) -> Vec { - let mut indexes: Vec = vec![]; - - // this is the lower level + // this is the lowest level if index.level == 0 { - return indexes; + return vec![]; } let new_level = index.level - 1; let offset = self.index_size * index.index; - for i in 0..self.index_size { - indexes.push(BloomIndex { - level: new_level, - index: offset + i, - }); - } - - indexes + (0..self.index_size).map(|i| BloomIndex::new(new_level, offset + i)).collect() } /// return number of levels @@ -140,7 +131,7 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource .map(|off| self.blocks(bloom, from_block, to_block, level - 1, off)) // filter existing ones .filter_map(|x| x) - // flatten nested structure + // flatten nested structures .flat_map(|v| v) .collect(); Some(res) From da60046e3d862fa4019b12473d540f5ff40001bf Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 21:42:50 +0100 Subject: [PATCH 12/22] bloom filters --- src/chainfilter.rs | 97 ++++++++++++++++++++++++++++------------------ src/filter.rs | 51 ------------------------ src/lib.rs | 2 +- 3 files changed, 60 insertions(+), 90 deletions(-) delete mode 100644 src/filter.rs diff --git a/src/chainfilter.rs b/src/chainfilter.rs index 3ca72e4bc..bfe2a5179 100644 --- a/src/chainfilter.rs +++ b/src/chainfilter.rs @@ -1,16 +1,45 @@ -//! basic implementation of multilevel bloom filter +//! Multilevel blockchain bloom filter. use std::collections::{HashMap}; use hash::*; -use filter::*; use sha3::*; use num::pow; -/// in memory cache for blooms +/// Represents bloom index in cache +/// +/// On cache level 0, every block bloom is represented by different index. +/// On higher cache levels, multiple block blooms are represented by one +/// index. Their `BloomIndex` can be created from block number and given level. +#[derive(Eq, PartialEq, Hash, Clone, Debug)] +pub struct BloomIndex { + pub level: u8, + pub index: usize, +} + +impl BloomIndex { + /// Default constructor for `BloomIndex` + pub fn new(level: u8, index: usize) -> BloomIndex { + BloomIndex { + level: level, + index: index, + } + } +} + +/// Types implementing this trait should provide read access for bloom filters database. +pub trait FilterDataSource { + /// returns reference to log at given position if it exists + fn bloom_at_index(&self, index: &BloomIndex) -> Option<&H2048>; +} + +/// In memory cache for blooms. +/// +/// Stores all blooms in HashMap, which indexes them by `BloomIndex`. pub struct MemoryCache { blooms: HashMap, } impl MemoryCache { + /// Default constructor for MemoryCache pub fn new() -> MemoryCache { MemoryCache { blooms: HashMap::new() } } @@ -29,7 +58,7 @@ impl FilterDataSource for MemoryCache { } } -/// Should be used to find blocks in FilterDataSource +/// Should be used for search operations on blockchain. pub struct ChainFilter<'a, D> where D: FilterDataSource + 'a { @@ -40,7 +69,9 @@ pub struct ChainFilter<'a, D> impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource { - /// creates new filter instance + /// Creates new filter instance. + /// + /// Borrows `FilterDataSource` for reading. pub fn new(data_source: &'a D, index_size: usize, levels: u8) -> Self { if levels == 0 { panic!("ChainFilter requires and least 1 level"); @@ -98,9 +129,7 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource self.level_sizes.len() as u8 - 1 } - /// internal function which actually does bloom search - /// TODO: optimize it, maybe non-recursive version? - /// TODO: clean up? + /// internal function which does bloom search recursively fn blocks(&self, bloom: &H2048, from_block: usize, to_block: usize, level: u8, offset: usize) -> Option> { let index = self.bloom_index(offset, level); @@ -136,14 +165,9 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource .collect(); Some(res) } -} -impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource -{ - /// add new bloom to all levels - /// - /// BitOr new bloom with all levels of filter - fn add_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap { + /// Adds new bloom to all filter levels + pub fn add_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap { let mut result: HashMap = HashMap::new(); for level in 0..self.levels() { @@ -159,10 +183,8 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource result } - /// add new blooms starting from block number - /// - /// BitOr new blooms with all levels of filter - fn add_blooms(&self, blooms: &[H2048], block_number: usize) -> HashMap { + /// Adds new blooms starting from block number. + pub fn add_blooms(&self, blooms: &[H2048], block_number: usize) -> HashMap { let mut result: HashMap = HashMap::new(); for level in 0..self.levels() { @@ -192,8 +214,8 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource result } - /// reset bloom at level 0 and forces rebuild on higher levels - fn reset_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap { + /// Resets bloom at level 0 and forces rebuild on higher levels. + pub fn reset_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap { let mut result: HashMap = HashMap::new(); let mut reset_index = self.bloom_index(block_number, 0); @@ -204,14 +226,14 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource // get all bloom indexes that were used to construct this bloom let lower_indexes = self.lower_level_bloom_indexes(&index); let new_bloom = lower_indexes.into_iter() - // skip reseted one - .filter(|li| li != &reset_index) - // get blooms for these indexes - .map(|li| self.data_source.bloom_at_index(&li)) - // filter existing ones - .filter_map(|b| b) - // BitOr all of them - .fold(H2048::new(), |acc, bloom| &acc | bloom); + // skip reseted one + .filter(|li| li != &reset_index) + // get blooms for these indexes + .map(|li| self.data_source.bloom_at_index(&li)) + // filter existing ones + .filter_map(|b| b) + // BitOr all of them + .fold(H2048::new(), |acc, bloom| &acc | bloom); reset_index = index.clone(); result.insert(index, &new_bloom | bloom); @@ -220,27 +242,27 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource result } - /// sets lowest level bloom to 0 and forces rebuild on higher levels - fn clear_bloom(&self, block_number: usize) -> HashMap { + /// Sets lowest level bloom to 0 and forces rebuild on higher levels. + pub fn clear_bloom(&self, block_number: usize) -> HashMap { self.reset_bloom(&H2048::new(), block_number) } - /// returns numbers of blocks that may contain Address - fn blocks_with_address(&self, address: &Address, from_block: usize, to_block: usize) -> Vec { + /// Returns numbers of blocks that may contain Address. + pub fn blocks_with_address(&self, address: &Address, from_block: usize, to_block: usize) -> Vec { let mut bloom = H2048::new(); bloom.shift_bloom(&address.sha3()); self.blocks_with_bloom(&bloom, from_block, to_block) } - /// returns numbers of blocks that may contain Topic - fn blocks_with_topics(&self, topic: &H256, from_block: usize, to_block: usize) -> Vec { + /// Returns numbers of blocks that may contain Topic. + pub fn blocks_with_topics(&self, topic: &H256, from_block: usize, to_block: usize) -> Vec { let mut bloom = H2048::new(); bloom.shift_bloom(&topic.sha3()); self.blocks_with_bloom(&bloom, from_block, to_block) } - /// returns numbers of blocks that may log bloom - fn blocks_with_bloom(&self, bloom: &H2048, from_block: usize, to_block: usize) -> Vec { + /// Returns numbers of blocks that may log bloom. + pub fn blocks_with_bloom(&self, bloom: &H2048, from_block: usize, to_block: usize) -> Vec { let mut result = vec![]; // lets start from highest level let max_level = self.max_level(); @@ -266,7 +288,6 @@ impl<'a, D> Filter for ChainFilter<'a, D> where D: FilterDataSource #[cfg(test)] mod tests { use hash::*; - use filter::*; use chainfilter::*; use sha3::*; use std::str::FromStr; diff --git a/src/filter.rs b/src/filter.rs deleted file mode 100644 index 044c59563..000000000 --- a/src/filter.rs +++ /dev/null @@ -1,51 +0,0 @@ -//! multilevel bloom filter interface -use hash::*; -use std::collections::HashMap; - -/// Represents bloom index in cache -/// -/// On bloom level 0, all positions represent different blooms. -/// On higher levels multiple positions represent one bloom -/// and should be transformed to `BlockIndex` to get index of this bloom -#[derive(Eq, PartialEq, Hash, Clone, Debug)] -pub struct BloomIndex { - pub level: u8, - pub index: usize, -} - -impl BloomIndex { - pub fn new(level: u8, index: usize) -> BloomIndex { - BloomIndex { - level: level, - index: index, - } - } -} - -pub trait FilterDataSource { - /// returns reference to log at given position if it exists - fn bloom_at_index(&self, index: &BloomIndex) -> Option<&H2048>; -} - -pub trait Filter: Sized { - /// add new bloom to all levels - fn add_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap; - - /// add new blooms starting from block number - fn add_blooms(&self, blooms: &[H2048], block_number: usize) -> HashMap; - - /// reset bloom at level 0 and forces rebuild on higher levels - fn reset_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap; - - /// sets lowest level bloom to 0 and forces rebuild on higher levels - fn clear_bloom(&self, block_number: usize) -> HashMap; - - /// returns numbers of blocks that may contain Address - fn blocks_with_address(&self, address: &Address, from_block: usize, to_block: usize) -> Vec; - - /// returns numbers of blocks that may contain Topic - fn blocks_with_topics(&self, topic: &H256, from_block: usize, to_block: usize) -> Vec; - - /// returns numbers of blocks that may log bloom - fn blocks_with_bloom(&self, bloom: &H2048, from_block: usize, to_block: usize) -> Vec; -} diff --git a/src/lib.rs b/src/lib.rs index e7802b114..7823c7711 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,7 +24,7 @@ pub mod sha3; pub mod hashdb; pub mod memorydb; pub mod math; -pub mod filter; +//pub mod filter; pub mod chainfilter; //pub mod network; From 52ccd7b094942ca7d5e23723c59db5a0faa7b0e4 Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 21:54:41 +0100 Subject: [PATCH 13/22] fixed typo --- src/chainfilter.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/chainfilter.rs b/src/chainfilter.rs index bfe2a5179..3f66173a7 100644 --- a/src/chainfilter.rs +++ b/src/chainfilter.rs @@ -74,7 +74,7 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource /// Borrows `FilterDataSource` for reading. pub fn new(data_source: &'a D, index_size: usize, levels: u8) -> Self { if levels == 0 { - panic!("ChainFilter requires and least 1 level"); + panic!("ChainFilter requires at least 1 level"); } let mut filter = ChainFilter { From d6d51b456bbd651519530a72375560e71adbaa8a Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 22:10:58 +0100 Subject: [PATCH 14/22] UnsafeRlp --- src/rlp.rs | 48 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/src/rlp.rs b/src/rlp.rs index 4b6df2c6e..d8d8c2e25 100644 --- a/src/rlp.rs +++ b/src/rlp.rs @@ -151,6 +151,54 @@ impl From for DecoderError { } } +/// Unsafe wrapper for rlp decoder. +/// +/// It assumes that you know what you are doing. Doesn't bother +/// you with error handling. +pub struct UnsafeRlp<'a> { + rlp: Rlp<'a> +} + +impl<'a> From> for UnsafeRlp<'a> { + fn from(rlp: Rlp<'a>) -> UnsafeRlp<'a> { + UnsafeRlp { rlp: rlp } + } +} + +impl<'a> From> for Rlp<'a> { + fn from(unsafe_rlp: UnsafeRlp<'a>) -> Rlp<'a> { + unsafe_rlp.rlp + } +} + +impl<'a> UnsafeRlp<'a> { + /// returns new instance of `UnsafeRlp` + pub fn new(bytes: &'a [u8]) -> UnsafeRlp<'a> { + UnsafeRlp { + rlp: Rlp::new(bytes) + } + } + + pub fn at(&self, index: usize) -> UnsafeRlp<'a> { + From::from(self.rlp.at(index).unwrap()) + } + + /// returns true if rlp is a list + pub fn is_list(&self) -> bool { + self.rlp.is_list() + } + + /// returns true if rlp is a value + pub fn is_value(&self) -> bool { + self.rlp.is_value() + } + + /// returns rlp iterator + pub fn iter(&'a self) -> RlpIterator<'a> { + self.rlp.into_iter() + } +} + impl<'a> Rlp<'a> { /// returns new instance of `Rlp` pub fn new(bytes: &'a [u8]) -> Rlp<'a> { From b0c38cb6ec3206e6a6d46bb1796749ed9f768262 Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 22:16:08 +0100 Subject: [PATCH 15/22] UntrastedRlp --- src/rlp.rs | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/rlp.rs b/src/rlp.rs index d8d8c2e25..48f5299ee 100644 --- a/src/rlp.rs +++ b/src/rlp.rs @@ -155,31 +155,31 @@ impl From for DecoderError { /// /// It assumes that you know what you are doing. Doesn't bother /// you with error handling. -pub struct UnsafeRlp<'a> { +pub struct UntrustedRlp<'a> { rlp: Rlp<'a> } -impl<'a> From> for UnsafeRlp<'a> { - fn from(rlp: Rlp<'a>) -> UnsafeRlp<'a> { - UnsafeRlp { rlp: rlp } +impl<'a> From> for UntrustedRlp<'a> { + fn from(rlp: Rlp<'a>) -> UntrustedRlp<'a> { + UntrustedRlp { rlp: rlp } } } -impl<'a> From> for Rlp<'a> { - fn from(unsafe_rlp: UnsafeRlp<'a>) -> Rlp<'a> { +impl<'a> From> for Rlp<'a> { + fn from(unsafe_rlp: UntrustedRlp<'a>) -> Rlp<'a> { unsafe_rlp.rlp } } -impl<'a> UnsafeRlp<'a> { - /// returns new instance of `UnsafeRlp` - pub fn new(bytes: &'a [u8]) -> UnsafeRlp<'a> { - UnsafeRlp { +impl<'a> UntrustedRlp<'a> { + /// returns new instance of `UntrustedRlp` + pub fn new(bytes: &'a [u8]) -> UntrustedRlp<'a> { + UntrustedRlp { rlp: Rlp::new(bytes) } } - pub fn at(&self, index: usize) -> UnsafeRlp<'a> { + pub fn at(&self, index: usize) -> UntrustedRlp<'a> { From::from(self.rlp.at(index).unwrap()) } From 8c0e063f6beb95c0ab225b0df45759353acfaf1e Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 22:43:16 +0100 Subject: [PATCH 16/22] docs for chainfilter --- src/chainfilter.rs | 49 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 44 insertions(+), 5 deletions(-) diff --git a/src/chainfilter.rs b/src/chainfilter.rs index 3f66173a7..538d1883f 100644 --- a/src/chainfilter.rs +++ b/src/chainfilter.rs @@ -1,4 +1,43 @@ //! Multilevel blockchain bloom filter. +//! +//! ``` +//! extern crate ethcore_util as util; +//! use std::str::FromStr; +//! use util::chainfilter::*; +//! use util::sha3::*; +//! use util::hash::*; +//! +//! fn main() { +//! let (index_size, bloom_levels) = (16, 3); +//! let mut cache = MemoryCache::new(); +//! +//! let address = Address::from_str("ef2d6d194084c2de36e0dabfce45d046b37d1106").unwrap(); +//! +//! // borrow cache for reading inside the scope +//! let modified_blooms = { +//! let filter = ChainFilter::new(&cache, index_size, bloom_levels); +//! let block_number = 39; +//! let mut bloom = H2048::new(); +//! bloom.shift_bloom(&address.sha3()); +//! filter.add_bloom(&bloom, block_number) +//! }; +//! +//! // number of updated blooms is equal number of levels +//! assert_eq!(modified_blooms.len(), bloom_levels as usize); +//! +//! // lets inserts modified blooms into the cache +//! cache.insert_blooms(modified_blooms); +//! +//! // borrow cache for another reading operations +//! { +//! let filter = ChainFilter::new(&cache, index_size, bloom_levels); +//! let blocks = filter.blocks_with_address(&address, 10, 40); +//! assert_eq!(blocks.len(), 1); +//! assert_eq!(blocks[0], 39); +//! } +//! } +//! ``` +//! use std::collections::{HashMap}; use hash::*; use sha3::*; @@ -255,7 +294,7 @@ impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource } /// Returns numbers of blocks that may contain Topic. - pub fn blocks_with_topics(&self, topic: &H256, from_block: usize, to_block: usize) -> Vec { + pub fn blocks_with_topic(&self, topic: &H256, from_block: usize, to_block: usize) -> Vec { let mut bloom = H2048::new(); bloom.shift_bloom(&topic.sha3()); self.blocks_with_bloom(&bloom, from_block, to_block) @@ -379,27 +418,27 @@ mod tests { { let filter = ChainFilter::new(&cache, index_size, bloom_levels); - let blocks = filter.blocks_with_topics(&topic, 0, 100); + let blocks = filter.blocks_with_topic(&topic, 0, 100); assert_eq!(blocks.len(), 1); assert_eq!(blocks[0], 23); } { let filter = ChainFilter::new(&cache, index_size, bloom_levels); - let blocks = filter.blocks_with_topics(&topic, 0, 23); + let blocks = filter.blocks_with_topic(&topic, 0, 23); assert_eq!(blocks.len(), 0); } { let filter = ChainFilter::new(&cache, index_size, bloom_levels); - let blocks = filter.blocks_with_topics(&topic, 23, 24); + let blocks = filter.blocks_with_topic(&topic, 23, 24); assert_eq!(blocks.len(), 1); assert_eq!(blocks[0], 23); } { let filter = ChainFilter::new(&cache, index_size, bloom_levels); - let blocks = filter.blocks_with_topics(&topic, 24, 100); + let blocks = filter.blocks_with_topic(&topic, 24, 100); assert_eq!(blocks.len(), 0); } } From 3f5ea22a63c3c3d7c8ba2e461f28977c3d745aa1 Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 23:13:05 +0100 Subject: [PATCH 17/22] first attempt to run travis --- .travis.yml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 .travis.yml diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..e7ca3f25b --- /dev/null +++ b/.travis.yml @@ -0,0 +1,7 @@ +language: rust +after_success: | + sudo apt-get install libcurl4-openssl-dev libelf-dev libdw-dev && + wget https://github.com/SimonKagstrom/kcov/archive/master.tar.gz && + tar xzf master.tar.gz && mkdir kcov-master/build && cd kcov-master/build && cmake .. && make && + sudo make install && cd ../.. && + kcov --coveralls-id=$TRAVIS_JOB_ID --exclude-pattern=/.cargo target/kcov target/debug/ethcore_util-* From de87de3920fece6dbb6eabaa79cc7616f6a1ec58 Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 23:21:14 +0100 Subject: [PATCH 18/22] added rocksdb to .travis.yml --- .travis.yml | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index e7ca3f25b..1069b1e3f 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,7 +1,13 @@ language: rust -after_success: | - sudo apt-get install libcurl4-openssl-dev libelf-dev libdw-dev && - wget https://github.com/SimonKagstrom/kcov/archive/master.tar.gz && - tar xzf master.tar.gz && mkdir kcov-master/build && cd kcov-master/build && cmake .. && make && - sudo make install && cd ../.. && - kcov --coveralls-id=$TRAVIS_JOB_ID --exclude-pattern=/.cargo target/kcov target/debug/ethcore_util-* + +before_script: + - wget https://github.com/facebook/rocksdb/archive/rocksdb-3.13.tar.gz + - tar xvf rocksdb-3.8.tar.gz && cd rocksdb-rocksdb-3.8 && make shared_lib + - make install + - cd .. + +after_success: + - sudo apt-get install libcurl4-openssl-dev libelf-dev libdw-dev + - wget https://github.com/SimonKagstrom/kcov/archive/master.tar.gz + - tar xzf master.tar.gz && mkdir kcov-master/build && cd kcov-master/build && cmake .. && make && sudo make install && cd ../.. && + - kcov --coveralls-id=$TRAVIS_JOB_ID --exclude-pattern=/.cargo target/kcov target/debug/ethcore_util-* From 7671877ed6efa0783b4d56d80949e17ba3fe99fe Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 23:23:11 +0100 Subject: [PATCH 19/22] fixed typo in travis.yml --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 1069b1e3f..0b33900a9 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,7 @@ language: rust before_script: - wget https://github.com/facebook/rocksdb/archive/rocksdb-3.13.tar.gz - - tar xvf rocksdb-3.8.tar.gz && cd rocksdb-rocksdb-3.8 && make shared_lib + - tar xvf rocksdb-3.13.tar.gz && cd rocksdb-rocksdb-3.13 && make shared_lib - make install - cd .. From 8eb9dfb387eb523577e8e95a9e7dc19ffacab2b4 Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 23:30:23 +0100 Subject: [PATCH 20/22] travis installs g++4.8 --- .travis.yml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.travis.yml b/.travis.yml index 0b33900a9..2059c3273 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,11 @@ language: rust before_script: + # g++4.8 for C++11 which is required by rocksdb + - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test + - sudo apt-get install -qq --yes --force-yes g++-4.8 + - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 50 + - wget https://github.com/facebook/rocksdb/archive/rocksdb-3.13.tar.gz - tar xvf rocksdb-3.13.tar.gz && cd rocksdb-rocksdb-3.13 && make shared_lib - make install From 054a11a5edc010737bd24198944a1eab6cd40d25 Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 23:33:01 +0100 Subject: [PATCH 21/22] apt-get update for travis --- .travis.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.travis.yml b/.travis.yml index 2059c3273..fd0116a17 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,7 @@ language: rust before_script: # g++4.8 for C++11 which is required by rocksdb - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test + - sudo apt-get update -y -qq - sudo apt-get install -qq --yes --force-yes g++-4.8 - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 50 From 7402559630bead5bedea7a0c8e929079cbd96065 Mon Sep 17 00:00:00 2001 From: debris Date: Sat, 28 Nov 2015 23:46:36 +0100 Subject: [PATCH 22/22] missing sudo --- .travis.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index fd0116a17..03320f067 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,12 @@ language: rust +rust: + - 1.4.0 + +os: + - linux + #- osx + before_script: # g++4.8 for C++11 which is required by rocksdb - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test @@ -9,7 +16,7 @@ before_script: - wget https://github.com/facebook/rocksdb/archive/rocksdb-3.13.tar.gz - tar xvf rocksdb-3.13.tar.gz && cd rocksdb-rocksdb-3.13 && make shared_lib - - make install + - sudo make install - cd .. after_success: