diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..03320f067 --- /dev/null +++ b/.travis.yml @@ -0,0 +1,26 @@ +language: rust + +rust: + - 1.4.0 + +os: + - linux + #- osx + +before_script: + # g++4.8 for C++11 which is required by rocksdb + - sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test + - sudo apt-get update -y -qq + - sudo apt-get install -qq --yes --force-yes g++-4.8 + - sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-4.8 50 + + - wget https://github.com/facebook/rocksdb/archive/rocksdb-3.13.tar.gz + - tar xvf rocksdb-3.13.tar.gz && cd rocksdb-rocksdb-3.13 && make shared_lib + - sudo make install + - cd .. + +after_success: + - sudo apt-get install libcurl4-openssl-dev libelf-dev libdw-dev + - wget https://github.com/SimonKagstrom/kcov/archive/master.tar.gz + - tar xzf master.tar.gz && mkdir kcov-master/build && cd kcov-master/build && cmake .. && make && sudo make install && cd ../.. && + - kcov --coveralls-id=$TRAVIS_JOB_ID --exclude-pattern=/.cargo target/kcov target/debug/ethcore_util-* diff --git a/Cargo.toml b/Cargo.toml index 19c64113c..c4358b8c3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,3 +15,4 @@ mio = "0.*" rand = "0.*" tiny-keccak = "0.3" rocksdb = "0.2.1" +num = "0.1" diff --git a/src/chainfilter.rs b/src/chainfilter.rs new file mode 100644 index 000000000..538d1883f --- /dev/null +++ b/src/chainfilter.rs @@ -0,0 +1,445 @@ +//! Multilevel blockchain bloom filter. +//! +//! ``` +//! extern crate ethcore_util as util; +//! use std::str::FromStr; +//! use util::chainfilter::*; +//! use util::sha3::*; +//! use util::hash::*; +//! +//! fn main() { +//! let (index_size, bloom_levels) = (16, 3); +//! let mut cache = MemoryCache::new(); +//! +//! let address = Address::from_str("ef2d6d194084c2de36e0dabfce45d046b37d1106").unwrap(); +//! +//! // borrow cache for reading inside the scope +//! let modified_blooms = { +//! let filter = ChainFilter::new(&cache, index_size, bloom_levels); +//! let block_number = 39; +//! let mut bloom = H2048::new(); +//! bloom.shift_bloom(&address.sha3()); +//! filter.add_bloom(&bloom, block_number) +//! }; +//! +//! // number of updated blooms is equal number of levels +//! assert_eq!(modified_blooms.len(), bloom_levels as usize); +//! +//! // lets inserts modified blooms into the cache +//! cache.insert_blooms(modified_blooms); +//! +//! // borrow cache for another reading operations +//! { +//! let filter = ChainFilter::new(&cache, index_size, bloom_levels); +//! let blocks = filter.blocks_with_address(&address, 10, 40); +//! assert_eq!(blocks.len(), 1); +//! assert_eq!(blocks[0], 39); +//! } +//! } +//! ``` +//! +use std::collections::{HashMap}; +use hash::*; +use sha3::*; +use num::pow; + +/// Represents bloom index in cache +/// +/// On cache level 0, every block bloom is represented by different index. +/// On higher cache levels, multiple block blooms are represented by one +/// index. Their `BloomIndex` can be created from block number and given level. +#[derive(Eq, PartialEq, Hash, Clone, Debug)] +pub struct BloomIndex { + pub level: u8, + pub index: usize, +} + +impl BloomIndex { + /// Default constructor for `BloomIndex` + pub fn new(level: u8, index: usize) -> BloomIndex { + BloomIndex { + level: level, + index: index, + } + } +} + +/// Types implementing this trait should provide read access for bloom filters database. +pub trait FilterDataSource { + /// returns reference to log at given position if it exists + fn bloom_at_index(&self, index: &BloomIndex) -> Option<&H2048>; +} + +/// In memory cache for blooms. +/// +/// Stores all blooms in HashMap, which indexes them by `BloomIndex`. +pub struct MemoryCache { + blooms: HashMap, +} + +impl MemoryCache { + /// Default constructor for MemoryCache + pub fn new() -> MemoryCache { + MemoryCache { blooms: HashMap::new() } + } + + /// inserts all blooms into cache + /// + /// if bloom at given index already exists, overwrites it + pub fn insert_blooms(&mut self, blooms: HashMap) { + self.blooms.extend(blooms); + } +} + +impl FilterDataSource for MemoryCache { + fn bloom_at_index(&self, index: &BloomIndex) -> Option<&H2048> { + self.blooms.get(index) + } +} + +/// Should be used for search operations on blockchain. +pub struct ChainFilter<'a, D> + where D: FilterDataSource + 'a +{ + data_source: &'a D, + index_size: usize, + level_sizes: Vec, +} + +impl<'a, D> ChainFilter<'a, D> where D: FilterDataSource +{ + /// Creates new filter instance. + /// + /// Borrows `FilterDataSource` for reading. + pub fn new(data_source: &'a D, index_size: usize, levels: u8) -> Self { + if levels == 0 { + panic!("ChainFilter requires at least 1 level"); + } + + let mut filter = ChainFilter { + data_source: data_source, + index_size: index_size, + level_sizes: vec![] + }; + + // cache level sizes, so we do not have to calculate them all the time + for i in 0..levels { + filter.level_sizes.push(pow(index_size, i as usize)); + } + + filter + } + + /// unsafely get level size + fn level_size(&self, level: u8) -> usize { + self.level_sizes[level as usize] + } + + /// converts block number and level to `BloomIndex` + fn bloom_index(&self, block_number: usize, level: u8) -> BloomIndex { + BloomIndex { + level: level, + index: block_number / self.level_size(level), + } + } + + /// return bloom which are dependencies for given index + /// + /// bloom indexes are ordered from lowest to highest + fn lower_level_bloom_indexes(&self, index: &BloomIndex) -> Vec { + // this is the lowest level + if index.level == 0 { + return vec![]; + } + + let new_level = index.level - 1; + let offset = self.index_size * index.index; + + (0..self.index_size).map(|i| BloomIndex::new(new_level, offset + i)).collect() + } + + /// return number of levels + fn levels(&self) -> u8 { + self.level_sizes.len() as u8 + } + + /// returns max filter level + fn max_level(&self) -> u8 { + self.level_sizes.len() as u8 - 1 + } + + /// internal function which does bloom search recursively + fn blocks(&self, bloom: &H2048, from_block: usize, to_block: usize, level: u8, offset: usize) -> Option> { + let index = self.bloom_index(offset, level); + + match self.data_source.bloom_at_index(&index) { + None => return None, + Some(level_bloom) => match level { + // if we are on the lowest level + // take the value, exclude to_block + 0 if offset < to_block => return Some(vec![offset]), + // return None if it is is equal to to_block + 0 => return None, + // return None if current level doesnt contain given bloom + _ if !level_bloom.contains(bloom) => return None, + // continue processing && go down + _ => () + } + }; + + let level_size = self.level_size(level - 1); + let from_index = self.bloom_index(from_block, level - 1); + let to_index = self.bloom_index(to_block, level - 1); + let res: Vec = self.lower_level_bloom_indexes(&index).into_iter() + // chose only blooms in range + .filter(|li| li.index >= from_index.index && li.index <= to_index.index) + // map them to offsets + .map(|li| li.index * level_size) + // get all blocks that may contain our bloom + .map(|off| self.blocks(bloom, from_block, to_block, level - 1, off)) + // filter existing ones + .filter_map(|x| x) + // flatten nested structures + .flat_map(|v| v) + .collect(); + Some(res) + } + + /// Adds new bloom to all filter levels + pub fn add_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap { + let mut result: HashMap = HashMap::new(); + + for level in 0..self.levels() { + let bloom_index = self.bloom_index(block_number, level); + let new_bloom = match self.data_source.bloom_at_index(&bloom_index) { + Some(old_bloom) => old_bloom | bloom, + None => bloom.clone(), + }; + + result.insert(bloom_index, new_bloom); + } + + result + } + + /// Adds new blooms starting from block number. + pub fn add_blooms(&self, blooms: &[H2048], block_number: usize) -> HashMap { + let mut result: HashMap = HashMap::new(); + + for level in 0..self.levels() { + for i in 0..blooms.len() { + let bloom_index = self.bloom_index(block_number + i, level); + let is_new_bloom = match result.get_mut(&bloom_index) { + + // it was already modified + Some(to_shift) => { + *to_shift = &blooms[i] | to_shift; + false + } + None => true, + }; + + // it hasn't been modified yet + if is_new_bloom { + let new_bloom = match self.data_source.bloom_at_index(&bloom_index) { + Some(old_bloom) => old_bloom | &blooms[i], + None => blooms[i].clone(), + }; + result.insert(bloom_index, new_bloom); + } + } + } + + result + } + + /// Resets bloom at level 0 and forces rebuild on higher levels. + pub fn reset_bloom(&self, bloom: &H2048, block_number: usize) -> HashMap { + let mut result: HashMap = HashMap::new(); + + let mut reset_index = self.bloom_index(block_number, 0); + result.insert(reset_index.clone(), bloom.clone()); + + for level in 1..self.levels() { + let index = self.bloom_index(block_number, level); + // get all bloom indexes that were used to construct this bloom + let lower_indexes = self.lower_level_bloom_indexes(&index); + let new_bloom = lower_indexes.into_iter() + // skip reseted one + .filter(|li| li != &reset_index) + // get blooms for these indexes + .map(|li| self.data_source.bloom_at_index(&li)) + // filter existing ones + .filter_map(|b| b) + // BitOr all of them + .fold(H2048::new(), |acc, bloom| &acc | bloom); + + reset_index = index.clone(); + result.insert(index, &new_bloom | bloom); + } + + result + } + + /// Sets lowest level bloom to 0 and forces rebuild on higher levels. + pub fn clear_bloom(&self, block_number: usize) -> HashMap { + self.reset_bloom(&H2048::new(), block_number) + } + + /// Returns numbers of blocks that may contain Address. + pub fn blocks_with_address(&self, address: &Address, from_block: usize, to_block: usize) -> Vec { + let mut bloom = H2048::new(); + bloom.shift_bloom(&address.sha3()); + self.blocks_with_bloom(&bloom, from_block, to_block) + } + + /// Returns numbers of blocks that may contain Topic. + pub fn blocks_with_topic(&self, topic: &H256, from_block: usize, to_block: usize) -> Vec { + let mut bloom = H2048::new(); + bloom.shift_bloom(&topic.sha3()); + self.blocks_with_bloom(&bloom, from_block, to_block) + } + + /// Returns numbers of blocks that may log bloom. + pub fn blocks_with_bloom(&self, bloom: &H2048, from_block: usize, to_block: usize) -> Vec { + let mut result = vec![]; + // lets start from highest level + let max_level = self.max_level(); + let level_size = self.level_size(max_level); + let from_index = self.bloom_index(from_block, max_level); + let to_index = self.bloom_index(to_block, max_level); + + for index in from_index.index..to_index.index + 1 { + // offset will be used to calculate where we are right now + let offset = level_size * index; + + // go doooown! + match self.blocks(bloom, from_block, to_block, max_level, offset) { + Some(blocks) => result.extend(blocks), + None => () + }; + } + + result + } +} + +#[cfg(test)] +mod tests { + use hash::*; + use chainfilter::*; + use sha3::*; + use std::str::FromStr; + + #[test] + fn test_level_size() { + let cache = MemoryCache::new(); + let filter = ChainFilter::new(&cache, 16, 3); + assert_eq!(filter.level_size(0), 1); + assert_eq!(filter.level_size(1), 16); + assert_eq!(filter.level_size(2), 256); + } + + #[test] + fn test_bloom_index() { + let cache = MemoryCache::new(); + let filter = ChainFilter::new(&cache, 16, 3); + + let bi0 = filter.bloom_index(0, 0); + assert_eq!(bi0.level, 0); + assert_eq!(bi0.index, 0); + + let bi1 = filter.bloom_index(1, 0); + assert_eq!(bi1.level, 0); + assert_eq!(bi1.index, 1); + + let bi2 = filter.bloom_index(2, 0); + assert_eq!(bi2.level, 0); + assert_eq!(bi2.index, 2); + + let bi3 = filter.bloom_index(3, 1); + assert_eq!(bi3.level, 1); + assert_eq!(bi3.index, 0); + + let bi4 = filter.bloom_index(15, 1); + assert_eq!(bi4.level, 1); + assert_eq!(bi4.index, 0); + + let bi5 = filter.bloom_index(16, 1); + assert_eq!(bi5.level, 1); + assert_eq!(bi5.index, 1); + + let bi6 = filter.bloom_index(255, 2); + assert_eq!(bi6.level, 2); + assert_eq!(bi6.index, 0); + + let bi7 = filter.bloom_index(256, 2); + assert_eq!(bi7.level, 2); + assert_eq!(bi7.index, 1); + } + + #[test] + fn test_lower_level_bloom_indexes() { + let cache = MemoryCache::new(); + let filter = ChainFilter::new(&cache, 16, 3); + + let bi = filter.bloom_index(256, 2); + assert_eq!(bi.level, 2); + assert_eq!(bi.index, 1); + + let mut ebis = vec![]; + for i in 16..32 { + ebis.push(BloomIndex::new(1, i)); + } + + let bis = filter.lower_level_bloom_indexes(&bi); + assert_eq!(ebis, bis); + } + + #[test] + fn test_topic_basic_search() { + let index_size = 16; + let bloom_levels = 3; + + let mut cache = MemoryCache::new(); + let topic = H256::from_str("8d936b1bd3fc635710969ccfba471fb17d598d9d1971b538dd712e1e4b4f4dba").unwrap(); + + let modified_blooms = { + let filter = ChainFilter::new(&cache, index_size, bloom_levels); + let block_number = 23; + let mut bloom = H2048::new(); + bloom.shift_bloom(&topic.sha3()); + filter.add_bloom(&bloom, block_number) + }; + + // number of modified blooms should always be equal number of levels + assert_eq!(modified_blooms.len(), bloom_levels as usize); + cache.insert_blooms(modified_blooms); + + { + let filter = ChainFilter::new(&cache, index_size, bloom_levels); + let blocks = filter.blocks_with_topic(&topic, 0, 100); + assert_eq!(blocks.len(), 1); + assert_eq!(blocks[0], 23); + } + + { + let filter = ChainFilter::new(&cache, index_size, bloom_levels); + let blocks = filter.blocks_with_topic(&topic, 0, 23); + assert_eq!(blocks.len(), 0); + } + + { + let filter = ChainFilter::new(&cache, index_size, bloom_levels); + let blocks = filter.blocks_with_topic(&topic, 23, 24); + assert_eq!(blocks.len(), 1); + assert_eq!(blocks[0], 23); + } + + { + let filter = ChainFilter::new(&cache, index_size, bloom_levels); + let blocks = filter.blocks_with_topic(&topic, 24, 100); + assert_eq!(blocks.len(), 0); + } + } +} diff --git a/src/hash.rs b/src/hash.rs index c03650be3..08166559a 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -19,6 +19,7 @@ pub trait FixedHash: Sized + BytesConvertable { fn shift_bloom<'a, T>(&'a mut self, b: &T) -> &'a mut Self where T: FixedHash; fn bloom_part(&self, m: usize) -> T where T: FixedHash; fn contains_bloom(&self, b: &T) -> bool where T: FixedHash; + fn contains<'a>(&'a self, b: &'a Self) -> bool; } macro_rules! impl_hash { @@ -108,7 +109,11 @@ macro_rules! impl_hash { fn contains_bloom(&self, b: &T) -> bool where T: FixedHash { let bp: Self = b.bloom_part($size); - (&bp & self) == bp + self.contains(&bp) + } + + fn contains<'a>(&'a self, b: &'a Self) -> bool { + &(b & self) == b } } diff --git a/src/lib.rs b/src/lib.rs index e918c7ca9..aec47b96f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -7,8 +7,11 @@ extern crate mio; extern crate rand; extern crate rocksdb; extern crate tiny_keccak; +extern crate num; #[macro_use] extern crate log; +#[macro_use] +pub mod macros; pub mod error; pub mod hash; @@ -22,6 +25,8 @@ pub mod hashdb; pub mod memorydb; pub mod overlaydb; pub mod math; +//pub mod filter; +pub mod chainfilter; //pub mod network; diff --git a/src/macros.rs b/src/macros.rs new file mode 100644 index 000000000..69286a340 --- /dev/null +++ b/src/macros.rs @@ -0,0 +1,11 @@ +macro_rules! map( + { $($key:expr => $value:expr),+ } => { + { + let mut m = ::std::collections::HashMap::new(); + $( + m.insert($key, $value); + )+ + m + } + }; +); diff --git a/src/math.rs b/src/math.rs index ba039f3d0..c85c4653e 100644 --- a/src/math.rs +++ b/src/math.rs @@ -7,4 +7,3 @@ pub fn log2(x: usize) -> u32 { let n = x.leading_zeros(); ::std::mem::size_of::() as u32 * 8 - n } - diff --git a/src/rlp.rs b/src/rlp.rs index 53860579b..66da7a91f 100644 --- a/src/rlp.rs +++ b/src/rlp.rs @@ -151,6 +151,54 @@ impl From for DecoderError { } } +/// Unsafe wrapper for rlp decoder. +/// +/// It assumes that you know what you are doing. Doesn't bother +/// you with error handling. +pub struct UntrustedRlp<'a> { + rlp: Rlp<'a> +} + +impl<'a> From> for UntrustedRlp<'a> { + fn from(rlp: Rlp<'a>) -> UntrustedRlp<'a> { + UntrustedRlp { rlp: rlp } + } +} + +impl<'a> From> for Rlp<'a> { + fn from(unsafe_rlp: UntrustedRlp<'a>) -> Rlp<'a> { + unsafe_rlp.rlp + } +} + +impl<'a> UntrustedRlp<'a> { + /// returns new instance of `UntrustedRlp` + pub fn new(bytes: &'a [u8]) -> UntrustedRlp<'a> { + UntrustedRlp { + rlp: Rlp::new(bytes) + } + } + + pub fn at(&self, index: usize) -> UntrustedRlp<'a> { + From::from(self.rlp.at(index).unwrap()) + } + + /// returns true if rlp is a list + pub fn is_list(&self) -> bool { + self.rlp.is_list() + } + + /// returns true if rlp is a value + pub fn is_value(&self) -> bool { + self.rlp.is_value() + } + + /// returns rlp iterator + pub fn iter(&'a self) -> RlpIterator<'a> { + self.rlp.into_iter() + } +} + impl<'a> Rlp<'a> { /// returns new instance of `Rlp` pub fn new(bytes: &'a [u8]) -> Rlp<'a> {