new blooms database (#8712)

* new blooms database

* fixed conflict in Cargo.lock

* removed bloomchain

* cleanup in progress

* all tests passing in trace db with new blooms-db

* added trace_blooms to BlockChainDB interface, fixed db flushing

* BlockChainDB no longer exposes RwLock in the interface

* automatically flush blooms-db after every insert

* blooms-db uses io::BufReader to read files, wrap blooms-db into Mutex, cause fs::File is just a shared file handle

* fix json_tests

* blooms-db can filter multiple possibilities at the same time

* removed enum trace/db.rs CacheId

* lint fixes

* fixed tests

* kvdb-rocksdb uses fs-swap crate

* update Cargo.lock

* use fs::rename

* fixed failing test on linux

* fix tests

* use fs_swap

* fixed failing test on linux

* cleanup after swap

* fix tests

* fixed osx permissions

* simplify parity database opening functions

* added migration to blooms-db

* address @niklasad1 grumbles

* fix license and authors field of blooms-db Cargo.toml

* restore blooms-db after snapshot
This commit is contained in:
Marek Kotewicz
2018-06-20 15:13:07 +02:00
committed by Afri Schoedon
parent cf5ae81ced
commit 458afcd230
81 changed files with 1222 additions and 2887 deletions

View File

@@ -15,7 +15,6 @@
// along with Parity. If not, see <http://www.gnu.org/licenses/>.
//! Traces config.
use bloomchain::Config as BloomConfig;
/// Traces config.
#[derive(Debug, PartialEq, Clone)]
@@ -23,8 +22,6 @@ pub struct Config {
/// Indicates if tracing should be enabled or not.
/// If it's None, it will be automatically configured.
pub enabled: bool,
/// Traces blooms configuration.
pub blooms: BloomConfig,
/// Preferef cache-size.
pub pref_cache_size: usize,
/// Max cache-size.
@@ -35,10 +32,6 @@ impl Default for Config {
fn default() -> Self {
Config {
enabled: false,
blooms: BloomConfig {
levels: 3,
elements_per_index: 16,
},
pref_cache_size: 15 * 1024 * 1024,
max_cache_size: 20 * 1024 * 1024,
}

View File

@@ -15,19 +15,16 @@
// along with Parity. If not, see <http://www.gnu.org/licenses/>.
//! Trace database.
use std::ops::Deref;
use std::collections::{HashMap, VecDeque};
use std::sync::Arc;
use bloomchain::{Number, Config as BloomConfig};
use bloomchain::group::{BloomGroupDatabase, BloomGroupChain, GroupPosition, BloomGroup};
use blockchain::{BlockChainDB};
use heapsize::HeapSizeOf;
use ethereum_types::{H256, H264};
use kvdb::{KeyValueDB, DBTransaction};
use kvdb::{DBTransaction};
use parking_lot::RwLock;
use header::BlockNumber;
use trace::{LocalizedTrace, Config, Filter, Database as TraceDatabase, ImportRequest, DatabaseExtras};
use db::{self, Key, Writable, Readable, CacheUpdatePolicy};
use blooms;
use super::flat::{FlatTrace, FlatBlockTraces, FlatTransactionTraces};
use cache_manager::CacheManager;
@@ -37,8 +34,6 @@ const TRACE_DB_VER: &'static [u8] = b"1.0";
enum TraceDBIndex {
/// Block traces index.
BlockTraces = 0,
/// Trace bloom group index.
BloomGroups = 1,
}
impl Key<FlatBlockTraces> for H256 {
@@ -52,55 +47,6 @@ impl Key<FlatBlockTraces> for H256 {
}
}
/// Wrapper around `blooms::GroupPosition` so it could be
/// uniquely identified in the database.
#[derive(Debug, PartialEq, Eq, Hash, Clone)]
struct TraceGroupPosition(blooms::GroupPosition);
impl From<GroupPosition> for TraceGroupPosition {
fn from(position: GroupPosition) -> Self {
TraceGroupPosition(From::from(position))
}
}
impl HeapSizeOf for TraceGroupPosition {
fn heap_size_of_children(&self) -> usize {
0
}
}
/// Helper data structure created cause [u8; 6] does not implement Deref to &[u8].
pub struct TraceGroupKey([u8; 6]);
impl Deref for TraceGroupKey {
type Target = [u8];
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl Key<blooms::BloomGroup> for TraceGroupPosition {
type Target = TraceGroupKey;
fn key(&self) -> Self::Target {
let mut result = [0u8; 6];
result[0] = TraceDBIndex::BloomGroups as u8;
result[1] = self.0.level;
result[2] = self.0.index as u8;
result[3] = (self.0.index >> 8) as u8;
result[4] = (self.0.index >> 16) as u8;
result[5] = (self.0.index >> 24) as u8;
TraceGroupKey(result)
}
}
#[derive(Debug, Hash, Eq, PartialEq)]
enum CacheId {
Trace(H256),
Bloom(TraceGroupPosition),
}
/// Database to store transaction execution trace.
///
/// Whenever a transaction is executed by EVM it's execution trace is stored
@@ -108,60 +54,45 @@ enum CacheId {
/// touched, which have been created during the execution of transaction, and
/// which calls failed.
pub struct TraceDB<T> where T: DatabaseExtras {
// cache
/// cache
traces: RwLock<HashMap<H256, FlatBlockTraces>>,
blooms: RwLock<HashMap<TraceGroupPosition, blooms::BloomGroup>>,
cache_manager: RwLock<CacheManager<CacheId>>,
// db
tracesdb: Arc<KeyValueDB>,
// config,
bloom_config: BloomConfig,
// tracing enabled
/// hashes of cached traces
cache_manager: RwLock<CacheManager<H256>>,
/// db
db: Arc<BlockChainDB>,
/// tracing enabled
enabled: bool,
// extras
/// extras
extras: Arc<T>,
}
impl<T> BloomGroupDatabase for TraceDB<T> where T: DatabaseExtras {
fn blooms_at(&self, position: &GroupPosition) -> Option<BloomGroup> {
let position = TraceGroupPosition::from(position.clone());
let result = self.tracesdb.read_with_cache(db::COL_TRACE, &self.blooms, &position).map(Into::into);
self.note_used(CacheId::Bloom(position));
result
}
}
impl<T> TraceDB<T> where T: DatabaseExtras {
/// Creates new instance of `TraceDB`.
pub fn new(config: Config, tracesdb: Arc<KeyValueDB>, extras: Arc<T>) -> Self {
pub fn new(config: Config, db: Arc<BlockChainDB>, extras: Arc<T>) -> Self {
let mut batch = DBTransaction::new();
let genesis = extras.block_hash(0)
.expect("Genesis block is always inserted upon extras db creation qed");
batch.write(db::COL_TRACE, &genesis, &FlatBlockTraces::default());
batch.put(db::COL_TRACE, b"version", TRACE_DB_VER);
tracesdb.write(batch).expect("failed to update version");
db.key_value().write(batch).expect("failed to update version");
TraceDB {
traces: RwLock::new(HashMap::new()),
blooms: RwLock::new(HashMap::new()),
cache_manager: RwLock::new(CacheManager::new(config.pref_cache_size, config.max_cache_size, 10 * 1024)),
tracesdb: tracesdb,
bloom_config: config.blooms,
db,
enabled: config.enabled,
extras: extras,
}
}
fn cache_size(&self) -> usize {
let traces = self.traces.read().heap_size_of_children();
let blooms = self.blooms.read().heap_size_of_children();
traces + blooms
self.traces.read().heap_size_of_children()
}
/// Let the cache system know that a cacheable item has been used.
fn note_used(&self, id: CacheId) {
fn note_trace_used(&self, trace_id: H256) {
let mut cache_manager = self.cache_manager.write();
cache_manager.note_used(id);
cache_manager.note_used(trace_id);
}
/// Ticks our cache system and throws out any old data.
@@ -169,27 +100,22 @@ impl<T> TraceDB<T> where T: DatabaseExtras {
let current_size = self.cache_size();
let mut traces = self.traces.write();
let mut blooms = self.blooms.write();
let mut cache_manager = self.cache_manager.write();
cache_manager.collect_garbage(current_size, | ids | {
for id in &ids {
match *id {
CacheId::Trace(ref h) => { traces.remove(h); },
CacheId::Bloom(ref h) => { blooms.remove(h); },
}
traces.remove(id);
}
traces.shrink_to_fit();
blooms.shrink_to_fit();
traces.heap_size_of_children() + blooms.heap_size_of_children()
traces.heap_size_of_children()
});
}
/// Returns traces for block with hash.
fn traces(&self, block_hash: &H256) -> Option<FlatBlockTraces> {
let result = self.tracesdb.read_with_cache(db::COL_TRACE, &self.traces, block_hash);
self.note_used(CacheId::Trace(block_hash.clone()));
let result = self.db.key_value().read_with_cache(db::COL_TRACE, &self.traces, block_hash);
self.note_trace_used(*block_hash);
result
}
@@ -271,10 +197,8 @@ impl<T> TraceDatabase for TraceDB<T> where T: DatabaseExtras {
// now let's rebuild the blooms
if !request.enacted.is_empty() {
let range_start = request.block_number as Number + 1 - request.enacted.len();
let range_end = range_start + request.retracted;
let replaced_range = range_start..range_end;
let enacted_blooms = request.enacted
let range_start = request.block_number + 1 - request.enacted.len() as u64;
let enacted_blooms: Vec<_> = request.enacted
.iter()
// all traces are expected to be found here. That's why `expect` has been used
// instead of `filter_map`. If some traces haven't been found, it meens that
@@ -286,19 +210,9 @@ impl<T> TraceDatabase for TraceDB<T> where T: DatabaseExtras {
})
.collect();
let chain = BloomGroupChain::new(self.bloom_config, self);
let trace_blooms = chain.replace(&replaced_range, enacted_blooms);
let blooms_to_insert = trace_blooms.into_iter()
.map(|p| (From::from(p.0), From::from(p.1)))
.collect::<HashMap<TraceGroupPosition, blooms::BloomGroup>>();
let blooms_keys: Vec<_> = blooms_to_insert.keys().cloned().collect();
let mut blooms = self.blooms.write();
batch.extend_with_cache(db::COL_TRACE, &mut *blooms, blooms_to_insert, CacheUpdatePolicy::Remove);
// note_used must be called after locking blooms to avoid cache/traces deadlock on garbage collection
for key in blooms_keys {
self.note_used(CacheId::Bloom(key));
}
self.db.trace_blooms()
.insert_blooms(range_start, enacted_blooms.iter())
.expect("Low level database error. Some issue with disk?");
}
// insert new block traces into the cache and the database
@@ -308,7 +222,7 @@ impl<T> TraceDatabase for TraceDB<T> where T: DatabaseExtras {
// cause this value might be queried by hash later
batch.write_with_cache(db::COL_TRACE, &mut *traces, request.block_hash, request.traces, CacheUpdatePolicy::Overwrite);
// note_used must be called after locking traces to avoid cache/traces deadlock on garbage collection
self.note_used(CacheId::Trace(request.block_hash.clone()));
self.note_trace_used(request.block_hash);
}
}
@@ -396,8 +310,11 @@ impl<T> TraceDatabase for TraceDB<T> where T: DatabaseExtras {
}
fn filter(&self, filter: &Filter) -> Vec<LocalizedTrace> {
let chain = BloomGroupChain::new(self.bloom_config, self);
let numbers = chain.filter(filter);
let possibilities = filter.bloom_possibilities();
let numbers = self.db.trace_blooms()
.filter(filter.range.start as u64, filter.range.end as u64, &possibilities)
.expect("Low level database error. Some issue with disk?");
numbers.into_iter()
.flat_map(|n| {
let number = n as BlockNumber;
@@ -416,14 +333,14 @@ mod tests {
use std::collections::HashMap;
use std::sync::Arc;
use ethereum_types::{H256, U256, Address};
use kvdb::{DBTransaction, KeyValueDB};
use kvdb_memorydb;
use kvdb::{DBTransaction};
use header::BlockNumber;
use trace::{Config, TraceDB, Database as TraceDatabase, DatabaseExtras, ImportRequest};
use trace::{Filter, LocalizedTrace, AddressesFilter, TraceError};
use trace::trace::{Call, Action, Res};
use trace::flat::{FlatTrace, FlatBlockTraces, FlatTransactionTraces};
use evm::CallType;
use test_helpers::new_db;
struct NoopExtras;
@@ -467,10 +384,6 @@ mod tests {
}
}
fn new_db() -> Arc<KeyValueDB> {
Arc::new(kvdb_memorydb::create(::db::NUM_COLUMNS.unwrap_or(0)))
}
#[test]
fn test_reopening_db_with_tracing_off() {
let db = new_db();
@@ -585,7 +498,7 @@ mod tests {
let request = create_noncanon_import_request(0, block_0.clone());
let mut batch = DBTransaction::new();
tracedb.import(&mut batch, request);
db.write(batch).unwrap();
db.key_value().write(batch).unwrap();
assert!(tracedb.traces(&block_0).is_some(), "Traces should be available even if block is non-canon.");
}
@@ -614,7 +527,7 @@ mod tests {
let request = create_simple_import_request(1, block_1.clone());
let mut batch = DBTransaction::new();
tracedb.import(&mut batch, request);
db.write(batch).unwrap();
db.key_value().write(batch).unwrap();
let filter = Filter {
range: (1..1),
@@ -630,7 +543,7 @@ mod tests {
let request = create_simple_import_request(2, block_2.clone());
let mut batch = DBTransaction::new();
tracedb.import(&mut batch, request);
db.write(batch).unwrap();
db.key_value().write(batch).unwrap();
let filter = Filter {
range: (1..2),
@@ -692,7 +605,7 @@ mod tests {
let request = create_simple_import_request(1, block_0.clone());
let mut batch = DBTransaction::new();
tracedb.import(&mut batch, request);
db.write(batch).unwrap();
db.key_value().write(batch).unwrap();
}
{

View File

@@ -17,7 +17,6 @@
//! Trace filters type definitions
use std::ops::Range;
use bloomchain::{Filter as BloomFilter, Number};
use ethereum_types::{Address, Bloom, BloomInput};
use trace::flat::FlatTrace;
use super::trace::{Action, Res};
@@ -88,19 +87,9 @@ pub struct Filter {
pub to_address: AddressesFilter,
}
impl BloomFilter for Filter {
fn bloom_possibilities(&self) -> Vec<Bloom> {
self.bloom_possibilities()
}
fn range(&self) -> Range<Number> {
self.range.clone()
}
}
impl Filter {
/// Returns combinations of each address.
fn bloom_possibilities(&self) -> Vec<Bloom> {
pub fn bloom_possibilities(&self) -> Vec<Bloom> {
self.to_address.with_blooms(self.from_address.blooms())
}