Accounts bloom (#2357)

* proper bloom

* incremental bloom updates

* crate update

* return of the column

* fix n^2 byteorder write

* add notes to funs

* working bloom commits

* Optimizations

* bloom diag

* migration basic

* migration ongoing

* migration finalizing

* mingration api workarounds

* fix test_client setups

* snapshot bloom update

* review fixes

* just forward keys in the migration

* migration extra tracing

* fix migration path

* remove close pray

* review issues
This commit is contained in:
Nikolay Volf 2016-09-29 13:27:41 +04:00 committed by Arkadiy Paronyan
parent 2d623f14db
commit 4d115987cb
15 changed files with 294 additions and 19 deletions

16
Cargo.lock generated
View File

@ -112,6 +112,20 @@ name = "bloomchain"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bloomfilter"
version = "0.0.10"
source = "git+https://github.com/ethcore/rust-bloom-filter#e66a3f20443bc78810877390ad4da00ebdf74d78"
dependencies = [
"bit-vec 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.3.14 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "byteorder"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "bytes"
version = "0.3.0"
@ -244,6 +258,8 @@ version = "1.3.0"
dependencies = [
"bit-set 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"bloomchain 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"bloomfilter 0.0.10 (git+https://github.com/ethcore/rust-bloom-filter)",
"byteorder 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)",
"clippy 0.0.80 (registry+https://github.com/rust-lang/crates.io-index)",
"crossbeam 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)",
"env_logger 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",

View File

@ -36,6 +36,8 @@ ethstore = { path = "../ethstore" }
ethcore-ipc-nano = { path = "../ipc/nano" }
rand = "0.3"
lru-cache = "0.0.7"
bloomfilter = { git = "https://github.com/ethcore/rust-bloom-filter" }
byteorder = "0.5"
[dependencies.hyper]
git = "https://github.com/ethcore/hyper"

View File

@ -154,8 +154,10 @@ pub const DB_COL_BODIES: Option<u32> = Some(2);
pub const DB_COL_EXTRA: Option<u32> = Some(3);
/// Column for Traces
pub const DB_COL_TRACE: Option<u32> = Some(4);
/// Column for Traces
pub const DB_COL_ACCOUNT_BLOOM: Option<u32> = Some(5);
/// Number of columns in DB
pub const DB_NO_OF_COLUMNS: Option<u32> = Some(5);
pub const DB_NO_OF_COLUMNS: Option<u32> = Some(6);
/// Append a path element to the given path and return the string.
pub fn append_path<P>(path: P, item: &str) -> String where P: AsRef<Path> {

View File

@ -23,7 +23,8 @@ use transaction::{Transaction, LocalizedTransaction, SignedTransaction, Action};
use blockchain::TreeRoute;
use client::{
BlockChainClient, MiningBlockChainClient, BlockChainInfo, BlockStatus, BlockID,
TransactionID, UncleID, TraceId, TraceFilter, LastHashes, CallAnalytics, BlockImportError
TransactionID, UncleID, TraceId, TraceFilter, LastHashes, CallAnalytics, BlockImportError,
DB_NO_OF_COLUMNS, DB_COL_STATE,
};
use header::{Header as BlockHeader, BlockNumber};
use filter::Filter;
@ -250,8 +251,8 @@ impl TestBlockChainClient {
pub fn get_temp_state_db() -> GuardedTempResult<StateDB> {
let temp = RandomTempPath::new();
let db = Database::open_default(temp.as_str()).unwrap();
let journal_db = journaldb::new(Arc::new(db), journaldb::Algorithm::EarlyMerge, None);
let db = Database::open(&DatabaseConfig::with_columns(DB_NO_OF_COLUMNS), temp.as_str()).unwrap();
let journal_db = journaldb::new(Arc::new(db), journaldb::Algorithm::EarlyMerge, DB_COL_STATE);
let state_db = StateDB::new(journal_db);
GuardedTempResult {
_temp: temp,

View File

@ -101,6 +101,8 @@ extern crate ethcore_devtools as devtools;
extern crate rand;
extern crate bit_set;
extern crate lru_cache;
extern crate bloomfilter;
extern crate byteorder;
#[cfg(feature = "jit" )]
extern crate evmjit;

View File

@ -0,0 +1,104 @@
// Copyright 2015, 2016 Ethcore (UK) Ltd.
// This file is part of Parity.
// Parity is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Parity is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Parity. If not, see <http://www.gnu.org/licenses/>.
//! Bloom upgrade
use client::{DB_COL_EXTRA, DB_COL_HEADERS, DB_NO_OF_COLUMNS, DB_COL_STATE, DB_COL_ACCOUNT_BLOOM};
use state_db::{ACCOUNT_BLOOM_SPACE, DEFAULT_ACCOUNT_PRESET, StateDB, ACCOUNT_BLOOM_HASHCOUNT_KEY};
use util::trie::TrieDB;
use views::HeaderView;
use bloomfilter::Bloom;
use util::migration::Error;
use util::journaldb;
use util::{H256, FixedHash, BytesConvertable};
use util::{Database, DatabaseConfig, DBTransaction, CompactionProfile};
use std::path::Path;
fn check_bloom_exists(db: &Database) -> bool {
let hash_count_entry = db.get(DB_COL_ACCOUNT_BLOOM, ACCOUNT_BLOOM_HASHCOUNT_KEY)
.expect("Low-level database error");
hash_count_entry.is_some()
}
/// Account bloom upgrade routine. If bloom already present, does nothing.
/// If database empty (no best block), does nothing.
/// Can be called on upgraded database with no issues (will do nothing).
pub fn upgrade_account_bloom(db_path: &Path) -> Result<(), Error> {
let path = try!(db_path.to_str().ok_or(Error::MigrationImpossible));
trace!(target: "migration", "Account bloom upgrade at {:?}", db_path);
let source = try!(Database::open(&DatabaseConfig {
max_open_files: 64,
cache_size: None,
compaction: CompactionProfile::default(),
columns: DB_NO_OF_COLUMNS,
wal: true,
}, path));
let best_block_hash = match try!(source.get(DB_COL_EXTRA, b"best")) {
// no migration needed
None => {
trace!(target: "migration", "No best block hash, skipping");
return Ok(());
},
Some(hash) => hash,
};
let best_block_header = match try!(source.get(DB_COL_HEADERS, &best_block_hash)) {
// no best block, nothing to do
None => {
trace!(target: "migration", "No best block header, skipping");
return Ok(())
},
Some(x) => x,
};
let state_root = HeaderView::new(&best_block_header).state_root();
if check_bloom_exists(&source) {
// bloom already exists, nothing to do
trace!(target: "migration", "Bloom already present, skipping");
return Ok(())
}
println!("Adding accounts bloom (one-time upgrade)");
let db = ::std::sync::Arc::new(source);
let bloom_journal = {
let mut bloom = Bloom::new(ACCOUNT_BLOOM_SPACE, DEFAULT_ACCOUNT_PRESET);
// no difference what algorithm is passed, since there will be no writes
let state_db = journaldb::new(
db.clone(),
journaldb::Algorithm::OverlayRecent,
DB_COL_STATE);
let account_trie = try!(TrieDB::new(state_db.as_hashdb(), &state_root).map_err(|e| Error::Custom(format!("Cannot open trie: {:?}", e))));
for (ref account_key, _) in account_trie.iter() {
let account_key_hash = H256::from_slice(&account_key);
bloom.set(account_key_hash.as_slice());
}
bloom.drain_journal()
};
trace!(target: "migration", "Generated {} bloom updates", bloom_journal.entries.len());
let batch = DBTransaction::new(&db);
try!(StateDB::commit_bloom(&batch, bloom_journal).map_err(|_| Error::Custom("Failed to commit bloom".to_owned())));
try!(db.write(batch));
trace!(target: "migration", "Finished bloom update");
Ok(())
}

View File

@ -23,3 +23,9 @@ pub mod extras;
mod v9;
pub use self::v9::ToV9;
pub use self::v9::Extract;
mod account_bloom;
pub use self::account_bloom::upgrade_account_bloom;
mod v10;
pub use self::v10::ToV10;

View File

@ -0,0 +1,35 @@
// Copyright 2015, 2016 Ethcore (UK) Ltd.
// This file is part of Parity.
// Parity is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Parity is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Parity. If not, see <http://www.gnu.org/licenses/>.
//! This migration compresses the state db.
use util::migration::SimpleMigration;
/// Compressing migration.
#[derive(Default)]
pub struct ToV10;
impl SimpleMigration for ToV10 {
fn version(&self) -> u32 {
10
}
fn columns(&self) -> Option<u32> { Some(6) }
fn simple_migrate(&mut self, key: Vec<u8>, value: Vec<u8>) -> Option<(Vec<u8>, Vec<u8>)> {
Some((key, value))
}
}

View File

@ -25,8 +25,9 @@ use blockchain::{BlockChain, BlockProvider};
use engines::Engine;
use ids::BlockID;
use views::BlockView;
use super::state_db::StateDB;
use util::{Bytes, Hashable, HashDB, snappy, TrieDB, TrieDBMut, TrieMut};
use util::{Bytes, Hashable, HashDB, snappy, TrieDB, TrieDBMut, TrieMut, BytesConvertable};
use util::Mutex;
use util::hash::{FixedHash, H256};
use util::journaldb::{self, Algorithm, JournalDB};
@ -453,7 +454,7 @@ impl StateRebuilder {
Ok::<_, ::error::Error>(())
}));
let mut bloom = StateDB::load_bloom(&backing);
// batch trie writes
{
let mut account_trie = if self.state_root != SHA3_NULL_RLP {
@ -463,11 +464,14 @@ impl StateRebuilder {
};
for (hash, thin_rlp) in pairs {
bloom.set(hash.as_slice());
try!(account_trie.insert(&hash, &thin_rlp));
}
}
let bloom_journal = bloom.drain_journal();
let batch = backing.transaction();
try!(StateDB::commit_bloom(&batch, bloom_journal));
try!(self.db.inject(&batch));
try!(backing.write(batch).map_err(::util::UtilError::SimpleString));
trace!(target: "snapshot", "current state root: {:?}", self.state_root);

View File

@ -240,6 +240,7 @@ impl Spec {
}
}
for (address, account) in self.genesis_state.get().iter() {
db.note_account_bloom(address);
account.insert_additional(&mut AccountDBMut::new(db.as_hashdb_mut(), address));
}
assert!(db.as_hashdb().contains(&self.state_root()));

View File

@ -259,9 +259,12 @@ impl State {
/// Mutate storage of account `address` so that it is `value` for `key`.
pub fn storage_at(&self, address: &Address, key: &H256) -> H256 {
// Storage key search and update works like this:
// 1. If there's an entry for the account in the local cache check for the key and return it if found.
// 2. If there's an entry for the account in the global cache check for the key or load it into that account.
// 3. If account is missing in the global cache load it into the local cache and cache the key there.
// 1. Check bloom to see if account never used surely
// 2. If there's an entry for the account in the local cache check for the key and return it if found.
// 3. If there's an entry for the account in the global cache check for the key or load it into that account.
// 4. If account is missing in the global cache load it into the local cache and cache the key there.
// check bloom
// check local cache first without updating
{
@ -293,6 +296,7 @@ impl State {
}
}
// account is not found in the global cache, get from the DB and insert into local
if !self.db.check_account_bloom(address) { return H256::zero() }
let db = self.trie_factory.readonly(self.db.as_hashdb(), &self.root).expect(SEC_TRIE_DB_UNWRAP_STR);
let maybe_acc = match db.get(address) {
Ok(acc) => acc.map(Account::from_rlp),
@ -387,6 +391,7 @@ impl State {
for (address, ref mut a) in accounts.iter_mut() {
match a {
&mut&mut AccountEntry::Cached(ref mut account) if account.is_dirty() => {
db.note_account_bloom(&address);
let mut account_db = AccountDBMut::from_hash(db.as_hashdb_mut(), account.address_hash(address));
account.commit_storage(trie_factory, &mut account_db);
account.commit_code(&mut account_db);
@ -449,6 +454,7 @@ impl State {
pub fn populate_from(&mut self, accounts: PodState) {
assert!(self.snapshots.borrow().is_empty());
for (add, acc) in accounts.drain().into_iter() {
self.db.note_account_bloom(&add);
self.cache.borrow_mut().insert(add, AccountEntry::Cached(Account::from_pod(acc)));
}
}
@ -525,6 +531,7 @@ impl State {
Some(r) => r,
None => {
// not found in the global cache, get from the DB and insert into local
if !self.db.check_account_bloom(a) { return f(None); }
let db = self.trie_factory.readonly(self.db.as_hashdb(), &self.root).expect(SEC_TRIE_DB_UNWRAP_STR);
let mut maybe_acc = match db.get(a) {
Ok(acc) => acc.map(Account::from_rlp),
@ -559,11 +566,17 @@ impl State {
Some(Some(acc)) => self.insert_cache(a, AccountEntry::Cached(acc)),
Some(None) => self.insert_cache(a, AccountEntry::Missing),
None => {
let db = self.trie_factory.readonly(self.db.as_hashdb(), &self.root).expect(SEC_TRIE_DB_UNWRAP_STR);
let maybe_acc = match db.get(a) {
Ok(Some(acc)) => AccountEntry::Cached(Account::from_rlp(acc)),
Ok(None) => AccountEntry::Missing,
Err(e) => panic!("Potential DB corruption encountered: {}", e),
let maybe_acc = if self.db.check_account_bloom(a) {
let db = self.trie_factory.readonly(self.db.as_hashdb(), &self.root).expect(SEC_TRIE_DB_UNWRAP_STR);
let maybe_acc = match db.get(a) {
Ok(Some(acc)) => AccountEntry::Cached(Account::from_rlp(acc)),
Ok(None) => AccountEntry::Missing,
Err(e) => panic!("Potential DB corruption encountered: {}", e),
};
maybe_acc
}
else {
AccountEntry::Missing
};
self.insert_cache(a, maybe_acc);
}

View File

@ -18,8 +18,12 @@ use lru_cache::LruCache;
use util::journaldb::JournalDB;
use util::hash::{H256};
use util::hashdb::HashDB;
use util::{Arc, Address, DBTransaction, UtilError, Mutex};
use util::{Arc, Address, DBTransaction, UtilError, Mutex, Hashable, BytesConvertable};
use account::Account;
use bloomfilter::{Bloom, BloomJournal};
use util::Database;
use client::DB_COL_ACCOUNT_BLOOM;
use byteorder::{LittleEndian, ByteOrder};
const STATE_CACHE_ITEMS: usize = 65536;
@ -39,22 +43,86 @@ pub struct StateDB {
account_cache: Arc<Mutex<AccountCache>>,
cache_overlay: Vec<(Address, Option<Account>)>,
is_canon: bool,
account_bloom: Arc<Mutex<Bloom>>,
}
pub const ACCOUNT_BLOOM_SPACE: usize = 1048576;
pub const DEFAULT_ACCOUNT_PRESET: usize = 1000000;
pub const ACCOUNT_BLOOM_HASHCOUNT_KEY: &'static [u8] = b"account_hash_count";
impl StateDB {
pub fn load_bloom(db: &Database) -> Bloom {
let hash_count_entry = db.get(DB_COL_ACCOUNT_BLOOM, ACCOUNT_BLOOM_HASHCOUNT_KEY)
.expect("Low-level database error");
if hash_count_entry.is_none() {
return Bloom::new(ACCOUNT_BLOOM_SPACE, DEFAULT_ACCOUNT_PRESET);
}
let hash_count_bytes = hash_count_entry.unwrap();
assert_eq!(hash_count_bytes.len(), 1);
let hash_count = hash_count_bytes[0];
let mut bloom_parts = vec![0u64; ACCOUNT_BLOOM_SPACE / 8];
let mut key = [0u8; 8];
for i in 0..ACCOUNT_BLOOM_SPACE / 8 {
LittleEndian::write_u64(&mut key, i as u64);
bloom_parts[i] = db.get(DB_COL_ACCOUNT_BLOOM, &key).expect("low-level database error")
.and_then(|val| Some(LittleEndian::read_u64(&val[..])))
.unwrap_or(0u64);
}
let bloom = Bloom::from_parts(&bloom_parts, hash_count as u32);
trace!(target: "account_bloom", "Bloom is {:?} full, hash functions count = {:?}", bloom.how_full(), hash_count);
bloom
}
/// Create a new instance wrapping `JournalDB`
pub fn new(db: Box<JournalDB>) -> StateDB {
let bloom = Self::load_bloom(db.backing());
StateDB {
db: db,
account_cache: Arc::new(Mutex::new(AccountCache { accounts: LruCache::new(STATE_CACHE_ITEMS) })),
cache_overlay: Vec::new(),
is_canon: false,
account_bloom: Arc::new(Mutex::new(bloom)),
}
}
pub fn check_account_bloom(&self, address: &Address) -> bool {
trace!(target: "account_bloom", "Check account bloom: {:?}", address);
let bloom = self.account_bloom.lock();
bloom.check(address.sha3().as_slice())
}
pub fn note_account_bloom(&self, address: &Address) {
trace!(target: "account_bloom", "Note account bloom: {:?}", address);
let mut bloom = self.account_bloom.lock();
bloom.set(address.sha3().as_slice());
}
pub fn commit_bloom(batch: &DBTransaction, journal: BloomJournal) -> Result<(), UtilError> {
assert!(journal.hash_functions <= 255);
try!(batch.put(DB_COL_ACCOUNT_BLOOM, ACCOUNT_BLOOM_HASHCOUNT_KEY, &vec![journal.hash_functions as u8]));
let mut key = [0u8; 8];
let mut val = [0u8; 8];
for (bloom_part_index, bloom_part_value) in journal.entries {
LittleEndian::write_u64(&mut key, bloom_part_index as u64);
LittleEndian::write_u64(&mut val, bloom_part_value);
try!(batch.put(DB_COL_ACCOUNT_BLOOM, &key, &val));
}
Ok(())
}
/// Commit all recent insert operations and canonical historical commits' removals from the
/// old era to the backing database, reverting any non-canonical historical commit's inserts.
pub fn commit(&mut self, batch: &DBTransaction, now: u64, id: &H256, end: Option<(u64, H256)>) -> Result<u32, UtilError> {
{
let mut bloom_lock = self.account_bloom.lock();
try!(Self::commit_bloom(batch, bloom_lock.drain_journal()));
}
let records = try!(self.db.commit(batch, now, id, end));
if self.is_canon {
self.commit_cache();
@ -81,6 +149,7 @@ impl StateDB {
account_cache: self.account_cache.clone(),
cache_overlay: Vec::new(),
is_canon: false,
account_bloom: self.account_bloom.clone(),
}
}
@ -91,6 +160,7 @@ impl StateDB {
account_cache: self.account_cache.clone(),
cache_overlay: Vec::new(),
is_canon: true,
account_bloom: self.account_bloom.clone(),
}
}

View File

@ -326,7 +326,7 @@ pub fn get_temp_state() -> GuardedTempResult<State> {
pub fn get_temp_state_db_in(path: &Path) -> StateDB {
let db = new_db(path.to_str().expect("Only valid utf8 paths for tests."));
let journal_db = journaldb::new(db.clone(), journaldb::Algorithm::EarlyMerge, None);
let journal_db = journaldb::new(db.clone(), journaldb::Algorithm::EarlyMerge, client::DB_COL_STATE);
StateDB::new(journal_db)
}

View File

@ -29,7 +29,7 @@ use ethcore::migrations::Extract;
/// Database is assumed to be at default version, when no version file is found.
const DEFAULT_VERSION: u32 = 5;
/// Current version of database models.
const CURRENT_VERSION: u32 = 9;
const CURRENT_VERSION: u32 = 10;
/// First version of the consolidated database.
const CONSOLIDATION_VERSION: u32 = 9;
/// Defines how many items are migrated to the new version of database at once.
@ -140,7 +140,12 @@ pub fn default_migration_settings(compaction_profile: &CompactionProfile) -> Mig
/// Migrations on the consolidated database.
fn consolidated_database_migrations(compaction_profile: &CompactionProfile) -> Result<MigrationManager, Error> {
let manager = MigrationManager::new(default_migration_settings(compaction_profile));
let mut manager = MigrationManager::new(default_migration_settings(compaction_profile));
// this won't ever fire, because version will be already 9
// added because migration api should know that it should open database with 5 columns
try!(manager.add_migration(migrations::ToV9::new(Some(5), migrations::Extract::All)).map_err(|_| Error::MigrationImpossible));
// this will
try!(manager.add_migration(migrations::ToV10).map_err(|_| Error::MigrationImpossible));
Ok(manager)
}
@ -180,7 +185,6 @@ fn consolidate_database(
Ok(())
}
/// Migrates database at given position with given migration rules.
fn migrate_database(version: u32, db_path: PathBuf, mut migrations: MigrationManager) -> Result<(), Error> {
// check if migration is needed
@ -215,6 +219,12 @@ fn exists(path: &Path) -> bool {
fs::metadata(path).is_ok()
}
// in-place upgrades that do nothing when called repeatedly
fn run_inplace_upgrades(path: &Path) -> Result<(), Error> {
try!(migrations::upgrade_account_bloom(path));
Ok(())
}
/// Migrates the database.
pub fn migrate(path: &Path, pruning: Algorithm, compaction_profile: CompactionProfile) -> Result<(), Error> {
// read version file.
@ -228,6 +238,7 @@ pub fn migrate(path: &Path, pruning: Algorithm, compaction_profile: CompactionPr
// We are in the latest version, yay!
if version == CURRENT_VERSION {
try!(run_inplace_upgrades(consolidated_database_path(path).as_path()));
return Ok(())
}
@ -259,6 +270,8 @@ pub fn migrate(path: &Path, pruning: Algorithm, compaction_profile: CompactionPr
println!("Migration finished");
}
try!(run_inplace_upgrades(consolidated_database_path(path).as_path()));
// update version file.
update_version(path)
}

View File

@ -102,6 +102,12 @@ impl From<::std::io::Error> for Error {
}
}
impl From<String> for Error {
fn from(e: String) -> Self {
Error::Custom(e)
}
}
/// A generalized migration from the given db to a destination db.
pub trait Migration: 'static {
/// Number of columns in database after the migration.