fetch known code from the database during restoration

previously it kept all seen code in memory, leading to
high memory usage by the end of state restoration
This commit is contained in:
Robert Habermeier 2016-11-11 17:18:31 +01:00
parent 1deeb0d901
commit ed135bb9dc
2 changed files with 40 additions and 27 deletions

View File

@ -19,11 +19,11 @@
use account_db::{AccountDB, AccountDBMut}; use account_db::{AccountDB, AccountDBMut};
use snapshot::Error; use snapshot::Error;
use util::{U256, FixedHash, H256, Bytes, HashDB, DBValue, SHA3_EMPTY, SHA3_NULL_RLP}; use util::{U256, FixedHash, H256, Bytes, HashDB, SHA3_EMPTY, SHA3_NULL_RLP};
use util::trie::{TrieDB, Trie}; use util::trie::{TrieDB, Trie};
use rlp::{Rlp, RlpStream, Stream, UntrustedRlp, View}; use rlp::{Rlp, RlpStream, Stream, UntrustedRlp, View};
use std::collections::{HashMap, HashSet}; use std::collections::HashSet;
// An empty account -- these are replaced with RLP null data for a space optimization. // An empty account -- these are replaced with RLP null data for a space optimization.
const ACC_EMPTY: Account = Account { const ACC_EMPTY: Account = Account {
@ -150,7 +150,6 @@ impl Account {
pub fn from_fat_rlp( pub fn from_fat_rlp(
acct_db: &mut AccountDBMut, acct_db: &mut AccountDBMut,
rlp: UntrustedRlp, rlp: UntrustedRlp,
code_map: &HashMap<H256, Bytes>,
) -> Result<(Self, Option<Bytes>), Error> { ) -> Result<(Self, Option<Bytes>), Error> {
use util::{TrieDBMut, TrieMut}; use util::{TrieDBMut, TrieMut};
@ -177,9 +176,6 @@ impl Account {
} }
CodeState::Hash => { CodeState::Hash => {
let code_hash = try!(rlp.val_at(3)); let code_hash = try!(rlp.val_at(3));
if let Some(code) = code_map.get(&code_hash) {
acct_db.emplace(code_hash.clone(), DBValue::from_slice(code));
}
(code_hash, None) (code_hash, None)
} }
@ -250,7 +246,7 @@ mod tests {
let fat_rlp = account.to_fat_rlp(&AccountDB::new(db.as_hashdb(), &addr), &mut Default::default()).unwrap(); let fat_rlp = account.to_fat_rlp(&AccountDB::new(db.as_hashdb(), &addr), &mut Default::default()).unwrap();
let fat_rlp = UntrustedRlp::new(&fat_rlp); let fat_rlp = UntrustedRlp::new(&fat_rlp);
assert_eq!(Account::from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr), fat_rlp, &Default::default()).unwrap().0, account); assert_eq!(Account::from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr), fat_rlp).unwrap().0, account);
} }
#[test] #[test]
@ -275,7 +271,7 @@ mod tests {
let fat_rlp = account.to_fat_rlp(&AccountDB::new(db.as_hashdb(), &addr), &mut Default::default()).unwrap(); let fat_rlp = account.to_fat_rlp(&AccountDB::new(db.as_hashdb(), &addr), &mut Default::default()).unwrap();
let fat_rlp = UntrustedRlp::new(&fat_rlp); let fat_rlp = UntrustedRlp::new(&fat_rlp);
assert_eq!(Account::from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr), fat_rlp, &Default::default()).unwrap().0, account); assert_eq!(Account::from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr), fat_rlp).unwrap().0, account);
} }
#[test] #[test]
@ -318,12 +314,11 @@ mod tests {
let fat_rlp1 = UntrustedRlp::new(&fat_rlp1); let fat_rlp1 = UntrustedRlp::new(&fat_rlp1);
let fat_rlp2 = UntrustedRlp::new(&fat_rlp2); let fat_rlp2 = UntrustedRlp::new(&fat_rlp2);
let code_map = HashMap::new(); let (acc, maybe_code) = Account::from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr2), fat_rlp2).unwrap();
let (acc, maybe_code) = Account::from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr2), fat_rlp2, &code_map).unwrap();
assert!(maybe_code.is_none()); assert!(maybe_code.is_none());
assert_eq!(acc, account2); assert_eq!(acc, account2);
let (acc, maybe_code) = Account::from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr1), fat_rlp1, &code_map).unwrap(); let (acc, maybe_code) = Account::from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr1), fat_rlp1).unwrap();
assert_eq!(maybe_code, Some(b"this is definitely code".to_vec())); assert_eq!(maybe_code, Some(b"this is definitely code".to_vec()));
assert_eq!(acc, account1); assert_eq!(acc, account1);
} }
@ -332,9 +327,8 @@ mod tests {
fn encoding_empty_acc() { fn encoding_empty_acc() {
let mut db = get_temp_state_db(); let mut db = get_temp_state_db();
let mut used_code = HashSet::new(); let mut used_code = HashSet::new();
let code_map = HashMap::new();
assert_eq!(ACC_EMPTY.to_fat_rlp(&AccountDB::new(db.as_hashdb(), &Address::default()), &mut used_code).unwrap(), ::rlp::NULL_RLP.to_vec()); assert_eq!(ACC_EMPTY.to_fat_rlp(&AccountDB::new(db.as_hashdb(), &Address::default()), &mut used_code).unwrap(), ::rlp::NULL_RLP.to_vec());
assert_eq!(Account::from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &Address::default()), UntrustedRlp::new(&::rlp::NULL_RLP), &code_map).unwrap(), (ACC_EMPTY, None)); assert_eq!(Account::from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &Address::default()), UntrustedRlp::new(&::rlp::NULL_RLP)).unwrap(), (ACC_EMPTY, None));
} }
} }

View File

@ -389,7 +389,7 @@ pub fn chunk_state<'a>(db: &HashDB, root: &H256, writer: &Mutex<SnapshotWriter +
pub struct StateRebuilder { pub struct StateRebuilder {
db: Box<JournalDB>, db: Box<JournalDB>,
state_root: H256, state_root: H256,
code_map: HashMap<H256, Bytes>, // maps code hashes to code itself. known_code: HashMap<H256, H256>, // code hashes mapped to first account with this code.
missing_code: HashMap<H256, Vec<H256>>, // maps code hashes to lists of accounts missing that code. missing_code: HashMap<H256, Vec<H256>>, // maps code hashes to lists of accounts missing that code.
bloom: Bloom, bloom: Bloom,
} }
@ -400,7 +400,7 @@ impl StateRebuilder {
StateRebuilder { StateRebuilder {
db: journaldb::new(db.clone(), pruning, ::db::COL_STATE), db: journaldb::new(db.clone(), pruning, ::db::COL_STATE),
state_root: SHA3_NULL_RLP, state_root: SHA3_NULL_RLP,
code_map: HashMap::new(), known_code: HashMap::new(),
missing_code: HashMap::new(), missing_code: HashMap::new(),
bloom: StateDB::load_bloom(&*db), bloom: StateDB::load_bloom(&*db),
} }
@ -419,24 +419,26 @@ impl StateRebuilder {
let chunk_size = account_fat_rlps.len() / ::num_cpus::get() + 1; let chunk_size = account_fat_rlps.len() / ::num_cpus::get() + 1;
// new code contained within this chunk. // new code contained within this chunk.
let mut chunk_code = HashMap::new(); let mut chunk_code = Vec::new();
for (account_chunk, out_pairs_chunk) in account_fat_rlps.chunks(chunk_size).zip(pairs.chunks_mut(chunk_size)) { for (account_chunk, out_pairs_chunk) in account_fat_rlps.chunks(chunk_size).zip(pairs.chunks_mut(chunk_size)) {
let code_map = &self.code_map; let status = try!(rebuild_accounts(self.db.as_hashdb_mut(), account_chunk, out_pairs_chunk, &self.known_code));
let status = try!(rebuild_accounts(self.db.as_hashdb_mut(), account_chunk, out_pairs_chunk, code_map));
chunk_code.extend(status.new_code); chunk_code.extend(status.new_code);
// update missing code.
for (addr_hash, code_hash) in status.missing_code { for (addr_hash, code_hash) in status.missing_code {
self.missing_code.entry(code_hash).or_insert_with(Vec::new).push(addr_hash); self.missing_code.entry(code_hash).or_insert_with(Vec::new).push(addr_hash);
} }
} }
// patch up all missing code. must be done after collecting all new missing code entries. // patch up all missing code. must be done after collecting all new missing code entries.
for (code_hash, code) in chunk_code { for (code_hash, code, first_with) in chunk_code {
for addr_hash in self.missing_code.remove(&code_hash).unwrap_or_else(Vec::new) { for addr_hash in self.missing_code.remove(&code_hash).unwrap_or_else(Vec::new) {
let mut db = AccountDBMut::from_hash(self.db.as_hashdb_mut(), addr_hash); let mut db = AccountDBMut::from_hash(self.db.as_hashdb_mut(), addr_hash);
db.emplace(code_hash, DBValue::from_slice(&code)); db.emplace(code_hash, DBValue::from_slice(&code));
} }
self.code_map.insert(code_hash, code); self.known_code.insert(code_hash, first_with);
} }
let backing = self.db.backing().clone(); let backing = self.db.backing().clone();
@ -482,7 +484,8 @@ impl StateRebuilder {
#[derive(Default)] #[derive(Default)]
struct RebuiltStatus { struct RebuiltStatus {
new_code: Vec<(H256, Bytes)>, // new code that's become available. // new code that's become available. (code_hash, code, addr_hash)
new_code: Vec<(H256, Bytes, H256)>,
missing_code: Vec<(H256, H256)>, // accounts that are missing code. missing_code: Vec<(H256, H256)>, // accounts that are missing code.
} }
@ -492,7 +495,7 @@ fn rebuild_accounts(
db: &mut HashDB, db: &mut HashDB,
account_chunk: &[&[u8]], account_chunk: &[&[u8]],
out_chunk: &mut [(H256, Bytes)], out_chunk: &mut [(H256, Bytes)],
code_map: &HashMap<H256, Bytes> known_code: &HashMap<H256, H256>,
) -> Result<RebuiltStatus, ::error::Error> ) -> Result<RebuiltStatus, ::error::Error>
{ {
let mut status = RebuiltStatus::default(); let mut status = RebuiltStatus::default();
@ -503,17 +506,33 @@ fn rebuild_accounts(
let fat_rlp = try!(account_rlp.at(1)); let fat_rlp = try!(account_rlp.at(1));
let thin_rlp = { let thin_rlp = {
let mut acct_db = AccountDBMut::from_hash(db, hash);
// fill out the storage trie and code while decoding. // fill out the storage trie and code while decoding.
let (acc, maybe_code) = try!(Account::from_fat_rlp(&mut acct_db, fat_rlp, code_map)); let (acc, maybe_code) = {
let mut acct_db = AccountDBMut::from_hash(db, hash);
try!(Account::from_fat_rlp(&mut acct_db, fat_rlp))
};
let code_hash = acc.code_hash().clone(); let code_hash = acc.code_hash().clone();
match maybe_code { match maybe_code {
Some(code) => status.new_code.push((code_hash, code)), // new inline code
Some(code) => status.new_code.push((code_hash, code, hash)),
None => { None => {
if code_hash != ::util::SHA3_EMPTY && !code_map.contains_key(&code_hash) { if code_hash != ::util::SHA3_EMPTY {
status.missing_code.push((hash, code_hash)); // see if this code has already been included inline
match known_code.get(&code_hash) {
Some(&first_with) => {
// if so, load it from the database.
let code = try!(AccountDB::from_hash(db, first_with)
.get(&code_hash)
.ok_or_else(|| Error::MissingCode(vec![first_with])));
// and write it again under a different mangled key
AccountDBMut::from_hash(db, hash).emplace(code_hash, code);
}
// if not, queue it up to be filled later
None => status.missing_code.push((hash, code_hash)),
}
} }
} }
} }