Compact chunks

This commit is contained in:
arkpar 2017-03-28 16:23:09 +02:00
parent d146ae7275
commit b840ab8f8b
5 changed files with 161 additions and 66 deletions

View File

@ -23,11 +23,10 @@ use snapshot::Error;
use util::{U256, H256, Bytes, HashDB, SHA3_EMPTY, SHA3_NULL_RLP};
use util::trie::{TrieDB, Trie};
use rlp::{RlpStream, UntrustedRlp};
use itertools::Itertools;
use std::collections::HashSet;
// An empty account -- these are replaced with RLP null data for a space optimization.
// An empty account -- these were replaced with RLP null data for a space optimization in v1.
const ACC_EMPTY: BasicAccount = BasicAccount {
nonce: U256([0, 0, 0, 0]),
balance: U256([0, 0, 0, 0]),
@ -62,28 +61,19 @@ impl CodeState {
}
// walk the account's storage trie, returning a vector of RLP items containing the
// account properties and the storage. Each item contains at most `max_storage_items`
// account address hash, account properties and the storage. Each item contains at most `max_storage_items`
// storage records split according to snapshot format definition.
pub fn to_fat_rlps(acc: &BasicAccount, acct_db: &AccountDB, used_code: &mut HashSet<H256>, max_storage_items: usize) -> Result<Vec<Bytes>, Error> {
if acc == &ACC_EMPTY {
return Ok(vec![::rlp::NULL_RLP.to_vec()]);
}
pub fn to_fat_rlps(account_hash: &H256, acc: &BasicAccount, acct_db: &AccountDB, used_code: &mut HashSet<H256>, first_chunk_size: usize, max_chunk_size: usize) -> Result<Vec<Bytes>, Error> {
let db = TrieDB::new(acct_db, &acc.storage_root)?;
let mut chunks = Vec::new();
let mut db_iter = db.iter()?;
let mut target_chunk_size = first_chunk_size;
let mut account_stream = RlpStream::new_list(2);
let mut leftover: Option<Vec<u8>> = None;
loop {
account_stream.append(account_hash);
account_stream.begin_list(5);
let chunks = db.iter()?.chunks(max_storage_items);
let pair_chunks = chunks.into_iter().map(|chunk| chunk.collect());
pair_chunks.pad_using(1, |_| Vec::new(), ).map(|pairs| {
let mut stream = RlpStream::new_list(pairs.len());
for r in pairs {
let (k, v) = r?;
stream.begin_list(2).append(&k).append(&&*v);
}
let pairs_rlp = stream.out();
let mut account_stream = RlpStream::new_list(5);
account_stream.append(&acc.nonce)
.append(&acc.balance);
@ -105,9 +95,47 @@ pub fn to_fat_rlps(acc: &BasicAccount, acct_db: &AccountDB, used_code: &mut Hash
}
}
account_stream.append_raw(&pairs_rlp, 1);
Ok(account_stream.out())
}).collect()
account_stream.begin_unbounded_list();
if account_stream.len() > target_chunk_size {
// account does not fit, push an empty record to mark a new chunk
target_chunk_size = max_chunk_size;
chunks.push(Vec::new());
}
if let Some(pair) = leftover.take() {
account_stream.append_raw_checked(&pair, 1, target_chunk_size);
}
loop {
match db_iter.next() {
Some(Ok((k, v))) => {
let pair = {
let mut stream = RlpStream::new_list(2);
stream.append(&k).append(&&*v);
stream.drain()
};
if !account_stream.append_raw_checked(&pair, 1, target_chunk_size) {
account_stream.complete_unbounded_list();
let stream = ::std::mem::replace(&mut account_stream, RlpStream::new_list(2));
chunks.push(stream.out());
target_chunk_size = max_chunk_size;
leftover = Some(pair.to_vec());
break;
}
},
Some(Err(e)) => {
return Err(e.into());
},
None => {
account_stream.complete_unbounded_list();
let stream = ::std::mem::replace(&mut account_stream, RlpStream::new_list(2));
chunks.push(stream.out());
return Ok(chunks);
}
}
}
}
}
// decode a fat rlp, and rebuild the storage trie as we go.
@ -181,7 +209,7 @@ mod tests {
use snapshot::tests::helpers::fill_storage;
use util::sha3::{SHA3_EMPTY, SHA3_NULL_RLP};
use util::{Address, H256, HashDB, DBValue};
use util::{Address, H256, HashDB, DBValue, Hashable};
use rlp::UntrustedRlp;
use std::collections::HashSet;
@ -203,8 +231,8 @@ mod tests {
let thin_rlp = ::rlp::encode(&account);
assert_eq!(::rlp::decode::<BasicAccount>(&thin_rlp), account);
let fat_rlps = to_fat_rlps(&account, &AccountDB::new(db.as_hashdb(), &addr), &mut Default::default(), usize::max_value()).unwrap();
let fat_rlp = UntrustedRlp::new(&fat_rlps[0]);
let fat_rlps = to_fat_rlps(&addr.sha3(), &account, &AccountDB::new(db.as_hashdb(), &addr), &mut Default::default(), usize::max_value(), usize::max_value()).unwrap();
let fat_rlp = UntrustedRlp::new(&fat_rlps[0]).at(1).unwrap();
assert_eq!(from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr), fat_rlp, H256::zero()).unwrap().0, account);
}
@ -228,8 +256,8 @@ mod tests {
let thin_rlp = ::rlp::encode(&account);
assert_eq!(::rlp::decode::<BasicAccount>(&thin_rlp), account);
let fat_rlp = to_fat_rlps(&account, &AccountDB::new(db.as_hashdb(), &addr), &mut Default::default(), usize::max_value()).unwrap();
let fat_rlp = UntrustedRlp::new(&fat_rlp[0]);
let fat_rlp = to_fat_rlps(&addr.sha3(), &account, &AccountDB::new(db.as_hashdb(), &addr), &mut Default::default(), usize::max_value(), usize::max_value()).unwrap();
let fat_rlp = UntrustedRlp::new(&fat_rlp[0]).at(1).unwrap();
assert_eq!(from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr), fat_rlp, H256::zero()).unwrap().0, account);
}
@ -253,11 +281,11 @@ mod tests {
let thin_rlp = ::rlp::encode(&account);
assert_eq!(::rlp::decode::<BasicAccount>(&thin_rlp), account);
let fat_rlps = to_fat_rlps(&account, &AccountDB::new(db.as_hashdb(), &addr), &mut Default::default(), 100).unwrap();
let fat_rlps = to_fat_rlps(&addr.sha3(), &account, &AccountDB::new(db.as_hashdb(), &addr), &mut Default::default(), 500, 1000).unwrap();
let mut root = SHA3_NULL_RLP;
let mut restored_account = None;
for rlp in fat_rlps {
let fat_rlp = UntrustedRlp::new(&rlp);
let fat_rlp = UntrustedRlp::new(&rlp).at(1).unwrap();
restored_account = Some(from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr), fat_rlp, root).unwrap().0);
root = restored_account.as_ref().unwrap().storage_root.clone();
}
@ -297,12 +325,12 @@ mod tests {
let mut used_code = HashSet::new();
let fat_rlp1 = to_fat_rlps(&account1, &AccountDB::new(db.as_hashdb(), &addr1), &mut used_code, usize::max_value()).unwrap();
let fat_rlp2 = to_fat_rlps(&account2, &AccountDB::new(db.as_hashdb(), &addr2), &mut used_code, usize::max_value()).unwrap();
let fat_rlp1 = to_fat_rlps(&addr1.sha3(), &account1, &AccountDB::new(db.as_hashdb(), &addr1), &mut used_code, usize::max_value(), usize::max_value()).unwrap();
let fat_rlp2 = to_fat_rlps(&addr2.sha3(), &account2, &AccountDB::new(db.as_hashdb(), &addr2), &mut used_code, usize::max_value(), usize::max_value()).unwrap();
assert_eq!(used_code.len(), 1);
let fat_rlp1 = UntrustedRlp::new(&fat_rlp1[0]);
let fat_rlp2 = UntrustedRlp::new(&fat_rlp2[0]);
let fat_rlp1 = UntrustedRlp::new(&fat_rlp1[0]).at(1).unwrap();
let fat_rlp2 = UntrustedRlp::new(&fat_rlp2[0]).at(1).unwrap();
let (acc, maybe_code) = from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr2), fat_rlp2, H256::zero()).unwrap();
assert!(maybe_code.is_none());
@ -316,9 +344,6 @@ mod tests {
#[test]
fn encoding_empty_acc() {
let mut db = get_temp_state_db();
let mut used_code = HashSet::new();
assert_eq!(to_fat_rlps(&ACC_EMPTY, &AccountDB::new(db.as_hashdb(), &Address::default()), &mut used_code, usize::max_value()).unwrap(), vec![::rlp::NULL_RLP.to_vec()]);
assert_eq!(from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &Address::default()), UntrustedRlp::new(&::rlp::NULL_RLP), H256::zero()).unwrap(), (ACC_EMPTY, None));
}
}

View File

@ -83,9 +83,6 @@ mod traits {
// Try to have chunks be around 4MB (before compression)
const PREFERRED_CHUNK_SIZE: usize = 4 * 1024 * 1024;
// Try to have chunks be around 4MB (before compression)
const MAX_STORAGE_ENTRIES_PER_ACCOUNT_RECORD: usize = 80_000;
// How many blocks to include in a snapshot, starting from the head of the chain.
const SNAPSHOT_BLOCKS: u64 = 30000;
@ -305,20 +302,9 @@ impl<'a> StateChunker<'a> {
//
// If the buffer is greater than the desired chunk size,
// this will write out the data to disk.
fn push(&mut self, account_hash: Bytes, data: Bytes, force_chunk: bool) -> Result<(), Error> {
let pair = {
let mut stream = RlpStream::new_list(2);
stream.append(&account_hash).append_raw(&data, 1);
stream.out()
};
if force_chunk || self.cur_size + pair.len() >= PREFERRED_CHUNK_SIZE {
self.write_chunk()?;
}
self.cur_size += pair.len();
self.rlps.push(pair);
fn push(&mut self, data: Bytes) -> Result<(), Error> {
self.cur_size += data.len();
self.rlps.push(data);
Ok(())
}
@ -348,6 +334,11 @@ impl<'a> StateChunker<'a> {
Ok(())
}
// Get current chunk size.
fn chunk_size(&self) -> usize {
self.cur_size
}
}
/// Walk the given state database starting from the given root,
@ -377,9 +368,12 @@ pub fn chunk_state<'a>(db: &HashDB, root: &H256, writer: &Mutex<SnapshotWriter +
let account_db = AccountDB::from_hash(db, account_key_hash);
let fat_rlps = account::to_fat_rlps(&account, &account_db, &mut used_code, MAX_STORAGE_ENTRIES_PER_ACCOUNT_RECORD)?;
let fat_rlps = account::to_fat_rlps(&account_key_hash, &account, &account_db, &mut used_code, PREFERRED_CHUNK_SIZE - chunker.chunk_size(), PREFERRED_CHUNK_SIZE)?;
for (i, fat_rlp) in fat_rlps.into_iter().enumerate() {
chunker.push(account_key.clone(), fat_rlp, i > 0)?;
if i > 0 {
chunker.write_chunk()?;
}
chunker.push(fat_rlp)?;
}
}

View File

@ -122,10 +122,9 @@ fn get_code_from_prev_chunk() {
let mut db = MemoryDB::new();
AccountDBMut::from_hash(&mut db, hash).insert(&code[..]);
let fat_rlp = account::to_fat_rlps(&acc, &AccountDB::from_hash(&db, hash), &mut used_code, usize::max_value()).unwrap();
let fat_rlp = account::to_fat_rlps(&hash, &acc, &AccountDB::from_hash(&db, hash), &mut used_code, usize::max_value(), usize::max_value()).unwrap();
let mut stream = RlpStream::new_list(1);
stream.begin_list(2).append(&hash).append_raw(&fat_rlp[0], 1);
stream.append_raw(&fat_rlp[0], 1);
stream.out()
};

View File

@ -23,11 +23,11 @@ use traits::Encodable;
struct ListInfo {
position: usize,
current: usize,
max: usize,
max: Option<usize>,
}
impl ListInfo {
fn new(position: usize, max: usize) -> ListInfo {
fn new(position: usize, max: Option<usize>) -> ListInfo {
ListInfo {
position: position,
current: 0,
@ -133,7 +133,7 @@ impl RlpStream {
self.buffer.push(0);
let position = self.buffer.len();
self.unfinished_lists.push(ListInfo::new(position, len));
self.unfinished_lists.push(ListInfo::new(position, Some(len)));
},
}
@ -141,6 +141,19 @@ impl RlpStream {
self
}
/// Declare appending the list of unknown size, chainable.
pub fn begin_unbounded_list(&mut self) -> &mut RlpStream {
self.finished_list = false;
// payload is longer than 1 byte only for lists > 55 bytes
// by pushing always this 1 byte we may avoid unnecessary shift of data
self.buffer.push(0);
let position = self.buffer.len();
self.unfinished_lists.push(ListInfo::new(position, None));
// return chainable self
self
}
/// Apends null to the end of stream, chainable.
///
/// ```rust
@ -177,6 +190,36 @@ impl RlpStream {
self
}
/// Appends raw (pre-serialised) RLP data. Checks for size oveflow.
pub fn append_raw_checked<'a>(&'a mut self, bytes: &[u8], item_count: usize, max_size: usize) -> bool {
if self.estimate_size(bytes.len()) > max_size {
return false;
}
self.append_raw(bytes, item_count);
true
}
/// Calculate total RLP size for appended payload.
pub fn estimate_size<'a>(&'a self, add: usize) -> usize {
let total_size = self.buffer.len() + add;
let mut base_size = total_size;
for list in &self.unfinished_lists[..] {
let len = total_size - list.position;
if len > 55 {
let leading_empty_bytes = (len as u64).leading_zeros() as usize / 8;
let size_bytes = 8 - leading_empty_bytes;
base_size += size_bytes;
}
}
base_size
}
/// Returns current RLP size in bytes for the data pushed into the list.
pub fn len<'a>(&'a self) -> usize {
self.estimate_size(0)
}
/// Clear the output stream so far.
///
/// ```rust
@ -246,10 +289,11 @@ impl RlpStream {
None => false,
Some(ref mut x) => {
x.current += inserted_items;
if x.current > x.max {
panic!("You cannot append more items then you expect!");
match x.max {
Some(ref max) if x.current > *max => panic!("You cannot append more items then you expect!"),
Some(ref max) => x.current == *max,
_ => false,
}
x.current == x.max
}
};
@ -273,6 +317,17 @@ impl RlpStream {
false => panic!()
}
}
/// Finalize current ubnbound list. Panics if no unbounded list has been opened.
pub fn complete_unbounded_list(&mut self) {
let list = self.unfinished_lists.pop().expect("No open list.");
if list.max.is_some() {
panic!("List type mismatch.");
}
let len = self.buffer.len() - list.position;
self.encoder().insert_list_payload(len, list.position);
self.note_appended(1);
}
}
pub struct BasicEncoder<'a> {

View File

@ -412,3 +412,25 @@ fn test_rlp_list_length_overflow() {
let as_val: Result<String, DecoderError> = rlp.val_at(0);
assert_eq!(Err(DecoderError::RlpIsTooShort), as_val);
}
#[test]
fn test_rlp_stream_size_limit() {
for limit in 40 .. 270 {
let item = [0u8; 1];
let mut stream = RlpStream::new();
while stream.append_raw_checked(&item, 1, limit) {}
assert_eq!(stream.drain().len(), limit);
}
}
#[test]
fn test_rlp_stream_unbounded_list() {
let mut stream = RlpStream::new();
stream.begin_unbounded_list();
stream.append(&40u32);
stream.append(&41u32);
assert!(!stream.is_finished());
stream.complete_unbounded_list();
assert!(stream.is_finished());
}