Compact chunks
This commit is contained in:
		
							parent
							
								
									d146ae7275
								
							
						
					
					
						commit
						b840ab8f8b
					
				| @ -23,11 +23,10 @@ use snapshot::Error; | ||||
| use util::{U256, H256, Bytes, HashDB, SHA3_EMPTY, SHA3_NULL_RLP}; | ||||
| use util::trie::{TrieDB, Trie}; | ||||
| use rlp::{RlpStream, UntrustedRlp}; | ||||
| use itertools::Itertools; | ||||
| 
 | ||||
| use std::collections::HashSet; | ||||
| 
 | ||||
| // An empty account -- these are replaced with RLP null data for a space optimization.
 | ||||
| // An empty account -- these were replaced with RLP null data for a space optimization in v1.
 | ||||
| const ACC_EMPTY: BasicAccount = BasicAccount { | ||||
| 	nonce: U256([0, 0, 0, 0]), | ||||
| 	balance: U256([0, 0, 0, 0]), | ||||
| @ -62,28 +61,19 @@ impl CodeState { | ||||
| } | ||||
| 
 | ||||
| // walk the account's storage trie, returning a vector of RLP items containing the
 | ||||
| // account properties and the storage. Each item contains at most `max_storage_items`
 | ||||
| // account address hash, account properties and the storage. Each item contains at most `max_storage_items`
 | ||||
| // storage records split according to snapshot format definition.
 | ||||
| pub fn to_fat_rlps(acc: &BasicAccount, acct_db: &AccountDB, used_code: &mut HashSet<H256>, max_storage_items: usize) -> Result<Vec<Bytes>, Error> { | ||||
| 	if acc == &ACC_EMPTY { | ||||
| 		return Ok(vec![::rlp::NULL_RLP.to_vec()]); | ||||
| 	} | ||||
| 
 | ||||
| pub fn to_fat_rlps(account_hash: &H256, acc: &BasicAccount, acct_db: &AccountDB, used_code: &mut HashSet<H256>, first_chunk_size: usize, max_chunk_size: usize) -> Result<Vec<Bytes>, Error> { | ||||
| 	let db = TrieDB::new(acct_db, &acc.storage_root)?; | ||||
| 	let mut chunks = Vec::new(); | ||||
| 	let mut db_iter = db.iter()?; | ||||
| 	let mut target_chunk_size = first_chunk_size; | ||||
| 	let mut account_stream = RlpStream::new_list(2); | ||||
| 	let mut leftover: Option<Vec<u8>> = None; | ||||
| 	loop { | ||||
| 		account_stream.append(account_hash); | ||||
| 		account_stream.begin_list(5); | ||||
| 
 | ||||
| 	let chunks = db.iter()?.chunks(max_storage_items); | ||||
| 	let pair_chunks = chunks.into_iter().map(|chunk| chunk.collect()); | ||||
| 	pair_chunks.pad_using(1, |_| Vec::new(), ).map(|pairs| { | ||||
| 		let mut stream = RlpStream::new_list(pairs.len()); | ||||
| 
 | ||||
| 		for r in pairs { | ||||
| 			let (k, v) = r?; | ||||
| 			stream.begin_list(2).append(&k).append(&&*v); | ||||
| 		} | ||||
| 
 | ||||
| 		let pairs_rlp = stream.out(); | ||||
| 
 | ||||
| 		let mut account_stream = RlpStream::new_list(5); | ||||
| 		account_stream.append(&acc.nonce) | ||||
| 					  .append(&acc.balance); | ||||
| 
 | ||||
| @ -105,9 +95,47 @@ pub fn to_fat_rlps(acc: &BasicAccount, acct_db: &AccountDB, used_code: &mut Hash | ||||
| 			} | ||||
| 		} | ||||
| 
 | ||||
| 		account_stream.append_raw(&pairs_rlp, 1); | ||||
| 		Ok(account_stream.out()) | ||||
| 	}).collect() | ||||
| 		account_stream.begin_unbounded_list(); | ||||
| 		if account_stream.len() > target_chunk_size { | ||||
| 			// account does not fit, push an empty record to mark a new chunk
 | ||||
| 			target_chunk_size = max_chunk_size; | ||||
| 			chunks.push(Vec::new()); | ||||
| 		} | ||||
| 
 | ||||
| 		if let Some(pair) = leftover.take() { | ||||
| 			account_stream.append_raw_checked(&pair, 1, target_chunk_size); | ||||
| 		} | ||||
| 
 | ||||
| 		loop { | ||||
| 			match db_iter.next() { | ||||
| 				Some(Ok((k, v))) => { | ||||
| 					let pair = { | ||||
| 						let mut stream = RlpStream::new_list(2); | ||||
| 						stream.append(&k).append(&&*v); | ||||
| 						stream.drain() | ||||
| 					}; | ||||
| 					if !account_stream.append_raw_checked(&pair, 1, target_chunk_size) { | ||||
| 						account_stream.complete_unbounded_list(); | ||||
| 						let stream = ::std::mem::replace(&mut account_stream, RlpStream::new_list(2)); | ||||
| 						chunks.push(stream.out()); | ||||
| 						target_chunk_size = max_chunk_size; | ||||
| 						leftover = Some(pair.to_vec()); | ||||
| 						break; | ||||
| 					} | ||||
| 				}, | ||||
| 				Some(Err(e)) => { | ||||
| 					return Err(e.into()); | ||||
| 				}, | ||||
| 				None => { | ||||
| 					account_stream.complete_unbounded_list(); | ||||
| 					let stream = ::std::mem::replace(&mut account_stream, RlpStream::new_list(2)); | ||||
| 					chunks.push(stream.out()); | ||||
| 					return Ok(chunks); | ||||
| 				} | ||||
| 			} | ||||
| 
 | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // decode a fat rlp, and rebuild the storage trie as we go.
 | ||||
| @ -181,7 +209,7 @@ mod tests { | ||||
| 	use snapshot::tests::helpers::fill_storage; | ||||
| 
 | ||||
| 	use util::sha3::{SHA3_EMPTY, SHA3_NULL_RLP}; | ||||
| 	use util::{Address, H256, HashDB, DBValue}; | ||||
| 	use util::{Address, H256, HashDB, DBValue, Hashable}; | ||||
| 	use rlp::UntrustedRlp; | ||||
| 
 | ||||
| 	use std::collections::HashSet; | ||||
| @ -203,8 +231,8 @@ mod tests { | ||||
| 		let thin_rlp = ::rlp::encode(&account); | ||||
| 		assert_eq!(::rlp::decode::<BasicAccount>(&thin_rlp), account); | ||||
| 
 | ||||
| 		let fat_rlps = to_fat_rlps(&account, &AccountDB::new(db.as_hashdb(), &addr), &mut Default::default(), usize::max_value()).unwrap(); | ||||
| 		let fat_rlp = UntrustedRlp::new(&fat_rlps[0]); | ||||
| 		let fat_rlps = to_fat_rlps(&addr.sha3(), &account, &AccountDB::new(db.as_hashdb(), &addr), &mut Default::default(), usize::max_value(), usize::max_value()).unwrap(); | ||||
| 		let fat_rlp = UntrustedRlp::new(&fat_rlps[0]).at(1).unwrap(); | ||||
| 		assert_eq!(from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr), fat_rlp, H256::zero()).unwrap().0, account); | ||||
| 	} | ||||
| 
 | ||||
| @ -228,8 +256,8 @@ mod tests { | ||||
| 		let thin_rlp = ::rlp::encode(&account); | ||||
| 		assert_eq!(::rlp::decode::<BasicAccount>(&thin_rlp), account); | ||||
| 
 | ||||
| 		let fat_rlp = to_fat_rlps(&account, &AccountDB::new(db.as_hashdb(), &addr), &mut Default::default(), usize::max_value()).unwrap(); | ||||
| 		let fat_rlp = UntrustedRlp::new(&fat_rlp[0]); | ||||
| 		let fat_rlp = to_fat_rlps(&addr.sha3(), &account, &AccountDB::new(db.as_hashdb(), &addr), &mut Default::default(), usize::max_value(), usize::max_value()).unwrap(); | ||||
| 		let fat_rlp = UntrustedRlp::new(&fat_rlp[0]).at(1).unwrap(); | ||||
| 		assert_eq!(from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr), fat_rlp, H256::zero()).unwrap().0, account); | ||||
| 	} | ||||
| 
 | ||||
| @ -253,11 +281,11 @@ mod tests { | ||||
| 		let thin_rlp = ::rlp::encode(&account); | ||||
| 		assert_eq!(::rlp::decode::<BasicAccount>(&thin_rlp), account); | ||||
| 
 | ||||
| 		let fat_rlps = to_fat_rlps(&account, &AccountDB::new(db.as_hashdb(), &addr), &mut Default::default(), 100).unwrap(); | ||||
| 		let fat_rlps = to_fat_rlps(&addr.sha3(), &account, &AccountDB::new(db.as_hashdb(), &addr), &mut Default::default(), 500, 1000).unwrap(); | ||||
| 		let mut root = SHA3_NULL_RLP; | ||||
| 		let mut restored_account = None; | ||||
| 		for rlp in fat_rlps { | ||||
| 			let fat_rlp = UntrustedRlp::new(&rlp); | ||||
| 			let fat_rlp = UntrustedRlp::new(&rlp).at(1).unwrap(); | ||||
| 			restored_account = Some(from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr), fat_rlp, root).unwrap().0); | ||||
| 			root = restored_account.as_ref().unwrap().storage_root.clone(); | ||||
| 		} | ||||
| @ -297,12 +325,12 @@ mod tests { | ||||
| 
 | ||||
| 		let mut used_code = HashSet::new(); | ||||
| 
 | ||||
| 		let fat_rlp1 = to_fat_rlps(&account1, &AccountDB::new(db.as_hashdb(), &addr1), &mut used_code, usize::max_value()).unwrap(); | ||||
| 		let fat_rlp2 = to_fat_rlps(&account2, &AccountDB::new(db.as_hashdb(), &addr2), &mut used_code, usize::max_value()).unwrap(); | ||||
| 		let fat_rlp1 = to_fat_rlps(&addr1.sha3(), &account1, &AccountDB::new(db.as_hashdb(), &addr1), &mut used_code, usize::max_value(), usize::max_value()).unwrap(); | ||||
| 		let fat_rlp2 = to_fat_rlps(&addr2.sha3(), &account2, &AccountDB::new(db.as_hashdb(), &addr2), &mut used_code, usize::max_value(), usize::max_value()).unwrap(); | ||||
| 		assert_eq!(used_code.len(), 1); | ||||
| 
 | ||||
| 		let fat_rlp1 = UntrustedRlp::new(&fat_rlp1[0]); | ||||
| 		let fat_rlp2 = UntrustedRlp::new(&fat_rlp2[0]); | ||||
| 		let fat_rlp1 = UntrustedRlp::new(&fat_rlp1[0]).at(1).unwrap(); | ||||
| 		let fat_rlp2 = UntrustedRlp::new(&fat_rlp2[0]).at(1).unwrap(); | ||||
| 
 | ||||
| 		let (acc, maybe_code) = from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &addr2), fat_rlp2, H256::zero()).unwrap(); | ||||
| 		assert!(maybe_code.is_none()); | ||||
| @ -316,9 +344,6 @@ mod tests { | ||||
| 	#[test] | ||||
| 	fn encoding_empty_acc() { | ||||
| 		let mut db = get_temp_state_db(); | ||||
| 		let mut used_code = HashSet::new(); | ||||
| 
 | ||||
| 		assert_eq!(to_fat_rlps(&ACC_EMPTY, &AccountDB::new(db.as_hashdb(), &Address::default()), &mut used_code, usize::max_value()).unwrap(), vec![::rlp::NULL_RLP.to_vec()]); | ||||
| 		assert_eq!(from_fat_rlp(&mut AccountDBMut::new(db.as_hashdb_mut(), &Address::default()), UntrustedRlp::new(&::rlp::NULL_RLP), H256::zero()).unwrap(), (ACC_EMPTY, None)); | ||||
| 	} | ||||
| } | ||||
|  | ||||
| @ -83,9 +83,6 @@ mod traits { | ||||
| // Try to have chunks be around 4MB (before compression)
 | ||||
| const PREFERRED_CHUNK_SIZE: usize = 4 * 1024 * 1024; | ||||
| 
 | ||||
| // Try to have chunks be around 4MB (before compression)
 | ||||
| const MAX_STORAGE_ENTRIES_PER_ACCOUNT_RECORD: usize = 80_000; | ||||
| 
 | ||||
| // How many blocks to include in a snapshot, starting from the head of the chain.
 | ||||
| const SNAPSHOT_BLOCKS: u64 = 30000; | ||||
| 
 | ||||
| @ -305,20 +302,9 @@ impl<'a> StateChunker<'a> { | ||||
| 	//
 | ||||
| 	// If the buffer is greater than the desired chunk size,
 | ||||
| 	// this will write out the data to disk.
 | ||||
| 	fn push(&mut self, account_hash: Bytes, data: Bytes, force_chunk: bool) -> Result<(), Error> { | ||||
| 		let pair = { | ||||
| 			let mut stream = RlpStream::new_list(2); | ||||
| 			stream.append(&account_hash).append_raw(&data, 1); | ||||
| 			stream.out() | ||||
| 		}; | ||||
| 
 | ||||
| 		if force_chunk || self.cur_size + pair.len() >= PREFERRED_CHUNK_SIZE { | ||||
| 			self.write_chunk()?; | ||||
| 		} | ||||
| 
 | ||||
| 		self.cur_size += pair.len(); | ||||
| 		self.rlps.push(pair); | ||||
| 
 | ||||
| 	fn push(&mut self, data: Bytes) -> Result<(), Error> { | ||||
| 		self.cur_size += data.len(); | ||||
| 		self.rlps.push(data); | ||||
| 		Ok(()) | ||||
| 	} | ||||
| 
 | ||||
| @ -348,6 +334,11 @@ impl<'a> StateChunker<'a> { | ||||
| 
 | ||||
| 		Ok(()) | ||||
| 	} | ||||
| 
 | ||||
| 	// Get current chunk size.
 | ||||
| 	fn chunk_size(&self) -> usize { | ||||
| 		self.cur_size | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| /// Walk the given state database starting from the given root,
 | ||||
| @ -377,9 +368,12 @@ pub fn chunk_state<'a>(db: &HashDB, root: &H256, writer: &Mutex<SnapshotWriter + | ||||
| 
 | ||||
| 		let account_db = AccountDB::from_hash(db, account_key_hash); | ||||
| 
 | ||||
| 		let fat_rlps = account::to_fat_rlps(&account, &account_db, &mut used_code, MAX_STORAGE_ENTRIES_PER_ACCOUNT_RECORD)?; | ||||
| 		let fat_rlps = account::to_fat_rlps(&account_key_hash, &account, &account_db, &mut used_code, PREFERRED_CHUNK_SIZE - chunker.chunk_size(), PREFERRED_CHUNK_SIZE)?; | ||||
| 		for (i, fat_rlp) in fat_rlps.into_iter().enumerate() { | ||||
| 			chunker.push(account_key.clone(), fat_rlp, i > 0)?; | ||||
| 			if i > 0 { | ||||
| 				chunker.write_chunk()?; | ||||
| 			} | ||||
| 			chunker.push(fat_rlp)?; | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
|  | ||||
| @ -122,10 +122,9 @@ fn get_code_from_prev_chunk() { | ||||
| 		let mut db = MemoryDB::new(); | ||||
| 		AccountDBMut::from_hash(&mut db, hash).insert(&code[..]); | ||||
| 
 | ||||
| 		let fat_rlp = account::to_fat_rlps(&acc, &AccountDB::from_hash(&db, hash), &mut used_code, usize::max_value()).unwrap(); | ||||
| 
 | ||||
| 		let fat_rlp = account::to_fat_rlps(&hash, &acc, &AccountDB::from_hash(&db, hash), &mut used_code, usize::max_value(), usize::max_value()).unwrap(); | ||||
| 		let mut stream = RlpStream::new_list(1); | ||||
| 		stream.begin_list(2).append(&hash).append_raw(&fat_rlp[0], 1); | ||||
| 		stream.append_raw(&fat_rlp[0], 1); | ||||
| 		stream.out() | ||||
| 	}; | ||||
| 
 | ||||
|  | ||||
| @ -23,11 +23,11 @@ use traits::Encodable; | ||||
| struct ListInfo { | ||||
| 	position: usize, | ||||
| 	current: usize, | ||||
| 	max: usize, | ||||
| 	max: Option<usize>, | ||||
| } | ||||
| 
 | ||||
| impl ListInfo { | ||||
| 	fn new(position: usize, max: usize) -> ListInfo { | ||||
| 	fn new(position: usize, max: Option<usize>) -> ListInfo { | ||||
| 		ListInfo { | ||||
| 			position: position, | ||||
| 			current: 0, | ||||
| @ -133,7 +133,7 @@ impl RlpStream { | ||||
| 				self.buffer.push(0); | ||||
| 
 | ||||
| 				let position = self.buffer.len(); | ||||
| 				self.unfinished_lists.push(ListInfo::new(position, len)); | ||||
| 				self.unfinished_lists.push(ListInfo::new(position, Some(len))); | ||||
| 			}, | ||||
| 		} | ||||
| 
 | ||||
| @ -141,6 +141,19 @@ impl RlpStream { | ||||
| 		self | ||||
| 	} | ||||
| 
 | ||||
| 
 | ||||
| 	/// Declare appending the list of unknown size, chainable.
 | ||||
| 	pub fn begin_unbounded_list(&mut self) -> &mut RlpStream { | ||||
| 		self.finished_list = false; | ||||
| 		// payload is longer than 1 byte only for lists > 55 bytes
 | ||||
| 		// by pushing always this 1 byte we may avoid unnecessary shift of data
 | ||||
| 		self.buffer.push(0); | ||||
| 		let position = self.buffer.len(); | ||||
| 		self.unfinished_lists.push(ListInfo::new(position, None)); | ||||
| 		// return chainable self
 | ||||
| 		self | ||||
| 	} | ||||
| 
 | ||||
| 	/// Apends null to the end of stream, chainable.
 | ||||
| 	///
 | ||||
| 	/// ```rust
 | ||||
| @ -177,6 +190,36 @@ impl RlpStream { | ||||
| 		self | ||||
| 	} | ||||
| 
 | ||||
| 	/// Appends raw (pre-serialised) RLP data. Checks for size oveflow.
 | ||||
| 	pub fn append_raw_checked<'a>(&'a mut self, bytes: &[u8], item_count: usize, max_size: usize) -> bool { | ||||
| 		if self.estimate_size(bytes.len()) > max_size { | ||||
| 			return false; | ||||
| 		} | ||||
| 		self.append_raw(bytes, item_count); | ||||
| 		true | ||||
| 	} | ||||
| 
 | ||||
| 	/// Calculate total RLP size for appended payload.
 | ||||
| 	pub fn estimate_size<'a>(&'a self, add: usize) -> usize { | ||||
| 		let total_size = self.buffer.len() + add; | ||||
| 		let mut base_size = total_size; | ||||
| 		for list in &self.unfinished_lists[..] { | ||||
| 			let len = total_size - list.position; | ||||
| 			if len > 55 { | ||||
| 				let leading_empty_bytes = (len as u64).leading_zeros() as usize / 8; | ||||
| 				let size_bytes = 8 - leading_empty_bytes; | ||||
| 				base_size += size_bytes; | ||||
| 			} | ||||
| 		} | ||||
| 		base_size | ||||
| 	} | ||||
| 
 | ||||
| 
 | ||||
| 	/// Returns current RLP size in bytes for the data pushed into the list.
 | ||||
| 	pub fn len<'a>(&'a self) -> usize { | ||||
| 		self.estimate_size(0) | ||||
| 	} | ||||
| 
 | ||||
| 	/// Clear the output stream so far.
 | ||||
| 	///
 | ||||
| 	/// ```rust
 | ||||
| @ -246,10 +289,11 @@ impl RlpStream { | ||||
| 			None => false, | ||||
| 			Some(ref mut x) => { | ||||
| 				x.current += inserted_items; | ||||
| 				if x.current > x.max { | ||||
| 					panic!("You cannot append more items then you expect!"); | ||||
| 				match x.max { | ||||
| 					Some(ref max) if x.current > *max => panic!("You cannot append more items then you expect!"), | ||||
| 					Some(ref max) => x.current == *max, | ||||
| 					_ => false, | ||||
| 				} | ||||
| 				x.current == x.max | ||||
| 			} | ||||
| 		}; | ||||
| 
 | ||||
| @ -273,6 +317,17 @@ impl RlpStream { | ||||
| 			false => panic!() | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	/// Finalize current ubnbound list. Panics if no unbounded list has been opened.
 | ||||
| 	pub fn complete_unbounded_list(&mut self) { | ||||
| 		let list = self.unfinished_lists.pop().expect("No open list."); | ||||
| 		if list.max.is_some() { | ||||
| 			panic!("List type mismatch."); | ||||
| 		} | ||||
| 		let len = self.buffer.len() - list.position; | ||||
| 		self.encoder().insert_list_payload(len, list.position); | ||||
| 		self.note_appended(1); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| pub struct BasicEncoder<'a> { | ||||
|  | ||||
| @ -412,3 +412,25 @@ fn test_rlp_list_length_overflow() { | ||||
| 	let as_val: Result<String, DecoderError> = rlp.val_at(0); | ||||
| 	assert_eq!(Err(DecoderError::RlpIsTooShort), as_val); | ||||
| } | ||||
| 
 | ||||
| #[test] | ||||
| fn test_rlp_stream_size_limit() { | ||||
| 	for limit in 40 .. 270 { | ||||
| 		let item = [0u8; 1]; | ||||
| 		let mut stream = RlpStream::new(); | ||||
| 		while stream.append_raw_checked(&item, 1, limit) {} | ||||
| 		assert_eq!(stream.drain().len(), limit); | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| #[test] | ||||
| fn test_rlp_stream_unbounded_list() { | ||||
| 	let mut stream = RlpStream::new(); | ||||
| 	stream.begin_unbounded_list(); | ||||
| 	stream.append(&40u32); | ||||
| 	stream.append(&41u32); | ||||
| 	assert!(!stream.is_finished()); | ||||
| 	stream.complete_unbounded_list(); | ||||
| 	assert!(stream.is_finished()); | ||||
| } | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user