2016-06-10 12:19:50 +02:00
|
|
|
// Copyright 2015, 2016 Ethcore (UK) Ltd.
|
|
|
|
// This file is part of Parity.
|
|
|
|
|
|
|
|
// Parity is free software: you can redistribute it and/or modify
|
|
|
|
// it under the terms of the GNU General Public License as published by
|
|
|
|
// the Free Software Foundation, either version 3 of the License, or
|
|
|
|
// (at your option) any later version.
|
|
|
|
|
|
|
|
// Parity is distributed in the hope that it will be useful,
|
|
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
// GNU General Public License for more details.
|
|
|
|
|
|
|
|
// You should have received a copy of the GNU General Public License
|
|
|
|
// along with Parity. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
2016-06-13 20:00:00 +02:00
|
|
|
//! Snapshot creation helpers.
|
2016-06-10 12:19:50 +02:00
|
|
|
|
|
|
|
use std::collections::VecDeque;
|
|
|
|
use std::fs::File;
|
|
|
|
use std::io::Write;
|
2016-06-10 13:10:12 +02:00
|
|
|
use std::path::Path;
|
2016-06-10 12:19:50 +02:00
|
|
|
|
2016-06-15 17:46:40 +02:00
|
|
|
use account_db::{AccountDB, AccountDBMut};
|
2016-06-10 12:19:50 +02:00
|
|
|
use client::BlockChainClient;
|
2016-06-11 19:28:18 +02:00
|
|
|
use error::Error;
|
2016-06-10 12:19:50 +02:00
|
|
|
use ids::BlockID;
|
2016-06-10 13:10:12 +02:00
|
|
|
use views::BlockView;
|
2016-06-10 12:19:50 +02:00
|
|
|
|
2016-06-17 12:56:57 +02:00
|
|
|
use util::{Bytes, Hashable, HashDB, JournalDB, snappy, TrieDB, TrieDBMut, TrieMut};
|
2016-06-13 14:23:53 +02:00
|
|
|
use util::hash::{FixedHash, H256};
|
2016-06-15 16:42:49 +02:00
|
|
|
use util::rlp::{DecoderError, RlpStream, Stream, UntrustedRlp, View};
|
2016-06-14 13:22:15 +02:00
|
|
|
|
2016-06-15 16:42:49 +02:00
|
|
|
use self::account::Account;
|
2016-06-14 18:34:27 +02:00
|
|
|
use self::block::AbridgedBlock;
|
|
|
|
|
2016-06-15 16:42:49 +02:00
|
|
|
mod account;
|
2016-06-14 18:34:27 +02:00
|
|
|
mod block;
|
|
|
|
|
2016-06-14 13:22:15 +02:00
|
|
|
// Try to have chunks be around 16MB (before compression)
|
|
|
|
const PREFERRED_CHUNK_SIZE: usize = 16 * 1024 * 1024;
|
|
|
|
|
2016-06-14 13:37:17 +02:00
|
|
|
// shared portion of write_chunk
|
|
|
|
// returns either a (hash, compressed_size) pair or an io error.
|
|
|
|
fn write_chunk(raw_data: &[u8], compression_buffer: &mut Vec<u8>, path: &Path) -> Result<(H256, usize), Error> {
|
2016-06-16 13:29:24 +02:00
|
|
|
let compressed_size = snappy::compress_into(raw_data, compression_buffer);
|
2016-06-14 13:37:17 +02:00
|
|
|
let compressed = &compression_buffer[..compressed_size];
|
|
|
|
let hash = compressed.sha3();
|
|
|
|
|
2016-06-15 19:18:49 +02:00
|
|
|
assert!(snappy::validate_compressed_buffer(compressed));
|
|
|
|
|
2016-06-14 13:37:17 +02:00
|
|
|
let mut file_path = path.to_owned();
|
|
|
|
file_path.push(hash.hex());
|
|
|
|
|
|
|
|
let mut file = try!(File::create(file_path));
|
|
|
|
try!(file.write_all(compressed));
|
|
|
|
|
|
|
|
Ok((hash, compressed_size))
|
|
|
|
}
|
|
|
|
|
2016-06-10 12:19:50 +02:00
|
|
|
/// Used to build block chunks.
|
2016-06-13 16:29:26 +02:00
|
|
|
struct BlockChunker<'a> {
|
2016-06-10 12:19:50 +02:00
|
|
|
client: &'a BlockChainClient,
|
|
|
|
// block, receipt rlp pairs.
|
2016-06-13 16:21:23 +02:00
|
|
|
rlps: VecDeque<Bytes>,
|
2016-06-10 12:19:50 +02:00
|
|
|
current_hash: H256,
|
2016-06-13 16:21:23 +02:00
|
|
|
hashes: Vec<H256>,
|
2016-06-14 13:22:15 +02:00
|
|
|
snappy_buffer: Vec<u8>,
|
2016-06-10 12:19:50 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> BlockChunker<'a> {
|
2016-06-14 19:14:41 +02:00
|
|
|
// Repeatedly fill the buffers and writes out chunks, moving backwards from starting block hash.
|
2016-06-14 18:34:27 +02:00
|
|
|
// Loops until we reach the genesis, and writes out the remainder.
|
|
|
|
fn chunk_all(&mut self, genesis_hash: H256, path: &Path) -> Result<(), Error> {
|
2016-06-10 12:19:50 +02:00
|
|
|
let mut loaded_size = 0;
|
|
|
|
|
2016-06-14 18:34:27 +02:00
|
|
|
while self.current_hash != genesis_hash {
|
2016-06-10 12:19:50 +02:00
|
|
|
let block = self.client.block(BlockID::Hash(self.current_hash)).unwrap();
|
2016-06-14 18:34:27 +02:00
|
|
|
let view = BlockView::new(&block);
|
|
|
|
let abridged_rlp = AbridgedBlock::from_block_view(&view).into_inner();
|
|
|
|
|
2016-06-10 12:19:50 +02:00
|
|
|
let receipts = self.client.block_receipts(&self.current_hash).unwrap();
|
|
|
|
|
2016-06-13 16:21:23 +02:00
|
|
|
let pair = {
|
|
|
|
let mut pair_stream = RlpStream::new_list(2);
|
2016-06-14 18:34:27 +02:00
|
|
|
pair_stream.append(&abridged_rlp).append(&receipts);
|
2016-06-13 16:21:23 +02:00
|
|
|
pair_stream.out()
|
|
|
|
};
|
|
|
|
|
|
|
|
let new_loaded_size = loaded_size + pair.len();
|
2016-06-10 12:45:46 +02:00
|
|
|
|
2016-06-10 15:05:20 +02:00
|
|
|
// cut off the chunk if too large
|
|
|
|
if new_loaded_size > PREFERRED_CHUNK_SIZE {
|
2016-06-14 18:34:27 +02:00
|
|
|
let header = view.header_view();
|
|
|
|
try!(self.write_chunk(header.parent_hash(), header.number(), path));
|
|
|
|
loaded_size = pair.len();
|
2016-06-10 12:19:50 +02:00
|
|
|
} else {
|
|
|
|
loaded_size = new_loaded_size;
|
|
|
|
}
|
|
|
|
|
2016-06-13 20:48:12 +02:00
|
|
|
self.rlps.push_front(pair);
|
2016-06-14 18:34:27 +02:00
|
|
|
self.current_hash = view.header_view().parent_hash();
|
2016-06-10 12:19:50 +02:00
|
|
|
}
|
|
|
|
|
2016-06-14 18:34:27 +02:00
|
|
|
if loaded_size != 0 {
|
2016-06-14 19:14:41 +02:00
|
|
|
// we don't store the genesis block, so once we get to this point,
|
|
|
|
// the "first" block will be number 1.
|
2016-06-14 18:34:27 +02:00
|
|
|
try!(self.write_chunk(genesis_hash, 1, path));
|
2016-06-10 14:15:20 +02:00
|
|
|
}
|
|
|
|
|
2016-06-14 18:34:27 +02:00
|
|
|
Ok(())
|
2016-06-10 12:19:50 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// write out the data in the buffers to a chunk on disk
|
2016-06-14 18:34:27 +02:00
|
|
|
fn write_chunk(&mut self, parent_hash: H256, number: u64, path: &Path) -> Result<(), Error> {
|
|
|
|
trace!(target: "snapshot", "prepared block chunk with {} blocks", self.rlps.len());
|
|
|
|
let mut rlp_stream = RlpStream::new_list(self.rlps.len() + 2);
|
|
|
|
rlp_stream.append(&parent_hash).append(&number);
|
2016-06-13 16:21:23 +02:00
|
|
|
for pair in self.rlps.drain(..) {
|
2016-06-16 16:39:42 +02:00
|
|
|
rlp_stream.append_raw(&pair, 1);
|
2016-06-10 12:19:50 +02:00
|
|
|
}
|
|
|
|
|
2016-06-14 13:37:17 +02:00
|
|
|
let raw_data = rlp_stream.out();
|
|
|
|
let (hash, size) = try!(write_chunk(&raw_data, &mut self.snappy_buffer, path));
|
|
|
|
trace!(target: "snapshot", "wrote block chunk. hash: {}, size: {}, uncompressed size: {}", hash.hex(), size, raw_data.len());
|
2016-06-10 12:45:46 +02:00
|
|
|
|
2016-06-13 16:21:23 +02:00
|
|
|
self.hashes.push(hash);
|
|
|
|
Ok(())
|
2016-06-10 12:19:50 +02:00
|
|
|
}
|
2016-06-13 16:29:26 +02:00
|
|
|
}
|
2016-06-10 12:19:50 +02:00
|
|
|
|
2016-06-13 16:29:26 +02:00
|
|
|
/// Create and write out all block chunks to disk, returning a vector of all
|
|
|
|
/// the hashes of block chunks created.
|
|
|
|
///
|
|
|
|
/// The path parameter is the directory to store the block chunks in.
|
|
|
|
/// This function assumes the directory exists already.
|
|
|
|
pub fn chunk_blocks(client: &BlockChainClient, best_block_hash: H256, genesis_hash: H256, path: &Path) -> Result<Vec<H256>, Error> {
|
|
|
|
let mut chunker = BlockChunker {
|
|
|
|
client: client,
|
|
|
|
rlps: VecDeque::new(),
|
|
|
|
current_hash: best_block_hash,
|
|
|
|
hashes: Vec::new(),
|
2016-06-15 13:05:00 +02:00
|
|
|
snappy_buffer: vec![0; snappy::max_compressed_len(PREFERRED_CHUNK_SIZE)],
|
2016-06-13 16:29:26 +02:00
|
|
|
};
|
|
|
|
|
2016-06-14 18:34:27 +02:00
|
|
|
try!(chunker.chunk_all(genesis_hash, path));
|
|
|
|
|
2016-06-13 16:29:26 +02:00
|
|
|
Ok(chunker.hashes)
|
2016-06-10 17:19:55 +02:00
|
|
|
}
|
|
|
|
|
2016-06-11 19:28:18 +02:00
|
|
|
/// State trie chunker.
|
2016-06-13 16:29:26 +02:00
|
|
|
struct StateChunker<'a> {
|
2016-06-11 19:28:18 +02:00
|
|
|
hashes: Vec<H256>,
|
|
|
|
rlps: Vec<Bytes>,
|
|
|
|
cur_size: usize,
|
|
|
|
snapshot_path: &'a Path,
|
2016-06-14 13:22:15 +02:00
|
|
|
snappy_buffer: Vec<u8>,
|
2016-06-11 19:28:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> StateChunker<'a> {
|
|
|
|
// Push a key, value pair to be encoded.
|
|
|
|
//
|
|
|
|
// If the buffer is greater than the desired chunk size,
|
|
|
|
// this will write out the data to disk.
|
2016-06-16 18:30:18 +02:00
|
|
|
fn push(&mut self, account_hash: Bytes, data: Bytes) -> Result<(), Error> {
|
2016-06-13 12:04:20 +02:00
|
|
|
let pair = {
|
|
|
|
let mut stream = RlpStream::new_list(2);
|
2016-06-16 18:30:18 +02:00
|
|
|
stream.append(&account_hash).append_raw(&data, 1);
|
2016-06-13 12:04:20 +02:00
|
|
|
stream.out()
|
|
|
|
};
|
2016-06-11 19:28:18 +02:00
|
|
|
|
|
|
|
if self.cur_size + pair.len() >= PREFERRED_CHUNK_SIZE {
|
2016-06-13 16:21:23 +02:00
|
|
|
try!(self.write_chunk());
|
2016-06-11 19:28:18 +02:00
|
|
|
}
|
|
|
|
|
2016-06-13 15:07:54 +02:00
|
|
|
self.cur_size += pair.len();
|
2016-06-11 19:28:18 +02:00
|
|
|
self.rlps.push(pair);
|
2016-06-13 16:21:23 +02:00
|
|
|
|
|
|
|
Ok(())
|
2016-06-11 19:28:18 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
// Write out the buffer to disk, pushing the created chunk's hash to
|
|
|
|
// the list.
|
2016-06-13 16:21:23 +02:00
|
|
|
fn write_chunk(&mut self) -> Result<(), Error> {
|
2016-06-16 16:39:42 +02:00
|
|
|
let mut stream = RlpStream::new_list(self.rlps.len());
|
|
|
|
for rlp in self.rlps.drain(..) {
|
|
|
|
stream.append_raw(&rlp, 1);
|
|
|
|
}
|
2016-06-11 19:28:18 +02:00
|
|
|
|
2016-06-14 13:37:17 +02:00
|
|
|
let raw_data = stream.out();
|
|
|
|
let (hash, compressed_size) = try!(write_chunk(&raw_data, &mut self.snappy_buffer, self.snapshot_path));
|
|
|
|
trace!(target: "snapshot", "wrote state chunk. size: {}, uncompressed size: {}", compressed_size, raw_data.len());
|
2016-06-11 19:28:18 +02:00
|
|
|
|
|
|
|
self.hashes.push(hash);
|
|
|
|
self.cur_size = 0;
|
2016-06-13 16:21:23 +02:00
|
|
|
|
|
|
|
Ok(())
|
2016-06-11 19:28:18 +02:00
|
|
|
}
|
2016-06-13 16:29:26 +02:00
|
|
|
}
|
2016-06-11 19:28:18 +02:00
|
|
|
|
2016-06-13 16:29:26 +02:00
|
|
|
/// Walk the given state database starting from the given root,
|
|
|
|
/// creating chunks and writing them out.
|
|
|
|
///
|
|
|
|
/// Returns a list of hashes of chunks created, or any error it may
|
|
|
|
/// have encountered.
|
|
|
|
pub fn chunk_state(db: &HashDB, root: &H256, path: &Path) -> Result<Vec<H256>, Error> {
|
|
|
|
let account_view = try!(TrieDB::new(db, &root));
|
2016-06-11 19:28:18 +02:00
|
|
|
|
2016-06-13 16:29:26 +02:00
|
|
|
let mut chunker = StateChunker {
|
|
|
|
hashes: Vec::new(),
|
|
|
|
rlps: Vec::new(),
|
|
|
|
cur_size: 0,
|
|
|
|
snapshot_path: path,
|
2016-06-15 13:05:00 +02:00
|
|
|
snappy_buffer: vec![0; snappy::max_compressed_len(PREFERRED_CHUNK_SIZE)],
|
2016-06-13 16:29:26 +02:00
|
|
|
};
|
2016-06-13 13:52:41 +02:00
|
|
|
|
2016-06-13 20:00:47 +02:00
|
|
|
trace!(target: "snapshot", "beginning state chunking");
|
2016-06-13 12:04:20 +02:00
|
|
|
|
2016-06-13 16:29:26 +02:00
|
|
|
// account_key here is the address' hash.
|
|
|
|
for (account_key, account_data) in account_view.iter() {
|
2016-06-15 16:42:49 +02:00
|
|
|
let account = Account::from_thin_rlp(account_data);
|
2016-06-13 16:29:26 +02:00
|
|
|
let account_key_hash = H256::from_slice(&account_key);
|
2016-06-13 14:23:53 +02:00
|
|
|
|
2016-06-13 16:29:26 +02:00
|
|
|
let account_db = AccountDB::from_hash(db, account_key_hash);
|
2016-06-13 12:04:20 +02:00
|
|
|
|
2016-06-15 13:52:53 +02:00
|
|
|
let fat_rlp = try!(account.to_fat_rlp(&account_db, account_key_hash));
|
2016-06-13 16:29:26 +02:00
|
|
|
try!(chunker.push(account_key, fat_rlp));
|
|
|
|
}
|
2016-06-13 15:07:54 +02:00
|
|
|
|
2016-06-13 16:29:26 +02:00
|
|
|
if chunker.cur_size != 0 {
|
|
|
|
try!(chunker.write_chunk());
|
2016-06-13 12:04:20 +02:00
|
|
|
}
|
2016-06-13 16:29:26 +02:00
|
|
|
|
|
|
|
Ok(chunker.hashes)
|
2016-06-13 12:04:20 +02:00
|
|
|
}
|
|
|
|
|
2016-06-10 17:19:55 +02:00
|
|
|
/// Manifest data.
|
|
|
|
pub struct ManifestData {
|
|
|
|
/// List of state chunk hashes.
|
|
|
|
pub state_hashes: Vec<H256>,
|
|
|
|
/// List of block chunk hashes.
|
|
|
|
pub block_hashes: Vec<H256>,
|
|
|
|
/// The final, expected state root.
|
|
|
|
pub state_root: H256,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl ManifestData {
|
2016-06-15 17:46:40 +02:00
|
|
|
/// Encode the manifest data to rlp.
|
2016-06-10 17:19:55 +02:00
|
|
|
pub fn to_rlp(self) -> Bytes {
|
|
|
|
let mut stream = RlpStream::new_list(3);
|
|
|
|
stream.append(&self.state_hashes);
|
|
|
|
stream.append(&self.block_hashes);
|
|
|
|
stream.append(&self.state_root);
|
|
|
|
|
|
|
|
stream.out()
|
|
|
|
}
|
|
|
|
|
2016-06-15 17:46:40 +02:00
|
|
|
/// Try to restore manifest data from raw bytes, interpreted as RLP.
|
2016-06-10 17:19:55 +02:00
|
|
|
pub fn from_rlp(raw: &[u8]) -> Result<Self, DecoderError> {
|
|
|
|
let decoder = UntrustedRlp::new(raw);
|
|
|
|
|
2016-06-15 13:52:53 +02:00
|
|
|
let state_hashes: Vec<H256> = try!(decoder.val_at(0));
|
|
|
|
let block_hashes: Vec<H256> = try!(decoder.val_at(1));
|
|
|
|
let state_root: H256 = try!(decoder.val_at(2));
|
2016-06-10 17:19:55 +02:00
|
|
|
|
|
|
|
Ok(ManifestData {
|
|
|
|
state_hashes: state_hashes,
|
|
|
|
block_hashes: block_hashes,
|
|
|
|
state_root: state_root,
|
|
|
|
})
|
|
|
|
}
|
2016-06-15 17:46:40 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/// Used to rebuild the state trie piece by piece.
|
2016-06-17 12:56:57 +02:00
|
|
|
pub struct StateRebuilder {
|
|
|
|
db: Box<JournalDB>,
|
2016-06-15 17:46:40 +02:00
|
|
|
state_root: H256,
|
|
|
|
snappy_buffer: Vec<u8>
|
|
|
|
}
|
|
|
|
|
2016-06-17 12:56:57 +02:00
|
|
|
impl StateRebuilder {
|
2016-06-15 17:46:40 +02:00
|
|
|
/// Create a new state rebuilder to write into the given backing DB.
|
2016-06-17 12:56:57 +02:00
|
|
|
pub fn new(db: Box<JournalDB>) -> Self {
|
2016-06-15 17:46:40 +02:00
|
|
|
StateRebuilder {
|
|
|
|
db: db,
|
|
|
|
state_root: H256::zero(),
|
|
|
|
snappy_buffer: Vec::new(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Feed a compressed state chunk into the rebuilder.
|
|
|
|
pub fn feed(&mut self, compressed: &[u8]) -> Result<(), Error> {
|
2016-06-16 13:29:24 +02:00
|
|
|
let len = try!(snappy::decompress_into(compressed, &mut self.snappy_buffer));
|
2016-06-15 17:46:40 +02:00
|
|
|
let rlp = UntrustedRlp::new(&self.snappy_buffer[..len]);
|
|
|
|
|
|
|
|
for account_pair in rlp.iter() {
|
2016-06-15 18:41:02 +02:00
|
|
|
let hash: H256 = try!(account_pair.val_at(0));
|
|
|
|
let fat_rlp = try!(account_pair.at(1));
|
2016-06-15 17:46:40 +02:00
|
|
|
|
|
|
|
let thin_rlp = {
|
2016-06-17 12:56:57 +02:00
|
|
|
let mut acct_db = AccountDBMut::from_hash(self.db.as_hashdb_mut(), hash);
|
2016-06-15 17:46:40 +02:00
|
|
|
|
|
|
|
// fill out the storage trie and code while decoding.
|
|
|
|
let acc = try!(Account::from_fat_rlp(&mut acct_db, fat_rlp));
|
|
|
|
acc.to_thin_rlp()
|
|
|
|
};
|
|
|
|
|
2016-06-16 18:30:18 +02:00
|
|
|
let mut account_trie = if self.state_root != H256::zero() {
|
2016-06-17 12:56:57 +02:00
|
|
|
try!(TrieDBMut::from_existing(self.db.as_hashdb_mut(), &mut self.state_root))
|
2016-06-16 18:30:18 +02:00
|
|
|
} else {
|
2016-06-17 12:56:57 +02:00
|
|
|
TrieDBMut::new(self.db.as_hashdb_mut(), &mut self.state_root)
|
2016-06-16 18:30:18 +02:00
|
|
|
};
|
|
|
|
account_trie.insert(&hash, &thin_rlp);
|
2016-06-15 17:46:40 +02:00
|
|
|
}
|
|
|
|
|
2016-06-17 12:56:57 +02:00
|
|
|
try!(self.db.commit(0, &H256::zero(), None));
|
2016-06-15 17:46:40 +02:00
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Get the state root of the rebuilder.
|
|
|
|
pub fn state_root(&self) -> H256 { self.state_root }
|
2016-06-10 12:19:50 +02:00
|
|
|
}
|