From 3290f393bd185bcd61a9edc7493ff96745c35b7f Mon Sep 17 00:00:00 2001 From: Robert Habermeier Date: Tue, 14 Jun 2016 18:34:27 +0200 Subject: [PATCH] block rlp compression --- ethcore/src/snapshot/block.rs | 120 ++++++++++++++++++++++++++++++++++ ethcore/src/snapshot/mod.rs | 50 +++++++------- 2 files changed, 146 insertions(+), 24 deletions(-) create mode 100644 ethcore/src/snapshot/block.rs diff --git a/ethcore/src/snapshot/block.rs b/ethcore/src/snapshot/block.rs new file mode 100644 index 000000000..99e470cad --- /dev/null +++ b/ethcore/src/snapshot/block.rs @@ -0,0 +1,120 @@ +// Copyright 2015, 2016 Ethcore (UK) Ltd. +// This file is part of Parity. + +// Parity is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Parity is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Parity. If not, see . + +//! Block RLP compression. + +use block::Block; +use header::Header; + +use views::BlockView; +use util::rlp::{Rlp, RlpStream, Stream, View}; +use util::{Bytes, H256}; + +const HEADER_FIELDS: usize = 11; +const BLOCK_FIELDS: usize = 2; + +pub struct AbridgedBlock { + rlp: Bytes, +} + +impl AbridgedBlock { + /// Create from a vector of bytes. Does no verification. + pub fn from_raw(rlp: Bytes) -> Self { + AbridgedBlock { + rlp: rlp, + } + } + + /// Return the inner bytes. + pub fn into_inner(self) -> Bytes { + self.rlp + } + + /// Given a full block view, trim out the parent hash and block number, + /// producing new rlp. + pub fn from_block_view(block_view: &BlockView) -> Self { + let header = block_view.header_view(); + + let seal_fields = header.seal(); + + // 11 header fields, unknown amount of seal fields, and 2 block fields. + let mut stream = RlpStream::new_list( + HEADER_FIELDS + + seal_fields.len() + + BLOCK_FIELDS + ); + + // write header values. + stream + .append(&header.uncles_hash()) + .append(&header.author()) + .append(&header.state_root()) + .append(&header.transactions_root()) + .append(&header.receipts_root()) + .append(&header.log_bloom()) + .append(&header.difficulty()) + .append(&header.gas_limit()) + .append(&header.gas_used()) + .append(&header.timestamp()) + .append(&header.extra_data()); + + // write seal fields. + for field in seal_fields { + stream.append_raw(&field, 1); + } + + // write block values. + stream.append(&block_view.transactions()).append(&block_view.uncles()); + + AbridgedBlock { + rlp: stream.out(), + } + } + + /// Flesh out an abridged block view with the provided parent hash and block number. + /// + /// Will fail if contains invalid rlp. + pub fn to_block(&self, parent_hash: H256, number: u64) -> Block { + let rlp = Rlp::new(&self.rlp); + + let mut header = Header { + parent_hash: parent_hash, + uncles_hash: rlp.val_at(0), + author: rlp.val_at(1), + state_root: rlp.val_at(2), + transactions_root: rlp.val_at(3), + receipts_root: rlp.val_at(4), + log_bloom: rlp.val_at(5), + difficulty: rlp.val_at(6), + number: number, + gas_limit: rlp.val_at(7), + gas_used: rlp.val_at(8), + timestamp: rlp.val_at(9), + extra_data: rlp.val_at(10), + ..Default::default() + }; + + let seal: Vec = rlp.val_at(11); + + header.set_seal(seal); + + Block { + header: header, + transactions: rlp.val_at(12), + uncles: rlp.val_at(13), + } + } +} \ No newline at end of file diff --git a/ethcore/src/snapshot/mod.rs b/ethcore/src/snapshot/mod.rs index b0e7547ae..3dfc043e2 100644 --- a/ethcore/src/snapshot/mod.rs +++ b/ethcore/src/snapshot/mod.rs @@ -33,6 +33,10 @@ use util::numbers::U256; use util::rlp::{DecoderError, Rlp, RlpStream, Stream, SHA3_NULL_RLP, UntrustedRlp, View}; use util::snappy; +use self::block::AbridgedBlock; + +mod block; + // Try to have chunks be around 16MB (before compression) const PREFERRED_CHUNK_SIZE: usize = 16 * 1024 * 1024; @@ -78,7 +82,6 @@ struct BlockChunker<'a> { client: &'a BlockChainClient, // block, receipt rlp pairs. rlps: VecDeque, - genesis_hash: H256, current_hash: H256, hashes: Vec, snappy_buffer: Vec, @@ -86,20 +89,20 @@ struct BlockChunker<'a> { impl<'a> BlockChunker<'a> { // Try to fill the buffers, moving backwards from current block hash. - // This will return true if it created a block chunk, false otherwise. - fn fill_buffers(&mut self) -> bool { + // Loops until we reach the genesis, and writes out the remainder. + fn chunk_all(&mut self, genesis_hash: H256, path: &Path) -> Result<(), Error> { let mut loaded_size = 0; - let mut blocks_loaded = 0; - while loaded_size < PREFERRED_CHUNK_SIZE && self.current_hash != self.genesis_hash { - - // skip compression for now + while self.current_hash != genesis_hash { let block = self.client.block(BlockID::Hash(self.current_hash)).unwrap(); + let view = BlockView::new(&block); + let abridged_rlp = AbridgedBlock::from_block_view(&view).into_inner(); + let receipts = self.client.block_receipts(&self.current_hash).unwrap(); let pair = { let mut pair_stream = RlpStream::new_list(2); - pair_stream.append(&block).append(&receipts); + pair_stream.append(&abridged_rlp).append(&receipts); pair_stream.out() }; @@ -107,27 +110,31 @@ impl<'a> BlockChunker<'a> { // cut off the chunk if too large if new_loaded_size > PREFERRED_CHUNK_SIZE { - break; + let header = view.header_view(); + try!(self.write_chunk(header.parent_hash(), header.number(), path)); + loaded_size = pair.len(); } else { loaded_size = new_loaded_size; } self.rlps.push_front(pair); - self.current_hash = BlockView::new(&block).header_view().parent_hash(); - blocks_loaded += 1; + self.current_hash = view.header_view().parent_hash(); } - if blocks_loaded > 0 { - trace!(target: "snapshot", "prepared block chunk with {} blocks", blocks_loaded); + if loaded_size != 0 { + // we don't store the genesis hash, so once we get to this point, + // the "first" block will have number 1. + try!(self.write_chunk(genesis_hash, 1, path)); } - loaded_size != 0 + Ok(()) } // write out the data in the buffers to a chunk on disk - fn write_chunk(&mut self, path: &Path) -> Result<(), Error> { - // Todo [rob]: compress raw data, put parent hash and block number into chunk. - let mut rlp_stream = RlpStream::new_list(self.rlps.len()); + fn write_chunk(&mut self, parent_hash: H256, number: u64, path: &Path) -> Result<(), Error> { + trace!(target: "snapshot", "prepared block chunk with {} blocks", self.rlps.len()); + let mut rlp_stream = RlpStream::new_list(self.rlps.len() + 2); + rlp_stream.append(&parent_hash).append(&number); for pair in self.rlps.drain(..) { rlp_stream.append(&pair); } @@ -150,18 +157,13 @@ pub fn chunk_blocks(client: &BlockChainClient, best_block_hash: H256, genesis_ha let mut chunker = BlockChunker { client: client, rlps: VecDeque::new(), - genesis_hash: genesis_hash, current_hash: best_block_hash, hashes: Vec::new(), snappy_buffer: vec![0; SNAPPY_BUFFER_SIZE], }; - while chunker.fill_buffers() { - try!(chunker.write_chunk(path)); - } - if chunker.rlps.len() != 0 { - try!(chunker.write_chunk(path)); - } + try!(chunker.chunk_all(genesis_hash, path)); + Ok(chunker.hashes) }