Blocks and snapshot compression (#1687)

* new Compressible rlp trait

* new Compressible rlp trait

* make compressed rlp iterable

* make compressed rlp iterable

* invalid rlp slice swapper

* switch compress to swapper, add reverse swapper test case

* add basic account compression test

* add new rlp trait

* new Compressible rlp trait

* make compressed rlp iterable

* invalid rlp slice swapper

* invalid rlp slice swapper

* switch compress to swapper, add reverse swapper test case

* switch compress to swapper, add reverse swapper test case

* add account compress/ decompress test

* make compressor cleaner, use hashmaps for swapper

* improve compression tests

* add a DecompressingDecoder, change Decoder to take refernce

* separate rlp compression related stuff

* new Compressible rlp trait

* new Compressible rlp trait

* new Compressible rlp trait

* make compressed rlp iterable

* make compressed rlp iterable

* make compressed rlp iterable

* invalid rlp slice swapper

* invalid rlp slice swapper

* invalid rlp slice swapper

* switch compress to swapper, add reverse swapper test case

* switch compress to swapper, add reverse swapper test case

* switch compress to swapper, add reverse swapper test case

* add basic account compression test

* add new rlp trait

* add account compress/ decompress test

* make compressor cleaner, use hashmaps for swapper

* improve compression tests

* add a DecompressingDecoder, change Decoder to take refernce

* separate rlp compression related stuff

* DecompressingDecoder test

* initial compressing HashDB wrapper

* remove unused test

* change CompressedDB to struct wrapper with overlay

* simplify compressor

* failed RefCell attempt

* use denote to return reference

* compiled compresseddb

* compressdb test, add overlay emplace

* fix overlay reference count handling

* add immutable compresseddb, make account use hashdb

* simplify using trait objects

* enable hashdb for account

* initial state compression attempt

* wrap state db

* add tests for analyzing db

* add account predicate

* try to compress data fields as rlp too

* remove compression for storage trie

* add a compressing migration

* more compression stats tests

* fix migration import

* nested encoding compression test

* fix decompression, move db stats tests to rlpcompression

* added malformed rlp tests, cover a few edge cases

* new CompressingEncoder struct

* extend migrations to state

* first version working on the whole db

* clean up Compressible impl

* tests cleanup

* add a testing migration

* refactor deep compression using option, add simple compression

* put tests in a module

* fix compressed overlay loading

* simple compression for snapshots

* remove unused DecompressingDecoder

* add a general compressing migration

* add more common rlps to compress

* use static slices for swapper

* add precomputed hashes and invalid rlps

* make decoder private again

* cover more cases with tests

* style

* fix weird indentation

* remove possible panic in payload_info

* make prefix checking safe

* fix db existence check

* remove db dir from test

* pass usize by value [ci skip]

* Improve comment on panic removal.

* add common blocks db rlps

* add compression to blockchain db

* add blocks db migration

* fix the migrations

* remove state compression

* add a separate snapshot swapper

* ability to use different swappers and traversal

* update tests to new interface

* clean up code ordering

* update usage

* fix compilation

* remove unnecessary changes

* move methods to functions to reduce interface

* move test to module

* update common rlps to blocks db

* move tests to tests modules

* remove redundant &
This commit is contained in:
keorn 2016-07-27 17:11:41 +02:00 committed by Gav Wood
parent 4907c5028f
commit 02cf48681d
13 changed files with 497 additions and 60 deletions

View File

@ -288,6 +288,16 @@ mod tests {
use super::*; use super::*;
use account_db::*; use account_db::*;
#[test]
fn account_compress() {
let raw = Account::new_basic(2.into(), 4.into()).rlp();
let rlp = UntrustedRlp::new(&raw);
let compact_vec = rlp.compress(RlpType::Snapshot).to_vec();
assert!(raw.len() > compact_vec.len());
let again_raw = UntrustedRlp::new(&compact_vec).decompress(RlpType::Snapshot);
assert_eq!(raw, again_raw.to_vec());
}
#[test] #[test]
fn storage_at() { fn storage_at() {
let mut db = MemoryDB::new(); let mut db = MemoryDB::new();

View File

@ -183,7 +183,7 @@ impl BlockProvider for BlockChain {
match opt { match opt {
Some(b) => { Some(b) => {
let bytes: Bytes = b.to_vec(); let bytes: Bytes = UntrustedRlp::new(&b).decompress(RlpType::Blocks).to_vec();
let mut write = self.blocks.write(); let mut write = self.blocks.write();
write.insert(hash.clone(), bytes.clone()); write.insert(hash.clone(), bytes.clone());
Some(bytes) Some(bytes)
@ -510,9 +510,11 @@ impl BlockChain {
return ImportRoute::none(); return ImportRoute::none();
} }
let compressed = UntrustedRlp::new(bytes).compress(RlpType::Blocks).to_vec();
let _lock = self.insert_lock.lock(); let _lock = self.insert_lock.lock();
// store block in db // store block in db
self.blocks_db.put(&hash, bytes).unwrap(); self.blocks_db.put(&hash, &compressed).unwrap();
let info = self.block_info(bytes); let info = self.block_info(bytes);

View File

@ -0,0 +1,21 @@
// Copyright 2015, 2016 Ethcore (UK) Ltd.
// This file is part of Parity.
// Parity is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Parity is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Parity. If not, see <http://www.gnu.org/licenses/>.
//! Blocks database migrations.
mod v8;
pub use self::v8::V8;

View File

@ -0,0 +1,34 @@
// Copyright 2015, 2016 Ethcore (UK) Ltd.
// This file is part of Parity.
// Parity is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Parity is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Parity. If not, see <http://www.gnu.org/licenses/>.
//! This migration compresses the state db.
use util::migration::SimpleMigration;
use util::rlp::{Compressible, UntrustedRlp, View, RlpType};
/// Compressing migration.
#[derive(Default)]
pub struct V8;
impl SimpleMigration for V8 {
fn version(&self) -> u32 {
8
}
fn simple_migrate(&mut self, key: Vec<u8>, value: Vec<u8>) -> Option<(Vec<u8>, Vec<u8>)> {
Some((key,UntrustedRlp::new(&value).compress(RlpType::Blocks).to_vec()))
}
}

View File

@ -1,4 +1,5 @@
//! Database migrations. //! Database migrations.
pub mod extras;
pub mod state; pub mod state;
pub mod blocks;
pub mod extras;

View File

@ -29,7 +29,7 @@ use views::{BlockView, HeaderView};
use util::{Bytes, Hashable, HashDB, JournalDB, snappy, TrieDB, TrieDBMut, TrieMut}; use util::{Bytes, Hashable, HashDB, JournalDB, snappy, TrieDB, TrieDBMut, TrieMut};
use util::hash::{FixedHash, H256}; use util::hash::{FixedHash, H256};
use util::rlp::{DecoderError, RlpStream, Stream, UntrustedRlp, View}; use util::rlp::{DecoderError, RlpStream, Stream, UntrustedRlp, View, Compressible, RlpType};
use self::account::Account; use self::account::Account;
use self::block::AbridgedBlock; use self::block::AbridgedBlock;
@ -261,7 +261,8 @@ pub fn chunk_state(db: &HashDB, root: &H256, path: &Path) -> Result<Vec<H256>, E
let account_db = AccountDB::from_hash(db, account_key_hash); let account_db = AccountDB::from_hash(db, account_key_hash);
let fat_rlp = try!(account.to_fat_rlp(&account_db)); let fat_rlp = try!(account.to_fat_rlp(&account_db));
try!(chunker.push(account_key, fat_rlp)); let compressed_rlp = UntrustedRlp::new(&fat_rlp).compress(RlpType::Snapshot).to_vec();
try!(chunker.push(account_key, compressed_rlp));
} }
if chunker.cur_size != 0 { if chunker.cur_size != 0 {
@ -400,7 +401,8 @@ fn rebuild_account_trie(db: &mut HashDB, account_chunk: &[&[u8]], out_chunk: &mu
let account_rlp = UntrustedRlp::new(account_pair); let account_rlp = UntrustedRlp::new(account_pair);
let hash: H256 = try!(account_rlp.val_at(0)); let hash: H256 = try!(account_rlp.val_at(0));
let fat_rlp = try!(account_rlp.at(1)); let decompressed = try!(account_rlp.at(1)).decompress(RlpType::Snapshot);
let fat_rlp = UntrustedRlp::new(&decompressed[..]);
let thin_rlp = { let thin_rlp = {
let mut acct_db = AccountDBMut::from_hash(db.as_hashdb_mut(), hash); let mut acct_db = AccountDBMut::from_hash(db.as_hashdb_mut(), hash);

View File

@ -26,7 +26,7 @@ use ethcore::migrations;
/// Database is assumed to be at default version, when no version file is found. /// Database is assumed to be at default version, when no version file is found.
const DEFAULT_VERSION: u32 = 5; const DEFAULT_VERSION: u32 = 5;
/// Current version of database models. /// Current version of database models.
const CURRENT_VERSION: u32 = 7; const CURRENT_VERSION: u32 = 8;
/// Defines how many items are migrated to the new version of database at once. /// Defines how many items are migrated to the new version of database at once.
const BATCH_SIZE: usize = 1024; const BATCH_SIZE: usize = 1024;
/// Version file name. /// Version file name.
@ -110,6 +110,13 @@ fn update_version(path: &Path) -> Result<(), Error> {
Ok(()) Ok(())
} }
/// State database path.
fn state_database_path(path: &Path) -> PathBuf {
let mut state_path = path.to_owned();
state_path.push("state");
state_path
}
/// Blocks database path. /// Blocks database path.
fn blocks_database_path(path: &Path) -> PathBuf { fn blocks_database_path(path: &Path) -> PathBuf {
let mut blocks_path = path.to_owned(); let mut blocks_path = path.to_owned();
@ -124,13 +131,6 @@ fn extras_database_path(path: &Path) -> PathBuf {
extras_path extras_path
} }
/// State database path.
fn state_database_path(path: &Path) -> PathBuf {
let mut state_path = path.to_owned();
state_path.push("state");
state_path
}
/// Database backup /// Database backup
fn backup_database_path(path: &Path) -> PathBuf { fn backup_database_path(path: &Path) -> PathBuf {
let mut backup_path = path.to_owned(); let mut backup_path = path.to_owned();
@ -148,7 +148,8 @@ fn default_migration_settings() -> MigrationConfig {
/// Migrations on the blocks database. /// Migrations on the blocks database.
fn blocks_database_migrations() -> Result<MigrationManager, Error> { fn blocks_database_migrations() -> Result<MigrationManager, Error> {
let manager = MigrationManager::new(default_migration_settings()); let mut manager = MigrationManager::new(default_migration_settings());
try!(manager.add_migration(migrations::blocks::V8::default()).map_err(|_| Error::MigrationImpossible));
Ok(manager) Ok(manager)
} }
@ -167,8 +168,8 @@ fn state_database_migrations(pruning: Algorithm) -> Result<MigrationManager, Err
Algorithm::OverlayRecent => manager.add_migration(migrations::state::OverlayRecentV7::default()), Algorithm::OverlayRecent => manager.add_migration(migrations::state::OverlayRecentV7::default()),
_ => return Err(Error::UnsuportedPruningMethod), _ => return Err(Error::UnsuportedPruningMethod),
}; };
try!(res.map_err(|_| Error::MigrationImpossible)); try!(res.map_err(|_| Error::MigrationImpossible));
Ok(manager) Ok(manager)
} }

View File

@ -275,4 +275,3 @@ mod tests {
test_db(&DatabaseConfig::default()); test_db(&DatabaseConfig::default());
} }
} }

106
util/src/rlp/commonrlps.rs Normal file
View File

@ -0,0 +1,106 @@
// Copyright 2015, 2016 Ethcore (UK) Ltd.
// This file is part of Parity.
// Parity is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Parity is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Parity. If not, see <http://www.gnu.org/licenses/>.
//! Contains RLPs used for compression.
use rlp::rlpcompression::InvalidRlpSwapper;
lazy_static! {
/// Swapper for snapshot compression.
pub static ref SNAPSHOT_RLP_SWAPPER: InvalidRlpSwapper<'static> = InvalidRlpSwapper::new(EMPTY_RLPS, INVALID_RLPS);
}
lazy_static! {
/// Swapper with common long RLPs, up to 127 can be added.
pub static ref BLOCKS_RLP_SWAPPER: InvalidRlpSwapper<'static> = InvalidRlpSwapper::new(COMMON_RLPS, INVALID_RLPS);
}
static EMPTY_RLPS: &'static [&'static [u8]] = &[
// RLP of SHA3_NULL_RLP
&[160, 86, 232, 31, 23, 27, 204, 85, 166, 255, 131, 69, 230, 146, 192, 248, 110, 91, 72, 224, 27, 153, 108, 173, 192, 1, 98, 47, 181, 227, 99, 180, 33],
// RLP of SHA3_EMPTY
&[160, 197, 210, 70, 1, 134, 247, 35, 60, 146, 126, 125, 178, 220, 199, 3, 192, 229, 0, 182, 83, 202, 130, 39, 59, 123, 250, 216, 4, 93, 133, 164, 112]
];
static COMMON_RLPS: &'static [&'static [u8]] = &[
// RLP of SHA3_NULL_RLP
&[160, 86, 232, 31, 23, 27, 204, 85, 166, 255, 131, 69, 230, 146, 192, 248, 110, 91, 72, 224, 27, 153, 108, 173, 192, 1, 98, 47, 181, 227, 99, 180, 33],
// RLP of SHA3_EMPTY
&[160, 197, 210, 70, 1, 134, 247, 35, 60, 146, 126, 125, 178, 220, 199, 3, 192, 229, 0, 182, 83, 202, 130, 39, 59, 123, 250, 216, 4, 93, 133, 164, 112],
// Other RLPs found in blocks DB using the test below.
&[160, 29, 204, 77, 232, 222, 199, 93, 122, 171, 133, 181, 103, 182, 204, 212, 26, 211, 18, 69, 27, 148, 138, 116, 19, 240, 161, 66, 253, 64, 212, 147, 71],
&[148, 50, 190, 52, 59, 148, 248, 96, 18, 77, 196, 254, 226, 120, 253, 203, 211, 140, 16, 45, 136],
&[148, 82, 188, 68, 213, 55, 131, 9, 238, 42, 191, 21, 57, 191, 113, 222, 27, 125, 123, 227, 181],
&[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
];
static INVALID_RLPS: &'static [&'static [u8]] = &[&[0x81, 0x0], &[0x81, 0x1], &[0x81, 0x2], &[0x81, 0x3], &[0x81, 0x4], &[0x81, 0x5], &[0x81, 0x6], &[0x81, 0x7], &[0x81, 0x8], &[0x81, 0x9], &[0x81, 0xa], &[0x81, 0xb], &[0x81, 0xc], &[0x81, 0xd], &[0x81, 0xe], &[0x81, 0xf], &[0x81, 0x10], &[0x81, 0x11], &[0x81, 0x12], &[0x81, 0x13], &[0x81, 0x14], &[0x81, 0x15], &[0x81, 0x16], &[0x81, 0x17], &[0x81, 0x18], &[0x81, 0x19], &[0x81, 0x1a], &[0x81, 0x1b], &[0x81, 0x1c], &[0x81, 0x1d], &[0x81, 0x1e], &[0x81, 0x1f], &[0x81, 0x20], &[0x81, 0x21], &[0x81, 0x22], &[0x81, 0x23], &[0x81, 0x24], &[0x81, 0x25], &[0x81, 0x26], &[0x81, 0x27], &[0x81, 0x28], &[0x81, 0x29], &[0x81, 0x2a], &[0x81, 0x2b], &[0x81, 0x2c], &[0x81, 0x2d], &[0x81, 0x2e], &[0x81, 0x2f], &[0x81, 0x30], &[0x81, 0x31], &[0x81, 0x32], &[0x81, 0x33], &[0x81, 0x34], &[0x81, 0x35], &[0x81, 0x36], &[0x81, 0x37], &[0x81, 0x38], &[0x81, 0x39], &[0x81, 0x3a], &[0x81, 0x3b], &[0x81, 0x3c], &[0x81, 0x3d], &[0x81, 0x3e], &[0x81, 0x3f], &[0x81, 0x40], &[0x81, 0x41], &[0x81, 0x42], &[0x81, 0x43], &[0x81, 0x44], &[0x81, 0x45], &[0x81, 0x46], &[0x81, 0x47], &[0x81, 0x48], &[0x81, 0x49], &[0x81, 0x4a], &[0x81, 0x4b], &[0x81, 0x4c], &[0x81, 0x4d], &[0x81, 0x4e], &[0x81, 0x4f], &[0x81, 0x50], &[0x81, 0x51], &[0x81, 0x52], &[0x81, 0x53], &[0x81, 0x54], &[0x81, 0x55], &[0x81, 0x56], &[0x81, 0x57], &[0x81, 0x58], &[0x81, 0x59], &[0x81, 0x5a], &[0x81, 0x5b], &[0x81, 0x5c], &[0x81, 0x5d], &[0x81, 0x5e], &[0x81, 0x5f], &[0x81, 0x60], &[0x81, 0x61], &[0x81, 0x62], &[0x81, 0x63], &[0x81, 0x64], &[0x81, 0x65], &[0x81, 0x66], &[0x81, 0x67], &[0x81, 0x68], &[0x81, 0x69], &[0x81, 0x6a], &[0x81, 0x6b], &[0x81, 0x6c], &[0x81, 0x6d], &[0x81, 0x6e], &[0x81, 0x6f], &[0x81, 0x70], &[0x81, 0x71], &[0x81, 0x72], &[0x81, 0x73], &[0x81, 0x74], &[0x81, 0x75], &[0x81, 0x76], &[0x81, 0x77], &[0x81, 0x78], &[0x81, 0x79], &[0x81, 0x7a], &[0x81, 0x7b], &[0x81, 0x7c], &[0x81, 0x7d], &[0x81, 0x7e]];
#[cfg(test)]
mod tests {
#[test]
#[ignore]
fn analyze_db() {
use rlp::{UntrustedRlp, View};
use std::collections::HashMap;
use kvdb::*;
let path = "db path".to_string();
let values: Vec<_> = Database::open_default(&path).unwrap().iter().map(|(_, v)| v).collect();
let mut rlp_counts: HashMap<_, u32> = HashMap::new();
let mut rlp_sizes: HashMap<_, u32> = HashMap::new();
fn flat_rlp<'a>(acc: &mut Vec<UntrustedRlp<'a>>, rlp: UntrustedRlp<'a>) {
match rlp.is_data() {
true => if rlp.size()>=70 {
match rlp.data() {
Ok(x) => flat_rlp(acc, UntrustedRlp::new(x)),
_ => acc.push(rlp),
}
} else {
acc.push(rlp);
},
false => for r in rlp.iter() { flat_rlp(acc, r); },
}
}
fn space_saving(bytes: &[u8]) -> u32 {
let l = bytes.len() as u32;
match l >= 2 {
true => l-2,
false => 0,
}
}
for v in values.iter() {
let rlp = UntrustedRlp::new(&v);
let mut flat = Vec::new();
flat_rlp(&mut flat, rlp);
for r in flat.iter() {
*rlp_counts.entry(r.as_raw()).or_insert(0) += 1;
*rlp_sizes.entry(r.as_raw()).or_insert(0) += space_saving(r.as_raw());
}
}
let mut size_vec: Vec<_> = rlp_sizes.iter().collect();
size_vec.sort_by(|a, b| b.1.cmp(a.1));
// Exclude rare large RLPs.
for v in size_vec.iter().filter(|v| rlp_counts.get(v.0).unwrap()>&100).take(20) {
println!("{:?}, {:?}", v, rlp_counts.get(v.0).unwrap());
}
println!("DONE");
}
}

View File

@ -51,16 +51,19 @@ mod rlperrors;
mod rlpin; mod rlpin;
mod untrusted_rlp; mod untrusted_rlp;
mod rlpstream; mod rlpstream;
mod rlpcompression;
mod commonrlps;
mod bytes; mod bytes;
#[cfg(test)] #[cfg(test)]
mod tests; mod tests;
pub use self::rlperrors::DecoderError; pub use self::rlperrors::DecoderError;
pub use self::rlptraits::{Decoder, Decodable, View, Stream, Encodable, Encoder, RlpEncodable, RlpDecodable}; pub use self::rlptraits::{Decoder, Decodable, View, Stream, Encodable, Encoder, RlpEncodable, RlpDecodable, Compressible};
pub use self::untrusted_rlp::{UntrustedRlp, UntrustedRlpIterator, PayloadInfo, Prototype}; pub use self::untrusted_rlp::{UntrustedRlp, UntrustedRlpIterator, PayloadInfo, Prototype};
pub use self::rlpin::{Rlp, RlpIterator}; pub use self::rlpin::{Rlp, RlpIterator};
pub use self::rlpstream::{RlpStream}; pub use self::rlpstream::RlpStream;
pub use self::rlpcompression::RlpType;
pub use elastic_array::ElasticArray1024; pub use elastic_array::ElasticArray1024;
use super::hash::H256; use super::hash::H256;

View File

@ -0,0 +1,245 @@
// Copyright 2015, 2016 Ethcore (UK) Ltd.
// This file is part of Parity.
// Parity is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
// Parity is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with Parity. If not, see <http://www.gnu.org/licenses/>.
use rlp::{UntrustedRlp, View, Compressible, encode, ElasticArray1024, Stream, RlpStream};
use rlp::commonrlps::{BLOCKS_RLP_SWAPPER, SNAPSHOT_RLP_SWAPPER};
use std::collections::HashMap;
/// Stores RLPs used for compression
pub struct InvalidRlpSwapper<'a> {
invalid_to_valid: HashMap<&'a [u8], &'a [u8]>,
valid_to_invalid: HashMap<&'a [u8], &'a [u8]>,
}
impl<'a> InvalidRlpSwapper<'a> {
/// Construct a swapper from a list of common RLPs
pub fn new(rlps_to_swap: &[&'a [u8]], invalid_rlps: &[&'a [u8]]) -> Self {
if rlps_to_swap.len() > 0x7e {
panic!("Invalid usage, only 127 RLPs can be swappable.");
}
let mut invalid_to_valid = HashMap::new();
let mut valid_to_invalid = HashMap::new();
for (&rlp, &invalid) in rlps_to_swap.iter().zip(invalid_rlps.iter()) {
invalid_to_valid.insert(invalid, rlp);
valid_to_invalid.insert(rlp, invalid);
}
InvalidRlpSwapper {
invalid_to_valid: invalid_to_valid,
valid_to_invalid: valid_to_invalid
}
}
/// Get a valid RLP corresponding to an invalid one
fn get_valid(&self, invalid_rlp: &[u8]) -> Option<&[u8]> {
self.invalid_to_valid.get(invalid_rlp).map(|r| r.clone())
}
/// Get an invalid RLP corresponding to a valid one
fn get_invalid(&self, valid_rlp: &[u8]) -> Option<&[u8]> {
self.valid_to_invalid.get(valid_rlp).map(|r| r.clone())
}
}
/// Type of RLP indicating its origin database.
pub enum RlpType {
/// RLP used in blocks database.
Blocks,
/// RLP used in snapshots.
Snapshot,
}
fn to_elastic(slice: &[u8]) -> ElasticArray1024<u8> {
let mut out = ElasticArray1024::new();
out.append_slice(slice);
out
}
fn map_rlp<F>(rlp: &UntrustedRlp, f: F) -> Option<ElasticArray1024<u8>> where
F: Fn(&UntrustedRlp) -> Option<ElasticArray1024<u8>> {
match rlp.iter()
.fold((false, RlpStream::new_list(rlp.item_count())),
|(is_some, mut acc), subrlp| {
let new = f(&subrlp);
if let Some(ref insert) = new {
acc.append_raw(&insert[..], 1);
} else {
acc.append_raw(subrlp.as_raw(), 1);
}
(is_some || new.is_some(), acc)
}) {
(true, s) => Some(s.drain()),
_ => None,
}
}
/// Replace common RLPs with invalid shorter ones.
fn simple_compress(rlp: &UntrustedRlp, swapper: &InvalidRlpSwapper) -> ElasticArray1024<u8> {
if rlp.is_data() {
to_elastic(swapper.get_invalid(rlp.as_raw()).unwrap_or(rlp.as_raw()))
} else {
map_rlp(rlp, |r| Some(simple_compress(r, swapper))).unwrap_or(to_elastic(rlp.as_raw()))
}
}
/// Recover valid RLP from a compressed form.
fn simple_decompress(rlp: &UntrustedRlp, swapper: &InvalidRlpSwapper) -> ElasticArray1024<u8> {
if rlp.is_data() {
to_elastic(swapper.get_valid(rlp.as_raw()).unwrap_or(rlp.as_raw()))
} else {
map_rlp(rlp, |r| Some(simple_decompress(r, swapper))).unwrap_or(to_elastic(rlp.as_raw()))
}
}
/// Replace common RLPs with invalid shorter ones, None if no compression achieved.
/// Tries to compress data insides.
fn deep_compress(rlp: &UntrustedRlp, swapper: &InvalidRlpSwapper) -> Option<ElasticArray1024<u8>> {
let simple_swap = ||
swapper.get_invalid(rlp.as_raw()).map(|b| to_elastic(&b));
if rlp.is_data() {
// Try to treat the inside as RLP.
return match rlp.payload_info() {
// Shortest decompressed account is 70, so simply try to swap the value.
Ok(ref p) if p.value_len < 70 => simple_swap(),
_ => {
if let Ok(d) = rlp.data() {
let internal_rlp = UntrustedRlp::new(d);
if let Some(new_d) = deep_compress(&internal_rlp, swapper) {
// If compressed put in a special list, with first element being invalid code.
let mut rlp = RlpStream::new_list(2);
rlp.append_raw(&[0x81, 0x7f], 1);
rlp.append_raw(&new_d[..], 1);
return Some(rlp.drain());
}
}
simple_swap()
},
};
}
// Iterate through RLP while checking if it has been compressed.
map_rlp(rlp, |r| deep_compress(r, swapper))
}
/// Recover valid RLP from a compressed form, None if no decompression achieved.
/// Tries to decompress compressed data insides.
fn deep_decompress(rlp: &UntrustedRlp, swapper: &InvalidRlpSwapper) -> Option<ElasticArray1024<u8>> {
let simple_swap = ||
swapper.get_valid(rlp.as_raw()).map(|b| to_elastic(&b));
// Simply decompress data.
if rlp.is_data() { return simple_swap(); }
match rlp.item_count() {
// Look for special compressed list, which contains nested data.
2 if rlp.at(0).map(|r| r.as_raw() == &[0x81, 0x7f]).unwrap_or(false) =>
rlp.at(1).ok().map_or(simple_swap(),
|r| deep_decompress(&r, swapper).map(|d| { let v = d.to_vec(); encode(&v) })),
// Iterate through RLP while checking if it has been compressed.
_ => map_rlp(rlp, |r| deep_decompress(r, swapper)),
}
}
impl<'a> Compressible for UntrustedRlp<'a> {
type DataType = RlpType;
fn compress(&self, t: RlpType) -> ElasticArray1024<u8> {
match t {
RlpType::Snapshot => simple_compress(self, &SNAPSHOT_RLP_SWAPPER),
RlpType::Blocks => deep_compress(self, &BLOCKS_RLP_SWAPPER).unwrap_or(to_elastic(self.as_raw())),
}
}
fn decompress(&self, t: RlpType) -> ElasticArray1024<u8> {
match t {
RlpType::Snapshot => simple_decompress(self, &SNAPSHOT_RLP_SWAPPER),
RlpType::Blocks => deep_decompress(self, &BLOCKS_RLP_SWAPPER).unwrap_or(to_elastic(self.as_raw())),
}
}
}
#[cfg(test)]
mod tests {
use rlp::{UntrustedRlp, Compressible, View, RlpType};
use rlp::rlpcompression::InvalidRlpSwapper;
#[test]
fn invalid_rlp_swapper() {
let to_swap: &[&[u8]] = &[&[0x83, b'c', b'a', b't'], &[0x83, b'd', b'o', b'g']];
let invalid_rlp: &[&[u8]] = &[&[0x81, 0x00], &[0x81, 0x01]];
let swapper = InvalidRlpSwapper::new(to_swap, invalid_rlp);
assert_eq!(Some(invalid_rlp[0]), swapper.get_invalid(&[0x83, b'c', b'a', b't']));
assert_eq!(None, swapper.get_invalid(&[0x83, b'b', b'a', b't']));
assert_eq!(Some(to_swap[1]), swapper.get_valid(invalid_rlp[1]));
}
#[test]
fn simple_compression() {
let basic_account_rlp = vec![248, 68, 4, 2, 160, 86, 232, 31, 23, 27, 204, 85, 166, 255, 131, 69, 230, 146, 192, 248, 110, 91, 72, 224, 27, 153, 108, 173, 192, 1, 98, 47, 181, 227, 99, 180, 33, 160, 197, 210, 70, 1, 134, 247, 35, 60, 146, 126, 125, 178, 220, 199, 3, 192, 229, 0, 182, 83, 202, 130, 39, 59, 123, 250, 216, 4, 93, 133, 164, 112];
let rlp = UntrustedRlp::new(&basic_account_rlp);
let compressed = rlp.compress(RlpType::Snapshot).to_vec();
assert_eq!(compressed, vec![198, 4, 2, 129, 0, 129, 1]);
let compressed_rlp = UntrustedRlp::new(&compressed);
assert_eq!(compressed_rlp.decompress(RlpType::Snapshot).to_vec(), basic_account_rlp);
}
#[test]
fn data_compression() {
let data_basic_account_rlp = vec![184, 70, 248, 68, 4, 2, 160, 86, 232, 31, 23, 27, 204, 85, 166, 255, 131, 69, 230, 146, 192, 248, 110, 91, 72, 224, 27, 153, 108, 173, 192, 1, 98, 47, 181, 227, 99, 180, 33, 160, 197, 210, 70, 1, 134, 247, 35, 60, 146, 126, 125, 178, 220, 199, 3, 192, 229, 0, 182, 83, 202, 130, 39, 59, 123, 250, 216, 4, 93, 133, 164, 112];
let data_rlp = UntrustedRlp::new(&data_basic_account_rlp);
let compressed = data_rlp.compress(RlpType::Blocks).to_vec();
assert_eq!(compressed, vec![201, 129, 127, 198, 4, 2, 129, 0, 129, 1]);
let compressed_rlp = UntrustedRlp::new(&compressed);
assert_eq!(compressed_rlp.decompress(RlpType::Blocks).to_vec(), data_basic_account_rlp);
}
#[test]
fn nested_list_rlp() {
let nested_basic_account_rlp = vec![228, 4, 226, 2, 160, 86, 232, 31, 23, 27, 204, 85, 166, 255, 131, 69, 230, 146, 192, 248, 110, 91, 72, 224, 27, 153, 108, 173, 192, 1, 98, 47, 181, 227, 99, 180, 33];
let nested_rlp = UntrustedRlp::new(&nested_basic_account_rlp);
let compressed = nested_rlp.compress(RlpType::Blocks).to_vec();
assert_eq!(compressed, vec![197, 4, 195, 2, 129, 0]);
let compressed_rlp = UntrustedRlp::new(&compressed);
assert_eq!(compressed_rlp.decompress(RlpType::Blocks).to_vec(), nested_basic_account_rlp);
let compressed = nested_rlp.compress(RlpType::Snapshot).to_vec();
assert_eq!(compressed, vec![197, 4, 195, 2, 129, 0]);
let compressed_rlp = UntrustedRlp::new(&compressed);
assert_eq!(compressed_rlp.decompress(RlpType::Snapshot).to_vec(), nested_basic_account_rlp);
}
#[test]
fn malformed_rlp() {
let malformed = vec![248, 81, 128, 128, 128, 128, 128, 160, 12, 51, 241, 93, 69, 218, 74, 138, 79, 115, 227, 44, 216, 81, 46, 132, 85, 235, 96, 45, 252, 48, 181, 29, 75, 141, 217, 215, 86, 160, 109, 130, 160, 140, 36, 93, 200, 109, 215, 100, 241, 246, 99, 135, 92, 168, 149, 170, 114, 9, 143, 4, 93, 25, 76, 54, 176, 119, 230, 170, 154, 105, 47, 121, 10, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128];
let malformed_rlp = UntrustedRlp::new(&malformed);
assert_eq!(malformed_rlp.decompress(RlpType::Blocks).to_vec(), malformed);
}
#[test]
#[ignore]
fn test_compression() {
use kvdb::*;
let path = "db to test".to_string();
let values: Vec<_> = Database::open_default(&path).unwrap().iter().map(|(_, v)| v).collect();
let mut decomp_size = 0;
let mut comp_size = 0;
for v in values.iter() {
let rlp = UntrustedRlp::new(&v);
let compressed = rlp.compress(RlpType::Blocks).to_vec();
comp_size += compressed.len();
let decompressed = rlp.decompress(RlpType::Blocks).to_vec();
decomp_size += decompressed.len();
}
println!("Decompressed bytes {:?}, compressed bytes: {:?}", decomp_size, comp_size);
assert!(decomp_size > comp_size);
}
}

View File

@ -26,8 +26,8 @@ use sha3::*;
/// Type is able to decode RLP. /// Type is able to decode RLP.
pub trait Decoder: Sized { pub trait Decoder: Sized {
/// Read a value from the RLP into a given type. /// Read a value from the RLP into a given type.
fn read_value<T, F>(&self, f: F) -> Result<T, DecoderError> fn read_value<T, F>(&self, f: &F) -> Result<T, DecoderError>
where F: FnOnce(&[u8]) -> Result<T, DecoderError>; where F: Fn(&[u8]) -> Result<T, DecoderError>;
/// Get underlying `UntrustedRLP` object. /// Get underlying `UntrustedRLP` object.
fn as_rlp(&self) -> &UntrustedRlp; fn as_rlp(&self) -> &UntrustedRlp;
@ -63,7 +63,7 @@ pub trait View<'a, 'view>: Sized {
/// Creates a new instance of `Rlp` reader /// Creates a new instance of `Rlp` reader
fn new(bytes: &'a [u8]) -> Self; fn new(bytes: &'a [u8]) -> Self;
/// The raw data of the RLP. /// The raw data of the RLP as slice.
/// ///
/// ```rust /// ```rust
/// extern crate ethcore_util as util; /// extern crate ethcore_util as util;
@ -365,3 +365,14 @@ pub trait Stream: Sized {
/// panic! if stream is not finished. /// panic! if stream is not finished.
fn out(self) -> Vec<u8>; fn out(self) -> Vec<u8>;
} }
/// Trait for compressing and decompressing RLP by replacement of common terms.
pub trait Compressible: Sized {
/// Indicates the origin of RLP to be compressed.
type DataType;
/// Compress given RLP type using appropriate methods.
fn compress(&self, t: Self::DataType) -> ElasticArray1024<u8>;
/// Decompress given RLP type using appropriate methods.
fn decompress(&self, t: Self::DataType) -> ElasticArray1024<u8>;
}

View File

@ -55,6 +55,18 @@ pub struct PayloadInfo {
pub value_len: usize, pub value_len: usize,
} }
fn calculate_payload_info(header_bytes: &[u8], len_of_len: usize) -> Result<PayloadInfo, DecoderError> {
let header_len = 1 + len_of_len;
match header_bytes.get(1) {
Some(&0) => return Err(DecoderError::RlpDataLenWithZeroPrefix),
None => return Err(DecoderError::RlpIsTooShort),
_ => (),
}
if header_bytes.len() < header_len { return Err(DecoderError::RlpIsTooShort); }
let value_len = try!(usize::from_bytes(&header_bytes[1..header_len]));
Ok(PayloadInfo::new(header_len, value_len))
}
impl PayloadInfo { impl PayloadInfo {
fn new(header_len: usize, value_len: usize) -> PayloadInfo { fn new(header_len: usize, value_len: usize) -> PayloadInfo {
PayloadInfo { PayloadInfo {
@ -68,28 +80,22 @@ impl PayloadInfo {
/// Create a new object from the given bytes RLP. The bytes /// Create a new object from the given bytes RLP. The bytes
pub fn from(header_bytes: &[u8]) -> Result<PayloadInfo, DecoderError> { pub fn from(header_bytes: &[u8]) -> Result<PayloadInfo, DecoderError> {
Ok(match header_bytes.first().cloned() { match header_bytes.first().cloned() {
None => return Err(DecoderError::RlpIsTooShort), None => Err(DecoderError::RlpIsTooShort),
Some(0...0x7f) => PayloadInfo::new(0, 1), Some(0...0x7f) => Ok(PayloadInfo::new(0, 1)),
Some(l @ 0x80...0xb7) => PayloadInfo::new(1, l as usize - 0x80), Some(l @ 0x80...0xb7) => Ok(PayloadInfo::new(1, l as usize - 0x80)),
Some(l @ 0xb8...0xbf) => { Some(l @ 0xb8...0xbf) => {
let len_of_len = l as usize - 0xb7; let len_of_len = l as usize - 0xb7;
let header_len = 1 + len_of_len; calculate_payload_info(header_bytes, len_of_len)
if header_bytes[1] == 0 { return Err(DecoderError::RlpDataLenWithZeroPrefix); }
let value_len = try!(usize::from_bytes(&header_bytes[1..header_len]));
PayloadInfo::new(header_len, value_len)
} }
Some(l @ 0xc0...0xf7) => PayloadInfo::new(1, l as usize - 0xc0), Some(l @ 0xc0...0xf7) => Ok(PayloadInfo::new(1, l as usize - 0xc0)),
Some(l @ 0xf8...0xff) => { Some(l @ 0xf8...0xff) => {
let len_of_len = l as usize - 0xf7; let len_of_len = l as usize - 0xf7;
let header_len = 1 + len_of_len; calculate_payload_info(header_bytes, len_of_len)
let value_len = try!(usize::from_bytes(&header_bytes[1..header_len]));
if header_bytes[1] == 0 { return Err(DecoderError::RlpListLenWithZeroPrefix); }
PayloadInfo::new(header_len, value_len)
}, },
// we cant reach this place, but rust requires _ to be implemented // we cant reach this place, but rust requires _ to be implemented
_ => { unreachable!(); } _ => { unreachable!(); }
}) }
} }
} }
@ -190,8 +196,8 @@ impl<'a, 'view> View<'a, 'view> for UntrustedRlp<'a> where 'a: 'view {
fn size(&self) -> usize { fn size(&self) -> usize {
match self.is_data() { match self.is_data() {
// we can safely unwrap (?) cause its data // TODO: No panic on malformed data, but ideally would Err on no PayloadInfo.
true => BasicDecoder::payload_info(self.bytes).unwrap().value_len, true => BasicDecoder::payload_info(self.bytes).map(|b| b.value_len).unwrap_or(0),
false => 0 false => 0
} }
} }
@ -342,15 +348,15 @@ impl<'a> BasicDecoder<'a> {
} }
impl<'a> Decoder for BasicDecoder<'a> { impl<'a> Decoder for BasicDecoder<'a> {
fn read_value<T, F>(&self, f: F) -> Result<T, DecoderError> fn read_value<T, F>(&self, f: &F) -> Result<T, DecoderError>
where F: FnOnce(&[u8]) -> Result<T, DecoderError> { where F: Fn(&[u8]) -> Result<T, DecoderError> {
let bytes = self.rlp.as_raw(); let bytes = self.rlp.as_raw();
match bytes.first().cloned() { match bytes.first().cloned() {
// rlp is too short // RLP is too short.
None => Err(DecoderError::RlpIsTooShort), None => Err(DecoderError::RlpIsTooShort),
// single byt value // Single byte value.
Some(l @ 0...0x7f) => Ok(try!(f(&[l]))), Some(l @ 0...0x7f) => Ok(try!(f(&[l]))),
// 0-55 bytes // 0-55 bytes
Some(l @ 0x80...0xb7) => { Some(l @ 0x80...0xb7) => {
@ -362,10 +368,9 @@ impl<'a> Decoder for BasicDecoder<'a> {
if l == 0x81 && d[0] < 0x80 { if l == 0x81 && d[0] < 0x80 {
return Err(DecoderError::RlpInvalidIndirection); return Err(DecoderError::RlpInvalidIndirection);
} }
Ok(try!(f(d))) Ok(try!(f(d)))
}, },
// longer than 55 bytes // Longer than 55 bytes.
Some(l @ 0xb8...0xbf) => { Some(l @ 0xb8...0xbf) => {
let len_of_len = l as usize - 0xb7; let len_of_len = l as usize - 0xb7;
let begin_of_value = 1 as usize + len_of_len; let begin_of_value = 1 as usize + len_of_len;
@ -380,7 +385,7 @@ impl<'a> Decoder for BasicDecoder<'a> {
} }
Ok(try!(f(&bytes[begin_of_value..last_index_of_value]))) Ok(try!(f(&bytes[begin_of_value..last_index_of_value])))
} }
// we are reading value, not a list! // We are reading value, not a list!
_ => Err(DecoderError::RlpExpectedToBeData) _ => Err(DecoderError::RlpExpectedToBeData)
} }
} }
@ -396,9 +401,7 @@ impl<'a> Decoder for BasicDecoder<'a> {
impl<T> Decodable for T where T: FromBytes { impl<T> Decodable for T where T: FromBytes {
fn decode<D>(decoder: &D) -> Result<Self, DecoderError> where D: Decoder { fn decode<D>(decoder: &D) -> Result<Self, DecoderError> where D: Decoder {
decoder.read_value(| bytes | { decoder.read_value(&|bytes: &[u8]| Ok(try!(T::from_bytes(bytes))))
Ok(try!(T::from_bytes(bytes)))
})
} }
} }
@ -416,11 +419,7 @@ impl<T> Decodable for Option<T> where T: Decodable {
impl Decodable for Vec<u8> { impl Decodable for Vec<u8> {
fn decode<D>(decoder: &D) -> Result<Self, DecoderError> where D: Decoder { fn decode<D>(decoder: &D) -> Result<Self, DecoderError> where D: Decoder {
decoder.read_value(| bytes | { decoder.read_value(&|bytes: &[u8]| Ok(bytes.to_vec()))
let mut res = vec![];
res.extend_from_slice(bytes);
Ok(res)
})
} }
} }
@ -489,6 +488,9 @@ impl RlpDecodable for u8 {
} }
} }
#[cfg(test)]
mod tests {
use rlp::{UntrustedRlp, View};
#[test] #[test]
fn test_rlp_display() { fn test_rlp_display() {
use rustc_serialize::hex::FromHex; use rustc_serialize::hex::FromHex;
@ -496,4 +498,4 @@ fn test_rlp_display() {
let rlp = UntrustedRlp::new(&data); let rlp = UntrustedRlp::new(&data);
assert_eq!(format!("{}", rlp), "[\"0x05\", \"0x010efbef67941f79b2\", \"0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421\", \"0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470\"]"); assert_eq!(format!("{}", rlp), "[\"0x05\", \"0x010efbef67941f79b2\", \"0x56e81f171bcc55a6ff8345e692c0f86e5b48e01b996cadc001622fb5e363b421\", \"0xc5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470\"]");
} }
}