This commit is contained in:
debris 2015-12-05 02:02:10 +01:00
commit 816b75cc87
5 changed files with 343 additions and 75 deletions

128
benches/trie.rs Normal file
View File

@ -0,0 +1,128 @@
#![feature(test)]
extern crate test;
extern crate rand;
extern crate ethcore_util;
#[macro_use]
extern crate log;
use test::Bencher;
use rand::random;
//use ethcore_util::BytesConvertable;
use ethcore_util::hash::*;
use ethcore_util::bytes::*;
use ethcore_util::trie::*;
use ethcore_util::sha3::*;
use ethcore_util::ToBytes::*;
fn random_word(alphabet: &[u8], min_count: usize, diff_count: usize, seed: &mut H256) -> Vec<u8> {
assert!(min_count + diff_count <= 32);
*seed = seed.sha3();
let r = min_count + (seed.bytes()[31] as usize % (diff_count + 1));
let mut ret: Vec<u8> = Vec::with_capacity(r);
for i in 0..r {
ret.push(alphabet[seed.bytes()[i] as usize % alphabet.len()]);
}
ret
}
fn random_bytes(min_count: usize, diff_count: usize, seed: &mut H256) -> Vec<u8> {
assert!(min_count + diff_count <= 32);
*seed = seed.sha3();
let r = min_count + (seed.bytes()[31] as usize % (diff_count + 1));
seed.bytes()[0..r].to_vec()
}
fn random_value(seed: &mut H256) -> Bytes {
*seed = seed.sha3();
match seed.bytes()[0] % 2 {
1 => vec![seed.bytes()[31];1],
_ => seed.bytes().to_vec(),
}
}
#[bench]
fn insertions_six_high(b: &mut Bencher) {
let mut d: Vec<(Bytes, Bytes)> = Vec::new();
let mut seed = H256::new();
for _ in 0..1000 {
let k = random_bytes(6, 0, &mut seed);
let v = random_value(&mut seed);
d.push((k, v))
}
b.iter(||{
let mut t = TrieDB::new_memory();
for i in d.iter() {
t.insert(&i.0, &i.1);
}
})
}
#[bench]
fn insertions_six_mid(b: &mut Bencher) {
let alphabet = b"@QWERTYUIOPASDFGHJKLZXCVBNM[/]^_";
let mut d: Vec<(Bytes, Bytes)> = Vec::new();
let mut seed = H256::new();
for _ in 0..1000 {
let k = random_word(alphabet, 6, 0, &mut seed);
let v = random_value(&mut seed);
d.push((k, v))
}
b.iter(||{
let mut t = TrieDB::new_memory();
for i in d.iter() {
t.insert(&i.0, &i.1);
}
debug!("hash_count={:?}", t.hash_count);
})
}
#[bench]
fn insertions_random_mid(b: &mut Bencher) {
let alphabet = b"@QWERTYUIOPASDFGHJKLZXCVBNM[/]^_";
let mut d: Vec<(Bytes, Bytes)> = Vec::new();
let mut seed = H256::new();
for _ in 0..1000 {
let k = random_word(alphabet, 1, 5, &mut seed);
let v = random_value(&mut seed);
d.push((k, v))
}
b.iter(||{
let mut t = TrieDB::new_memory();
for i in d.iter() {
t.insert(&i.0, &i.1);
}
})
}
#[bench]
fn insertions_six_low(b: &mut Bencher) {
let alphabet = b"abcdef";
let mut d: Vec<(Bytes, Bytes)> = Vec::new();
let mut seed = H256::new();
for _ in 0..1000 {
let k = random_word(alphabet, 6, 0, &mut seed);
let v = random_value(&mut seed);
d.push((k, v))
}
b.iter(||{
let mut t = TrieDB::new_memory();
for i in d.iter() {
t.insert(&i.0, &i.1);
}
})
}
#[bench]
fn sha3x1000(b: &mut Bencher) {
b.iter(||{
let mut seed = H256::new();
for i in 0..1000 {
seed = seed.sha3()
}
})
}

View File

@ -4,7 +4,7 @@ use std::collections::HashMap;
pub trait HashDB { pub trait HashDB {
/// Get the keys in the database together with number of underlying references. /// Get the keys in the database together with number of underlying references.
fn keys(&self) -> HashMap<H256, u32>; fn keys(&self) -> HashMap<H256, i32>;
/// Look up a given hash into the bytes that hash to it, returning None if the /// Look up a given hash into the bytes that hash to it, returning None if the
/// hash is not known. /// hash is not known.

View File

@ -116,10 +116,6 @@ impl MemoryDB {
} }
self.data.get(key).unwrap() self.data.get(key).unwrap()
} }
pub fn raw_keys(&self) -> HashMap<H256, i32> {
self.data.iter().filter_map(|(k, v)| if v.1 != 0 {Some((k.clone(), v.1))} else {None}).collect::<HashMap<H256, i32>>()
}
} }
impl HashDB for MemoryDB { impl HashDB for MemoryDB {
@ -130,8 +126,8 @@ impl HashDB for MemoryDB {
} }
} }
fn keys(&self) -> HashMap<H256, u32> { fn keys(&self) -> HashMap<H256, i32> {
self.data.iter().filter_map(|(k, v)| if v.1 > 0 {Some((k.clone(), v.1 as u32))} else {None} ).collect::<HashMap<H256, u32>>() self.data.iter().filter_map(|(k, v)| if v.1 != 0 {Some((k.clone(), v.1))} else {None}).collect::<HashMap<H256, i32>>()
} }
fn exists(&self, key: &H256) -> bool { fn exists(&self, key: &H256) -> bool {

View File

@ -136,17 +136,17 @@ impl OverlayDB {
} }
impl HashDB for OverlayDB { impl HashDB for OverlayDB {
fn keys(&self) -> HashMap<H256, u32> { fn keys(&self) -> HashMap<H256, i32> {
let mut ret: HashMap<H256, u32> = HashMap::new(); let mut ret: HashMap<H256, i32> = HashMap::new();
for (key, _) in self.backing.iterator().from_start() { for (key, _) in self.backing.iterator().from_start() {
let h = H256::from_slice(key.deref()); let h = H256::from_slice(key.deref());
let r = self.payload(&h).unwrap().1; let r = self.payload(&h).unwrap().1;
ret.insert(h, r); ret.insert(h, r as i32);
} }
for (key, refs) in self.overlay.raw_keys().into_iter() { for (key, refs) in self.overlay.keys().into_iter() {
let refs = *ret.get(&key).unwrap_or(&0u32) as i32 + refs as i32; let refs = *ret.get(&key).unwrap_or(&0) + refs;
ret.insert(key, refs as u32); ret.insert(key, refs);
} }
ret ret
} }

View File

@ -26,6 +26,67 @@ pub trait Trie {
fn remove(&mut self, key: &[u8]); fn remove(&mut self, key: &[u8]);
} }
pub enum Alphabet {
All,
Low,
Mid,
Custom(Bytes),
}
pub struct StandardMap {
alphabet: Alphabet,
min_key: usize,
diff_key: usize,
count: usize,
}
impl StandardMap {
fn random_bytes(min_count: usize, diff_count: usize, seed: &mut H256) -> Vec<u8> {
assert!(min_count + diff_count <= 32);
*seed = seed.sha3();
let r = min_count + (seed.bytes()[31] as usize % (diff_count + 1));
seed.bytes()[0..r].to_vec()
}
fn random_value(seed: &mut H256) -> Bytes {
*seed = seed.sha3();
match seed.bytes()[0] % 2 {
1 => vec![seed.bytes()[31];1],
_ => seed.bytes().to_vec(),
}
}
fn random_word(alphabet: &[u8], min_count: usize, diff_count: usize, seed: &mut H256) -> Vec<u8> {
assert!(min_count + diff_count <= 32);
*seed = seed.sha3();
let r = min_count + (seed.bytes()[31] as usize % (diff_count + 1));
let mut ret: Vec<u8> = Vec::with_capacity(r);
for i in 0..r {
ret.push(alphabet[seed.bytes()[i] as usize % alphabet.len()]);
}
ret
}
pub fn make(&self) -> Vec<(Bytes, Bytes)> {
let low = b"abcdef";
let mid = b"@QWERTYUIOPASDFGHJKLZXCVBNM[/]^_";
let mut d: Vec<(Bytes, Bytes)> = Vec::new();
let mut seed = H256::new();
for _ in 0..self.count {
let k = match self.alphabet {
Alphabet::All => Self::random_bytes(self.min_key, self.diff_key, &mut seed),
Alphabet::Low => Self::random_word(low, self.min_key, self.diff_key, &mut seed),
Alphabet::Mid => Self::random_word(mid, self.min_key, self.diff_key, &mut seed),
Alphabet::Custom(ref a) => Self::random_word(&a, self.min_key, self.diff_key, &mut seed),
};
let v = Self::random_value(&mut seed);
d.push((k, v))
}
d
}
}
#[derive(Eq, PartialEq, Debug)] #[derive(Eq, PartialEq, Debug)]
pub enum Node<'a> { pub enum Node<'a> {
Empty, Empty,
@ -34,11 +95,13 @@ pub enum Node<'a> {
Branch([&'a[u8]; 16], Option<&'a [u8]>) Branch([&'a[u8]; 16], Option<&'a [u8]>)
} }
#[derive(Debug)]
enum Operation { enum Operation {
New(H256, Bytes), New(H256, Bytes),
Delete(H256), Delete(H256),
} }
#[derive(Debug)]
struct Diff (Vec<Operation>); struct Diff (Vec<Operation>);
impl Diff { impl Diff {
@ -48,8 +111,9 @@ impl Diff {
/// such that the reference is valid, once applied. /// such that the reference is valid, once applied.
fn new_node(&mut self, rlp: Bytes, out: &mut RlpStream) { fn new_node(&mut self, rlp: Bytes, out: &mut RlpStream) {
if rlp.len() >= 32 { if rlp.len() >= 32 {
trace!("new_node: reference node {:?}", rlp.pretty());
let rlp_sha3 = rlp.sha3(); let rlp_sha3 = rlp.sha3();
trace!("new_node: reference node {:?} => {:?}", rlp_sha3, rlp.pretty());
out.append(&rlp_sha3); out.append(&rlp_sha3);
self.0.push(Operation::New(rlp_sha3, rlp)); self.0.push(Operation::New(rlp_sha3, rlp));
} }
@ -61,25 +125,18 @@ impl Diff {
/// Given the RLP that encodes a now-unused node, leave `diff` in such a state that it is noted. /// Given the RLP that encodes a now-unused node, leave `diff` in such a state that it is noted.
fn delete_node_sha3(&mut self, old_sha3: H256) { fn delete_node_sha3(&mut self, old_sha3: H256) {
trace!("delete_node: {:?}", old_sha3);
self.0.push(Operation::Delete(old_sha3)); self.0.push(Operation::Delete(old_sha3));
} }
fn delete_node(&mut self, old: &Rlp) { fn delete_node(&mut self, old: &Rlp) {
if old.is_data() && old.size() == 32 { if old.is_data() && old.size() == 32 {
self.0.push(Operation::Delete(H256::decode(old))); self.delete_node_sha3(H256::decode(old));
} }
} }
fn delete_node_from_slice(&mut self, old: &[u8]) { fn delete_node_from_slice(&mut self, old: &[u8]) {
let r = Rlp::new(old); self.delete_node(&Rlp::new(old));
if r.is_data() && r.size() == 32 {
self.0.push(Operation::Delete(H256::decode(&r)));
}
}
fn replace_node(&mut self, old: &Rlp, rlp: Bytes, out: &mut RlpStream) {
self.delete_node(old);
self.new_node(rlp, out);
} }
} }
@ -192,11 +249,12 @@ impl <'a>Node<'a> {
pub struct TrieDB { pub struct TrieDB {
db: Box<HashDB>, db: Box<HashDB>,
root: H256, root: H256,
pub hash_count: usize,
} }
impl fmt::Debug for TrieDB { impl fmt::Debug for TrieDB {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
try!(writeln!(f, "[")); try!(writeln!(f, "c={:?} [", self.hash_count));
let root_rlp = self.db.lookup(&self.root).expect("Trie root not found!"); let root_rlp = self.db.lookup(&self.root).expect("Trie root not found!");
try!(self.fmt_all(Node::decoded(root_rlp), f, 0)); try!(self.fmt_all(Node::decoded(root_rlp), f, 0));
writeln!(f, "]") writeln!(f, "]")
@ -209,7 +267,7 @@ enum MaybeChanged<'a> {
} }
impl TrieDB { impl TrieDB {
pub fn new_boxed(db_box: Box<HashDB>) -> Self { let mut r = TrieDB{ db: db_box, root: H256::new() }; r.set_root_rlp(&NULL_RLP); r } pub fn new_boxed(db_box: Box<HashDB>) -> Self { let mut r = TrieDB{ db: db_box, root: H256::new(), hash_count: 0 }; r.root = r.db.insert(&NULL_RLP); r }
pub fn new<T>(db: T) -> Self where T: HashDB + 'static { Self::new_boxed(Box::new(db)) } pub fn new<T>(db: T) -> Self where T: HashDB + 'static { Self::new_boxed(Box::new(db)) }
@ -220,6 +278,7 @@ impl TrieDB {
fn set_root_rlp(&mut self, root_data: &[u8]) { fn set_root_rlp(&mut self, root_data: &[u8]) {
self.db.kill(&self.root); self.db.kill(&self.root);
self.root = self.db.insert(root_data); self.root = self.db.insert(root_data);
self.hash_count += 1;
trace!("set_root_rlp {:?} {:?}", root_data.pretty(), self.root); trace!("set_root_rlp {:?} {:?}", root_data.pretty(), self.root);
} }
@ -234,6 +293,7 @@ impl TrieDB {
Operation::New(h, d) => { Operation::New(h, d) => {
trace!("TrieDB::apply +++ {:?} -> {:?}", &h, d.pretty()); trace!("TrieDB::apply +++ {:?} -> {:?}", &h, d.pretty());
self.db.emplace(h, d); self.db.emplace(h, d);
self.hash_count += 1;
} }
} }
} }
@ -272,14 +332,13 @@ impl TrieDB {
r r
} }
pub fn db_items_remaining(&self) -> HashMap<H256, u32> { pub fn db_items_remaining(&self) -> HashMap<H256, i32> {
let mut ret = self.db().keys(); let mut ret = self.db().keys();
for (k, v) in Self::to_map(self.keys()).into_iter() { for (k, v) in Self::to_map(self.keys()).into_iter() {
let old = *ret.get(&k).expect("Node in trie is not in database!"); let keycount = *ret.get(&k).unwrap_or(&0);
assert!(old >= v); match keycount == v as i32 {
match old > v { true => ret.remove(&k),
true => ret.insert(k, old - v), _ => ret.insert(k, keycount - v as i32),
_ => ret.remove(&k),
}; };
} }
ret ret
@ -437,7 +496,11 @@ impl TrieDB {
} }
else if rlp.is_data() && rlp.size() == 32 { else if rlp.is_data() && rlp.size() == 32 {
let h = H256::decode(rlp); let h = H256::decode(rlp);
let r = self.db.lookup(&h).expect("Trie root not found!"); let r = self.db.lookup(&h).unwrap_or_else(||{
println!("Node not found! rlp={:?}, node_hash={:?}", rlp.raw().pretty(), h);
println!("Diff: {:?}", diff);
panic!();
});
trace!("take_node {:?} (indirect for {:?})", rlp.raw().pretty(), r); trace!("take_node {:?} (indirect for {:?})", rlp.raw().pretty(), r);
diff.delete_node_sha3(h); diff.delete_node_sha3(h);
r r
@ -518,7 +581,7 @@ impl TrieDB {
diff.new_node(Self::compose_leaf(&partial.mid(1), value), &mut s), diff.new_node(Self::compose_leaf(&partial.mid(1), value), &mut s),
(true, i) => { // harder - original has something there already (true, i) => { // harder - original has something there already
let new = self.augmented(self.take_node(&orig.at(i), diff), &partial.mid(1), value, diff); let new = self.augmented(self.take_node(&orig.at(i), diff), &partial.mid(1), value, diff);
diff.replace_node(&orig.at(i), new, &mut s); diff.new_node(new, &mut s);
} }
(false, i) => { s.append_raw(orig.at(i).raw(), 1); }, (false, i) => { s.append_raw(orig.at(i).raw(), 1); },
} }
@ -801,28 +864,113 @@ mod tests {
use rlp; use rlp;
use env_logger; use env_logger;
use rand::random; use rand::random;
use bytes::ToPretty; use std::collections::HashSet;
use bytes::{ToPretty,Bytes};
fn random_key(alphabet: &[u8], min_count: usize, diff_count: usize) -> Vec<u8> {
let mut ret: Vec<u8> = Vec::new();
let r = min_count + if diff_count > 0 {random::<usize>() % diff_count} else {0};
for _ in 0..r {
ret.push(alphabet[random::<usize>() % alphabet.len()]);
}
ret
}
fn random_value_indexed(j: usize) -> Bytes {
match random::<usize>() % 2 {
0 => rlp::encode(&j),
_ => {
let mut h = H256::new();
h.mut_bytes()[31] = j as u8;
rlp::encode(&h)
},
}
}
fn populate_trie(v: &Vec<(Vec<u8>, Vec<u8>)>) -> TrieDB {
let mut t = TrieDB::new_memory();
for i in 0..v.len() {
let key: &[u8]= &v[i].0;
let val: &[u8] = &v[i].1;
t.insert(&key, &val);
}
t
}
fn unpopulate_trie(t: &mut TrieDB, v: &Vec<(Vec<u8>, Vec<u8>)>) {
for i in v.iter() {
let key: &[u8]= &i.0;
t.remove(&key);
}
}
macro_rules! map({$($key:expr => $value:expr),+ } => {
{
let mut m = ::std::collections::HashMap::new();
$(
m.insert($key, $value);
)+
m
}
};);
#[test] #[test]
fn playpen() { fn playpen() {
env_logger::init().ok(); env_logger::init().ok();
let big_value = b"00000000000000000000000000000000"; let maps = map!{
"six-low" => StandardMap{alphabet: Alphabet::Low, min_key: 6, diff_key: 0, count: 1000},
"six-mid" => StandardMap{alphabet: Alphabet::Mid, min_key: 6, diff_key: 0, count: 1000},
"six-all" => StandardMap{alphabet: Alphabet::All, min_key: 6, diff_key: 0, count: 1000},
"mix-mid" => StandardMap{alphabet: Alphabet::Mid, min_key: 1, diff_key: 5, count: 1000}
};
for sm in maps {
let m = sm.1.make();
let t = populate_trie(&m);
println!("{:?}: root={:?}, hash_count={:?}", sm.0, t.root(), t.hash_count);
};
panic!();
let mut t1 = TrieDB::new_memory(); for test_i in 0..1 {
t1.insert(&[0x01, 0x23], &big_value.to_vec()); if test_i % 50 == 0 {
t1.insert(&[0x01, 0x34], &big_value.to_vec()); debug!("{:?} of 10000 stress tests done", test_i);
trace!("keys remaining {:?}", t1.db_items_remaining()); }
assert!(t1.db_items_remaining().is_empty()); let mut x: Vec<(Vec<u8>, Vec<u8>)> = Vec::new();
let mut t2 = TrieDB::new_memory(); let mut got: HashSet<Vec<u8>> = HashSet::new();
t2.insert(&[0x01], &big_value.to_vec()); let alphabet = b"@QWERTYUIOPASDFGHJKLZXCVBNM[/]^_";
t2.insert(&[0x01, 0x23], &big_value.to_vec()); for j in 0..1000usize {
t2.insert(&[0x01, 0x34], &big_value.to_vec()); let key = random_key(alphabet, 5, 0);
t2.remove(&[0x01]); if !got.contains(&key) {
assert!(t2.db_items_remaining().is_empty()); x.push((key.clone(), random_value_indexed(j)));
/*if t1.root() != t2.root()*/ { got.insert(key);
trace!("{:?}", t1); }
trace!("{:?}", t2); }
let real = trie_root(x.clone());
let mut memtrie = populate_trie(&x);
if *memtrie.root() != real || !memtrie.db_items_remaining().is_empty() {
println!("TRIE MISMATCH");
println!("");
println!("{:?} vs {:?}", memtrie.root(), real);
for i in x.iter() {
println!("{:?} -> {:?}", i.0.pretty(), i.1.pretty());
}
println!("{:?}", memtrie);
}
assert_eq!(*memtrie.root(), real);
assert!(memtrie.db_items_remaining().is_empty());
unpopulate_trie(&mut memtrie, &x);
if *memtrie.root() != SHA3_NULL_RLP || !memtrie.db_items_remaining().is_empty() {
println!("TRIE MISMATCH");
println!("");
println!("{:?} vs {:?}", memtrie.root(), real);
for i in x.iter() {
println!("{:?} -> {:?}", i.0.pretty(), i.1.pretty());
}
println!("{:?}", memtrie);
}
assert_eq!(*memtrie.root(), SHA3_NULL_RLP);
assert!(memtrie.db_items_remaining().is_empty());
} }
} }
@ -842,6 +990,23 @@ mod tests {
#[test] #[test]
fn remove_to_empty() { fn remove_to_empty() {
let big_value = b"00000000000000000000000000000000";
let mut t1 = TrieDB::new_memory();
t1.insert(&[0x01, 0x23], &big_value.to_vec());
t1.insert(&[0x01, 0x34], &big_value.to_vec());
trace!("keys remaining {:?}", t1.db_items_remaining());
assert!(t1.db_items_remaining().is_empty());
let mut t2 = TrieDB::new_memory();
t2.insert(&[0x01], &big_value.to_vec());
t2.insert(&[0x01, 0x23], &big_value.to_vec());
t2.insert(&[0x01, 0x34], &big_value.to_vec());
t2.remove(&[0x01]);
assert!(t2.db_items_remaining().is_empty());
/*if t1.root() != t2.root()*/ {
trace!("{:?}", t1);
trace!("{:?}", t2);
}
} }
#[test] #[test]
@ -1022,29 +1187,20 @@ mod tests {
//assert!(false); //assert!(false);
} }
fn random_key() -> Vec<u8> {
let chars = b"abcdefgrstuvwABCDEFGRSTUVW";
let mut ret: Vec<u8> = Vec::new();
let r = random::<u8>() % 4 + 1;
for _ in 0..r {
ret.push(chars[random::<usize>() % chars.len()]);
}
ret
}
#[test] #[test]
fn stress() { fn stress() {
for _ in 0..5000 { for _ in 0..5000 {
let mut x: Vec<(Vec<u8>, Vec<u8>)> = Vec::new(); let mut x: Vec<(Vec<u8>, Vec<u8>)> = Vec::new();
let alphabet = b"@QWERTYUIOPASDFGHJKLZXCVBNM[/]^_";
for j in 0..4u32 { for j in 0..4u32 {
let key = random_key(); let key = random_key(alphabet, 5, 1);
x.push((key, rlp::encode(&j))); x.push((key, rlp::encode(&j)));
} }
let real = trie_root(x.clone()); let real = trie_root(x.clone());
let memtrie = trie_root_mem(&x); let memtrie = populate_trie(&x);
let mut y = x.clone(); let mut y = x.clone();
y.sort_by(|ref a, ref b| a.0.cmp(&b.0)); y.sort_by(|ref a, ref b| a.0.cmp(&b.0));
let memtrie_sorted = trie_root_mem(&y); let memtrie_sorted = populate_trie(&y);
if *memtrie.root() != real || *memtrie_sorted.root() != real { if *memtrie.root() != real || *memtrie_sorted.root() != real {
println!("TRIE MISMATCH"); println!("TRIE MISMATCH");
println!(""); println!("");
@ -1064,18 +1220,6 @@ mod tests {
} }
} }
fn trie_root_mem(v: &Vec<(Vec<u8>, Vec<u8>)>) -> TrieDB {
let mut t = TrieDB::new_memory();
for i in 0..v.len() {
let key: &[u8]= &v[i].0;
let val: &[u8] = &v[i].1;
t.insert(&key, &val);
}
t
}
#[test] #[test]
fn test_trie_json() { fn test_trie_json() {
println!("Json trie test: "); println!("Json trie test: ");