From 53547adea8b72d36ad997894e32abf40b89bc809 Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Fri, 4 Dec 2015 18:05:59 +0100 Subject: [PATCH] Fix for trie. Benchmarks for Trie. --- benches/trie.rs | 128 ++++++++++++++++++++++++++++++++++++++ src/hashdb.rs | 2 +- src/memorydb.rs | 8 +-- src/overlaydb.rs | 12 ++-- src/trie.rs | 159 +++++++++++++++++++++++++++++++++++++---------- 5 files changed, 262 insertions(+), 47 deletions(-) create mode 100644 benches/trie.rs diff --git a/benches/trie.rs b/benches/trie.rs new file mode 100644 index 000000000..0b7055bde --- /dev/null +++ b/benches/trie.rs @@ -0,0 +1,128 @@ +#![feature(test)] + +extern crate test; +extern crate rand; +extern crate ethcore_util; +#[macro_use] +extern crate log; + +use test::Bencher; +use rand::random; +//use ethcore_util::BytesConvertable; +use ethcore_util::hash::*; +use ethcore_util::bytes::*; +use ethcore_util::trie::*; +use ethcore_util::sha3::*; +use ethcore_util::ToBytes::*; + + +fn random_word(alphabet: &[u8], min_count: usize, diff_count: usize, seed: &mut H256) -> Vec { + assert!(min_count + diff_count <= 32); + *seed = seed.sha3(); + let r = min_count + (seed.bytes()[31] as usize % (diff_count + 1)); + let mut ret: Vec = Vec::with_capacity(r); + for i in 0..r { + ret.push(alphabet[seed.bytes()[i] as usize % alphabet.len()]); + } + ret +} + +fn random_bytes(min_count: usize, diff_count: usize, seed: &mut H256) -> Vec { + assert!(min_count + diff_count <= 32); + *seed = seed.sha3(); + let r = min_count + (seed.bytes()[31] as usize % (diff_count + 1)); + seed.bytes()[0..r].to_vec() +} + +fn random_value(seed: &mut H256) -> Bytes { + *seed = seed.sha3(); + match seed.bytes()[0] % 2 { + 1 => vec![seed.bytes()[31];1], + _ => seed.bytes().to_vec(), + } +} + +#[bench] +fn insertions_six_high(b: &mut Bencher) { + let mut d: Vec<(Bytes, Bytes)> = Vec::new(); + let mut seed = H256::new(); + for _ in 0..1000 { + let k = random_bytes(6, 0, &mut seed); + let v = random_value(&mut seed); + d.push((k, v)) + } + + b.iter(||{ + let mut t = TrieDB::new_memory(); + for i in d.iter() { + t.insert(&i.0, &i.1); + } + }) +} + +#[bench] +fn insertions_six_mid(b: &mut Bencher) { + let alphabet = b"@QWERTYUIOPASDFGHJKLZXCVBNM[/]^_"; + let mut d: Vec<(Bytes, Bytes)> = Vec::new(); + let mut seed = H256::new(); + for _ in 0..1000 { + let k = random_word(alphabet, 6, 0, &mut seed); + let v = random_value(&mut seed); + d.push((k, v)) + } + b.iter(||{ + let mut t = TrieDB::new_memory(); + for i in d.iter() { + t.insert(&i.0, &i.1); + } + debug!("hash_count={:?}", t.hash_count); + }) +} + +#[bench] +fn insertions_random_mid(b: &mut Bencher) { + let alphabet = b"@QWERTYUIOPASDFGHJKLZXCVBNM[/]^_"; + let mut d: Vec<(Bytes, Bytes)> = Vec::new(); + let mut seed = H256::new(); + for _ in 0..1000 { + let k = random_word(alphabet, 1, 5, &mut seed); + let v = random_value(&mut seed); + d.push((k, v)) + } + + b.iter(||{ + let mut t = TrieDB::new_memory(); + for i in d.iter() { + t.insert(&i.0, &i.1); + } + }) +} + +#[bench] +fn insertions_six_low(b: &mut Bencher) { + let alphabet = b"abcdef"; + let mut d: Vec<(Bytes, Bytes)> = Vec::new(); + let mut seed = H256::new(); + for _ in 0..1000 { + let k = random_word(alphabet, 6, 0, &mut seed); + let v = random_value(&mut seed); + d.push((k, v)) + } + + b.iter(||{ + let mut t = TrieDB::new_memory(); + for i in d.iter() { + t.insert(&i.0, &i.1); + } + }) +} + +#[bench] +fn sha3x1000(b: &mut Bencher) { + b.iter(||{ + let mut seed = H256::new(); + for i in 0..1000 { + seed = seed.sha3() + } + }) +} diff --git a/src/hashdb.rs b/src/hashdb.rs index f893c8df8..207883e4b 100644 --- a/src/hashdb.rs +++ b/src/hashdb.rs @@ -4,7 +4,7 @@ use std::collections::HashMap; pub trait HashDB { /// Get the keys in the database together with number of underlying references. - fn keys(&self) -> HashMap; + fn keys(&self) -> HashMap; /// Look up a given hash into the bytes that hash to it, returning None if the /// hash is not known. diff --git a/src/memorydb.rs b/src/memorydb.rs index 8c7eaff2c..680129670 100644 --- a/src/memorydb.rs +++ b/src/memorydb.rs @@ -116,10 +116,6 @@ impl MemoryDB { } self.data.get(key).unwrap() } - - pub fn raw_keys(&self) -> HashMap { - self.data.iter().filter_map(|(k, v)| if v.1 != 0 {Some((k.clone(), v.1))} else {None}).collect::>() - } } impl HashDB for MemoryDB { @@ -130,8 +126,8 @@ impl HashDB for MemoryDB { } } - fn keys(&self) -> HashMap { - self.data.iter().filter_map(|(k, v)| if v.1 > 0 {Some((k.clone(), v.1 as u32))} else {None} ).collect::>() + fn keys(&self) -> HashMap { + self.data.iter().filter_map(|(k, v)| if v.1 != 0 {Some((k.clone(), v.1))} else {None}).collect::>() } fn exists(&self, key: &H256) -> bool { diff --git a/src/overlaydb.rs b/src/overlaydb.rs index 78ca67d01..c13acfd6a 100644 --- a/src/overlaydb.rs +++ b/src/overlaydb.rs @@ -136,17 +136,17 @@ impl OverlayDB { } impl HashDB for OverlayDB { - fn keys(&self) -> HashMap { - let mut ret: HashMap = HashMap::new(); + fn keys(&self) -> HashMap { + let mut ret: HashMap = HashMap::new(); for (key, _) in self.backing.iterator().from_start() { let h = H256::from_slice(key.deref()); let r = self.payload(&h).unwrap().1; - ret.insert(h, r); + ret.insert(h, r as i32); } - for (key, refs) in self.overlay.raw_keys().into_iter() { - let refs = *ret.get(&key).unwrap_or(&0u32) as i32 + refs as i32; - ret.insert(key, refs as u32); + for (key, refs) in self.overlay.keys().into_iter() { + let refs = *ret.get(&key).unwrap_or(&0) + refs; + ret.insert(key, refs); } ret } diff --git a/src/trie.rs b/src/trie.rs index 81ac4f99b..a340d78f7 100644 --- a/src/trie.rs +++ b/src/trie.rs @@ -26,6 +26,67 @@ pub trait Trie { fn remove(&mut self, key: &[u8]); } +pub enum Alphabet { + All, + Low, + Mid, + Custom(Bytes), +} + +pub struct StandardMap { + alphabet: Alphabet, + min_key: usize, + diff_key: usize, + count: usize, +} + +impl StandardMap { + fn random_bytes(min_count: usize, diff_count: usize, seed: &mut H256) -> Vec { + assert!(min_count + diff_count <= 32); + *seed = seed.sha3(); + let r = min_count + (seed.bytes()[31] as usize % (diff_count + 1)); + seed.bytes()[0..r].to_vec() + } + + fn random_value(seed: &mut H256) -> Bytes { + *seed = seed.sha3(); + match seed.bytes()[0] % 2 { + 1 => vec![seed.bytes()[31];1], + _ => seed.bytes().to_vec(), + } + } + + fn random_word(alphabet: &[u8], min_count: usize, diff_count: usize, seed: &mut H256) -> Vec { + assert!(min_count + diff_count <= 32); + *seed = seed.sha3(); + let r = min_count + (seed.bytes()[31] as usize % (diff_count + 1)); + let mut ret: Vec = Vec::with_capacity(r); + for i in 0..r { + ret.push(alphabet[seed.bytes()[i] as usize % alphabet.len()]); + } + ret + } + + pub fn make(&self) -> Vec<(Bytes, Bytes)> { + let low = b"abcdef"; + let mid = b"@QWERTYUIOPASDFGHJKLZXCVBNM[/]^_"; + + let mut d: Vec<(Bytes, Bytes)> = Vec::new(); + let mut seed = H256::new(); + for _ in 0..self.count { + let k = match self.alphabet { + Alphabet::All => Self::random_bytes(self.min_key, self.diff_key, &mut seed), + Alphabet::Low => Self::random_word(low, self.min_key, self.diff_key, &mut seed), + Alphabet::Mid => Self::random_word(mid, self.min_key, self.diff_key, &mut seed), + Alphabet::Custom(ref a) => Self::random_word(&a, self.min_key, self.diff_key, &mut seed), + }; + let v = Self::random_value(&mut seed); + d.push((k, v)) + } + d + } +} + #[derive(Eq, PartialEq, Debug)] pub enum Node<'a> { Empty, @@ -34,11 +95,13 @@ pub enum Node<'a> { Branch([&'a[u8]; 16], Option<&'a [u8]>) } +#[derive(Debug)] enum Operation { New(H256, Bytes), Delete(H256), } +#[derive(Debug)] struct Diff (Vec); impl Diff { @@ -48,8 +111,9 @@ impl Diff { /// such that the reference is valid, once applied. fn new_node(&mut self, rlp: Bytes, out: &mut RlpStream) { if rlp.len() >= 32 { - trace!("new_node: reference node {:?}", rlp.pretty()); let rlp_sha3 = rlp.sha3(); + + trace!("new_node: reference node {:?} => {:?}", rlp_sha3, rlp.pretty()); out.append(&rlp_sha3); self.0.push(Operation::New(rlp_sha3, rlp)); } @@ -61,25 +125,18 @@ impl Diff { /// Given the RLP that encodes a now-unused node, leave `diff` in such a state that it is noted. fn delete_node_sha3(&mut self, old_sha3: H256) { + trace!("delete_node: {:?}", old_sha3); self.0.push(Operation::Delete(old_sha3)); } fn delete_node(&mut self, old: &Rlp) { if old.is_data() && old.size() == 32 { - self.0.push(Operation::Delete(H256::decode(old))); + self.delete_node_sha3(H256::decode(old)); } } fn delete_node_from_slice(&mut self, old: &[u8]) { - let r = Rlp::new(old); - if r.is_data() && r.size() == 32 { - self.0.push(Operation::Delete(H256::decode(&r))); - } - } - - fn replace_node(&mut self, old: &Rlp, rlp: Bytes, out: &mut RlpStream) { - self.delete_node(old); - self.new_node(rlp, out); + self.delete_node(&Rlp::new(old)); } } @@ -192,11 +249,12 @@ impl <'a>Node<'a> { pub struct TrieDB { db: Box, root: H256, + pub hash_count: usize, } impl fmt::Debug for TrieDB { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - try!(writeln!(f, "[")); + try!(writeln!(f, "c={:?} [", self.hash_count)); let root_rlp = self.db.lookup(&self.root).expect("Trie root not found!"); try!(self.fmt_all(Node::decoded(root_rlp), f, 0)); writeln!(f, "]") @@ -209,7 +267,7 @@ enum MaybeChanged<'a> { } impl TrieDB { - pub fn new_boxed(db_box: Box) -> Self { let mut r = TrieDB{ db: db_box, root: H256::new() }; r.set_root_rlp(&NULL_RLP); r } + pub fn new_boxed(db_box: Box) -> Self { let mut r = TrieDB{ db: db_box, root: H256::new(), hash_count: 0 }; r.root = r.db.insert(&NULL_RLP); r } pub fn new(db: T) -> Self where T: HashDB + 'static { Self::new_boxed(Box::new(db)) } @@ -220,6 +278,7 @@ impl TrieDB { fn set_root_rlp(&mut self, root_data: &[u8]) { self.db.kill(&self.root); self.root = self.db.insert(root_data); + self.hash_count += 1; trace!("set_root_rlp {:?} {:?}", root_data.pretty(), self.root); } @@ -234,6 +293,7 @@ impl TrieDB { Operation::New(h, d) => { trace!("TrieDB::apply +++ {:?} -> {:?}", &h, d.pretty()); self.db.emplace(h, d); + self.hash_count += 1; } } } @@ -272,14 +332,13 @@ impl TrieDB { r } - pub fn db_items_remaining(&self) -> HashMap { + pub fn db_items_remaining(&self) -> HashMap { let mut ret = self.db().keys(); for (k, v) in Self::to_map(self.keys()).into_iter() { - let old = *ret.get(&k).expect("Node in trie is not in database!"); - assert!(old >= v); - match old > v { - true => ret.insert(k, old - v), - _ => ret.remove(&k), + let keycount = *ret.get(&k).unwrap_or(&0); + match keycount == v as i32 { + true => ret.remove(&k), + _ => ret.insert(k, keycount - v as i32), }; } ret @@ -437,7 +496,11 @@ impl TrieDB { } else if rlp.is_data() && rlp.size() == 32 { let h = H256::decode(rlp); - let r = self.db.lookup(&h).expect("Trie root not found!"); + let r = self.db.lookup(&h).unwrap_or_else(||{ + println!("Node not found! rlp={:?}, node_hash={:?}", rlp.raw().pretty(), h); + println!("Diff: {:?}", diff); + panic!(); + }); trace!("take_node {:?} (indirect for {:?})", rlp.raw().pretty(), r); diff.delete_node_sha3(h); r @@ -518,7 +581,7 @@ impl TrieDB { diff.new_node(Self::compose_leaf(&partial.mid(1), value), &mut s), (true, i) => { // harder - original has something there already let new = self.augmented(self.take_node(&orig.at(i), diff), &partial.mid(1), value, diff); - diff.replace_node(&orig.at(i), new, &mut s); + diff.new_node(new, &mut s); } (false, i) => { s.append_raw(orig.at(i).raw(), 1); }, } @@ -804,22 +867,21 @@ mod tests { use std::collections::HashSet; use bytes::{ToPretty,Bytes}; - fn random_key() -> Vec { - let chars = b"abcdefgrstuvwABCDEFGRSTUVW"; + fn random_key(alphabet: &[u8], min_count: usize, diff_count: usize) -> Vec { let mut ret: Vec = Vec::new(); - let r = random::() % 4 + 1; + let r = min_count + if diff_count > 0 {random::() % diff_count} else {0}; for _ in 0..r { - ret.push(chars[random::() % chars.len()]); + ret.push(alphabet[random::() % alphabet.len()]); } ret } - - fn random_value(i: usize) -> Bytes { + + fn random_value_indexed(j: usize) -> Bytes { match random::() % 2 { - 0 => rlp::encode(&i), + 0 => rlp::encode(&j), _ => { let mut h = H256::new(); - h.mut_bytes()[31] = i as u8; + h.mut_bytes()[31] = j as u8; rlp::encode(&h) }, } @@ -842,16 +904,44 @@ mod tests { } } + macro_rules! map({$($key:expr => $value:expr),+ } => { + { + let mut m = ::std::collections::HashMap::new(); + $( + m.insert($key, $value); + )+ + m + } + };); + #[test] fn playpen() { env_logger::init().ok(); - for _ in 0..1000 { + + let maps = map!{ + "six-low" => StandardMap{alphabet: Alphabet::Low, min_key: 6, diff_key: 0, count: 1000}, + "six-mid" => StandardMap{alphabet: Alphabet::Mid, min_key: 6, diff_key: 0, count: 1000}, + "six-all" => StandardMap{alphabet: Alphabet::All, min_key: 6, diff_key: 0, count: 1000}, + "mix-mid" => StandardMap{alphabet: Alphabet::Mid, min_key: 1, diff_key: 5, count: 1000} + }; + for sm in maps { + let m = sm.1.make(); + let t = populate_trie(&m); + println!("{:?}: root={:?}, hash_count={:?}", sm.0, t.root(), t.hash_count); + }; + panic!(); + + for test_i in 0..1 { + if test_i % 50 == 0 { + debug!("{:?} of 10000 stress tests done", test_i); + } let mut x: Vec<(Vec, Vec)> = Vec::new(); let mut got: HashSet> = HashSet::new(); - for j in 0..10usize { - let key = random_key(); + let alphabet = b"@QWERTYUIOPASDFGHJKLZXCVBNM[/]^_"; + for j in 0..1000usize { + let key = random_key(alphabet, 5, 0); if !got.contains(&key) { - x.push((key.clone(), random_value(j))); + x.push((key.clone(), random_value_indexed(j))); got.insert(key); } } @@ -1101,8 +1191,9 @@ mod tests { fn stress() { for _ in 0..5000 { let mut x: Vec<(Vec, Vec)> = Vec::new(); + let alphabet = b"@QWERTYUIOPASDFGHJKLZXCVBNM[/]^_"; for j in 0..4u32 { - let key = random_key(); + let key = random_key(alphabet, 5, 1); x.push((key, rlp::encode(&j))); } let real = trie_root(x.clone());