From 38f0af1aa0e47195d74a9552cc0530945effe6ca Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Fri, 4 Dec 2015 20:04:26 +0100 Subject: [PATCH 1/3] Code cleanups. --- src/trie.rs | 87 +++++++++++++++++------------------------------------ 1 file changed, 27 insertions(+), 60 deletions(-) diff --git a/src/trie.rs b/src/trie.rs index a340d78f7..48fbe8d83 100644 --- a/src/trie.rs +++ b/src/trie.rs @@ -511,58 +511,24 @@ impl TrieDB { } } - /// Transform an existing extension or leaf node to an invalid single-entry branch. - /// - /// **This operation will not insert the new node nor destroy the original.** - fn transmuted_extension_to_branch(orig_partial: &NibbleSlice, orig_raw_payload: &[u8], diff: &mut Diff) -> Bytes { - trace!("transmuted_extension_to_branch"); - let mut s = RlpStream::new_list(17); - assert!(!orig_partial.is_empty()); // extension nodes are not allowed to have empty partial keys. - let index = orig_partial.at(0); - // orig is extension - orig_raw_payload is a node itself. - for i in 0..17 { - if index == i { - if orig_partial.len() > 1 { - // still need an extension - diff.new_node(Self::compose_extension(&orig_partial.mid(1), orig_raw_payload), &mut s); - } else { - // was an extension of length 1 - just redirect the payload into here. - s.append_raw(orig_raw_payload, 1); - } - } else { - s.append_empty_data(); - } - } - s.out() - } - - fn transmuted_leaf_to_branch(orig_partial: &NibbleSlice, value: &[u8], diff: &mut Diff) -> Bytes { - trace!("transmuted_leaf_to_branch"); + fn augmented_into_transmuted_branch(&self, orig_is_leaf: bool, orig_partial: &NibbleSlice, orig_raw_payload: &[u8], partial: &NibbleSlice, value: &[u8], diff: &mut Diff) -> Bytes { + assert!(orig_is_leaf || !orig_partial.is_empty()); // extension nodes are not allowed to have empty partial keys. let mut s = RlpStream::new_list(17); let index = if orig_partial.is_empty() {16} else {orig_partial.at(0)}; - // orig is leaf - orig_raw_payload is data representing the actual value. for i in 0..17 { - match (index == i, i) { - (true, 16) => // leaf entry - just replace. - { s.append(&value); }, - (true, _) => // easy - original had empty slot. - diff.new_node(Self::compose_leaf(&orig_partial.mid(1), value), &mut s), - (false, _) => { s.append_empty_data(); } + match orig_is_leaf { + // not us - empty. + _ if index != i => { s.append_empty_data(); }, + // just replace. + true if i == 16 => { s.append(&value); }, + // original has empty slot. + true => diff.new_node(Self::compose_leaf(&orig_partial.mid(1), Rlp::new(orig_raw_payload).data()), &mut s), + // + false if orig_partial.len() > 1 => diff.new_node(Self::compose_extension(&orig_partial.mid(1), orig_raw_payload), &mut s), + false => { s.append_raw(orig_raw_payload, 1); }, } - } - s.out() - } - - /// Transform an existing extension or leaf node plus a new partial/value to a two-entry branch. - /// - /// **This operation will not insert the new node nor destroy the original.** - fn transmuted_to_branch_and_augmented(&self, orig_is_leaf: bool, orig_partial: &NibbleSlice, orig_raw_payload: &[u8], partial: &NibbleSlice, value: &[u8], diff: &mut Diff) -> Bytes { - trace!("transmuted_to_branch_and_augmented"); - let intermediate = match orig_is_leaf { - true => Self::transmuted_leaf_to_branch(orig_partial, Rlp::new(orig_raw_payload).data(), diff), - false => Self::transmuted_extension_to_branch(orig_partial, orig_raw_payload, diff), }; - self.augmented(&intermediate, partial, value, diff) + self.augmented(&s.out(), partial, value, diff) // TODO: implement without having to make an intermediate representation. } @@ -573,17 +539,19 @@ impl TrieDB { trace!("augmented_into_branch"); let mut s = RlpStream::new_list(17); let index = if partial.is_empty() {16} else {partial.at(0) as usize}; - for i in 0usize..17 { - match (index == i, i) { - (true, 16) => // leaf entry - just replace. - { s.append(&value); }, - (true, i) if orig.at(i).is_empty() => // easy - original had empty slot. - diff.new_node(Self::compose_leaf(&partial.mid(1), value), &mut s), - (true, i) => { // harder - original has something there already + for i in 0..17 { + match index == i { + // not us - leave alone. + false => { s.append_raw(orig.at(i).raw(), 1); }, + // branch-leaf entry - just replace. + true if i == 16 => { s.append(&value); }, + // original had empty slot - place a leaf there. + true if orig.at(i).is_empty() => diff.new_node(Self::compose_leaf(&partial.mid(1), value), &mut s), + // original has something there already; augment. + true => { let new = self.augmented(self.take_node(&orig.at(i), diff), &partial.mid(1), value, diff); diff.new_node(new, &mut s); } - (false, i) => { s.append_raw(orig.at(i).raw(), 1); }, } } s.out() @@ -619,17 +587,16 @@ impl TrieDB { (_, 0) => { // one of us isn't empty: transmute to branch here trace!("no-common-prefix, not-both-empty (exist={:?}; new={:?}): TRANSMUTE,AUGMENT", existing_key.len(), partial.len()); - self.transmuted_to_branch_and_augmented(is_leaf, &existing_key, old_rlp.at(1).raw(), partial, value, diff) + self.augmented_into_transmuted_branch(is_leaf, &existing_key, old_rlp.at(1).raw(), partial, value, diff) }, (_, cp) if cp == existing_key.len() => { trace!("complete-prefix (cp={:?}): AUGMENT-AT-END", cp); // fully-shared prefix for this extension: // transform to an extension + augmented version of onward node. - let downstream_node: Bytes = if is_leaf { + let downstream_node: Bytes = match is_leaf { // no onward node because we're a leaf - create fake stub and use that. - self.augmented(&Self::compose_stub_branch(old_rlp.at(1).data()), &partial.mid(cp), value, diff) - } else { - self.augmented(self.take_node(&old_rlp.at(1), diff), &partial.mid(cp), value, diff) + true => self.augmented(&Self::compose_stub_branch(old_rlp.at(1).data()), &partial.mid(cp), value, diff), + false => self.augmented(self.take_node(&old_rlp.at(1), diff), &partial.mid(cp), value, diff), }; Self::create_extension(&existing_key, downstream_node, diff) }, From fcae9034acbd17e136f5a062c3d4bc095c56bb3f Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Mon, 7 Dec 2015 15:00:54 +0100 Subject: [PATCH 2/3] Trie documentation and code consolidation. --- src/trie.rs | 451 ++++++++++++++++++++++++++++++---------------------- 1 file changed, 265 insertions(+), 186 deletions(-) diff --git a/src/trie.rs b/src/trie.rs index 48fbe8d83..0425565bc 100644 --- a/src/trie.rs +++ b/src/trie.rs @@ -15,17 +15,30 @@ use std::collections::HashMap; pub const NULL_RLP: [u8; 1] = [0x80; 1]; pub const SHA3_NULL_RLP: H256 = H256( [0x56, 0xe8, 0x1f, 0x17, 0x1b, 0xcc, 0x55, 0xa6, 0xff, 0x83, 0x45, 0xe6, 0x92, 0xc0, 0xf8, 0x6e, 0x5b, 0x48, 0xe0, 0x1b, 0x99, 0x6c, 0xad, 0xc0, 0x01, 0x62, 0x2f, 0xb5, 0xe3, 0x63, 0xb4, 0x21] ); +/// A key-value datastore implemented as a database-backed modified Merkle tree. pub trait Trie { + /// Return the root of the trie. fn root(&self) -> &H256; + + /// Is the trie empty? fn is_empty(&self) -> bool { *self.root() == SHA3_NULL_RLP } - // TODO: consider returning &[u8]... + /// Does the trie contain a given key? fn contains(&self, key: &[u8]) -> bool; + + /// What is the value of the given key in this trie? fn at<'a, 'key>(&'a self, key: &'key [u8]) -> Option<&'a [u8]> where 'a: 'key; + + /// Insert a `key`/`value` pair into the trie. An `empty` value is equivalent to removing + /// `key` from the trie. fn insert(&mut self, key: &[u8], value: &[u8]); + + /// Remove a `key` from the trie. Equivalent to making it equal to the empty + /// value. fn remove(&mut self, key: &[u8]); } +/// Alphabet to use when creating words for insertion into tries. pub enum Alphabet { All, Low, @@ -33,21 +46,25 @@ pub enum Alphabet { Custom(Bytes), } +/// Standard test map for profiling tries. pub struct StandardMap { alphabet: Alphabet, min_key: usize, - diff_key: usize, + journal_key: usize, count: usize, } impl StandardMap { - fn random_bytes(min_count: usize, diff_count: usize, seed: &mut H256) -> Vec { - assert!(min_count + diff_count <= 32); + /// Get a bunch of random bytes, at least `min_count` bytes, at most `min_count` + `journal_count` bytes. + /// `seed` is mutated pseudoramdonly and used. + fn random_bytes(min_count: usize, journal_count: usize, seed: &mut H256) -> Vec { + assert!(min_count + journal_count <= 32); *seed = seed.sha3(); - let r = min_count + (seed.bytes()[31] as usize % (diff_count + 1)); + let r = min_count + (seed.bytes()[31] as usize % (journal_count + 1)); seed.bytes()[0..r].to_vec() } + /// Get a random value. Equal chance of being 1 byte as of 32. `seed` is mutated pseudoramdonly and used. fn random_value(seed: &mut H256) -> Bytes { *seed = seed.sha3(); match seed.bytes()[0] % 2 { @@ -56,10 +73,12 @@ impl StandardMap { } } - fn random_word(alphabet: &[u8], min_count: usize, diff_count: usize, seed: &mut H256) -> Vec { - assert!(min_count + diff_count <= 32); + /// Get a random word of, at least `min_count` bytes, at most `min_count` + `journal_count` bytes. + /// Each byte is an item from `alphabet`. `seed` is mutated pseudoramdonly and used. + fn random_word(alphabet: &[u8], min_count: usize, journal_count: usize, seed: &mut H256) -> Vec { + assert!(min_count + journal_count <= 32); *seed = seed.sha3(); - let r = min_count + (seed.bytes()[31] as usize % (diff_count + 1)); + let r = min_count + (seed.bytes()[31] as usize % (journal_count + 1)); let mut ret: Vec = Vec::with_capacity(r); for i in 0..r { ret.push(alphabet[seed.bytes()[i] as usize % alphabet.len()]); @@ -67,6 +86,7 @@ impl StandardMap { ret } + /// Create the standard map (set of keys and values) for the object's fields. pub fn make(&self) -> Vec<(Bytes, Bytes)> { let low = b"abcdef"; let mid = b"@QWERTYUIOPASDFGHJKLZXCVBNM[/]^_"; @@ -75,10 +95,10 @@ impl StandardMap { let mut seed = H256::new(); for _ in 0..self.count { let k = match self.alphabet { - Alphabet::All => Self::random_bytes(self.min_key, self.diff_key, &mut seed), - Alphabet::Low => Self::random_word(low, self.min_key, self.diff_key, &mut seed), - Alphabet::Mid => Self::random_word(mid, self.min_key, self.diff_key, &mut seed), - Alphabet::Custom(ref a) => Self::random_word(&a, self.min_key, self.diff_key, &mut seed), + Alphabet::All => Self::random_bytes(self.min_key, self.journal_key, &mut seed), + Alphabet::Low => Self::random_word(low, self.min_key, self.journal_key, &mut seed), + Alphabet::Mid => Self::random_word(mid, self.min_key, self.journal_key, &mut seed), + Alphabet::Custom(ref a) => Self::random_word(&a, self.min_key, self.journal_key, &mut seed), }; let v = Self::random_value(&mut seed); d.push((k, v)) @@ -87,6 +107,7 @@ impl StandardMap { } } +/// Type of node in the trie and essential information thereof. #[derive(Eq, PartialEq, Debug)] pub enum Node<'a> { Empty, @@ -95,19 +116,22 @@ pub enum Node<'a> { Branch([&'a[u8]; 16], Option<&'a [u8]>) } +/// Type of operation for the backing database - either a new node or a node deletion. #[derive(Debug)] enum Operation { New(H256, Bytes), Delete(H256), } +/// A journal of operations on the backing database. #[derive(Debug)] -struct Diff (Vec); +struct Journal (Vec); -impl Diff { - fn new() -> Diff { Diff(vec![]) } +impl Journal { + /// Create a new, empty, object. + fn new() -> Journal { Journal(vec![]) } - /// Given the RLP that encodes a node, append a reference to that node `out` and leave `diff` + /// Given the RLP that encodes a node, append a reference to that node `out` and leave `journal` /// such that the reference is valid, once applied. fn new_node(&mut self, rlp: Bytes, out: &mut RlpStream) { if rlp.len() >= 32 { @@ -123,24 +147,23 @@ impl Diff { } } - /// Given the RLP that encodes a now-unused node, leave `diff` in such a state that it is noted. + /// Given the RLP that encodes a now-unused node, leave `journal` in such a state that it is noted. fn delete_node_sha3(&mut self, old_sha3: H256) { trace!("delete_node: {:?}", old_sha3); self.0.push(Operation::Delete(old_sha3)); } - fn delete_node(&mut self, old: &Rlp) { - if old.is_data() && old.size() == 32 { - self.delete_node_sha3(H256::decode(old)); + /// Register an RLP-encoded node for deletion (given a slice), if it needs to be deleted. + fn delete_node(&mut self, old: &[u8]) { + let r = Rlp::new(old); + if r.is_data() && r.size() == 32 { + self.delete_node_sha3(H256::decode(&r)); } } - - fn delete_node_from_slice(&mut self, old: &[u8]) { - self.delete_node(&Rlp::new(old)); - } } impl <'a>Node<'a> { + /// Decode the `node_rlp` and return the Node. fn decoded(node_rlp: &'a [u8]) -> Node<'a> { let r = Rlp::new(node_rlp); match r.prototype() { @@ -168,7 +191,10 @@ impl <'a>Node<'a> { } } - // todo: should check length before encoding, cause it may just be sha3 of data + /// Encode the node into RLP. + /// + /// Will always return the direct node RLP even if it's 32 or more bytes. To get the + /// RLP which would be valid for using in another node, use `encoded_and_added()`. fn encoded(&self) -> Bytes { match *self { Node::Leaf(ref slice, ref value) => { @@ -202,7 +228,9 @@ impl <'a>Node<'a> { } } - fn encoded_and_added(&self, diff: &mut Diff) -> Bytes { + /// Encode the node, adding it to `journal` if necessary and return the RLP valid for + /// insertion into a parent node. + fn encoded_and_added(&self, journal: &mut Journal) -> Bytes { let mut stream = RlpStream::new(); match *self { Node::Leaf(ref slice, ref value) => { @@ -234,47 +262,96 @@ impl <'a>Node<'a> { 0 ... 31 => node, _ => { let mut stream = RlpStream::new(); - diff.new_node(node, &mut stream); + journal.new_node(node, &mut stream); stream.out() } } } } -//enum ValidationResult<'a> { - //Valid, - //Invalid { node: Node<'a>, depth: usize } -//} - +/// A `Trie` implementation using a generic `HashDB` backing database. +/// +/// Use it as a `Trie` trait object. You can use `db()` to get the backing database object, `keys` +/// to get the keys belonging to the trie in the backing database, and `db_items_remaining()` to get +/// which items in the backing database do not belong to this trie. If this is the only trie in the +/// backing database, then `db_items_remaining()` should be empty. +/// +/// # Example +/// ``` +/// extern crate ethcore_util; +/// use ethcore_util::trie::*; +/// fn main() { +/// let mut t = TrieDB::new_memory(); +/// assert!(t.is_empty()); +/// assert_eq!(*t.root(), SHA3_NULL_RLP); +/// t.insert(b"foo", b"bar"); +/// assert!(t.contains(b"foo")); +/// assert_eq!(t.at(b"foo").unwrap(), b"bar"); +/// assert!(t.db_items_remaining().is_empty()); +/// t.remove(b"foo"); +/// assert!(!t.contains(b"foo")); +/// assert!(t.db_items_remaining().is_empty()); +/// } +/// ``` pub struct TrieDB { db: Box, root: H256, pub hash_count: usize, } -impl fmt::Debug for TrieDB { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - try!(writeln!(f, "c={:?} [", self.hash_count)); - let root_rlp = self.db.lookup(&self.root).expect("Trie root not found!"); - try!(self.fmt_all(Node::decoded(root_rlp), f, 0)); - writeln!(f, "]") - } -} - +/// Option-like type allowing either a Node object passthrough or Bytes in the case of data alteration. enum MaybeChanged<'a> { Same(Node<'a>), Changed(Bytes), } impl TrieDB { + /// Create a new trie with the boxed backing database `box_db`. pub fn new_boxed(db_box: Box) -> Self { let mut r = TrieDB{ db: db_box, root: H256::new(), hash_count: 0 }; r.root = r.db.insert(&NULL_RLP); r } + /// Convenience function to create a new trie with the backing database `db`. pub fn new(db: T) -> Self where T: HashDB + 'static { Self::new_boxed(Box::new(db)) } + /// Convenience function to create a new trie with a new `MemoryDB` based backing database. pub fn new_memory() -> Self { Self::new(MemoryDB::new()) } + /// Get the backing database. pub fn db(&self) -> &HashDB { self.db.as_ref() } + /// Determine all the keys in the backing database that belong to the trie. + pub fn keys(&self) -> Vec { + let mut ret: Vec = Vec::new(); + ret.push(self.root.clone()); + self.accumulate_keys(self.root_node(), &mut ret); + ret + } + + /// Convert a vector of hashes to a hashmap of hash to occurances. + pub fn to_map(hashes: Vec) -> HashMap { + let mut r: HashMap = HashMap::new(); + for h in hashes.into_iter() { + let c = *r.get(&h).unwrap_or(&0); + r.insert(h, c + 1); + } + r + } + + /// Determine occurances of items in the backing database which are not related to this + /// trie. + pub fn db_items_remaining(&self) -> HashMap { + let mut ret = self.db().keys(); + for (k, v) in Self::to_map(self.keys()).into_iter() { + let keycount = *ret.get(&k).unwrap_or(&0); + match keycount == v as i32 { + true => ret.remove(&k), + _ => ret.insert(k, keycount - v as i32), + }; + } + ret + } + + /// Set the trie to a new root node's RLP, inserting the new RLP into the backing database + /// and removing the old. fn set_root_rlp(&mut self, root_data: &[u8]) { self.db.kill(&self.root); self.root = self.db.insert(root_data); @@ -282,9 +359,10 @@ impl TrieDB { trace!("set_root_rlp {:?} {:?}", root_data.pretty(), self.root); } - fn apply(&mut self, diff: Diff) { - trace!("applying {:?} changes", diff.0.len()); - for d in diff.0.into_iter() { + /// Apply the items in `journal` into the backing database. + fn apply(&mut self, journal: Journal) { + trace!("applying {:?} changes", journal.0.len()); + for d in journal.0.into_iter() { match d { Operation::Delete(h) => { trace!("TrieDB::apply --- {:?}", &h); @@ -299,13 +377,7 @@ impl TrieDB { } } - pub fn keys(&self) -> Vec { - let mut ret: Vec = Vec::new(); - ret.push(self.root.clone()); - self.accumulate_keys(self.root_node(), &mut ret); - ret - } - + /// Recursion helper for `keys`. fn accumulate_keys(&self, node: Node, acc: &mut Vec) { let mut handle_payload = |payload| { let p = Rlp::new(payload); @@ -323,27 +395,17 @@ impl TrieDB { } } - fn to_map(hashes: Vec) -> HashMap { - let mut r: HashMap = HashMap::new(); - for h in hashes.into_iter() { - let c = *r.get(&h).unwrap_or(&0); - r.insert(h, c + 1); - } - r + /// Get the root node's RLP. + fn root_node(&self) -> Node { + Node::decoded(self.db.lookup(&self.root).expect("Trie root not found!")) } - pub fn db_items_remaining(&self) -> HashMap { - let mut ret = self.db().keys(); - for (k, v) in Self::to_map(self.keys()).into_iter() { - let keycount = *ret.get(&k).unwrap_or(&0); - match keycount == v as i32 { - true => ret.remove(&k), - _ => ret.insert(k, keycount - v as i32), - }; - } - ret + /// Get the root node as a `Node`. + fn get_node<'a>(&'a self, node: &'a [u8]) -> Node { + Node::decoded(self.get_raw_or_lookup(node)) } + /// Indentation helper for `formal_all`. fn fmt_indent(&self, f: &mut fmt::Formatter, size: usize) -> fmt::Result { for _ in 0..size { try!(write!(f, " ")); @@ -351,14 +413,7 @@ impl TrieDB { Ok(()) } - fn root_node(&self) -> Node { - Node::decoded(self.db.lookup(&self.root).expect("Trie root not found!")) - } - - fn get_node<'a>(&'a self, node: &'a [u8]) -> Node { - Node::decoded(self.get_raw_or_lookup(node)) - } - + /// Recursion helper for implementation of formatting trait. fn fmt_all(&self, node: Node, f: &mut fmt::Formatter, deepness: usize) -> fmt::Result { match node { Node::Leaf(slice, value) => try!(writeln!(f, "'{:?}: {:?}.", slice, value.pretty())), @@ -394,11 +449,16 @@ impl TrieDB { Ok(()) } + /// Return optional data for a key given as a `NibbleSlice`. Returns `None` if no data exists. fn get<'a, 'key>(&'a self, key: &NibbleSlice<'key>) -> Option<&'a [u8]> where 'a: 'key { let root_rlp = self.db.lookup(&self.root).expect("Trie root not found!"); self.get_from_node(&root_rlp, key) } + /// Recursible function to retrieve the value given a `node` and a partial `key`. `None` if no + /// value exists for the key. + /// + /// Note: Not a public API; use Trie trait functions. fn get_from_node<'a, 'key>(&'a self, node: &'a [u8], key: &NibbleSlice<'key>) -> Option<&'a [u8]> where 'a: 'key { match Node::decoded(node) { Node::Leaf(ref slice, ref value) if key == slice => Some(value), @@ -413,6 +473,9 @@ impl TrieDB { } } + /// Given some node-describing data `node`, return the actual node RLP. + /// This could be a simple identity operation in the case that the node is sufficiently small, but + /// may require a database lookup. fn get_raw_or_lookup<'a>(&'a self, node: &'a [u8]) -> &'a [u8] { // check if its sha3 + len let r = Rlp::new(node); @@ -422,20 +485,26 @@ impl TrieDB { } } - fn add(&mut self, key: &NibbleSlice, value: &[u8]) { + /// Insert a `key` and `value` pair into the trie. + /// + /// Note: Not a public API; use Trie trait functions. + fn insert_ns(&mut self, key: &NibbleSlice, value: &[u8]) { trace!("ADD: {:?} {:?}", key, value.pretty()); // determine what the new root is, insert new nodes and remove old as necessary. - let mut todo: Diff = Diff::new(); + let mut todo: Journal = Journal::new(); let root_rlp = self.augmented(self.db.lookup(&self.root).expect("Trie root not found!"), key, value, &mut todo); self.apply(todo); self.set_root_rlp(&root_rlp); trace!("/"); } - fn delete(&mut self, key: &NibbleSlice) { + /// Remove a `key` and `value` pair from the trie. + /// + /// Note: Not a public API; use Trie trait functions. + fn remove_ns(&mut self, key: &NibbleSlice) { trace!("DELETE: {:?}", key); // determine what the new root is, insert new nodes and remove old as necessary. - let mut todo: Diff = Diff::new(); + let mut todo: Journal = Journal::new(); match self.cleared_from_slice(self.db.lookup(&self.root).expect("Trie root not found!"), key, &mut todo) { Some(root_rlp) => { self.apply(todo); @@ -448,6 +517,7 @@ impl TrieDB { trace!("/"); } + /// Compose a leaf node in RLP given the `partial` key and `value`. fn compose_leaf(partial: &NibbleSlice, value: &[u8]) -> Bytes { trace!("compose_leaf {:?} {:?} ({:?})", partial, value.pretty(), partial.encoded(true).pretty()); let mut s = RlpStream::new_list(2); @@ -458,6 +528,7 @@ impl TrieDB { r } + /// Compose a raw extension/leaf node in RLP given the `partial` key, `raw_payload` and whether it `is_leaf`. fn compose_raw(partial: &NibbleSlice, raw_payload: &[u8], is_leaf: bool) -> Bytes { trace!("compose_raw {:?} {:?} {:?} ({:?})", partial, raw_payload.pretty(), is_leaf, partial.encoded(is_leaf)); let mut s = RlpStream::new_list(2); @@ -468,6 +539,7 @@ impl TrieDB { r } + /// Compose a branch node in RLP with a particular `value` sitting in the value position (17th place). fn compose_stub_branch(value: &[u8]) -> Bytes { let mut s = RlpStream::new_list(17); for _ in 0..16 { s.append_empty_data(); } @@ -475,21 +547,14 @@ impl TrieDB { s.out() } + /// Compose an extension node's RLP with the `partial` key and `raw_payload`. fn compose_extension(partial: &NibbleSlice, raw_payload: &[u8]) -> Bytes { Self::compose_raw(partial, raw_payload, false) } - fn create_extension(partial: &NibbleSlice, downstream_node: Bytes, diff: &mut Diff) -> Bytes { - trace!("create_extension partial: {:?}, downstream_node: {:?}", partial, downstream_node.pretty()); - let mut s = RlpStream::new_list(2); - s.append(&partial.encoded(false)); - diff.new_node(downstream_node, &mut s); - s.out() - } - - /// Return the bytes encoding the node represented by `rlp`. It will be unlinked from - /// the trie. - fn take_node<'a, 'rlp_view>(&'a self, rlp: &'rlp_view Rlp<'a>, diff: &mut Diff) -> &'a [u8] where 'a: 'rlp_view { + /// Return the bytes encoding the node represented by `rlp`. `journal` will record necessary + /// removal instructions from the backing database. + fn take_node<'a, 'rlp_view>(&'a self, rlp: &'rlp_view Rlp<'a>, journal: &mut Journal) -> &'a [u8] where 'a: 'rlp_view { if rlp.is_list() { trace!("take_node {:?} (inline)", rlp.raw().pretty()); rlp.raw() @@ -498,11 +563,11 @@ impl TrieDB { let h = H256::decode(rlp); let r = self.db.lookup(&h).unwrap_or_else(||{ println!("Node not found! rlp={:?}, node_hash={:?}", rlp.raw().pretty(), h); - println!("Diff: {:?}", diff); + println!("Journal: {:?}", journal); panic!(); }); trace!("take_node {:?} (indirect for {:?})", rlp.raw().pretty(), r); - diff.delete_node_sha3(h); + journal.delete_node_sha3(h); r } else { @@ -511,61 +576,15 @@ impl TrieDB { } } - fn augmented_into_transmuted_branch(&self, orig_is_leaf: bool, orig_partial: &NibbleSlice, orig_raw_payload: &[u8], partial: &NibbleSlice, value: &[u8], diff: &mut Diff) -> Bytes { - assert!(orig_is_leaf || !orig_partial.is_empty()); // extension nodes are not allowed to have empty partial keys. - let mut s = RlpStream::new_list(17); - let index = if orig_partial.is_empty() {16} else {orig_partial.at(0)}; - for i in 0..17 { - match orig_is_leaf { - // not us - empty. - _ if index != i => { s.append_empty_data(); }, - // just replace. - true if i == 16 => { s.append(&value); }, - // original has empty slot. - true => diff.new_node(Self::compose_leaf(&orig_partial.mid(1), Rlp::new(orig_raw_payload).data()), &mut s), - // - false if orig_partial.len() > 1 => diff.new_node(Self::compose_extension(&orig_partial.mid(1), orig_raw_payload), &mut s), - false => { s.append_raw(orig_raw_payload, 1); }, - } - }; - self.augmented(&s.out(), partial, value, diff) - // TODO: implement without having to make an intermediate representation. - } - - /// Given a branch node's RLP `orig` together with a `partial` key and `value`, return the - /// RLP-encoded node that accomodates the trie with the new entry. Mutate `diff` so that - /// once applied the returned node is valid. - fn augmented_into_branch(&self, orig: &Rlp, partial: &NibbleSlice, value: &[u8], diff: &mut Diff) -> Bytes { - trace!("augmented_into_branch"); - let mut s = RlpStream::new_list(17); - let index = if partial.is_empty() {16} else {partial.at(0) as usize}; - for i in 0..17 { - match index == i { - // not us - leave alone. - false => { s.append_raw(orig.at(i).raw(), 1); }, - // branch-leaf entry - just replace. - true if i == 16 => { s.append(&value); }, - // original had empty slot - place a leaf there. - true if orig.at(i).is_empty() => diff.new_node(Self::compose_leaf(&partial.mid(1), value), &mut s), - // original has something there already; augment. - true => { - let new = self.augmented(self.take_node(&orig.at(i), diff), &partial.mid(1), value, diff); - diff.new_node(new, &mut s); - } - } - } - s.out() - } - /// Determine the RLP of the node, assuming we're inserting `partial` into the /// node currently of data `old`. This will *not* delete any hash of `old` from the database; /// it will just return the new RLP that includes the new node. /// - /// The database will be updated so as to make the returned RLP valid through inserting + /// `journal` will record the database updates so as to make the returned RLP valid through inserting /// and deleting nodes as necessary. /// /// **This operation will not insert the new node nor destroy the original.** - fn augmented(&self, old: &[u8], partial: &NibbleSlice, value: &[u8], diff: &mut Diff) -> Bytes { + fn augmented(&self, old: &[u8], partial: &NibbleSlice, value: &[u8], journal: &mut Journal) -> Bytes { trace!("augmented (old: {:?}, partial: {:?}, value: {:?})", old.pretty(), partial, value.pretty()); // already have an extension. either fast_forward, cleve or transmute_to_branch. let old_rlp = Rlp::new(old); @@ -573,7 +592,24 @@ impl TrieDB { Prototype::List(17) => { trace!("branch: ROUTE,AUGMENT"); // already have a branch. route and augment. - self.augmented_into_branch(&old_rlp, partial, value, diff) + let mut s = RlpStream::new_list(17); + let index = if partial.is_empty() {16} else {partial.at(0) as usize}; + for i in 0..17 { + match index == i { + // not us - leave alone. + false => { s.append_raw(old_rlp.at(i).raw(), 1); }, + // branch-leaf entry - just replace. + true if i == 16 => { s.append(&value); }, + // original had empty slot - place a leaf there. + true if old_rlp.at(i).is_empty() => journal.new_node(Self::compose_leaf(&partial.mid(1), value), &mut s), + // original has something there already; augment. + true => { + let new = self.augmented(self.take_node(&old_rlp.at(i), journal), &partial.mid(1), value, journal); + journal.new_node(new, &mut s); + } + } + } + s.out() }, Prototype::List(2) => { let existing_key_rlp = old_rlp.at(0); @@ -587,7 +623,24 @@ impl TrieDB { (_, 0) => { // one of us isn't empty: transmute to branch here trace!("no-common-prefix, not-both-empty (exist={:?}; new={:?}): TRANSMUTE,AUGMENT", existing_key.len(), partial.len()); - self.augmented_into_transmuted_branch(is_leaf, &existing_key, old_rlp.at(1).raw(), partial, value, diff) + assert!(is_leaf || !existing_key.is_empty()); // extension nodes are not allowed to have empty partial keys. + let mut s = RlpStream::new_list(17); + let index = if existing_key.is_empty() {16} else {existing_key.at(0)}; + for i in 0..17 { + match is_leaf { + // not us - empty. + _ if index != i => { s.append_empty_data(); }, + // branch-value: just replace. + true if i == 16 => { s.append_raw(old_rlp.at(1).raw(), 1); }, + // direct extension: just replace. + false if existing_key.len() == 1 => { s.append_raw(old_rlp.at(1).raw(), 1); }, + // original has empty slot. + true => journal.new_node(Self::compose_leaf(&existing_key.mid(1), old_rlp.at(1).data()), &mut s), + // additional work required after branching. + false => journal.new_node(Self::compose_extension(&existing_key.mid(1), old_rlp.at(1).raw()), &mut s), + } + }; + self.augmented(&s.out(), partial, value, journal) }, (_, cp) if cp == existing_key.len() => { trace!("complete-prefix (cp={:?}): AUGMENT-AT-END", cp); @@ -595,10 +648,15 @@ impl TrieDB { // transform to an extension + augmented version of onward node. let downstream_node: Bytes = match is_leaf { // no onward node because we're a leaf - create fake stub and use that. - true => self.augmented(&Self::compose_stub_branch(old_rlp.at(1).data()), &partial.mid(cp), value, diff), - false => self.augmented(self.take_node(&old_rlp.at(1), diff), &partial.mid(cp), value, diff), + true => self.augmented(&Self::compose_stub_branch(old_rlp.at(1).data()), &partial.mid(cp), value, journal), + false => self.augmented(self.take_node(&old_rlp.at(1), journal), &partial.mid(cp), value, journal), }; - Self::create_extension(&existing_key, downstream_node, diff) + + trace!("create_extension partial: {:?}, downstream_node: {:?}", existing_key, downstream_node.pretty()); + let mut s = RlpStream::new_list(2); + s.append(&existing_key.encoded(false)); + journal.new_node(downstream_node, &mut s); + s.out() }, (_, cp) => { // partially-shared prefix for this extension: @@ -612,12 +670,12 @@ impl TrieDB { // low (farther from root) let low = Self::compose_raw(&existing_key.mid(cp), old_rlp.at(1).raw(), is_leaf); - let augmented_low = self.augmented(&low, &partial.mid(cp), value, diff); + let augmented_low = self.augmented(&low, &partial.mid(cp), value, journal); // high (closer to root) let mut s = RlpStream::new_list(2); s.append(&existing_key.encoded_leftmost(cp, false)); - diff.new_node(augmented_low, &mut s); + journal.new_node(augmented_low, &mut s); s.out() }, } @@ -630,6 +688,7 @@ impl TrieDB { } } + /// Given a `MaybeChanged` result `n`, return the node's RLP regardless of whether it changed. fn encoded(n: MaybeChanged) -> Bytes { match n { MaybeChanged::Same(n) => n.encoded(), @@ -637,18 +696,20 @@ impl TrieDB { } } - fn fixed_indirection<'a>(n: Node<'a>, diff: &mut Diff) -> MaybeChanged<'a> { + /// Fix the node payload's sizes in `n`, replacing any over-size payloads with the hashed reference + /// and placing the payload DB insertions in the `journal`. + fn fixed_indirection<'a>(n: Node<'a>, journal: &mut Journal) -> MaybeChanged<'a> { match n { Node::Extension(partial, payload) if payload.len() >= 32 && Rlp::new(payload).is_list() => { // make indirect - MaybeChanged::Changed(Node::Extension(partial, &Node::decoded(payload).encoded_and_added(diff)).encoded()) + MaybeChanged::Changed(Node::Extension(partial, &Node::decoded(payload).encoded_and_added(journal)).encoded()) }, Node::Branch(payloads, value) => { // check each child isn't too big // TODO OPTIMISE - should really check at the point of (re-)constructing the branch. for i in 0..16 { if payloads[i].len() >= 32 && Rlp::new(payloads[i]).is_list() { - let n = Node::decoded(payloads[i]).encoded_and_added(diff); + let n = Node::decoded(payloads[i]).encoded_and_added(journal); let mut new_nodes = payloads; new_nodes[i] = &n; return MaybeChanged::Changed(Node::Branch(new_nodes, value).encoded()) @@ -668,8 +729,11 @@ impl TrieDB { /// - Extension node followed by anything other than a Branch node. /// - Extension node with a child which has too many bytes to be inline. /// + /// `journal` will record the database updates so as to make the returned RLP valid through inserting + /// and deleting nodes as necessary. + /// /// **This operation will not insert the new node nor destroy the original.** - fn fixed<'a, 'b>(&'a self, n: Node<'b>, diff: &mut Diff) -> MaybeChanged<'b> where 'a: 'b { + fn fixed<'a, 'b>(&'a self, n: Node<'b>, journal: &mut Journal) -> MaybeChanged<'b> where 'a: 'b { trace!("fixed node={:?}", n); match n { Node::Branch(nodes, node_value) => { @@ -681,9 +745,6 @@ impl TrieDB { Many, }; let mut used_index = UsedIndex::None; - // 0-15 -> index of a non-null branch - // 16 -> no non-null branch - // 17 -> multiple non-null branches for i in 0..16 { match (nodes[i] == NULL_RLP, &used_index) { (false, &UsedIndex::None) => used_index = UsedIndex::One(i as u8), @@ -699,17 +760,17 @@ impl TrieDB { // TODO: OPTIMISE: - don't call fixed again but put the right node in straight away here. // call fixed again since the transmute may cause invalidity. let new_partial: [u8; 1] = [a; 1]; - MaybeChanged::Changed(Self::encoded(self.fixed(Node::Extension(NibbleSlice::new_offset(&new_partial[..], 1), nodes[a as usize]), diff))) + MaybeChanged::Changed(Self::encoded(self.fixed(Node::Extension(NibbleSlice::new_offset(&new_partial[..], 1), nodes[a as usize]), journal))) }, (UsedIndex::None, Some(value)) => { // one leaf value // transmute to leaf. // call fixed again since the transmute may cause invalidity. - MaybeChanged::Changed(Self::encoded(self.fixed(Node::Leaf(NibbleSlice::new(&b""[..]), value), diff))) + MaybeChanged::Changed(Self::encoded(self.fixed(Node::Leaf(NibbleSlice::new(&b""[..]), value), journal))) } _ => { // onwards node(s) and/or leaf // no transmute needed, but should still fix the indirection. trace!("no-transmute: FIXINDIRECTION"); - Self::fixed_indirection(Node::Branch(nodes, node_value), diff) + Self::fixed_indirection(Node::Branch(nodes, node_value), journal) }, } }, @@ -717,16 +778,16 @@ impl TrieDB { match Node::decoded(self.get_raw_or_lookup(payload)) { Node::Extension(sub_partial, sub_payload) => { // combine with node below - diff.delete_node_from_slice(payload); - MaybeChanged::Changed(Self::encoded(Self::fixed_indirection(Node::Extension(NibbleSlice::new_composed(&partial, &sub_partial), sub_payload), diff))) + journal.delete_node(payload); + MaybeChanged::Changed(Self::encoded(Self::fixed_indirection(Node::Extension(NibbleSlice::new_composed(&partial, &sub_partial), sub_payload), journal))) }, Node::Leaf(sub_partial, sub_value) => { // combine with node below - diff.delete_node_from_slice(payload); - MaybeChanged::Changed(Self::encoded(Self::fixed_indirection(Node::Leaf(NibbleSlice::new_composed(&partial, &sub_partial), sub_value), diff))) + journal.delete_node(payload); + MaybeChanged::Changed(Self::encoded(Self::fixed_indirection(Node::Leaf(NibbleSlice::new_composed(&partial, &sub_partial), sub_value), journal))) }, // no change, might still have an oversize node inline - fix indirection - _ => Self::fixed_indirection(n, diff), + _ => Self::fixed_indirection(n, journal), } }, // leaf or empty. no change. @@ -739,34 +800,40 @@ impl TrieDB { /// it will just return the new RLP that represents the new node. /// `None` may be returned should no change be needed. /// - /// The database will be updated so as to make the returned RLP valid through inserting + /// `journal` will record the database updates so as to make the returned RLP valid through inserting /// and deleting nodes as necessary. /// /// **This operation will not insert the new node nor destroy the original.** - fn cleared_from_slice(&self, old: &[u8], partial: &NibbleSlice, diff: &mut Diff) -> Option { - self.cleared(Node::decoded(old), partial, diff) + fn cleared_from_slice(&self, old: &[u8], partial: &NibbleSlice, journal: &mut Journal) -> Option { + self.cleared(Node::decoded(old), partial, journal) } - fn cleared(&self, n: Node, partial: &NibbleSlice, diff: &mut Diff) -> Option { + /// Compose the RLP of the node equivalent to `n` except with the `partial` key removed from its (sub-)trie. + /// + /// `journal` will record the database updates so as to make the returned RLP valid through inserting + /// and deleting nodes as necessary. + /// + /// **This operation will not insert the new node nor destroy the original.** + fn cleared(&self, n: Node, partial: &NibbleSlice, journal: &mut Journal) -> Option { trace!("cleared old={:?}, partial={:?})", n, partial); match (n, partial.is_empty()) { (Node::Empty, _) => None, (Node::Branch(_, None), true) => { None }, - (Node::Branch(payloads, _), true) => Some(Self::encoded(self.fixed(Node::Branch(payloads, None), diff))), // matched as leaf-branch - give back fixed branch with it. + (Node::Branch(payloads, _), true) => Some(Self::encoded(self.fixed(Node::Branch(payloads, None), journal))), // matched as leaf-branch - give back fixed branch with it. (Node::Branch(payloads, value), false) => { // Branch with partial left - route, clear, fix. let i: usize = partial.at(0) as usize; trace!("branch-with-partial node[{:?}]={:?}", i, payloads[i].pretty()); - self.cleared(self.get_node(payloads[i]), &partial.mid(1), diff).map(|new_payload| { + self.cleared(self.get_node(payloads[i]), &partial.mid(1), journal).map(|new_payload| { trace!("branch-new-payload={:?}; delete-old={:?}", new_payload.pretty(), payloads[i].pretty()); // downsteam node needed to be changed. - diff.delete_node_from_slice(payloads[i]); + journal.delete_node(payloads[i]); // return fixed up new node. let mut new_payloads = payloads; new_payloads[i] = &new_payload; - Self::encoded(self.fixed(Node::Branch(new_payloads, value), diff)) + Self::encoded(self.fixed(Node::Branch(new_payloads, value), journal)) }) }, (Node::Leaf(node_partial, _), _) => { @@ -785,12 +852,12 @@ impl TrieDB { cp if cp == node_partial.len() => { trace!("matching-prefix (cp={:?}): SKIP,CLEAR,FIXUP", cp); // key at end of extension - skip, clear, fix - self.cleared(self.get_node(node_payload), &partial.mid(node_partial.len()), diff).map(|new_payload| { + self.cleared(self.get_node(node_payload), &partial.mid(node_partial.len()), journal).map(|new_payload| { trace!("extension-new-payload={:?}; delete-old={:?}", new_payload.pretty(), node_payload.pretty()); // downsteam node needed to be changed. - diff.delete_node_from_slice(node_payload); + journal.delete_node(node_payload); // return fixed up new node. - Self::encoded(self.fixed(Node::Extension(node_partial, &new_payload), diff)) + Self::encoded(self.fixed(Node::Extension(node_partial, &new_payload), journal)) }) }, _ => None, // key in the middle of an extension - doesn't exist. @@ -812,11 +879,23 @@ impl Trie for TrieDB { } fn insert(&mut self, key: &[u8], value: &[u8]) { - self.add(&NibbleSlice::new(key), value); + match value.is_empty() { + false => self.insert_ns(&NibbleSlice::new(key), value), + true => self.remove_ns(&NibbleSlice::new(key)), + } } fn remove(&mut self, key: &[u8]) { - self.delete(&NibbleSlice::new(key)); + self.remove_ns(&NibbleSlice::new(key)); + } +} + +impl fmt::Debug for TrieDB { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + try!(writeln!(f, "c={:?} [", self.hash_count)); + let root_rlp = self.db.lookup(&self.root).expect("Trie root not found!"); + try!(self.fmt_all(Node::decoded(root_rlp), f, 0)); + writeln!(f, "]") } } @@ -834,9 +913,9 @@ mod tests { use std::collections::HashSet; use bytes::{ToPretty,Bytes}; - fn random_key(alphabet: &[u8], min_count: usize, diff_count: usize) -> Vec { + fn random_key(alphabet: &[u8], min_count: usize, journal_count: usize) -> Vec { let mut ret: Vec = Vec::new(); - let r = min_count + if diff_count > 0 {random::() % diff_count} else {0}; + let r = min_count + if journal_count > 0 {random::() % journal_count} else {0}; for _ in 0..r { ret.push(alphabet[random::() % alphabet.len()]); } @@ -885,18 +964,18 @@ mod tests { fn playpen() { env_logger::init().ok(); - let maps = map!{ - "six-low" => StandardMap{alphabet: Alphabet::Low, min_key: 6, diff_key: 0, count: 1000}, - "six-mid" => StandardMap{alphabet: Alphabet::Mid, min_key: 6, diff_key: 0, count: 1000}, - "six-all" => StandardMap{alphabet: Alphabet::All, min_key: 6, diff_key: 0, count: 1000}, - "mix-mid" => StandardMap{alphabet: Alphabet::Mid, min_key: 1, diff_key: 5, count: 1000} + /*let maps = map!{ + "six-low" => StandardMap{alphabet: Alphabet::Low, min_key: 6, journal_key: 0, count: 1000}, + "six-mid" => StandardMap{alphabet: Alphabet::Mid, min_key: 6, journal_key: 0, count: 1000}, + "six-all" => StandardMap{alphabet: Alphabet::All, min_key: 6, journal_key: 0, count: 1000}, + "mix-mid" => StandardMap{alphabet: Alphabet::Mid, min_key: 1, journal_key: 5, count: 1000} }; for sm in maps { let m = sm.1.make(); let t = populate_trie(&m); println!("{:?}: root={:?}, hash_count={:?}", sm.0, t.root(), t.hash_count); - }; - panic!(); + };*/ +// panic!(); for test_i in 0..1 { if test_i % 50 == 0 { @@ -1156,7 +1235,7 @@ mod tests { #[test] fn stress() { - for _ in 0..5000 { + for _ in 0..500 { let mut x: Vec<(Vec, Vec)> = Vec::new(); let alphabet = b"@QWERTYUIOPASDFGHJKLZXCVBNM[/]^_"; for j in 0..4u32 { From 03674cad3be346b00e19641a7c254dbaa15fbc85 Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Mon, 7 Dec 2015 17:20:15 +0100 Subject: [PATCH 3/3] Documentation. --- src/error.rs | 3 +++ src/hash.rs | 6 +++++- src/hashdb.rs | 2 ++ src/trie.rs | 1 + 4 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/error.rs b/src/error.rs index f6c64a54f..71514d045 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,3 +1,5 @@ +//! General error types for use in ethcore. + use rustc_serialize::hex::*; #[derive(Debug)] @@ -6,6 +8,7 @@ pub enum BaseDataError { } #[derive(Debug)] +/// General error type which should be capable of representing all errors in ethcore. pub enum EthcoreError { FromHex(FromHexError), BaseData(BaseDataError), diff --git a/src/hash.rs b/src/hash.rs index 5662d713c..b51bbdacf 100644 --- a/src/hash.rs +++ b/src/hash.rs @@ -1,3 +1,5 @@ +//! General hash types, a fixed-size raw-data type used as the output of hash functions. + use std::str::FromStr; use std::fmt; use std::ops; @@ -11,7 +13,9 @@ use bytes::BytesConvertable; use math::log2; use uint::U256; -/// types implementing FixedHash must be also BytesConvertable +/// Trait for a fixed-size byte array to be used as the output of hash functions. +/// +/// Note: types implementing `FixedHash` must be also `BytesConvertable`. pub trait FixedHash: Sized + BytesConvertable { fn new() -> Self; fn random() -> Self; diff --git a/src/hashdb.rs b/src/hashdb.rs index 207883e4b..e63d031e5 100644 --- a/src/hashdb.rs +++ b/src/hashdb.rs @@ -1,7 +1,9 @@ +//! Database of byte-slices keyed to their Keccak hash. use hash::*; use bytes::*; use std::collections::HashMap; +/// Trait modelling datastore keyed by a 32-byte Keccak hash. pub trait HashDB { /// Get the keys in the database together with number of underlying references. fn keys(&self) -> HashMap; diff --git a/src/trie.rs b/src/trie.rs index 0425565bc..5c4420787 100644 --- a/src/trie.rs +++ b/src/trie.rs @@ -1,3 +1,4 @@ +//! Key-value datastore with a modified Merkle tree. extern crate rand; use std::fmt;