From 2b09521b5632478039cae28311c220b48d6570cb Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Tue, 1 Dec 2015 01:12:06 +0100 Subject: [PATCH] Main logic for insertion into trie. --- src/nibbleslice.rs | 12 +++ src/rlp.rs | 6 +- src/trie.rs | 252 ++++++++++++++++++++++++++++++++++++++------- 3 files changed, 229 insertions(+), 41 deletions(-) diff --git a/src/nibbleslice.rs b/src/nibbleslice.rs index 6ccf695c0..9656306e5 100644 --- a/src/nibbleslice.rs +++ b/src/nibbleslice.rs @@ -87,6 +87,18 @@ impl<'a> NibbleSlice<'a> { } r } + + pub fn encoded_leftmost(&self, n: usize, is_leaf: bool) -> Bytes { + let l = min(self.len(), n); + let mut r = Bytes::with_capacity(l / 2 + 1); + let mut i = l % 2; + r.push(if i == 1 {0x10 + self.at(0)} else {0} + if is_leaf {0x20} else {0}); + while i < l { + r.push(self.at(i) * 16 + self.at(i + 1)); + i += 2; + } + r + } } impl<'a> PartialEq for NibbleSlice<'a> { diff --git a/src/rlp.rs b/src/rlp.rs index e8562828a..aca12281f 100644 --- a/src/rlp.rs +++ b/src/rlp.rs @@ -167,8 +167,8 @@ impl<'a> Rlp<'a> { /// assert_eq!(dog, &[0x83, b'd', b'o', b'g']); /// } /// ``` - pub fn data(&self) -> &[u8] { - self.rlp.data() + pub fn raw(&self) -> &[u8] { + self.rlp.raw() } /// Returns number of rlp items. @@ -348,7 +348,7 @@ impl<'a> UntrustedRlp<'a> { /// assert_eq!(dog, &[0x83, b'd', b'o', b'g']); /// } /// ``` - pub fn data(&self) -> &[u8] { + pub fn raw(&self) -> &[u8] { self.bytes } diff --git a/src/trie.rs b/src/trie.rs index 4aebc688f..9d004c9dd 100644 --- a/src/trie.rs +++ b/src/trie.rs @@ -4,6 +4,7 @@ use hash::*; use nibbleslice::*; use bytes::*; use rlp::*; +use log::*; pub const NULL_RLP: [u8; 1] = [0x80; 1]; pub const SHA3_NULL_RLP: H256 = H256( [0x56, 0xe8, 0x1f, 0x17, 0x1b, 0xcc, 0x55, 0xa6, 0xff, 0x83, 0x45, 0xe6, 0x92, 0xc0, 0xf8, 0x6e, 0x5b, 0x48, 0xe0, 0x1b, 0x99, 0x6c, 0xad, 0xc0, 0x01, 0x62, 0x2f, 0xb5, 0xe3, 0x63, 0xb4, 0x21] ); @@ -29,13 +30,44 @@ pub struct TrieDB { root: H256, } -struct Diff { - new: Vec<(H256, Bytes)>, - old: Vec, +enum Operation { + New(H256, Bytes), + Delete(H256), } +struct Diff (Vec) + impl Diff { - pub fn new() -> Diff { Diff { new: vec![], old: vec![] }} + fn new() -> Diff { Diff(vec![]) } + + /// Given the RLP that encodes a node, append a reference to that node `out` and leave `diff` + /// such that the reference is valid, once applied. + fn new_node(&mut self, Bytes rlp, out: &mut RlpStream) { + if (rlp.len() >= 32) { + let rlp_sha3 = rlp.sha3(); + out.append(&rlp_sha3); + self.operations.push(Operation::New(rlp_sha3, rlp)); + } + else { + out.append_raw(&rlp); + } + } + + /// Given the RLP that encodes a now-unused node, leave `diff` in such a state that it is noted. + fn delete_node_sha3(&mut self, old_sha3: H256) { + self.operations.push(Operation::Delete(old_sha3)); + } + + fn delete_node(&mut self, old: &Rlp) { + if (old.is_data() && old.size() == 32) { + self.operations.push(Operation::Delete(H256::decode(old))); + } + } + + fn replace_node(&mut self, old: &Rlp, Bytes rlp, out: &mut RlpStream) { + self.delete_node(old); + self.new_node(rlp, &mut out); + } } impl TrieDB { @@ -57,70 +89,214 @@ impl TrieDB { fn add(&mut self, key: &NibbleSlice, value: &[u8]) { // determine what the new root is, insert new nodes and remove old as necessary. - let todo = { - let root_rlp = self.db.lookup(&self.root).expect("Trie root not found!"); - self.merge(root_rlp, key, value) - }; - self.apply(todo.1); - self.set_root_rlp(&todo.0); + let todo: Diff = Diff::new(); + let root_rlp = self.inject(self.db.lookup(&self.root).expect("Trie root not found!"), key, value, &mut todo); + self.apply(todo); + self.set_root_rlp(&root_rlp); } fn apply(&mut self, diff: Diff) { - for d in diff.old.iter() { - self.db.kill(&d); - } - for d in diff.new.into_iter() { - self.db.emplace(d.0, d.1); + for d in diff.operations.into_iter() { + match d { + Operation::Delete(h) => { + trace!("TrieDB::apply --- {:?}", &h); + self.db.kill(&h); + }, + Operation::New(h, d) => { + trace!("TrieDB::apply +++ {:?} -> {:?}", &h, &d); + self.db.emplace(h, d); + } + } } } - /// Determine the RLP of the node, assuming we're inserting `partial_key` into the - /// node at `old`. This will *not* delete the old mode; it will just return the new RLP - /// that includes the new node. + /// Return the bytes encoding the node represented by `rlp`. It will be unlinked from + /// the trie. + fn take_node(&self, rlp: &Rlp, &mut diff) -> Bytes { + if (rlp.is_data()) { + Bytes::decode(rlp) + } + else { + let h = H256::decode(rlp); + let r = self.db.lookup(&h).as_vec(); + diff.delete_node(h); + r + } + } + + fn inject_and_replace(&self, old: &[u8], old_sha3: H256, partial: &NibbleSlice, value: &[u8], diff: &mut Diff, out: &mut RlpStream) { + diff.new_node(self.inject(old, partial, value, diff), &mut out); + diff.delete_node(old, old_sha3); + } + + /// Transform an existing extension or leaf node plus a new partial/value to a two-entry branch. + /// + /// **This operation will not insert the new node nor destroy the original.** + fn transmute_to_branch_and_inject(&self, orig_is_leaf: bool, orig_partial: &NibbleSlice, orig_raw_payload: &[u8], partial: &NibbleSlice, value: &[u8], diff: &mut Diff) -> Bytes { + let intermediate = match orig_is_leaf { + true => Self::transmute_leaf_to_branch(orig_partial, orig_raw_payload, &mut diff), + false => Self::transmute_extension_to_branch(orig_partial, orig_raw_payload, &mut diff), + }; + self.inject(&intermediate, partial, value, &mut diff) + // TODO: implement without having to make an intermediate representation. + } + + /// Transform an existing extension or leaf node to an invalid single-entry branch. + /// + /// **This operation will not insert the new node nor destroy the original.** + fn transmute_extension_to_branch(orig_partial: &NibbleSlice, orig_raw_payload: &[u8], diff: &mut Diff) -> Bytes { + let mut s = RLPStream::new_list(17); + assert!(!orig_partial.is_empty()); // extension nodes are not allowed to have empty partial keys. + let index = orig_partial.at(0); + // orig is extension - orig_payload is a node itself. + for i in 0..17 { + if index == i { + if orig_partial.len() > 1 { + // still need an extension + diff.new_node(compose_extension(orig_partial.mid(1), orig_raw_payload), &mut s); + } else { + // was an extension of length 1 - just redirect the payload into here. + s.append_raw(orig_payload.raw()); + } + } else { + s.append_null_data(); + } + } + s.out() + } + + fn transmute_leaf_to_branch(orig_partial: &NibbleSlice, orig_raw_payload: &[u8], diff: &mut Diff) -> Bytes { + let mut s = RLPStream::new_list(17); + let index = orig_partial.is_empty() ? 16 : orig_partial.at(0); + // orig is leaf - orig_payload is data representing the actual value. + for i in 0..17 { + if index == i { + // this is our node. + diff.new_node(compose_raw(orig_partial.mid(if i == 16 {0} else {1}), orig_raw_payload, true), &mut s); + } else { + s.append_null_data(); + } + } + s.out() + } + + /// Given a branch node's RLP `orig` together with a `partial` key and `value`, return the + /// RLP-encoded node that accomodates the trie with the new entry. Mutate `diff` so that + /// once applied the returned node is valid. + fn injected_into_branch(&self, orig: &Rlp, partial: &NibbleSlice, value: &[u8], diff: &mut Diff) -> Bytes { + RlpStream s; + let index = partial.is_empty() ? 16 : partial.at(0); + for i in 0..17 { + if index == i && { + // this is our node. + if (orig.at(i).is_empty()) { + // easy - original had empty slot. + diff.new_node(compose_leaf(partial.mid(if i == 16 {0} else {1}), value), &mut s); + } else if (i == 16) { + // leaf entry - just replace. + let new = compose_leaf(partial.mid(if i == 16 {0} else {1}), value); + diff.replace_node(orig.at(i).raw(), new, &mut s), + } else { + // harder - original has something there already + let new = self.inject(orig.at(i).raw(), partial.mid(1), value, &mut diff); + diff.replace_node(orig.at(i).raw(), new, &mut s) + } + } else { + s.append_raw(orig.at(i).raw()); + } + } + s + } + + /// Determine the RLP of the node, assuming we're inserting `partial` into the + /// node currently of data `old`. This will *not* delete any hash of `old` from the database; + /// it will just return the new RLP that includes the new node. /// /// The database will be updated so as to make the returned RLP valid through inserting /// and deleting nodes as necessary. - fn merge(&self, old: &[u8], partial_key: &NibbleSlice, value: &[u8]) -> (Bytes, Diff) { - let o = Rlp::new(old); - match o.prototype() { + /// + /// **This operation will not insert the new node now destroy the original.** + fn inject(&self, old: &[u8], partial: &NibbleSlice, value: &[u8], diff: &mut Diff) -> Bytes { + // already have an extension. either fast_forward, cleve or transmute_to_branch. + let old_rlp = Rlp::new(old); + match old_rlp.prototype() { Prototype::List(17) => { - // already have a branch. route and merge. - unimplemented!(); + // already have a branch. route and inject. + self.injected_into_branch(old_rlp, partial, value, &mut diff) }, Prototype::List(2) => { - let their_key_rlp = o.at(0); - let (them, _) = NibbleSlice::from_encoded(their_key_rlp.data()); - match partial_key.common_prefix(&them) { + let their_key_rlp = old_rlp.at(0); + let (them, is_leaf) = NibbleSlice::from_encoded(their_key_rlp.data()); + + match partial.common_prefix(&them) { + 0 if partial.is_empty() && them.is_empty() => { + // both empty: just replace. + compose_leaf(partial, value) + }, 0 => { - // transmute to branch here + // one of us isn't empty: transmute to branch here + transmute_to_branch_and_inject(is_leaf, them, old_rlp.at(1).raw()) }, cp if cp == them.len() => { - // fast-forward + // fully-shared prefix for this extension: + // skip to the end of this extension and continue the inject there. + let n = self.take_node(old_rlp.at(1).raw()); + let downstream_node = self.inject(&n, partial.mid(cp), value, &mut diff); + let mut s = RlpStream::new_list(2); + s.append_raw(old_rlp.at(0).raw()); + diff.new_node(downstream_node, &mut s); + s.out() }, - _ => { - // cleve into two + branch in the middle + cp => { + // partially-shared prefix for this extension: + // split into two extensions, high and low, pass the + // low through inject with the value before inserting the result + // into high to create the new. + + // TODO: optimise by doing this without creating injected_low. + + // low (farther from root) + let low = Self::compose_raw(them.mid(cp), old_rlp.at(1).raw(), is_leaf); + let injected_low = self.inject(&low, partial.mid(cp), value, &mut diff); + + // high (closer to root) + let mut s = RlpStream::new_list(2); + s.append(them.encoded_leftmost(cp, false)); + diff.new_node(injected_low, &mut s); + s.out() }, } - // already have an extension. either fast_forward, cleve or transmute_to_branch. - unimplemented!(); }, Prototype::Data(0) => { - (Self::compose_extension(partial_key, value, true), Diff::new()) + (Self::compose_leaf(partial, value, true), Diff::new()) }, _ => panic!("Invalid RLP for node."), } } - fn compose_extension(partial_key: &NibbleSlice, value: &[u8], is_leaf: bool) -> Bytes { - println!("compose_extension {:?} {:?} {:?} ({:?})", partial_key, value, is_leaf, partial_key.encoded(is_leaf)); + fn compose_raw(partial: &NibbleSlice, raw_payload: &[u8], bool is_leaf) -> Bytes { + println!("compose_raw {:?} {:?} {:?} ({:?})", partial, value, is_leaf, partial.encoded(is_leaf)); let mut s = RlpStream::new_list(2); - s.append(&partial_key.encoded(is_leaf)); - s.append(&value.to_vec()); // WTF?!?! - //s.append(value); // <-- should be. + s.append(&partial.encoded(is_leaf)); + s.append_raw(raw_payload); let r = s.out(); println!("output: -> {:?}", &r); r } + + fn compose_leaf(partial: &NibbleSlice, value: &[u8]) -> Bytes { + println!("compose_leaf {:?} {:?} ({:?})", partial, value, partial.encoded(true)); + let mut s = RlpStream::new_list(2); + s.append(&partial.encoded(true)); + s.append(value); + let r = s.out(); + println!("output: -> {:?}", &r); + r + } + + fn compose_extension(partial: &NibbleSlice, raw_payload: &[u8]) -> Bytes { + Self::compose_raw(partial, raw_payload, false) + } } impl Trie for TrieDB {