From 65d49f280c7b10765c778fcbbc4bcf0900dbd901 Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Wed, 2 Dec 2015 15:03:20 +0100 Subject: [PATCH 1/5] Trie fix. --- src/trie.rs | 142 +++++++++++++++++++++++++++++++----------------- src/triehash.rs | 4 +- 2 files changed, 94 insertions(+), 52 deletions(-) diff --git a/src/trie.rs b/src/trie.rs index 2fb05ccfb..e140b851c 100644 --- a/src/trie.rs +++ b/src/trie.rs @@ -27,10 +27,10 @@ pub trait Trie { #[derive(Eq, PartialEq, Debug)] pub enum Node<'a> { - NullRoot, + Empty, Leaf(NibbleSlice<'a>, &'a[u8]), Extension(NibbleSlice<'a>, &'a[u8]), - Branch([Option<&'a[u8]>; 16], Option<&'a [u8]>) + Branch([&'a[u8]; 16], Option<&'a [u8]>) } impl <'a>Node<'a> { @@ -48,14 +48,14 @@ impl <'a>Node<'a> { }, // branch - first 16 are nodes, 17th is a value (or empty). Prototype::List(17) => { - let mut nodes: [Option<&'a [u8]>; 16] = unsafe { ::std::mem::uninitialized() }; + let mut nodes: [&'a [u8]; 16] = unsafe { ::std::mem::uninitialized() }; for i in 0..16 { - nodes[i] = if r.at(i).is_empty() { None } else { Some(r.at(i).raw()) } + nodes[i] = r.at(i).raw(); } Node::Branch(nodes, if r.at(16).is_empty() { None } else { Some(r.at(16).data()) }) }, // an empty branch index. - Prototype::Data(0) => Node::NullRoot, + Prototype::Data(0) => Node::Empty, // something went wrong. _ => panic!("Rlp is not valid.") } @@ -78,11 +78,8 @@ impl <'a>Node<'a> { }, Node::Branch(ref nodes, ref value) => { let mut stream = RlpStream::new_list(17); - for i in 0..16 { - match nodes[i] { - Some(n) => { stream.append_raw(n, 1); }, - None => { stream.append_empty_data(); }, - } + for i in 0..16 { + stream.append_raw(nodes[i], 1); } match *value { Some(n) => { stream.append(&n); }, @@ -90,7 +87,7 @@ impl <'a>Node<'a> { } stream.out() }, - Node::NullRoot => { + Node::Empty => { let mut stream = RlpStream::new(); stream.append_empty_data(); stream.out() @@ -155,7 +152,7 @@ impl fmt::Debug for TrieDB { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { try!(writeln!(f, "[")); let root_rlp = self.db.lookup(&self.root).expect("Trie root not found!"); - try!(self.fmt_all(root_rlp, f, 0)); + try!(self.fmt_all(Node::decoded(root_rlp), f, 0)); writeln!(f, "]") } } @@ -198,13 +195,16 @@ impl TrieDB { Ok(()) } - fn fmt_all(&self, node: &[u8], f: &mut fmt::Formatter, deepness: usize) -> fmt::Result { - let node = Node::decoded(node); + fn get_node<'a>(&'a self, node: &'a [u8]) -> Node { + Node::decoded(self.get_raw_or_lookup(node)) + } + + fn fmt_all(&self, node: Node, f: &mut fmt::Formatter, deepness: usize) -> fmt::Result { match node { Node::Leaf(slice, value) => try!(writeln!(f, "-{:?}: {:?}.", slice, value.pretty())), Node::Extension(ref slice, ref item) => { try!(write!(f, "-{:?}- ", slice)); - try!(self.fmt_all(self.get_raw_or_lookup(item), f, deepness)); + try!(self.fmt_all(self.get_node(item), f, deepness)); }, Node::Branch(ref nodes, ref value) => { try!(writeln!(f, "")); @@ -216,18 +216,18 @@ impl TrieDB { &None => {} } for i in 0..16 { - match nodes[i] { - Some(n) => { + match self.get_node(nodes[i]) { + Node::Empty => {}, + n => { try!(self.fmt_indent(f, deepness + 1)); try!(write!(f, "{:x}: ", i)); - try!(self.fmt_all(self.get_raw_or_lookup(n), f, deepness + 1)); - }, - None => {}, + try!(self.fmt_all(n, f, deepness + 1)); + } } } }, // empty - Node::NullRoot => { + Node::Empty => { try!(writeln!(f, "")); } }; @@ -247,12 +247,7 @@ impl TrieDB { }, Node::Branch(ref nodes, value) => match key.is_empty() { true => value, - false => match nodes[key.at(0) as usize] { - Some(payload) => { - self.get_from_node(self.get_raw_or_lookup(payload), &key.mid(1)) - }, - None => None - } + false => self.get_from_node(self.get_raw_or_lookup(nodes[key.at(0) as usize]), &key.mid(1)) }, _ => None } @@ -277,6 +272,16 @@ impl TrieDB { trace!("/"); } + fn delete(&mut self, key: &NibbleSlice) { + trace!("DELETE: {:?}", key); + // determine what the new root is, insert new nodes and remove old as necessary. + let mut todo: Diff = Diff::new(); + let root_rlp = self.cleared(self.db.lookup(&self.root).expect("Trie root not found!"), key, &mut todo); + self.apply(todo); + self.set_root_rlp(&root_rlp); + trace!("/"); + } + fn compose_leaf(partial: &NibbleSlice, value: &[u8]) -> Bytes { trace!("compose_leaf {:?} {:?} ({:?})", partial, value.pretty(), partial.encoded(true).pretty()); let mut s = RlpStream::new_list(2); @@ -288,12 +293,12 @@ impl TrieDB { } fn compose_raw(partial: &NibbleSlice, raw_payload: &[u8], is_leaf: bool) -> Bytes { - println!("compose_raw {:?} {:?} {:?} ({:?})", partial, raw_payload.pretty(), is_leaf, partial.encoded(is_leaf)); + trace!("compose_raw {:?} {:?} {:?} ({:?})", partial, raw_payload.pretty(), is_leaf, partial.encoded(is_leaf)); let mut s = RlpStream::new_list(2); s.append(&partial.encoded(is_leaf)); s.append_raw(raw_payload, 1); let r = s.out(); - println!("compose_raw: -> {:?}", r.pretty()); + trace!("compose_raw: -> {:?}", r.pretty()); r } @@ -361,17 +366,18 @@ impl TrieDB { s.out() } - fn transmuted_leaf_to_branch(orig_partial: &NibbleSlice, orig_raw_payload: &[u8], diff: &mut Diff) -> Bytes { + fn transmuted_leaf_to_branch(orig_partial: &NibbleSlice, value: &[u8], diff: &mut Diff) -> Bytes { trace!("transmuted_leaf_to_branch"); let mut s = RlpStream::new_list(17); let index = if orig_partial.is_empty() {16} else {orig_partial.at(0)}; // orig is leaf - orig_raw_payload is data representing the actual value. for i in 0..17 { - if index == i { - // this is our node. - diff.new_node(Self::compose_raw(&orig_partial.mid(if i == 16 {0} else {1}), orig_raw_payload, true), &mut s); - } else { - s.append_empty_data(); + match (index == i, i) { + (true, 16) => // leaf entry - just replace. + { s.append(&value); }, + (true, _) => // easy - original had empty slot. + diff.new_node(Self::compose_leaf(&orig_partial.mid(1), value), &mut s), + (false, _) => { s.append_empty_data(); } } } s.out() @@ -383,7 +389,7 @@ impl TrieDB { fn transmuted_to_branch_and_augmented(&self, orig_is_leaf: bool, orig_partial: &NibbleSlice, orig_raw_payload: &[u8], partial: &NibbleSlice, value: &[u8], diff: &mut Diff) -> Bytes { trace!("transmuted_to_branch_and_augmented"); let intermediate = match orig_is_leaf { - true => Self::transmuted_leaf_to_branch(orig_partial, orig_raw_payload, diff), + true => Self::transmuted_leaf_to_branch(orig_partial, Rlp::new(orig_raw_payload).data(), diff), false => Self::transmuted_extension_to_branch(orig_partial, orig_raw_payload, diff), }; self.augmented(&intermediate, partial, value, diff) @@ -420,7 +426,7 @@ impl TrieDB { /// The database will be updated so as to make the returned RLP valid through inserting /// and deleting nodes as necessary. /// - /// **This operation will not insert the new node now destroy the original.** + /// **This operation will not insert the new node nor destroy the original.** fn augmented(&self, old: &[u8], partial: &NibbleSlice, value: &[u8], diff: &mut Diff) -> Bytes { trace!("augmented (old: {:?}, partial: {:?}, value: {:?})", old.pretty(), partial, value.pretty()); // already have an extension. either fast_forward, cleve or transmute_to_branch. @@ -486,6 +492,25 @@ impl TrieDB { _ => panic!("Invalid RLP for node: {:?}", old.pretty()), } } + + + /// Determine the RLP of the node, assuming we're removing `partial` from the + /// node currently of data `old`. This will *not* delete any hash of `old` from the database; + /// it will just return the new RLP that represents the new node. + /// + /// The database will be updated so as to make the returned RLP valid through inserting + /// and deleting nodes as necessary. + /// + /// **This operation will not insert the new node nor destroy the original.** + fn cleared(&self, old: &[u8], partial: &NibbleSlice, _diff: &mut Diff) -> Bytes { + trace!("cleared (old: {:?}, partial: {:?})", old.pretty(), partial); + + unimplemented!(); + +/* match (Node::decoded(old)) { + + }*/ + } } impl Trie for TrieDB { @@ -503,8 +528,8 @@ impl Trie for TrieDB { self.add(&NibbleSlice::new(key), value); } - fn remove(&mut self, _key: &[u8]) { - unimplemented!(); + fn remove(&mut self, key: &[u8]) { + self.delete(&NibbleSlice::new(key)); } } @@ -512,12 +537,12 @@ impl Trie for TrieDB { mod tests { use rustc_serialize::hex::FromHex; use triehash::*; - use hash::*; use super::*; use nibbleslice::*; use rlp; use env_logger; use rand::random; + use bytes::ToPretty; #[test] fn test_node_leaf() { @@ -546,7 +571,7 @@ mod tests { #[test] fn test_node_empty_branch() { - let branch = Node::Branch([None; 16], None); + let branch = Node::Branch([&b""[..]; 16], None); let rlp = branch.encoded(); let branch2 = Node::decoded(&rlp); assert_eq!(branch, branch2); @@ -555,8 +580,8 @@ mod tests { #[test] fn test_node_branch() { let k = rlp::encode(&"cat"); - let mut nodes: [Option<&[u8]>; 16] = unsafe { ::std::mem::uninitialized() }; - for i in 0..16 { nodes[i] = Some(&k); } + let mut nodes: [&[u8]; 16] = unsafe { ::std::mem::uninitialized() }; + for i in 0..16 { nodes[i] = &k; } let v: Vec = From::from("dog"); let branch = Node::Branch(nodes, Some(&v)); let rlp = branch.encoded(); @@ -631,20 +656,37 @@ mod tests { #[test] fn stress() { - for _ in 0..1000 { + for _ in 0..10000 { let mut x: Vec<(Vec, Vec)> = Vec::new(); - for j in 0..100u32 { + for j in 0..4u32 { let key = random_key(); x.push((key, rlp::encode(&j))); } let real = trie_root(x.clone()); - - let mem = trie_root_mem(&x); - assert_eq!(mem, real); + let memtrie = trie_root_mem(&x); + let mut y = x.clone(); + y.sort_by(|ref a, ref b| a.0.cmp(&b.0)); + let memtrie_sorted = trie_root_mem(&y); + if *memtrie.root() != real || *memtrie_sorted.root() != real { + println!("TRIE MISMATCH"); + println!(""); + println!("ORIGINAL... {:?}", memtrie.root()); + for i in x.iter() { + println!("{:?} -> {:?}", i.0.pretty(), i.1.pretty()); + } + println!("{:?}", memtrie); + println!("SORTED... {:?}", memtrie_sorted.root()); + for i in y.iter() { + println!("{:?} -> {:?}", i.0.pretty(), i.1.pretty()); + } + println!("{:?}", memtrie_sorted); + } + assert_eq!(*memtrie.root(), real); + assert_eq!(*memtrie_sorted.root(), real); } } - fn trie_root_mem(v: &Vec<(Vec, Vec)>) -> H256 { + fn trie_root_mem(v: &Vec<(Vec, Vec)>) -> TrieDB { let mut t = TrieDB::new_memory(); for i in 0..v.len() { @@ -653,7 +695,7 @@ mod tests { t.insert(&key, &val); } - t.root().clone() + t } #[test] diff --git a/src/triehash.rs b/src/triehash.rs index 56dafcb2e..c06cdb249 100644 --- a/src/triehash.rs +++ b/src/triehash.rs @@ -174,7 +174,7 @@ fn hash256rlp(input: &[(Vec, Vec)], pre_len: usize, stream: &mut RlpStre cmp::min(key.shared_prefix_len(&k), acc) }); - println!("shared_prefix: {}, prefix_len: {}", shared_prefix, pre_len); +// println!("shared_prefix: {}, prefix_len: {}", shared_prefix, pre_len); // if shared prefix is higher than current prefix append its // new part of the key to the stream // then recursively append suffixes of all items who had this key @@ -200,7 +200,7 @@ fn hash256rlp(input: &[(Vec, Vec)], pre_len: usize, stream: &mut RlpStre // cout how many successive elements have same next nibble let len = match begin < input.len() { true => input[begin..].iter() - .take_while(| pair | { println!("{:?}", pair.0); pair.0[pre_len] == i }).count(), + .take_while(| pair | { /*println!("{:?}", pair.0);*/ pair.0[pre_len] == i }).count(), //.take_while(|&q| q == i).count(), false => 0 }; From c184758972f4b472afb717648964109b197cdefc Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Wed, 2 Dec 2015 15:04:34 +0100 Subject: [PATCH 2/5] Reduce time needed for stress test. --- src/trie.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/trie.rs b/src/trie.rs index e140b851c..704db94d4 100644 --- a/src/trie.rs +++ b/src/trie.rs @@ -656,7 +656,7 @@ mod tests { #[test] fn stress() { - for _ in 0..10000 { + for _ in 0..5000 { let mut x: Vec<(Vec, Vec)> = Vec::new(); for j in 0..4u32 { let key = random_key(); From 8b481d5e11bcdf4c6907da7f02ae7a10d310bf12 Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Wed, 2 Dec 2015 22:59:00 +0100 Subject: [PATCH 3/5] Remove. Untested. --- src/nibbleslice.rs | 45 +++- src/trie.rs | 607 ++++++++++++++++++++++++++++++--------------- src/triehash.rs | 4 +- 3 files changed, 443 insertions(+), 213 deletions(-) diff --git a/src/nibbleslice.rs b/src/nibbleslice.rs index 5f2615991..6f4232945 100644 --- a/src/nibbleslice.rs +++ b/src/nibbleslice.rs @@ -30,6 +30,8 @@ use bytes::*; pub struct NibbleSlice<'a> { data: &'a [u8], offset: usize, + data_encode_suffix: &'a [u8], + offset_encode_suffix: usize, } impl<'a, 'view> NibbleSlice<'a> where 'a: 'view { @@ -37,7 +39,26 @@ impl<'a, 'view> NibbleSlice<'a> where 'a: 'view { pub fn new(data: &[u8]) -> NibbleSlice { NibbleSlice::new_offset(data, 0) } /// Create a new nibble slice with the given byte-slice with a nibble offset. - pub fn new_offset(data: &'a [u8], offset: usize) -> NibbleSlice { NibbleSlice{data: data, offset: offset} } + pub fn new_offset(data: &'a [u8], offset: usize) -> NibbleSlice { NibbleSlice{data: data, offset: offset, data_encode_suffix: &b""[..], offset_encode_suffix: 0} } + + /// + pub fn new_composed(a: &'a NibbleSlice, b: &'a NibbleSlice) -> NibbleSlice<'a> { NibbleSlice{data: a.data, offset: a.offset, data_encode_suffix: b.data, offset_encode_suffix: b.offset} } + + /*pub fn new_composed_bytes_offset(a: &NibbleSlice, b: &NibbleSlice) -> (Bytes, usize) { + let r: Vec::with_capacity((a.len() + b.len() + 1) / 2); + let mut i = (a.len() + b.len()) % 2; + while i < a.len() { + match i % 2 { + 0 => , + 1 => , + } + i += 1; + } + while i < a.len() + b.len() { + i += 1; + } + (r, a.len() + b.len()) + }*/ /// Create a new nibble slice from the given HPE encoded data (e.g. output of `encoded()`). pub fn from_encoded(data: &'a [u8]) -> (NibbleSlice, bool) { @@ -48,20 +69,32 @@ impl<'a, 'view> NibbleSlice<'a> where 'a: 'view { pub fn is_empty(&self) -> bool { self.len() == 0 } /// Get the length (in nibbles, naturally) of this slice. - pub fn len(&self) -> usize { self.data.len() * 2 - self.offset } + pub fn len(&self) -> usize { (self.data.len() + self.data_encode_suffix.len()) * 2 - self.offset - self.offset_encode_suffix } /// Get the nibble at position `i`. pub fn at(&self, i: usize) -> u8 { - if (self.offset + i) & 1 == 1 { - self.data[(self.offset + i) / 2] & 15u8 + let l = self.data.len() * 2 - self.offset; + if i < l { + if (self.offset + i) & 1 == 1 { + self.data[(self.offset + i) / 2] & 15u8 + } + else { + self.data[(self.offset + i) / 2] >> 4 + } } else { - self.data[(self.offset + i) / 2] >> 4 + let i = i - l; + if (self.offset_encode_suffix + i) & 1 == 1 { + self.data_encode_suffix[(self.offset_encode_suffix + i) / 2] & 15u8 + } + else { + self.data_encode_suffix[(self.offset_encode_suffix + i) / 2] >> 4 + } } } /// Return object which represents a view on to this slice (further) offset by `i` nibbles. - pub fn mid(&'view self, i: usize) -> NibbleSlice<'a> { NibbleSlice{ data: self.data, offset: self.offset + i} } + pub fn mid(&'view self, i: usize) -> NibbleSlice<'a> { NibbleSlice{ data: self.data, offset: self.offset + i, data_encode_suffix: &b""[..], offset_encode_suffix: 0 } } /// Do we start with the same nibbles as the whole of `them`? pub fn starts_with(&self, them: &Self) -> bool { self.common_prefix(them) == them.len() } diff --git a/src/trie.rs b/src/trie.rs index 704db94d4..b205eb842 100644 --- a/src/trie.rs +++ b/src/trie.rs @@ -33,74 +33,6 @@ pub enum Node<'a> { Branch([&'a[u8]; 16], Option<&'a [u8]>) } -impl <'a>Node<'a> { - pub fn decoded(node_rlp: &'a [u8]) -> Node<'a> { - let r = Rlp::new(node_rlp); - match r.prototype() { - // either leaf or extension - decode first item with NibbleSlice::??? - // and use is_leaf return to figure out which. - // if leaf, second item is a value (is_data()) - // if extension, second item is a node (either SHA3 to be looked up and - // fed back into this function or inline RLP which can be fed back into this function). - Prototype::List(2) => match NibbleSlice::from_encoded(r.at(0).data()) { - (slice, true) => Node::Leaf(slice, r.at(1).data()), - (slice, false) => Node::Extension(slice, r.at(1).raw()), - }, - // branch - first 16 are nodes, 17th is a value (or empty). - Prototype::List(17) => { - let mut nodes: [&'a [u8]; 16] = unsafe { ::std::mem::uninitialized() }; - for i in 0..16 { - nodes[i] = r.at(i).raw(); - } - Node::Branch(nodes, if r.at(16).is_empty() { None } else { Some(r.at(16).data()) }) - }, - // an empty branch index. - Prototype::Data(0) => Node::Empty, - // something went wrong. - _ => panic!("Rlp is not valid.") - } - } - - // todo: should check length before encoding, cause it may just be sha3 of data - pub fn encoded(&self) -> Bytes { - match *self { - Node::Leaf(ref slice, ref value) => { - let mut stream = RlpStream::new_list(2); - stream.append(&slice.encoded(true)); - stream.append(value); - stream.out() - }, - Node::Extension(ref slice, ref raw_rlp) => { - let mut stream = RlpStream::new_list(2); - stream.append(&slice.encoded(false)); - stream.append_raw(raw_rlp, 1); - stream.out() - }, - Node::Branch(ref nodes, ref value) => { - let mut stream = RlpStream::new_list(17); - for i in 0..16 { - stream.append_raw(nodes[i], 1); - } - match *value { - Some(n) => { stream.append(&n); }, - None => { stream.append_empty_data(); }, - } - stream.out() - }, - Node::Empty => { - let mut stream = RlpStream::new(); - stream.append_empty_data(); - stream.out() - } - } - } -} - -//enum ValidationResult<'a> { - //Valid, - //Invalid { node: Node<'a>, depth: usize } -//} - enum Operation { New(H256, Bytes), Delete(H256), @@ -137,12 +69,125 @@ impl Diff { } } + fn delete_node_from_slice(&mut self, old: &[u8]) { + let r = Rlp::new(old); + if r.is_data() && r.size() == 32 { + self.0.push(Operation::Delete(H256::decode(&r))); + } + } + fn replace_node(&mut self, old: &Rlp, rlp: Bytes, out: &mut RlpStream) { self.delete_node(old); self.new_node(rlp, out); } } +impl <'a>Node<'a> { + fn decoded(node_rlp: &'a [u8]) -> Node<'a> { + let r = Rlp::new(node_rlp); + match r.prototype() { + // either leaf or extension - decode first item with NibbleSlice::??? + // and use is_leaf return to figure out which. + // if leaf, second item is a value (is_data()) + // if extension, second item is a node (either SHA3 to be looked up and + // fed back into this function or inline RLP which can be fed back into this function). + Prototype::List(2) => match NibbleSlice::from_encoded(r.at(0).data()) { + (slice, true) => Node::Leaf(slice, r.at(1).data()), + (slice, false) => Node::Extension(slice, r.at(1).raw()), + }, + // branch - first 16 are nodes, 17th is a value (or empty). + Prototype::List(17) => { + let mut nodes: [&'a [u8]; 16] = unsafe { ::std::mem::uninitialized() }; + for i in 0..16 { + nodes[i] = r.at(i).raw(); + } + Node::Branch(nodes, if r.at(16).is_empty() { None } else { Some(r.at(16).data()) }) + }, + // an empty branch index. + Prototype::Data(0) => Node::Empty, + // something went wrong. + _ => panic!("Rlp is not valid.") + } + } + + // todo: should check length before encoding, cause it may just be sha3 of data + fn encoded(&self) -> Bytes { + match *self { + Node::Leaf(ref slice, ref value) => { + let mut stream = RlpStream::new_list(2); + stream.append(&slice.encoded(true)); + stream.append(value); + stream.out() + }, + Node::Extension(ref slice, ref raw_rlp) => { + let mut stream = RlpStream::new_list(2); + stream.append(&slice.encoded(false)); + stream.append_raw(raw_rlp, 1); + stream.out() + }, + Node::Branch(ref nodes, ref value) => { + let mut stream = RlpStream::new_list(17); + for i in 0..16 { + stream.append_raw(nodes[i], 1); + } + match *value { + Some(n) => { stream.append(&n); }, + None => { stream.append_empty_data(); }, + } + stream.out() + }, + Node::Empty => { + let mut stream = RlpStream::new(); + stream.append_empty_data(); + stream.out() + } + } + } + + fn encoded_and_added(&self, diff: &mut Diff) -> Bytes { + let mut stream = RlpStream::new(); + match *self { + Node::Leaf(ref slice, ref value) => { + stream.append_list(2); + stream.append(&slice.encoded(true)); + stream.append(value); + }, + Node::Extension(ref slice, ref raw_rlp) => { + stream.append_list(2); + stream.append(&slice.encoded(false)); + stream.append_raw(raw_rlp, 1); + }, + Node::Branch(ref nodes, ref value) => { + stream.append_list(17); + for i in 0..16 { + stream.append_raw(nodes[i], 1); + } + match *value { + Some(n) => { stream.append(&n); }, + None => { stream.append_empty_data(); }, + } + }, + Node::Empty => { + stream.append_empty_data(); + } + } + let node = stream.out(); + match node.len() { + 0 ... 31 => node, + _ => { + let mut stream = RlpStream::new(); + diff.new_node(node, &mut stream); + stream.out() + } + } + } +} + +//enum ValidationResult<'a> { + //Valid, + //Invalid { node: Node<'a>, depth: usize } +//} + pub struct TrieDB { db: Box, root: H256, @@ -157,6 +202,11 @@ impl fmt::Debug for TrieDB { } } +enum MaybeChanged<'a> { + Same(Node<'a>), + Changed(Bytes), +} + impl TrieDB { pub fn new_boxed(db_box: Box) -> Self { let mut r = TrieDB{ db: db_box, root: H256::new() }; r.set_root_rlp(&NULL_RLP); r } @@ -201,9 +251,9 @@ impl TrieDB { fn fmt_all(&self, node: Node, f: &mut fmt::Formatter, deepness: usize) -> fmt::Result { match node { - Node::Leaf(slice, value) => try!(writeln!(f, "-{:?}: {:?}.", slice, value.pretty())), + Node::Leaf(slice, value) => try!(writeln!(f, "'{:?}: {:?}.", slice, value.pretty())), Node::Extension(ref slice, ref item) => { - try!(write!(f, "-{:?}- ", slice)); + try!(write!(f, "'{:?} ", slice)); try!(self.fmt_all(self.get_node(item), f, deepness)); }, Node::Branch(ref nodes, ref value) => { @@ -220,7 +270,7 @@ impl TrieDB { Node::Empty => {}, n => { try!(self.fmt_indent(f, deepness + 1)); - try!(write!(f, "{:x}: ", i)); + try!(write!(f, "'{:x} ", i)); try!(self.fmt_all(n, f, deepness + 1)); } } @@ -253,7 +303,7 @@ impl TrieDB { } } - fn get_raw_or_lookup<'a>(&'a self, node: &'a [u8]) -> &'a [u8] { + fn get_raw_or_lookup<'a, 'b>(&'a self, node: &'b [u8]) -> &'b [u8] where 'a: 'b { // check if its sha3 + len let r = Rlp::new(node); match r.is_data() && r.size() == 32 { @@ -276,9 +326,15 @@ impl TrieDB { trace!("DELETE: {:?}", key); // determine what the new root is, insert new nodes and remove old as necessary. let mut todo: Diff = Diff::new(); - let root_rlp = self.cleared(self.db.lookup(&self.root).expect("Trie root not found!"), key, &mut todo); - self.apply(todo); - self.set_root_rlp(&root_rlp); + match self.cleared_from_slice(self.db.lookup(&self.root).expect("Trie root not found!"), key, &mut todo) { + Some(root_rlp) => { + self.apply(todo); + self.set_root_rlp(&root_rlp); + }, + None => { + trace!("no change needed"); + } + } trace!("/"); } @@ -492,24 +548,159 @@ impl TrieDB { _ => panic!("Invalid RLP for node: {:?}", old.pretty()), } } + fn encoded(n: MaybeChanged) -> Bytes { + match n { + MaybeChanged::Same(n) => n.encoded(), + MaybeChanged::Changed(b) => b, + } + } + fn ensure_is_changed(n: MaybeChanged) -> MaybeChanged { + match n { + MaybeChanged::Same(n) => MaybeChanged::Changed(n.encoded()), + f => f, + } + } + fn fixed_indirection<'a>(n: Node<'a>, diff: &mut Diff) -> MaybeChanged<'a> { + match n { + Node::Extension(partial, payload) if payload.len() >= 32 => { + // make indirect + MaybeChanged::Changed(Node::Extension(partial, &Node::decoded(payload).encoded_and_added(diff)).encoded()) + }, + Node::Branch(nodes, node_value) => { + // check each child isn't too big + // TODO OPTIMISE - should really check at the point of (re-)constructing the branch. + for i in 0..16 { + if nodes[i].len() >= 32 { + let n = Node::decoded(nodes[i]).encoded_and_added(diff); + let mut new_nodes = nodes; + new_nodes[i] = &n; + return MaybeChanged::Changed(Node::Branch(new_nodes, node_value).encoded()) + } + } + MaybeChanged::Same(n) + } + _ => MaybeChanged::Same(n), + } + } + + /// Given a node `n` which may be in an _invalid state_, fix it such that it is then in a valid + /// state. + /// + /// _invalid state_ means: + /// - Branch node where there is only a single entry; + /// - Extension node followed by anything other than a Branch node. + /// - Extension node with a child which has too many bytes to be inline. + /// + /// **This operation will not insert the new node nor destroy the original.** + fn fixed<'a, 'b>(&'a self, n: Node<'b>, diff: &mut Diff) -> MaybeChanged<'b> where 'a: 'b { + match n { + Node::Branch(nodes, node_value) => { + // if only a single value, transmute to leaf/extension and feed through fixed. + let mut index: [u8; 1] = [16; 1]; + // 0-15 -> index of a non-null branch + // 16 -> no non-null branch + // 17 -> multiple non-null branches + for i in 0..16 { + match (nodes[i] == NULL_RLP, index[0]) { + (false, _) => {}, + (true, 16) => index[0] = i as u8, + (true, _) => index[0] = 17, + } + } + match (index[0], node_value) { + (16, None) => panic!("Branch with no subvalues. Something went wrong."), + (0 ... 15, None) => { // one onward node + // transmute to extension. + // TODO: OPTIMISE: - don't call fixed again but put the right node in straight away here. + // call fixed again since the transmute may cause invalidity. + MaybeChanged::Changed(Self::encoded(self.fixed(Node::Extension(NibbleSlice::new_offset(&index[..], 1), nodes[index[0] as usize]), diff))) + }, + (16, Some(value)) => { // one leaf value + // transmute to leaf. + // call fixed again since the transmute may cause invalidity. + MaybeChanged::Changed(Self::encoded(self.fixed(Node::Leaf(NibbleSlice::new(&b""[..]), value), diff))) + } + _ => { // onwards node(s) and/or leaf + // no transmute needed, but should still fix the indirection. + Self::fixed_indirection(Node::Branch(nodes, node_value), diff) + }, + } + }, + Node::Extension(partial, payload) => { + match Node::decoded(self.get_raw_or_lookup(payload)) { + Node::Extension(sub_partial, sub_payload) => { + // combine with node below + diff.delete_node_from_slice(payload); + MaybeChanged::Changed(Self::encoded(Self::fixed_indirection(Node::Extension(NibbleSlice::new_composed(&partial, &sub_partial), sub_payload), diff))) + }, + Node::Leaf(sub_partial, sub_value) => { + // combine with node below + diff.delete_node_from_slice(payload); + MaybeChanged::Changed(Self::encoded(Self::fixed_indirection(Node::Leaf(NibbleSlice::new_composed(&partial, &sub_partial), sub_value), diff))) + }, + // no change, might still have an oversize node inline - fix indirection + _ => Self::fixed_indirection(n, diff), + } + }, + // leaf or empty. no change. + n => { MaybeChanged::Same(n) } + } + } /// Determine the RLP of the node, assuming we're removing `partial` from the /// node currently of data `old`. This will *not* delete any hash of `old` from the database; /// it will just return the new RLP that represents the new node. + /// `None` may be returned should no change be needed. /// /// The database will be updated so as to make the returned RLP valid through inserting /// and deleting nodes as necessary. /// /// **This operation will not insert the new node nor destroy the original.** - fn cleared(&self, old: &[u8], partial: &NibbleSlice, _diff: &mut Diff) -> Bytes { - trace!("cleared (old: {:?}, partial: {:?})", old.pretty(), partial); + fn cleared_from_slice(&self, old: &[u8], partial: &NibbleSlice, diff: &mut Diff) -> Option { + self.cleared(Node::decoded(old), partial, diff) + } - unimplemented!(); + fn cleared(&self, n: Node, partial: &NibbleSlice, diff: &mut Diff) -> Option { + trace!("cleared (old: {:?}, partial: {:?})", n, partial); -/* match (Node::decoded(old)) { - - }*/ + match (n, partial.is_empty()) { + (Node::Empty, _) => None, + (Node::Branch(nodes, None), true) => { None }, + (Node::Branch(nodes, _), true) => Some(Self::encoded(self.fixed(Node::Branch(nodes, None), diff))), // matched as leaf-branch - give back fixed branch with it. + (Node::Branch(nodes, value), false) => { + // Branch with partial left - route, clear, fix. + let i: usize = partial.at(0) as usize; + self.cleared(self.get_node(nodes[i]), &partial.mid(1), diff).map(|new_payload| { + // downsteam node needed to be changed. + diff.delete_node_from_slice(nodes[i]); + // return fixed up new node. + let mut new_nodes = nodes; + new_nodes[i] = &new_payload; + Self::encoded(self.fixed(Node::Branch(new_nodes, value), diff)) + }) + }, + (Node::Leaf(node_partial, node_value), _) => { + match node_partial.common_prefix(partial) { + cp if cp == partial.len() => Some(Node::Empty.encoded()), // leaf to be deleted - delete it :) + cp => None, // anything else and the key doesn't exit - no change. + } + }, + (Node::Extension(node_partial, node_payload), _) => { + match node_partial.common_prefix(partial) { + cp if cp < partial.len() => None, // key in the middle of an extension - doesn't exist. + cp => { + // key at end of extension - skip, clear, fix + self.cleared(self.get_node(node_payload), &partial.mid(node_partial.len()), diff).map(|new_payload| { + // downsteam node needed to be changed. + diff.delete_node_from_slice(node_payload); + // return fixed up new node. + Self::encoded(self.fixed(Node::Extension(node_partial, &new_payload), diff)) + }) + }, + } + }, + } } } @@ -544,6 +735,136 @@ mod tests { use rand::random; use bytes::ToPretty; + #[test] + fn playpen() { + env_logger::init().ok(); + + let mut t1 = TrieDB::new_memory(); + t1.insert(&[0x01], &[0]); + t1.insert(&[0x01, 0x23], &[1]); + t1.insert(&[0x01, 0x34], &[2]); + t1.remove(&[0x01]); + let mut t2 = TrieDB::new_memory(); + t2.insert(&[0x01, 0x23], &[1]); + t2.insert(&[0x01, 0x34], &[2]); + /*if t1.root() != t2.root()*/ { + trace!("{:?}", t1); + trace!("{:?}", t2); + } + } + + #[test] + fn init() { + let t = TrieDB::new_memory(); + assert_eq!(*t.root(), SHA3_NULL_RLP); + assert!(t.is_empty()); + } + + #[test] + fn insert_on_empty() { + let mut t = TrieDB::new_memory(); + t.insert(&[0x01u8, 0x23], &[0x01u8, 0x23]); + assert_eq!(*t.root(), trie_root(vec![ (vec![0x01u8, 0x23], vec![0x01u8, 0x23]) ])); + } + + #[test] + fn remove_to_empty() { + } + + #[test] + fn insert_replace_root() { + let mut t = TrieDB::new_memory(); + t.insert(&[0x01u8, 0x23], &[0x01u8, 0x23]); + t.insert(&[0x01u8, 0x23], &[0x23u8, 0x45]); + assert_eq!(*t.root(), trie_root(vec![ (vec![0x01u8, 0x23], vec![0x23u8, 0x45]) ])); + } + + #[test] + fn insert_make_branch_root() { + let mut t = TrieDB::new_memory(); + t.insert(&[0x01u8, 0x23], &[0x01u8, 0x23]); + t.insert(&[0x11u8, 0x23], &[0x11u8, 0x23]); + assert_eq!(*t.root(), trie_root(vec![ + (vec![0x01u8, 0x23], vec![0x01u8, 0x23]), + (vec![0x11u8, 0x23], vec![0x11u8, 0x23]) + ])); + } + + #[test] + fn insert_into_branch_root() { + let mut t = TrieDB::new_memory(); + t.insert(&[0x01u8, 0x23], &[0x01u8, 0x23]); + t.insert(&[0xf1u8, 0x23], &[0xf1u8, 0x23]); + t.insert(&[0x81u8, 0x23], &[0x81u8, 0x23]); + assert_eq!(*t.root(), trie_root(vec![ + (vec![0x01u8, 0x23], vec![0x01u8, 0x23]), + (vec![0x81u8, 0x23], vec![0x81u8, 0x23]), + (vec![0xf1u8, 0x23], vec![0xf1u8, 0x23]), + ])); + } + + #[test] + fn insert_value_into_branch_root() { + let mut t = TrieDB::new_memory(); + t.insert(&[0x01u8, 0x23], &[0x01u8, 0x23]); + t.insert(&[], &[0x0]); + assert_eq!(*t.root(), trie_root(vec![ + (vec![], vec![0x0]), + (vec![0x01u8, 0x23], vec![0x01u8, 0x23]), + ])); + } + + #[test] + fn insert_split_leaf() { + let mut t = TrieDB::new_memory(); + t.insert(&[0x01u8, 0x23], &[0x01u8, 0x23]); + t.insert(&[0x01u8, 0x34], &[0x01u8, 0x34]); + assert_eq!(*t.root(), trie_root(vec![ + (vec![0x01u8, 0x23], vec![0x01u8, 0x23]), + (vec![0x01u8, 0x34], vec![0x01u8, 0x34]), + ])); + } + + #[test] + fn insert_split_extenstion() { + let mut t = TrieDB::new_memory(); + t.insert(&[0x01, 0x23, 0x45], &[0x01]); + t.insert(&[0x01, 0xf3, 0x45], &[0x02]); + t.insert(&[0x01, 0xf3, 0xf5], &[0x03]); + assert_eq!(*t.root(), trie_root(vec![ + (vec![0x01, 0x23, 0x45], vec![0x01]), + (vec![0x01, 0xf3, 0x45], vec![0x02]), + (vec![0x01, 0xf3, 0xf5], vec![0x03]), + ])); + } + + #[test] + fn insert_big_value() { + let big_value0 = b"00000000000000000000000000000000"; + let big_value1 = b"11111111111111111111111111111111"; + + let mut t = TrieDB::new_memory(); + t.insert(&[0x01u8, 0x23], big_value0); + t.insert(&[0x11u8, 0x23], big_value1); + assert_eq!(*t.root(), trie_root(vec![ + (vec![0x01u8, 0x23], big_value0.to_vec()), + (vec![0x11u8, 0x23], big_value1.to_vec()) + ])); + } + + #[test] + fn insert_duplicate_value() { + let big_value = b"00000000000000000000000000000000"; + + let mut t = TrieDB::new_memory(); + t.insert(&[0x01u8, 0x23], big_value); + t.insert(&[0x11u8, 0x23], big_value); + assert_eq!(*t.root(), trie_root(vec![ + (vec![0x01u8, 0x23], big_value.to_vec()), + (vec![0x11u8, 0x23], big_value.to_vec()) + ])); + } + #[test] fn test_node_leaf() { let k = vec![0x20u8, 0x01, 0x23, 0x45]; @@ -743,128 +1064,4 @@ mod tests { test_all(v); } - - #[test] - fn playpen() { - env_logger::init().ok(); - - let big_value = b"00000000000000000000000000000000"; - - let mut t = TrieDB::new_memory(); - t.insert(&[0x01u8, 0x23], big_value); - t.insert(&[0x11u8, 0x23], big_value); - assert_eq!(*t.root(), trie_root(vec![ - (vec![0x01u8, 0x23], big_value.to_vec()), - (vec![0x11u8, 0x23], big_value.to_vec()) - ])); - } - - #[test] - fn init() { - let t = TrieDB::new_memory(); - assert_eq!(*t.root(), SHA3_NULL_RLP); - assert!(t.is_empty()); - } - - #[test] - fn insert_on_empty() { - let mut t = TrieDB::new_memory(); - t.insert(&[0x01u8, 0x23], &[0x01u8, 0x23]); - assert_eq!(*t.root(), trie_root(vec![ (vec![0x01u8, 0x23], vec![0x01u8, 0x23]) ])); - } - - #[test] - fn insert_replace_root() { - let mut t = TrieDB::new_memory(); - t.insert(&[0x01u8, 0x23], &[0x01u8, 0x23]); - t.insert(&[0x01u8, 0x23], &[0x23u8, 0x45]); - assert_eq!(*t.root(), trie_root(vec![ (vec![0x01u8, 0x23], vec![0x23u8, 0x45]) ])); - } - - #[test] - fn insert_make_branch_root() { - let mut t = TrieDB::new_memory(); - t.insert(&[0x01u8, 0x23], &[0x01u8, 0x23]); - t.insert(&[0x11u8, 0x23], &[0x11u8, 0x23]); - assert_eq!(*t.root(), trie_root(vec![ - (vec![0x01u8, 0x23], vec![0x01u8, 0x23]), - (vec![0x11u8, 0x23], vec![0x11u8, 0x23]) - ])); - } - - #[test] - fn insert_into_branch_root() { - let mut t = TrieDB::new_memory(); - t.insert(&[0x01u8, 0x23], &[0x01u8, 0x23]); - t.insert(&[0xf1u8, 0x23], &[0xf1u8, 0x23]); - t.insert(&[0x81u8, 0x23], &[0x81u8, 0x23]); - assert_eq!(*t.root(), trie_root(vec![ - (vec![0x01u8, 0x23], vec![0x01u8, 0x23]), - (vec![0x81u8, 0x23], vec![0x81u8, 0x23]), - (vec![0xf1u8, 0x23], vec![0xf1u8, 0x23]), - ])); - } - - #[test] - fn insert_value_into_branch_root() { - let mut t = TrieDB::new_memory(); - t.insert(&[0x01u8, 0x23], &[0x01u8, 0x23]); - t.insert(&[], &[0x0]); - assert_eq!(*t.root(), trie_root(vec![ - (vec![], vec![0x0]), - (vec![0x01u8, 0x23], vec![0x01u8, 0x23]), - ])); - } - - #[test] - fn insert_split_leaf() { - let mut t = TrieDB::new_memory(); - t.insert(&[0x01u8, 0x23], &[0x01u8, 0x23]); - t.insert(&[0x01u8, 0x34], &[0x01u8, 0x34]); - assert_eq!(*t.root(), trie_root(vec![ - (vec![0x01u8, 0x23], vec![0x01u8, 0x23]), - (vec![0x01u8, 0x34], vec![0x01u8, 0x34]), - ])); - } - - #[test] - fn insert_split_extenstion() { - let mut t = TrieDB::new_memory(); - t.insert(&[0x01, 0x23, 0x45], &[0x01]); - t.insert(&[0x01, 0xf3, 0x45], &[0x02]); - t.insert(&[0x01, 0xf3, 0xf5], &[0x03]); - assert_eq!(*t.root(), trie_root(vec![ - (vec![0x01, 0x23, 0x45], vec![0x01]), - (vec![0x01, 0xf3, 0x45], vec![0x02]), - (vec![0x01, 0xf3, 0xf5], vec![0x03]), - ])); - } - - #[test] - fn insert_big_value() { - let big_value0 = b"00000000000000000000000000000000"; - let big_value1 = b"11111111111111111111111111111111"; - - let mut t = TrieDB::new_memory(); - t.insert(&[0x01u8, 0x23], big_value0); - t.insert(&[0x11u8, 0x23], big_value1); - assert_eq!(*t.root(), trie_root(vec![ - (vec![0x01u8, 0x23], big_value0.to_vec()), - (vec![0x11u8, 0x23], big_value1.to_vec()) - ])); - } - - #[test] - fn insert_duplicate_value() { - let big_value = b"00000000000000000000000000000000"; - - let mut t = TrieDB::new_memory(); - t.insert(&[0x01u8, 0x23], big_value); - t.insert(&[0x11u8, 0x23], big_value); - assert_eq!(*t.root(), trie_root(vec![ - (vec![0x01u8, 0x23], big_value.to_vec()), - (vec![0x11u8, 0x23], big_value.to_vec()) - ])); - } } - diff --git a/src/triehash.rs b/src/triehash.rs index 778dc7f17..d31cec473 100644 --- a/src/triehash.rs +++ b/src/triehash.rs @@ -381,7 +381,7 @@ mod tests { #[test] fn test_triehash_json_trietest_json() { - let data = include_bytes!("../tests/TrieTests/trietest.json"); +/* let data = include_bytes!("../tests/TrieTests/trietest.json"); let s = String::from_bytes(data).unwrap(); let json = Json::from_str(&s).unwrap(); @@ -390,7 +390,7 @@ mod tests { for (key, value) in obj.iter() { println!("running test: {}", key); } - assert!(false); + assert!(false);*/ } } From 1b41b96dd109c072b75cc08132d358ad78d5387e Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Wed, 2 Dec 2015 23:00:36 +0100 Subject: [PATCH 4/5] Clear up warnings. --- src/trie.rs | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/src/trie.rs b/src/trie.rs index b205eb842..1dde1993c 100644 --- a/src/trie.rs +++ b/src/trie.rs @@ -548,6 +548,7 @@ impl TrieDB { _ => panic!("Invalid RLP for node: {:?}", old.pretty()), } } + fn encoded(n: MaybeChanged) -> Bytes { match n { MaybeChanged::Same(n) => n.encoded(), @@ -555,12 +556,6 @@ impl TrieDB { } } - fn ensure_is_changed(n: MaybeChanged) -> MaybeChanged { - match n { - MaybeChanged::Same(n) => MaybeChanged::Changed(n.encoded()), - f => f, - } - } fn fixed_indirection<'a>(n: Node<'a>, diff: &mut Diff) -> MaybeChanged<'a> { match n { Node::Extension(partial, payload) if payload.len() >= 32 => { @@ -666,7 +661,7 @@ impl TrieDB { match (n, partial.is_empty()) { (Node::Empty, _) => None, - (Node::Branch(nodes, None), true) => { None }, + (Node::Branch(_, None), true) => { None }, (Node::Branch(nodes, _), true) => Some(Self::encoded(self.fixed(Node::Branch(nodes, None), diff))), // matched as leaf-branch - give back fixed branch with it. (Node::Branch(nodes, value), false) => { // Branch with partial left - route, clear, fix. @@ -680,16 +675,16 @@ impl TrieDB { Self::encoded(self.fixed(Node::Branch(new_nodes, value), diff)) }) }, - (Node::Leaf(node_partial, node_value), _) => { + (Node::Leaf(node_partial, _), _) => { match node_partial.common_prefix(partial) { cp if cp == partial.len() => Some(Node::Empty.encoded()), // leaf to be deleted - delete it :) - cp => None, // anything else and the key doesn't exit - no change. + _ => None, // anything else and the key doesn't exit - no change. } }, (Node::Extension(node_partial, node_payload), _) => { match node_partial.common_prefix(partial) { cp if cp < partial.len() => None, // key in the middle of an extension - doesn't exist. - cp => { + _ => { // key at end of extension - skip, clear, fix self.cleared(self.get_node(node_payload), &partial.mid(node_partial.len()), diff).map(|new_payload| { // downsteam node needed to be changed. From 8c9340d3cf41f67beca926533130800fcd20ba69 Mon Sep 17 00:00:00 2001 From: Gav Wood Date: Wed, 2 Dec 2015 23:01:32 +0100 Subject: [PATCH 5/5] Remove unused uses. --- src/triehash.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/triehash.rs b/src/triehash.rs index d31cec473..c12971112 100644 --- a/src/triehash.rs +++ b/src/triehash.rs @@ -278,10 +278,7 @@ fn test_hex_prefix_encode() { #[cfg(test)] mod tests { use std::str::FromStr; - use std::collections::BTreeMap; use rustc_serialize::hex::FromHex; - use rustc_serialize::json::Json; - use bytes::*; use hash::*; use triehash::*;