From 485d4aa8f3d536f8dca91b51ab4c2e4c22a3e6d5 Mon Sep 17 00:00:00 2001 From: Jim Posen Date: Fri, 1 Jun 2018 00:42:45 -0700 Subject: [PATCH] Fix XOR distance calculation in discovery Kademlia impl (#8589) * network-devp2p: Test for discovery bucket insertion. All test values are randomly generated and the assertions are checked manually. Test fails because distance metric is implemented incorrectly. * network-devp2p: Fix discovery distance function. The Kademlia distance function (XOR) was implemented incorrectly as a population count. * network-devp2p: Refactor nearest_node_entries to be on instance. Optimizations are possible with more access to the discovery state. * network-devp2p: Fix loss of precision in nearest_node_entries. * network-devp2p: More efficient nearest node search. The discovery algorithm to identify the nearest k nodes does not need to scan all entries in all buckets. --- util/network-devp2p/src/discovery.rs | 221 +++++++++++++++++++++------ 1 file changed, 172 insertions(+), 49 deletions(-) diff --git a/util/network-devp2p/src/discovery.rs b/util/network-devp2p/src/discovery.rs index f14cd5ba6..af43546a5 100644 --- a/util/network-devp2p/src/discovery.rs +++ b/util/network-devp2p/src/discovery.rs @@ -16,7 +16,7 @@ use ethcore_bytes::Bytes; use std::net::SocketAddr; -use std::collections::{HashSet, HashMap, BTreeMap, VecDeque}; +use std::collections::{HashSet, HashMap, VecDeque}; use std::mem; use std::default::Default; use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH}; @@ -34,9 +34,8 @@ use network::IpFilter; use PROTOCOL_VERSION; -const ADDRESS_BYTES_SIZE: u32 = 32; // Size of address type in bytes. -const ADDRESS_BITS: u32 = 8 * ADDRESS_BYTES_SIZE; // Denoted by n in [Kademlia]. -const NODE_BINS: u32 = ADDRESS_BITS - 1; // Size of m_state (excludes root, which is us). +const ADDRESS_BYTES_SIZE: usize = 32; // Size of address type in bytes. +const ADDRESS_BITS: usize = 8 * ADDRESS_BYTES_SIZE; // Denoted by n in [Kademlia]. const DISCOVERY_MAX_STEPS: u16 = 8; // Max iterations of discovery. (discover) const BUCKET_SIZE: usize = 16; // Denoted by k in [Kademlia]. Number of nodes stored in each bucket. const ALPHA: usize = 3; // Denoted by \alpha in [Kademlia]. Number of concurrent FindNode requests. @@ -119,7 +118,7 @@ impl Discovery { discovery_round: 0, discovery_id: NodeId::new(), discovery_nodes: HashSet::new(), - node_buckets: (0..NODE_BINS).map(|_| NodeBucket::new()).collect(), + node_buckets: (0..ADDRESS_BITS).map(|_| NodeBucket::new()).collect(), udp_socket: socket, send_queue: VecDeque::new(), check_timestamps: true, @@ -155,8 +154,16 @@ impl Discovery { fn update_node(&mut self, e: NodeEntry) { trace!(target: "discovery", "Inserting {:?}", &e); let id_hash = keccak(e.id); + let dist = match Discovery::distance(&self.id_hash, &id_hash) { + Some(dist) => dist, + None => { + warn!(target: "discovery", "Attempted to update own entry: {:?}", e); + return; + } + }; + let ping = { - let bucket = &mut self.node_buckets[Discovery::distance(&self.id_hash, &id_hash) as usize]; + let bucket = &mut self.node_buckets[dist]; let updated = if let Some(node) = bucket.nodes.iter_mut().find(|n| n.address.id == e.id) { node.address = e.clone(); node.timeout = None; @@ -181,7 +188,15 @@ impl Discovery { /// Removes the timeout of a given NodeId if it can be found in one of the discovery buckets fn clear_ping(&mut self, id: &NodeId) { - let bucket = &mut self.node_buckets[Discovery::distance(&self.id_hash, &keccak(id)) as usize]; + let dist = match Discovery::distance(&self.id_hash, &keccak(id)) { + Some(dist) => dist, + None => { + warn!(target: "discovery", "Received ping from self"); + return + } + }; + + let bucket = &mut self.node_buckets[dist]; if let Some(node) = bucket.nodes.iter_mut().find(|n| &n.address.id == id) { node.timeout = None; } @@ -212,7 +227,7 @@ impl Discovery { trace!(target: "discovery", "Starting round {:?}", self.discovery_round); let mut tried_count = 0; { - let nearest = Discovery::nearest_node_entries(&self.discovery_id, &self.node_buckets).into_iter(); + let nearest = self.nearest_node_entries(&self.discovery_id).into_iter(); let nearest = nearest.filter(|x| !self.discovery_nodes.contains(&x.id)).take(ALPHA).collect::>(); for r in nearest { let rlp = encode_list(&(&[self.discovery_id.clone()][..])); @@ -233,17 +248,17 @@ impl Discovery { self.discovery_round += 1; } - fn distance(a: &H256, b: &H256) -> u32 { - let d = *a ^ *b; - let mut ret:u32 = 0; - for i in 0..32 { - let mut v: u8 = d[i]; - while v != 0 { - v >>= 1; - ret += 1; + /// The base 2 log of the distance between a and b using the XOR metric. + fn distance(a: &H256, b: &H256) -> Option { + for i in (0..ADDRESS_BYTES_SIZE).rev() { + let byte_index = ADDRESS_BYTES_SIZE - i - 1; + let d: u8 = a[byte_index] ^ b[byte_index]; + if d != 0 { + let high_bit_index = 7 - d.leading_zeros() as usize; + return Some(i * 8 + high_bit_index); } } - ret + None // a and b are equal, so log distance is -inf } fn ping(&mut self, node: &NodeEndpoint) { @@ -286,36 +301,53 @@ impl Discovery { Ok(()) } - fn nearest_node_entries(target: &NodeId, buckets: &[NodeBucket]) -> Vec { - let mut found: BTreeMap> = BTreeMap::new(); - let mut count = 0; + fn nearest_node_entries(&self, target: &NodeId) -> Vec { let target_hash = keccak(target); + let target_distance = self.id_hash ^ target_hash; - // Sort nodes by distance to target - for bucket in buckets { - for node in &bucket.nodes { - let distance = Discovery::distance(&target_hash, &node.id_hash); - found.entry(distance).or_insert_with(Vec::new).push(&node.address); - if count == BUCKET_SIZE { - // delete the most distant element - let remove = { - let (key, last) = found.iter_mut().next_back().expect("Last element is always Some when count > 0"); - last.pop(); - if last.is_empty() { Some(key.clone()) } else { None } - }; - if let Some(remove) = remove { - found.remove(&remove); - } - } - else { - count += 1; + let mut ret = Vec::::with_capacity(BUCKET_SIZE); + + // Sort bucket entries by distance to target and append to end of result vector. + let append_bucket = |results: &mut Vec, bucket: &NodeBucket| -> bool { + let mut sorted_entries: Vec<&BucketEntry> = bucket.nodes.iter().collect(); + sorted_entries.sort_unstable_by_key(|entry| entry.id_hash ^ target_hash); + + let remaining_capacity = results.capacity() - results.len(); + let to_append = if remaining_capacity < sorted_entries.len() { + &sorted_entries[0..remaining_capacity] + } else { + &sorted_entries + }; + for entry in to_append.iter() { + results.push(entry.address.clone()); + } + results.len() == results.capacity() + }; + + // This algorithm leverages the structure of the routing table to efficiently find the + // nearest entries to a target hash. First, we compute the XOR distance from this node to + // the target. On a first pass, we iterate from the MSB of the distance, stopping at any + // buckets where the distance bit is set, and skipping the buckets where it is unset. These + // must be in order the nearest to the target. On a second pass, we traverse from LSB to + // MSB, appending the buckets skipped on the first pass. The reason this works is that all + // entries in bucket i have a common prefix of length exactly 32 - i - 1 with the ID of this + // node. + + for i in 0..ADDRESS_BITS { + if ((target_distance[i / 8] << (i % 8)) & 0x80) != 0 { + let bucket = &self.node_buckets[ADDRESS_BITS - i - 1]; + if !bucket.nodes.is_empty() && append_bucket(&mut ret, bucket) { + return ret; } } } - - let mut ret:Vec = Vec::new(); - for nodes in found.values() { - ret.extend(nodes.iter().map(|&n| n.clone())); + for i in (0..ADDRESS_BITS).rev() { + if ((target_distance[i / 8] << (i % 8)) & 0x80) == 0 { + let bucket = &self.node_buckets[ADDRESS_BITS - i - 1]; + if !bucket.nodes.is_empty() && append_bucket(&mut ret, bucket) { + return ret; + } + } } ret } @@ -453,7 +485,7 @@ impl Discovery { let target: NodeId = rlp.val_at(0)?; let timestamp: u64 = rlp.val_at(1)?; self.check_timestamp(timestamp)?; - let nearest = Discovery::nearest_node_entries(&target, &self.node_buckets); + let nearest = self.nearest_node_entries(&target); if nearest.is_empty() { return Ok(None); } @@ -614,7 +646,7 @@ mod tests { } discovery2.round(); } - assert_eq!(Discovery::nearest_node_entries(&NodeId::new(), &discovery2.node_buckets).len(), 3) + assert_eq!(discovery2.nearest_node_entries(&NodeId::new()).len(), 3) } #[test] @@ -625,7 +657,7 @@ mod tests { for _ in 0..1200 { discovery.add_node(NodeEntry { id: NodeId::random(), endpoint: ep.clone() }); } - assert!(Discovery::nearest_node_entries(&NodeId::new(), &discovery.node_buckets).len() <= 16); + assert!(discovery.nearest_node_entries(&NodeId::new()).len() <= 16); let removed = discovery.check_expired(true).len(); assert!(removed > 0); } @@ -633,23 +665,114 @@ mod tests { #[test] fn find_nearest_saturated() { use super::*; - let mut buckets: Vec<_> = (0..256).map(|_| NodeBucket::new()).collect(); + + let key = Random.generate().unwrap(); let ep = NodeEndpoint { address: SocketAddr::from_str("127.0.0.1:40447").unwrap(), udp_port: 40447 }; + let mut discovery = Discovery::new(&key, ep.address.clone(), ep.clone(), 0, IpFilter::default()); + for _ in 0..(16 + 10) { - buckets[0].nodes.push_back(BucketEntry { + discovery.node_buckets[0].nodes.push_back(BucketEntry { address: NodeEntry { id: NodeId::new(), endpoint: ep.clone() }, timeout: None, id_hash: keccak(NodeId::new()), }); } - let nearest = Discovery::nearest_node_entries(&NodeId::new(), &buckets); + let nearest = discovery.nearest_node_entries(&NodeId::new()); assert_eq!(nearest.len(), 16) } + #[test] + fn routing_table_insertions_lookups() { + use super::*; + let ep = NodeEndpoint { address: SocketAddr::from_str("127.0.0.1:40448").unwrap(), udp_port: 40447 }; + let node_ids_hex: [&str; 32] = [ + "22536fa57acc12c4993295cbc26fef4550513496712b301ad2283d356c8108521244a362e64e6d907a0d0b4e65526699c5ae3cfebfc680505fe3b33d50672835", + "22c482f42401546f8dd7ed6b1c0cad976da6630730f1116614579ccb084791a528ff2676bfe94434de80e5d7e479f1ea1d7737077da3bd5e69a0f3e5bf596091", + "234c73e3a8f6835a7f9a9d2a896bff4908d66d21d5433a2c37d94f1fa9a6ca17d02388f31013ff87e3ad86506e76bd1006b9cac3815974a2b47c8d4f2124697e", + "2a5aaf4e2046c521e890dc82313c6151a55078f045a7e3d259f168238d029271cdd9a0943468d45c1e36a34a8a6d4de4b0262e48d3c8cfdd4c2aab5df42926b9", + "341d8c94d9670461186cfc1f66d4246cb12384940e9f621ec8d6c216b5d037cde5f7a41b70474ca36ced4a4f2fe91c9dc5a24a128414672661f78e8611d54bfd", + "3d9fd01851f3ae1bfd06b48e89738f29f9a2b4dce3ab7864df4fccca55d1ac88044956ba47d0c4cb44a19924626a3a3aa5a4de8958365cb7385111ce7b929200", + "406d5507a7fbc194a495800ae8cf408093336febc24d03d6c63756f522274ab02146ceb1b0213291a9a1544680503837519f88f1e8677d921de62c82935b4e6c", + "4c537f00805f320616ee49c7bc36e1d7e52a04a782b0cc00fd3d6b77200b027cef5f875ed38f1167fef4b02d7bd49a661812301d9d680bb62297131204c035f9", + "4fc8e3fdbdd7acad82b283ac52c121b805f3b15ffcaa6b2ca67b9e375aa88e978951ffa3d03ee13be99f0ee987db0bbfc6a7ca02b175e9123d79826025b4089d", + "55b5042a6910bc908a0520966e8cbcc92ac299bdb7efbfbcf703df1506fa0f9b09c5eeb930080de848d2864cca71f885942852c51233db0ee46fe0447306d61f", + "5d24f28b350c4c37fc4dad7f418e029992c9e4ac356bb3d9a1356ba1076339863c05044d7ceba233c65779401f8a3b38fe67b6a592c1be4834dc869f7bb932eb", + "5f6edaf2f2ae3003f4b4ff90b8e71a717c832c71a634d96e77fe046f9a88adc8de5718ff3c47659aea4cead5376df5b731e1b6530e6b0999f56ad75d4dabd3f6", + "6214c04211efe91abd23d65e2dc8e711b06d4fb13dcfd65b691dc51f58455b2145f9b38f523b72a45a12705a28d389308a34455720d774c9b805326df42b5a63", + "69df92573ddbbce88b72a930843dbb70728b2a020e0cc4e8ba805dcf7f19297bfc5def4ca447e9e6ec66971be1815b8f49042720431f698b6a87a185d94fa6c8", + "72ffc23de007cf8b6f4a117f7427b532d05861c314344ffa265175f57ee45dae041a710a4dc74124dba1dabdc0f52dfd21e3154d1d4285aab529810c6161d623", + "80b567f279a9512f3a66ebd8f87a93acd4d50bf66f5eff6d04039c1f5838e37021e981539659b33e0644b243fc9671209a80cbef40d1bcf7c7117d353cb45532", + "9009dc9e3bf50595f84271f46d4c7a5ad6971f7d2ffce1905bfc40a407d34fc5e2dcebd92746eadcd2c5fa4d5aaccb0e01b542d506b361851df3f19e6bc629a3", + "95264f56e091efeba911003fd01eeb2c81f6fc4bb7b10c92e4c7bfaf460b7246d232e61ad8a223d74870981a84e15b2d5134c25d931cb860c6912b20a2d3ac01", + "96013a472a9f7ff9c5c76b5ca958f14ee510d826703aa41d4c88eac51d30d14229b9f19f6e0469c37aaa6d2136a978a4aaa38ca766f48e53e569f84e44252962", + "a513c988cf8480ad2992caa64e3fa059ce07efda260dfeefed78e1d41ea3f97844603b8a9737eb633086fd9ac2f201200cb656cda8a91bf6cc500d6039db6f53", + "ab3311f38e3641c8b3b1fd36dd7f94b148166e267258e840d29d1859537c74f202bd3342359b3623f96c23fa662d1b65182a898bf20343744b37cb265182e500", + "ac8f41dbd637891a08c9cf715c23577bdd431ba40231682a5a9ba7fd6cb6d66c04f63d6d65c7d9f8737e641e05fdbeede57138a174f0d55e7835575dd6cddd98", + "accdad251888d53e4e18efee1e0d749d050216b14896efb657e9c7b1b78dab82a5b6fb3234017aa19a2f50475d73960f352d308b2e0e841cbebaf418362a4f21", + "b138622208f74d2b8e8fc10bcd4cf3302685cd77d339280a939474b92be8b93e441c50709e25c82cc88a2a4207e9f2938912d60600226efe322b43c6ef5e7aef", + "b4f64e1fa6a5cd6198b2515bde63fbdabaf7e7a31dbaf5369babbda4b8cd0bf5025ac4b7d2d6e6e3bc76c890df585d28d4815e464c8792ef677df9206864a12b", + "c1136e08a27c93812ae2dd47201d9e81c82d1995001b88dba9eec700e1d3385dfaf7ae834226c3c90a138f1808cd10b5502f49ee774a2bc707f34bd7d160b7bd", + "c203ae9b5d1953b0ac462e66338800ec26982e2af54bd444fc8978973191633d4f483e31b28233c07bb99f34d57c680fa5f8e093e64f13b235005b7ab6e2d594", + "c2e1067c58a9948e773e0a3637d946e26d95762f89ec9d35e2ad84f770309d94168d4e112c78d62b60efc6216bc5d31475f24307b1b8e0fa8dcbb18a10cb85f5", + "d60ecb1a89e0d5aeff14c9a95da9f5492eb15871c53563b86b7c5ddf0da74b4c29e682fdd22aae2290e0b16ef4b6d707ef55396ca98f755c95b689cf65ce5f80", + "df5ad4ea6242929df86f2162d1cc62b0e0a6f0a03428a39dea98f6a689335b5ceaf1f0696c17b717b141aeb45a29108d95c3a7d2d1d0bb3441219504ae672917", + "e1268f5dd9552a11989df9d4953bb388e7466711b2bd9882a3ed4d0767a21f046c53c20f9a18d66bae1d6a5544492857ddecb0b5b4818bd4557be252ddd66c71", + "e626019dc0b50b9e254461f19d29e69a4669c5256134a6352c6c30d3bc55d201a5b43fc2e006556cfaf29765b683e807e03093798942826244e4ee9e47c75d3f", + ]; + let node_entries = node_ids_hex.iter() + .map(|node_id_hex| NodeId::from_str(node_id_hex).unwrap()) + .map(|node_id| NodeEntry { id: node_id, endpoint: ep.clone() }) + .collect::>(); + + let secret_hex = "6c71d1b8930d29e6371be1081f2c909c64b46440a1716314c3c9df995cb3aed1"; + let key = Secret::from_str(secret_hex) + .and_then(|secret| KeyPair::from_secret(secret)) + .unwrap(); + let mut discovery = Discovery::new(&key, ep.address.clone(), ep.clone(), 0, IpFilter::default()); + + node_entries.iter().for_each(|entry| discovery.update_node(entry.clone())); + + let expected_bucket_sizes = vec![ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7, 8, 12 + ]; + let actual_bucket_sizes = discovery.node_buckets.iter() + .map(|ref bucket| bucket.nodes.len()) + .collect::>(); + assert_eq!(actual_bucket_sizes, expected_bucket_sizes); + + for entry in &node_entries { + let nearest = discovery.nearest_node_entries(&entry.id); + assert_eq!(nearest.len(), 16); + assert_eq!(nearest[0].id, entry.id); + + let mut expected_ids: Vec = node_entries.iter().map(|entry| entry.id).collect(); + expected_ids.sort_unstable_by_key(|id| keccak(id) ^ keccak(entry.id)); + expected_ids.resize(BUCKET_SIZE, NodeId::default()); + + let actual_ids: Vec = nearest.iter().map(|entry| entry.id).collect(); + assert_eq!(actual_ids, expected_ids); + } + } + #[test] fn packets() { let key = Random.generate().unwrap(); - let ep = NodeEndpoint { address: SocketAddr::from_str("127.0.0.1:40447").unwrap(), udp_port: 40447 }; + let ep = NodeEndpoint { address: SocketAddr::from_str("127.0.0.1:40449").unwrap(), udp_port: 40449 }; let mut discovery = Discovery::new(&key, ep.address.clone(), ep.clone(), 0, IpFilter::default()); discovery.check_timestamps = false; let from = SocketAddr::from_str("99.99.99.99:40445").unwrap();