Fix XOR distance calculation in discovery Kademlia impl (#8589)

* network-devp2p: Test for discovery bucket insertion.

All test values are randomly generated and the assertions are checked manually.
Test fails because distance metric is implemented incorrectly.

* network-devp2p: Fix discovery distance function.

The Kademlia distance function (XOR) was implemented incorrectly as a population count.

* network-devp2p: Refactor nearest_node_entries to be on instance.

Optimizations are possible with more access to the discovery state.

* network-devp2p: Fix loss of precision in nearest_node_entries.

* network-devp2p: More efficient nearest node search.

The discovery algorithm to identify the nearest k nodes does not need to scan
all entries in all buckets.
This commit is contained in:
Jim Posen 2018-06-01 00:42:45 -07:00 committed by Marek Kotewicz
parent 799ae29ac4
commit 485d4aa8f3

View File

@ -16,7 +16,7 @@
use ethcore_bytes::Bytes;
use std::net::SocketAddr;
use std::collections::{HashSet, HashMap, BTreeMap, VecDeque};
use std::collections::{HashSet, HashMap, VecDeque};
use std::mem;
use std::default::Default;
use std::time::{Duration, Instant, SystemTime, UNIX_EPOCH};
@ -34,9 +34,8 @@ use network::IpFilter;
use PROTOCOL_VERSION;
const ADDRESS_BYTES_SIZE: u32 = 32; // Size of address type in bytes.
const ADDRESS_BITS: u32 = 8 * ADDRESS_BYTES_SIZE; // Denoted by n in [Kademlia].
const NODE_BINS: u32 = ADDRESS_BITS - 1; // Size of m_state (excludes root, which is us).
const ADDRESS_BYTES_SIZE: usize = 32; // Size of address type in bytes.
const ADDRESS_BITS: usize = 8 * ADDRESS_BYTES_SIZE; // Denoted by n in [Kademlia].
const DISCOVERY_MAX_STEPS: u16 = 8; // Max iterations of discovery. (discover)
const BUCKET_SIZE: usize = 16; // Denoted by k in [Kademlia]. Number of nodes stored in each bucket.
const ALPHA: usize = 3; // Denoted by \alpha in [Kademlia]. Number of concurrent FindNode requests.
@ -119,7 +118,7 @@ impl Discovery {
discovery_round: 0,
discovery_id: NodeId::new(),
discovery_nodes: HashSet::new(),
node_buckets: (0..NODE_BINS).map(|_| NodeBucket::new()).collect(),
node_buckets: (0..ADDRESS_BITS).map(|_| NodeBucket::new()).collect(),
udp_socket: socket,
send_queue: VecDeque::new(),
check_timestamps: true,
@ -155,8 +154,16 @@ impl Discovery {
fn update_node(&mut self, e: NodeEntry) {
trace!(target: "discovery", "Inserting {:?}", &e);
let id_hash = keccak(e.id);
let dist = match Discovery::distance(&self.id_hash, &id_hash) {
Some(dist) => dist,
None => {
warn!(target: "discovery", "Attempted to update own entry: {:?}", e);
return;
}
};
let ping = {
let bucket = &mut self.node_buckets[Discovery::distance(&self.id_hash, &id_hash) as usize];
let bucket = &mut self.node_buckets[dist];
let updated = if let Some(node) = bucket.nodes.iter_mut().find(|n| n.address.id == e.id) {
node.address = e.clone();
node.timeout = None;
@ -181,7 +188,15 @@ impl Discovery {
/// Removes the timeout of a given NodeId if it can be found in one of the discovery buckets
fn clear_ping(&mut self, id: &NodeId) {
let bucket = &mut self.node_buckets[Discovery::distance(&self.id_hash, &keccak(id)) as usize];
let dist = match Discovery::distance(&self.id_hash, &keccak(id)) {
Some(dist) => dist,
None => {
warn!(target: "discovery", "Received ping from self");
return
}
};
let bucket = &mut self.node_buckets[dist];
if let Some(node) = bucket.nodes.iter_mut().find(|n| &n.address.id == id) {
node.timeout = None;
}
@ -212,7 +227,7 @@ impl Discovery {
trace!(target: "discovery", "Starting round {:?}", self.discovery_round);
let mut tried_count = 0;
{
let nearest = Discovery::nearest_node_entries(&self.discovery_id, &self.node_buckets).into_iter();
let nearest = self.nearest_node_entries(&self.discovery_id).into_iter();
let nearest = nearest.filter(|x| !self.discovery_nodes.contains(&x.id)).take(ALPHA).collect::<Vec<_>>();
for r in nearest {
let rlp = encode_list(&(&[self.discovery_id.clone()][..]));
@ -233,17 +248,17 @@ impl Discovery {
self.discovery_round += 1;
}
fn distance(a: &H256, b: &H256) -> u32 {
let d = *a ^ *b;
let mut ret:u32 = 0;
for i in 0..32 {
let mut v: u8 = d[i];
while v != 0 {
v >>= 1;
ret += 1;
/// The base 2 log of the distance between a and b using the XOR metric.
fn distance(a: &H256, b: &H256) -> Option<usize> {
for i in (0..ADDRESS_BYTES_SIZE).rev() {
let byte_index = ADDRESS_BYTES_SIZE - i - 1;
let d: u8 = a[byte_index] ^ b[byte_index];
if d != 0 {
let high_bit_index = 7 - d.leading_zeros() as usize;
return Some(i * 8 + high_bit_index);
}
}
ret
None // a and b are equal, so log distance is -inf
}
fn ping(&mut self, node: &NodeEndpoint) {
@ -286,36 +301,53 @@ impl Discovery {
Ok(())
}
fn nearest_node_entries(target: &NodeId, buckets: &[NodeBucket]) -> Vec<NodeEntry> {
let mut found: BTreeMap<u32, Vec<&NodeEntry>> = BTreeMap::new();
let mut count = 0;
fn nearest_node_entries(&self, target: &NodeId) -> Vec<NodeEntry> {
let target_hash = keccak(target);
let target_distance = self.id_hash ^ target_hash;
// Sort nodes by distance to target
for bucket in buckets {
for node in &bucket.nodes {
let distance = Discovery::distance(&target_hash, &node.id_hash);
found.entry(distance).or_insert_with(Vec::new).push(&node.address);
if count == BUCKET_SIZE {
// delete the most distant element
let remove = {
let (key, last) = found.iter_mut().next_back().expect("Last element is always Some when count > 0");
last.pop();
if last.is_empty() { Some(key.clone()) } else { None }
};
if let Some(remove) = remove {
found.remove(&remove);
}
}
else {
count += 1;
let mut ret = Vec::<NodeEntry>::with_capacity(BUCKET_SIZE);
// Sort bucket entries by distance to target and append to end of result vector.
let append_bucket = |results: &mut Vec<NodeEntry>, bucket: &NodeBucket| -> bool {
let mut sorted_entries: Vec<&BucketEntry> = bucket.nodes.iter().collect();
sorted_entries.sort_unstable_by_key(|entry| entry.id_hash ^ target_hash);
let remaining_capacity = results.capacity() - results.len();
let to_append = if remaining_capacity < sorted_entries.len() {
&sorted_entries[0..remaining_capacity]
} else {
&sorted_entries
};
for entry in to_append.iter() {
results.push(entry.address.clone());
}
results.len() == results.capacity()
};
// This algorithm leverages the structure of the routing table to efficiently find the
// nearest entries to a target hash. First, we compute the XOR distance from this node to
// the target. On a first pass, we iterate from the MSB of the distance, stopping at any
// buckets where the distance bit is set, and skipping the buckets where it is unset. These
// must be in order the nearest to the target. On a second pass, we traverse from LSB to
// MSB, appending the buckets skipped on the first pass. The reason this works is that all
// entries in bucket i have a common prefix of length exactly 32 - i - 1 with the ID of this
// node.
for i in 0..ADDRESS_BITS {
if ((target_distance[i / 8] << (i % 8)) & 0x80) != 0 {
let bucket = &self.node_buckets[ADDRESS_BITS - i - 1];
if !bucket.nodes.is_empty() && append_bucket(&mut ret, bucket) {
return ret;
}
}
}
let mut ret:Vec<NodeEntry> = Vec::new();
for nodes in found.values() {
ret.extend(nodes.iter().map(|&n| n.clone()));
for i in (0..ADDRESS_BITS).rev() {
if ((target_distance[i / 8] << (i % 8)) & 0x80) == 0 {
let bucket = &self.node_buckets[ADDRESS_BITS - i - 1];
if !bucket.nodes.is_empty() && append_bucket(&mut ret, bucket) {
return ret;
}
}
}
ret
}
@ -453,7 +485,7 @@ impl Discovery {
let target: NodeId = rlp.val_at(0)?;
let timestamp: u64 = rlp.val_at(1)?;
self.check_timestamp(timestamp)?;
let nearest = Discovery::nearest_node_entries(&target, &self.node_buckets);
let nearest = self.nearest_node_entries(&target);
if nearest.is_empty() {
return Ok(None);
}
@ -614,7 +646,7 @@ mod tests {
}
discovery2.round();
}
assert_eq!(Discovery::nearest_node_entries(&NodeId::new(), &discovery2.node_buckets).len(), 3)
assert_eq!(discovery2.nearest_node_entries(&NodeId::new()).len(), 3)
}
#[test]
@ -625,7 +657,7 @@ mod tests {
for _ in 0..1200 {
discovery.add_node(NodeEntry { id: NodeId::random(), endpoint: ep.clone() });
}
assert!(Discovery::nearest_node_entries(&NodeId::new(), &discovery.node_buckets).len() <= 16);
assert!(discovery.nearest_node_entries(&NodeId::new()).len() <= 16);
let removed = discovery.check_expired(true).len();
assert!(removed > 0);
}
@ -633,23 +665,114 @@ mod tests {
#[test]
fn find_nearest_saturated() {
use super::*;
let mut buckets: Vec<_> = (0..256).map(|_| NodeBucket::new()).collect();
let key = Random.generate().unwrap();
let ep = NodeEndpoint { address: SocketAddr::from_str("127.0.0.1:40447").unwrap(), udp_port: 40447 };
let mut discovery = Discovery::new(&key, ep.address.clone(), ep.clone(), 0, IpFilter::default());
for _ in 0..(16 + 10) {
buckets[0].nodes.push_back(BucketEntry {
discovery.node_buckets[0].nodes.push_back(BucketEntry {
address: NodeEntry { id: NodeId::new(), endpoint: ep.clone() },
timeout: None,
id_hash: keccak(NodeId::new()),
});
}
let nearest = Discovery::nearest_node_entries(&NodeId::new(), &buckets);
let nearest = discovery.nearest_node_entries(&NodeId::new());
assert_eq!(nearest.len(), 16)
}
#[test]
fn routing_table_insertions_lookups() {
use super::*;
let ep = NodeEndpoint { address: SocketAddr::from_str("127.0.0.1:40448").unwrap(), udp_port: 40447 };
let node_ids_hex: [&str; 32] = [
"22536fa57acc12c4993295cbc26fef4550513496712b301ad2283d356c8108521244a362e64e6d907a0d0b4e65526699c5ae3cfebfc680505fe3b33d50672835",
"22c482f42401546f8dd7ed6b1c0cad976da6630730f1116614579ccb084791a528ff2676bfe94434de80e5d7e479f1ea1d7737077da3bd5e69a0f3e5bf596091",
"234c73e3a8f6835a7f9a9d2a896bff4908d66d21d5433a2c37d94f1fa9a6ca17d02388f31013ff87e3ad86506e76bd1006b9cac3815974a2b47c8d4f2124697e",
"2a5aaf4e2046c521e890dc82313c6151a55078f045a7e3d259f168238d029271cdd9a0943468d45c1e36a34a8a6d4de4b0262e48d3c8cfdd4c2aab5df42926b9",
"341d8c94d9670461186cfc1f66d4246cb12384940e9f621ec8d6c216b5d037cde5f7a41b70474ca36ced4a4f2fe91c9dc5a24a128414672661f78e8611d54bfd",
"3d9fd01851f3ae1bfd06b48e89738f29f9a2b4dce3ab7864df4fccca55d1ac88044956ba47d0c4cb44a19924626a3a3aa5a4de8958365cb7385111ce7b929200",
"406d5507a7fbc194a495800ae8cf408093336febc24d03d6c63756f522274ab02146ceb1b0213291a9a1544680503837519f88f1e8677d921de62c82935b4e6c",
"4c537f00805f320616ee49c7bc36e1d7e52a04a782b0cc00fd3d6b77200b027cef5f875ed38f1167fef4b02d7bd49a661812301d9d680bb62297131204c035f9",
"4fc8e3fdbdd7acad82b283ac52c121b805f3b15ffcaa6b2ca67b9e375aa88e978951ffa3d03ee13be99f0ee987db0bbfc6a7ca02b175e9123d79826025b4089d",
"55b5042a6910bc908a0520966e8cbcc92ac299bdb7efbfbcf703df1506fa0f9b09c5eeb930080de848d2864cca71f885942852c51233db0ee46fe0447306d61f",
"5d24f28b350c4c37fc4dad7f418e029992c9e4ac356bb3d9a1356ba1076339863c05044d7ceba233c65779401f8a3b38fe67b6a592c1be4834dc869f7bb932eb",
"5f6edaf2f2ae3003f4b4ff90b8e71a717c832c71a634d96e77fe046f9a88adc8de5718ff3c47659aea4cead5376df5b731e1b6530e6b0999f56ad75d4dabd3f6",
"6214c04211efe91abd23d65e2dc8e711b06d4fb13dcfd65b691dc51f58455b2145f9b38f523b72a45a12705a28d389308a34455720d774c9b805326df42b5a63",
"69df92573ddbbce88b72a930843dbb70728b2a020e0cc4e8ba805dcf7f19297bfc5def4ca447e9e6ec66971be1815b8f49042720431f698b6a87a185d94fa6c8",
"72ffc23de007cf8b6f4a117f7427b532d05861c314344ffa265175f57ee45dae041a710a4dc74124dba1dabdc0f52dfd21e3154d1d4285aab529810c6161d623",
"80b567f279a9512f3a66ebd8f87a93acd4d50bf66f5eff6d04039c1f5838e37021e981539659b33e0644b243fc9671209a80cbef40d1bcf7c7117d353cb45532",
"9009dc9e3bf50595f84271f46d4c7a5ad6971f7d2ffce1905bfc40a407d34fc5e2dcebd92746eadcd2c5fa4d5aaccb0e01b542d506b361851df3f19e6bc629a3",
"95264f56e091efeba911003fd01eeb2c81f6fc4bb7b10c92e4c7bfaf460b7246d232e61ad8a223d74870981a84e15b2d5134c25d931cb860c6912b20a2d3ac01",
"96013a472a9f7ff9c5c76b5ca958f14ee510d826703aa41d4c88eac51d30d14229b9f19f6e0469c37aaa6d2136a978a4aaa38ca766f48e53e569f84e44252962",
"a513c988cf8480ad2992caa64e3fa059ce07efda260dfeefed78e1d41ea3f97844603b8a9737eb633086fd9ac2f201200cb656cda8a91bf6cc500d6039db6f53",
"ab3311f38e3641c8b3b1fd36dd7f94b148166e267258e840d29d1859537c74f202bd3342359b3623f96c23fa662d1b65182a898bf20343744b37cb265182e500",
"ac8f41dbd637891a08c9cf715c23577bdd431ba40231682a5a9ba7fd6cb6d66c04f63d6d65c7d9f8737e641e05fdbeede57138a174f0d55e7835575dd6cddd98",
"accdad251888d53e4e18efee1e0d749d050216b14896efb657e9c7b1b78dab82a5b6fb3234017aa19a2f50475d73960f352d308b2e0e841cbebaf418362a4f21",
"b138622208f74d2b8e8fc10bcd4cf3302685cd77d339280a939474b92be8b93e441c50709e25c82cc88a2a4207e9f2938912d60600226efe322b43c6ef5e7aef",
"b4f64e1fa6a5cd6198b2515bde63fbdabaf7e7a31dbaf5369babbda4b8cd0bf5025ac4b7d2d6e6e3bc76c890df585d28d4815e464c8792ef677df9206864a12b",
"c1136e08a27c93812ae2dd47201d9e81c82d1995001b88dba9eec700e1d3385dfaf7ae834226c3c90a138f1808cd10b5502f49ee774a2bc707f34bd7d160b7bd",
"c203ae9b5d1953b0ac462e66338800ec26982e2af54bd444fc8978973191633d4f483e31b28233c07bb99f34d57c680fa5f8e093e64f13b235005b7ab6e2d594",
"c2e1067c58a9948e773e0a3637d946e26d95762f89ec9d35e2ad84f770309d94168d4e112c78d62b60efc6216bc5d31475f24307b1b8e0fa8dcbb18a10cb85f5",
"d60ecb1a89e0d5aeff14c9a95da9f5492eb15871c53563b86b7c5ddf0da74b4c29e682fdd22aae2290e0b16ef4b6d707ef55396ca98f755c95b689cf65ce5f80",
"df5ad4ea6242929df86f2162d1cc62b0e0a6f0a03428a39dea98f6a689335b5ceaf1f0696c17b717b141aeb45a29108d95c3a7d2d1d0bb3441219504ae672917",
"e1268f5dd9552a11989df9d4953bb388e7466711b2bd9882a3ed4d0767a21f046c53c20f9a18d66bae1d6a5544492857ddecb0b5b4818bd4557be252ddd66c71",
"e626019dc0b50b9e254461f19d29e69a4669c5256134a6352c6c30d3bc55d201a5b43fc2e006556cfaf29765b683e807e03093798942826244e4ee9e47c75d3f",
];
let node_entries = node_ids_hex.iter()
.map(|node_id_hex| NodeId::from_str(node_id_hex).unwrap())
.map(|node_id| NodeEntry { id: node_id, endpoint: ep.clone() })
.collect::<Vec<_>>();
let secret_hex = "6c71d1b8930d29e6371be1081f2c909c64b46440a1716314c3c9df995cb3aed1";
let key = Secret::from_str(secret_hex)
.and_then(|secret| KeyPair::from_secret(secret))
.unwrap();
let mut discovery = Discovery::new(&key, ep.address.clone(), ep.clone(), 0, IpFilter::default());
node_entries.iter().for_each(|entry| discovery.update_node(entry.clone()));
let expected_bucket_sizes = vec![
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 3, 7, 8, 12
];
let actual_bucket_sizes = discovery.node_buckets.iter()
.map(|ref bucket| bucket.nodes.len())
.collect::<Vec<_>>();
assert_eq!(actual_bucket_sizes, expected_bucket_sizes);
for entry in &node_entries {
let nearest = discovery.nearest_node_entries(&entry.id);
assert_eq!(nearest.len(), 16);
assert_eq!(nearest[0].id, entry.id);
let mut expected_ids: Vec<NodeId> = node_entries.iter().map(|entry| entry.id).collect();
expected_ids.sort_unstable_by_key(|id| keccak(id) ^ keccak(entry.id));
expected_ids.resize(BUCKET_SIZE, NodeId::default());
let actual_ids: Vec<NodeId> = nearest.iter().map(|entry| entry.id).collect();
assert_eq!(actual_ids, expected_ids);
}
}
#[test]
fn packets() {
let key = Random.generate().unwrap();
let ep = NodeEndpoint { address: SocketAddr::from_str("127.0.0.1:40447").unwrap(), udp_port: 40447 };
let ep = NodeEndpoint { address: SocketAddr::from_str("127.0.0.1:40449").unwrap(), udp_port: 40449 };
let mut discovery = Discovery::new(&key, ep.address.clone(), ep.clone(), 0, IpFilter::default());
discovery.check_timestamps = false;
let from = SocketAddr::from_str("99.99.99.99:40445").unwrap();