From 2cc1c92901e9d54d4cd83181ecf22c687c1326b1 Mon Sep 17 00:00:00 2001 From: Vurich Date: Wed, 19 Jul 2017 12:07:34 +0200 Subject: [PATCH] Unroll one more loop I also tried unrolling the 256-iteration loop further below, but it actually caused a slowdown (my guess is either branch prediction stopped kicking in or the instruction cache was being maculated). --- Cargo.lock | 6 +++--- ethash/src/compute.rs | 25 ++++++++++++++++--------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3e88b40cb..79217f7e2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -256,7 +256,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "crunchy" -version = "0.1.0" +version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -365,7 +365,7 @@ dependencies = [ name = "ethash" version = "1.7.0" dependencies = [ - "crunchy 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "crunchy 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)", "parking_lot 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "primal 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2949,7 +2949,7 @@ dependencies = [ "checksum core-foundation 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "20a6d0448d3a99d977ae4a2aa5a98d886a923e863e81ad9ff814645b6feb3bbd" "checksum core-foundation-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "05eed248dc504a5391c63794fe4fb64f46f071280afaa1b73308f3c0ce4574c5" "checksum crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0c5ea215664ca264da8a9d9c3be80d2eaf30923c259d03e870388eb927508f97" -"checksum crunchy 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b56bdac907d4b64254aed43964b11dd334bc46e0826a0e3add75caddfaf90175" +"checksum crunchy 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "803315676d3fc09db67636977586262e7d1a6c5bdb291810efd5ceb48a0d5d58" "checksum crypt32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e34988f7e069e0b2f3bfc064295161e489b2d4e04a2e4248fb94360cdf00b4ec" "checksum ctrlc 1.1.1 (git+https://github.com/paritytech/rust-ctrlc.git)" = "" "checksum daemonize 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "271ec51b7e0bee92f0d04601422c73eb76ececf197026711c97ad25038a010cf" diff --git a/ethash/src/compute.rs b/ethash/src/compute.rs index 67b1ea678..21dd0a7c7 100644 --- a/ethash/src/compute.rs +++ b/ethash/src/compute.rs @@ -298,7 +298,7 @@ fn hash_compute(light: &Light, full_size: usize, header_hash: &H256, nonce: u64) let index = fnv_hash( f_mix.get_unchecked(0).as_words().get_unchecked(0) ^ i, - *mix.get_unchecked(0).as_words().get_unchecked(i_usize % MIX_WORDS) + *mix.get_unchecked(0).as_words().get_unchecked(i as usize % MIX_WORDS) ) % num_full_pages; unroll! { @@ -324,14 +324,18 @@ fn hash_compute(light: &Light, full_size: usize, header_hash: &H256, nonce: u64) } } + debug_assert_eq!(MIX_WORDS / 4, 8); + // compress mix - for i in 0..(MIX_WORDS / 4) { - let w = i * 4; - let mut reduction = *mix.get_unchecked(0).as_words().get_unchecked(w + 0); - reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 1); - reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 2); - reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 3); - *mix.get_unchecked_mut(0).as_words_mut().get_unchecked_mut(i) = reduction; + unroll! { + for i in 0..8 { + let w = i * 4; + let mut reduction = *mix.get_unchecked(0).as_words().get_unchecked(w + 0); + reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 1); + reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 2); + reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 3); + *mix.get_unchecked_mut(0).as_words_mut().get_unchecked_mut(i) = reduction; + } } let mut mix_hash = [0u8; 32]; @@ -356,13 +360,15 @@ fn calculate_dag_item(node_index: u32, cache: &[Node]) -> Node { *ret.as_words_mut().get_unchecked_mut(0) ^= node_index; sha3::sha3_512(ret.bytes.as_mut_ptr(), ret.bytes.len(), ret.bytes.as_ptr(), ret.bytes.len()); - for i in 0..ETHASH_DATASET_PARENTS { + for i_usize in 0..ETHASH_DATASET_PARENTS { + let i = i_usize as u32; let parent_index = fnv_hash(node_index ^ i, *ret.as_words().get_unchecked(i as usize % NODE_WORDS)) % num_parent_nodes as u32; let parent = cache.get_unchecked(parent_index as usize); for w in 0..NODE_WORDS { *ret.as_words_mut().get_unchecked_mut(w) = fnv_hash(*ret.as_words().get_unchecked(w), *parent.as_words().get_unchecked(w)); } } + sha3::sha3_512(ret.bytes.as_mut_ptr(), ret.bytes.len(), ret.bytes.as_ptr(), ret.bytes.len()); ret } @@ -386,6 +392,7 @@ fn light_new(block_number: u64) -> Light { sha3::sha3_512(nodes.get_unchecked_mut(i).bytes.as_mut_ptr(), NODE_BYTES, nodes.get_unchecked(i - 1).bytes.as_ptr(), NODE_BYTES); } + // This _should_ get unrolled by the compiler, since it's not using the loop variable. for _ in 0..ETHASH_CACHE_ROUNDS { for i in 0..num_nodes { let idx = *nodes.get_unchecked_mut(i).as_words().get_unchecked(0) as usize % num_nodes;