Unroll one more loop
I also tried unrolling the 256-iteration loop further below, but it actually caused a slowdown (my guess is either branch prediction stopped kicking in or the instruction cache was being maculated).
This commit is contained in:
parent
d51958dbf5
commit
2cc1c92901
6
Cargo.lock
generated
6
Cargo.lock
generated
@ -256,7 +256,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "crunchy"
|
||||
version = "0.1.0"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
@ -365,7 +365,7 @@ dependencies = [
|
||||
name = "ethash"
|
||||
version = "1.7.0"
|
||||
dependencies = [
|
||||
"crunchy 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"crunchy 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"log 0.3.7 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"parking_lot 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"primal 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
@ -2949,7 +2949,7 @@ dependencies = [
|
||||
"checksum core-foundation 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "20a6d0448d3a99d977ae4a2aa5a98d886a923e863e81ad9ff814645b6feb3bbd"
|
||||
"checksum core-foundation-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "05eed248dc504a5391c63794fe4fb64f46f071280afaa1b73308f3c0ce4574c5"
|
||||
"checksum crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)" = "0c5ea215664ca264da8a9d9c3be80d2eaf30923c259d03e870388eb927508f97"
|
||||
"checksum crunchy 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b56bdac907d4b64254aed43964b11dd334bc46e0826a0e3add75caddfaf90175"
|
||||
"checksum crunchy 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "803315676d3fc09db67636977586262e7d1a6c5bdb291810efd5ceb48a0d5d58"
|
||||
"checksum crypt32-sys 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e34988f7e069e0b2f3bfc064295161e489b2d4e04a2e4248fb94360cdf00b4ec"
|
||||
"checksum ctrlc 1.1.1 (git+https://github.com/paritytech/rust-ctrlc.git)" = "<none>"
|
||||
"checksum daemonize 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "271ec51b7e0bee92f0d04601422c73eb76ececf197026711c97ad25038a010cf"
|
||||
|
@ -298,7 +298,7 @@ fn hash_compute(light: &Light, full_size: usize, header_hash: &H256, nonce: u64)
|
||||
|
||||
let index = fnv_hash(
|
||||
f_mix.get_unchecked(0).as_words().get_unchecked(0) ^ i,
|
||||
*mix.get_unchecked(0).as_words().get_unchecked(i_usize % MIX_WORDS)
|
||||
*mix.get_unchecked(0).as_words().get_unchecked(i as usize % MIX_WORDS)
|
||||
) % num_full_pages;
|
||||
|
||||
unroll! {
|
||||
@ -324,14 +324,18 @@ fn hash_compute(light: &Light, full_size: usize, header_hash: &H256, nonce: u64)
|
||||
}
|
||||
}
|
||||
|
||||
debug_assert_eq!(MIX_WORDS / 4, 8);
|
||||
|
||||
// compress mix
|
||||
for i in 0..(MIX_WORDS / 4) {
|
||||
let w = i * 4;
|
||||
let mut reduction = *mix.get_unchecked(0).as_words().get_unchecked(w + 0);
|
||||
reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 1);
|
||||
reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 2);
|
||||
reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 3);
|
||||
*mix.get_unchecked_mut(0).as_words_mut().get_unchecked_mut(i) = reduction;
|
||||
unroll! {
|
||||
for i in 0..8 {
|
||||
let w = i * 4;
|
||||
let mut reduction = *mix.get_unchecked(0).as_words().get_unchecked(w + 0);
|
||||
reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 1);
|
||||
reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 2);
|
||||
reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 3);
|
||||
*mix.get_unchecked_mut(0).as_words_mut().get_unchecked_mut(i) = reduction;
|
||||
}
|
||||
}
|
||||
|
||||
let mut mix_hash = [0u8; 32];
|
||||
@ -356,13 +360,15 @@ fn calculate_dag_item(node_index: u32, cache: &[Node]) -> Node {
|
||||
*ret.as_words_mut().get_unchecked_mut(0) ^= node_index;
|
||||
sha3::sha3_512(ret.bytes.as_mut_ptr(), ret.bytes.len(), ret.bytes.as_ptr(), ret.bytes.len());
|
||||
|
||||
for i in 0..ETHASH_DATASET_PARENTS {
|
||||
for i_usize in 0..ETHASH_DATASET_PARENTS {
|
||||
let i = i_usize as u32;
|
||||
let parent_index = fnv_hash(node_index ^ i, *ret.as_words().get_unchecked(i as usize % NODE_WORDS)) % num_parent_nodes as u32;
|
||||
let parent = cache.get_unchecked(parent_index as usize);
|
||||
for w in 0..NODE_WORDS {
|
||||
*ret.as_words_mut().get_unchecked_mut(w) = fnv_hash(*ret.as_words().get_unchecked(w), *parent.as_words().get_unchecked(w));
|
||||
}
|
||||
}
|
||||
|
||||
sha3::sha3_512(ret.bytes.as_mut_ptr(), ret.bytes.len(), ret.bytes.as_ptr(), ret.bytes.len());
|
||||
ret
|
||||
}
|
||||
@ -386,6 +392,7 @@ fn light_new(block_number: u64) -> Light {
|
||||
sha3::sha3_512(nodes.get_unchecked_mut(i).bytes.as_mut_ptr(), NODE_BYTES, nodes.get_unchecked(i - 1).bytes.as_ptr(), NODE_BYTES);
|
||||
}
|
||||
|
||||
// This _should_ get unrolled by the compiler, since it's not using the loop variable.
|
||||
for _ in 0..ETHASH_CACHE_ROUNDS {
|
||||
for i in 0..num_nodes {
|
||||
let idx = *nodes.get_unchecked_mut(i).as_words().get_unchecked(0) as usize % num_nodes;
|
||||
|
Loading…
Reference in New Issue
Block a user