Fix unsoundness in ethash's unsafe code (#6140)
* Fix benchmarks * Fix unsoundness in uses of unsafety * Remove most uses of unsafe indexing and ptr::copy_nonoverlapping This commit also includes a completely absurd optimisation that I promise is an honest win. You can check the benchmarks, I barely believe it myself. * Add safety comment * Add more safety comments
This commit is contained in:
parent
671ed1b9db
commit
e84f308264
@ -123,7 +123,9 @@ impl Light {
|
||||
}
|
||||
let num_nodes = cache_size / NODE_BYTES;
|
||||
let mut nodes: Vec<Node> = Vec::with_capacity(num_nodes);
|
||||
nodes.resize(num_nodes, unsafe { mem::uninitialized() });
|
||||
|
||||
unsafe { nodes.set_len(num_nodes) };
|
||||
|
||||
let buf = unsafe { slice::from_raw_parts_mut(nodes.as_mut_ptr() as *mut u8, cache_size) };
|
||||
file.read_exact(buf)?;
|
||||
Ok(Light {
|
||||
@ -208,17 +210,20 @@ pub fn slow_get_seedhash(block_number: u64) -> H256 {
|
||||
SeedHashCompute::resume_compute_seedhash([0u8; 32], 0, block_number / ETHASH_EPOCH_LENGTH)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn fnv_hash(x: u32, y: u32) -> u32 {
|
||||
return x.wrapping_mul(FNV_PRIME) ^ y;
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn sha3_512(input: &[u8], output: &mut [u8]) {
|
||||
unsafe { sha3::sha3_512(output.as_mut_ptr(), output.len(), input.as_ptr(), input.len()) };
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn sha3_512_inplace(input: &mut [u8]) {
|
||||
// This is safe since `sha3_*` uses an internal buffer and copies the result to the output. This
|
||||
// means that we can reuse the input buffer for both input and output.
|
||||
unsafe { sha3::sha3_512(input.as_mut_ptr(), input.len(), input.as_ptr(), input.len()) };
|
||||
}
|
||||
|
||||
fn get_cache_size(block_number: u64) -> usize {
|
||||
let mut sz: u64 = CACHE_BYTES_INIT + CACHE_BYTES_GROWTH * (block_number / ETHASH_EPOCH_LENGTH);
|
||||
sz = sz - NODE_BYTES as u64;
|
||||
@ -228,7 +233,6 @@ fn get_cache_size(block_number: u64) -> usize {
|
||||
sz as usize
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn get_data_size(block_number: u64) -> usize {
|
||||
let mut sz: u64 = DATASET_BYTES_INIT + DATASET_BYTES_GROWTH * (block_number / ETHASH_EPOCH_LENGTH);
|
||||
sz = sz - ETHASH_MIX_BYTES as u64;
|
||||
@ -238,7 +242,6 @@ fn get_data_size(block_number: u64) -> usize {
|
||||
sz as usize
|
||||
}
|
||||
|
||||
|
||||
/// Difficulty quick check for POW preverification
|
||||
///
|
||||
/// `header_hash` The hash of the header
|
||||
@ -246,18 +249,28 @@ fn get_data_size(block_number: u64) -> usize {
|
||||
/// `mix_hash` The mix digest hash
|
||||
/// Boundary recovered from mix hash
|
||||
pub fn quick_get_difficulty(header_hash: &H256, nonce: u64, mix_hash: &H256) -> H256 {
|
||||
let mut buf = [0u8; 64 + 32];
|
||||
unsafe { ptr::copy_nonoverlapping(header_hash.as_ptr(), buf.as_mut_ptr(), 32) };
|
||||
unsafe { ptr::copy_nonoverlapping(mem::transmute(&nonce), buf[32..].as_mut_ptr(), 8) };
|
||||
unsafe {
|
||||
// This is safe - the `sha3_512` call below reads the first 40 bytes (which we explicitly set
|
||||
// with two `copy_nonoverlapping` calls) but writes the first 64, and then we explicitly write
|
||||
// the next 32 bytes before we read the whole thing with `sha3_256`.
|
||||
//
|
||||
// This cannot be elided by the compiler as it doesn't know the implementation of
|
||||
// `sha3_512`.
|
||||
let mut buf: [u8; 64 + 32] = mem::uninitialized();
|
||||
|
||||
unsafe { sha3::sha3_512(buf.as_mut_ptr(), 64, buf.as_ptr(), 40) };
|
||||
unsafe { ptr::copy_nonoverlapping(mix_hash.as_ptr(), buf[64..].as_mut_ptr(), 32) };
|
||||
ptr::copy_nonoverlapping(header_hash.as_ptr(), buf.as_mut_ptr(), 32);
|
||||
ptr::copy_nonoverlapping(mem::transmute(&nonce), buf[32..].as_mut_ptr(), 8);
|
||||
|
||||
sha3::sha3_512(buf.as_mut_ptr(), 64, buf.as_ptr(), 40);
|
||||
ptr::copy_nonoverlapping(mix_hash.as_ptr(), buf[64..].as_mut_ptr(), 32);
|
||||
|
||||
// This is initialized in `sha3_256`
|
||||
let mut hash: [u8; 32] = mem::uninitialized();
|
||||
sha3::sha3_256(hash.as_mut_ptr(), hash.len(), buf.as_ptr(), buf.len());
|
||||
|
||||
let mut hash = [0u8; 32];
|
||||
unsafe { sha3::sha3_256(hash.as_mut_ptr(), hash.len(), buf.as_ptr(), buf.len()) };
|
||||
hash.as_mut_ptr();
|
||||
hash
|
||||
}
|
||||
}
|
||||
|
||||
/// Calculate the light client data
|
||||
/// `light` - The light client handler
|
||||
@ -269,39 +282,97 @@ pub fn light_compute(light: &Light, header_hash: &H256, nonce: u64) -> ProofOfWo
|
||||
}
|
||||
|
||||
fn hash_compute(light: &Light, full_size: usize, header_hash: &H256, nonce: u64) -> ProofOfWork {
|
||||
macro_rules! make_const_array {
|
||||
($n:expr, $value:expr) => {{
|
||||
// We use explicit lifetimes to ensure that val's borrow is invalidated until the
|
||||
// transmuted val dies.
|
||||
unsafe fn make_const_array<'a, T, U>(val: &'a mut [T]) -> &'a mut [U; $n] {
|
||||
use ::std::mem;
|
||||
|
||||
debug_assert_eq!(val.len() * mem::size_of::<T>(), $n * mem::size_of::<U>());
|
||||
mem::transmute(val.as_mut_ptr())
|
||||
}
|
||||
|
||||
make_const_array($value)
|
||||
}}
|
||||
}
|
||||
|
||||
#[repr(C)]
|
||||
struct MixBuf {
|
||||
half_mix: Node,
|
||||
compress_bytes: [u8; MIX_WORDS],
|
||||
};
|
||||
|
||||
if full_size % MIX_WORDS != 0 {
|
||||
panic!("Unaligned full size");
|
||||
}
|
||||
// pack hash and nonce together into first 40 bytes of s_mix
|
||||
let mut s_mix: [Node; MIX_NODES + 1] = [Node::default(), Node::default(), Node::default()];
|
||||
unsafe { ptr::copy_nonoverlapping(header_hash.as_ptr(), s_mix.get_unchecked_mut(0).bytes.as_mut_ptr(), 32) };
|
||||
unsafe { ptr::copy_nonoverlapping(mem::transmute(&nonce), s_mix.get_unchecked_mut(0).bytes[32..].as_mut_ptr(), 8) };
|
||||
|
||||
// You may be asking yourself: what in the name of Crypto Jesus is going on here? So: we need
|
||||
// `half_mix` and `compress_bytes` in a single array later down in the code (we hash them
|
||||
// together to create `value`) so that we can hash the full array. However, we do a bunch of
|
||||
// reading and writing to these variables first. We originally allocated two arrays and then
|
||||
// stuck them together with `ptr::copy_nonoverlapping` at the end, but this method is
|
||||
// _significantly_ faster - by my benchmarks, a consistent 3-5%. This is the most ridiculous
|
||||
// optimization I have ever done and I am so sorry. I can only chalk it up to cache locality
|
||||
// improvements, since I can't imagine that 3-5% of our runtime is taken up by catting two
|
||||
// arrays together.
|
||||
let mut buf: MixBuf = MixBuf {
|
||||
half_mix: unsafe {
|
||||
// Pack `header_hash` and `nonce` together
|
||||
// We explicitly write the first 40 bytes, leaving the last 24 as uninitialized. Then
|
||||
// `sha3_512` reads the first 40 bytes (4th parameter) and overwrites the entire array,
|
||||
// leaving it fully initialized.
|
||||
let mut out: [u8; NODE_BYTES] = mem::uninitialized();
|
||||
|
||||
ptr::copy_nonoverlapping(
|
||||
header_hash.as_ptr(),
|
||||
out.as_mut_ptr(),
|
||||
header_hash.len(),
|
||||
);
|
||||
ptr::copy_nonoverlapping(
|
||||
mem::transmute(&nonce),
|
||||
out[header_hash.len()..].as_mut_ptr(),
|
||||
mem::size_of::<u64>(),
|
||||
);
|
||||
|
||||
// compute sha3-512 hash and replicate across mix
|
||||
unsafe {
|
||||
sha3::sha3_512(s_mix.get_unchecked_mut(0).bytes.as_mut_ptr(), NODE_BYTES, s_mix.get_unchecked(0).bytes.as_ptr(), 40);
|
||||
let (f_mix, mut mix) = s_mix.split_at_mut(1);
|
||||
for w in 0..MIX_WORDS {
|
||||
*mix.get_unchecked_mut(0).as_words_mut().get_unchecked_mut(w) = *f_mix.get_unchecked(0).as_words().get_unchecked(w % NODE_WORDS);
|
||||
}
|
||||
sha3::sha3_512(
|
||||
out.as_mut_ptr(),
|
||||
NODE_BYTES,
|
||||
out.as_ptr(),
|
||||
header_hash.len() + mem::size_of::<u64>()
|
||||
);
|
||||
|
||||
Node { bytes: out }
|
||||
},
|
||||
// This is fully initialized before being read, see `let mut compress = ...` below
|
||||
compress_bytes: unsafe { mem::uninitialized() },
|
||||
};
|
||||
|
||||
let mut mix: [_; MIX_NODES] = [buf.half_mix.clone(), buf.half_mix.clone()];
|
||||
|
||||
let page_size = 4 * MIX_WORDS;
|
||||
let num_full_pages = (full_size / page_size) as u32;
|
||||
let cache: &[Node] = &light.cache; // deref once for better performance
|
||||
// deref once for better performance
|
||||
let cache: &[Node] = &light.cache;
|
||||
let first_val = buf.half_mix.as_words()[0];
|
||||
|
||||
debug_assert_eq!(ETHASH_ACCESSES, 64);
|
||||
debug_assert_eq!(MIX_NODES, 2);
|
||||
debug_assert_eq!(NODE_WORDS, 16);
|
||||
|
||||
unroll! {
|
||||
// ETHASH_ACCESSES
|
||||
for i_usize in 0..64 {
|
||||
let i = i_usize as u32;
|
||||
for i in 0..ETHASH_ACCESSES as u32 {
|
||||
let index = {
|
||||
// This is trivially safe, but does not work on big-endian. The safety of this is
|
||||
// asserted in debug builds (see the definition of `make_const_array!`).
|
||||
let mix_words: &mut [u32; MIX_WORDS] = unsafe {
|
||||
make_const_array!(MIX_WORDS, &mut mix)
|
||||
};
|
||||
|
||||
let index = fnv_hash(
|
||||
f_mix.get_unchecked(0).as_words().get_unchecked(0) ^ i,
|
||||
*mix.get_unchecked(0).as_words().get_unchecked(i as usize % MIX_WORDS)
|
||||
) % num_full_pages;
|
||||
fnv_hash(
|
||||
first_val ^ i,
|
||||
mix_words[i as usize % MIX_WORDS]
|
||||
) % num_full_pages
|
||||
};
|
||||
|
||||
unroll! {
|
||||
// MIX_NODES
|
||||
@ -314,74 +385,92 @@ fn hash_compute(light: &Light, full_size: usize, header_hash: &H256, nonce: u64)
|
||||
unroll! {
|
||||
// NODE_WORDS
|
||||
for w in 0..16 {
|
||||
*mix.get_unchecked_mut(n).as_words_mut().get_unchecked_mut(w) =
|
||||
mix[n].as_words_mut()[w] =
|
||||
fnv_hash(
|
||||
*mix.get_unchecked(n).as_words().get_unchecked(w),
|
||||
*tmp_node.as_words().get_unchecked(w),
|
||||
mix[n].as_words()[w],
|
||||
tmp_node.as_words()[w],
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mix_words: [u32; MIX_WORDS] = unsafe { mem::transmute(mix) };
|
||||
|
||||
{
|
||||
// This is an uninitialized buffer to begin with, but we iterate precisely `compress.len()`
|
||||
// times and set each index, leaving the array fully initialized. THIS ONLY WORKS ON LITTLE-
|
||||
// ENDIAN MACHINES. See a future PR to make this and the rest of the code work correctly on
|
||||
// big-endian arches like mips.
|
||||
let mut compress: &mut [u32; MIX_WORDS / 4] = unsafe {
|
||||
make_const_array!(MIX_WORDS / 4, &mut buf.compress_bytes)
|
||||
};
|
||||
|
||||
// Compress mix
|
||||
debug_assert_eq!(MIX_WORDS / 4, 8);
|
||||
|
||||
// compress mix
|
||||
unroll! {
|
||||
for i in 0..8 {
|
||||
let w = i * 4;
|
||||
let mut reduction = *mix.get_unchecked(0).as_words().get_unchecked(w + 0);
|
||||
reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 1);
|
||||
reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 2);
|
||||
reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 3);
|
||||
*mix.get_unchecked_mut(0).as_words_mut().get_unchecked_mut(i) = reduction;
|
||||
|
||||
let mut reduction = mix_words[w + 0];
|
||||
reduction = reduction.wrapping_mul(FNV_PRIME) ^ mix_words[w + 1];
|
||||
reduction = reduction.wrapping_mul(FNV_PRIME) ^ mix_words[w + 2];
|
||||
reduction = reduction.wrapping_mul(FNV_PRIME) ^ mix_words[w + 3];
|
||||
compress[i] = reduction;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let mut mix_hash = [0u8; 32];
|
||||
let mut buf = [0u8; 32 + 64];
|
||||
ptr::copy_nonoverlapping(f_mix.get_unchecked_mut(0).bytes.as_ptr(), buf.as_mut_ptr(), 64);
|
||||
ptr::copy_nonoverlapping(mix.get_unchecked_mut(0).bytes.as_ptr(), buf[64..].as_mut_ptr(), 32);
|
||||
ptr::copy_nonoverlapping(mix.get_unchecked_mut(0).bytes.as_ptr(), mix_hash.as_mut_ptr(), 32);
|
||||
let mut value: H256 = [0u8; 32];
|
||||
sha3::sha3_256(value.as_mut_ptr(), value.len(), buf.as_ptr(), buf.len());
|
||||
let mix_hash = buf.compress_bytes;
|
||||
|
||||
let value: H256 = unsafe {
|
||||
// We can interpret the buffer as an array of `u8`s, since it's `repr(C)`.
|
||||
let read_ptr: *const u8 = mem::transmute(&buf);
|
||||
// We overwrite the second half since `sha3_256` has an internal buffer and so allows
|
||||
// overlapping arrays as input.
|
||||
let write_ptr: *mut u8 = mem::transmute(&mut buf.compress_bytes);
|
||||
sha3::sha3_256(
|
||||
write_ptr,
|
||||
buf.compress_bytes.len(),
|
||||
read_ptr,
|
||||
buf.half_mix.bytes.len() + buf.compress_bytes.len(),
|
||||
);
|
||||
buf.compress_bytes
|
||||
};
|
||||
|
||||
ProofOfWork {
|
||||
mix_hash: mix_hash,
|
||||
value: value,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn calculate_dag_item(node_index: u32, cache: &[Node]) -> Node {
|
||||
unsafe {
|
||||
let num_parent_nodes = cache.len();
|
||||
let init = cache.get_unchecked(node_index as usize % num_parent_nodes);
|
||||
let mut ret = init.clone();
|
||||
*ret.as_words_mut().get_unchecked_mut(0) ^= node_index;
|
||||
sha3::sha3_512(ret.bytes.as_mut_ptr(), ret.bytes.len(), ret.bytes.as_ptr(), ret.bytes.len());
|
||||
let mut ret = cache[node_index as usize % num_parent_nodes].clone();
|
||||
ret.as_words_mut()[0] ^= node_index;
|
||||
|
||||
sha3_512_inplace(&mut ret.bytes);
|
||||
|
||||
debug_assert_eq!(NODE_WORDS, 16);
|
||||
for i in 0..ETHASH_DATASET_PARENTS as u32 {
|
||||
let parent_index = fnv_hash(node_index ^ i, *ret.as_words().get_unchecked(i as usize % NODE_WORDS)) % num_parent_nodes as u32;
|
||||
let parent = cache.get_unchecked(parent_index as usize);
|
||||
let parent_index = fnv_hash(
|
||||
node_index ^ i,
|
||||
ret.as_words()[i as usize % NODE_WORDS],
|
||||
) % num_parent_nodes as u32;
|
||||
let parent = &cache[parent_index as usize];
|
||||
|
||||
unroll! {
|
||||
for w in 0..16 {
|
||||
*ret.as_words_mut().get_unchecked_mut(w) =
|
||||
fnv_hash(
|
||||
*ret.as_words().get_unchecked(w),
|
||||
*parent.as_words().get_unchecked(w)
|
||||
);
|
||||
ret.as_words_mut()[w] = fnv_hash(ret.as_words()[w], parent.as_words()[w]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sha3::sha3_512(ret.bytes.as_mut_ptr(), ret.bytes.len(), ret.bytes.as_ptr(), ret.bytes.len());
|
||||
sha3_512_inplace(&mut ret.bytes);
|
||||
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
fn light_new<T: AsRef<Path>>(cache_dir: T, block_number: u64) -> Light {
|
||||
let seed_compute = SeedHashCompute::new();
|
||||
@ -391,9 +480,11 @@ fn light_new<T: AsRef<Path>>(cache_dir: T, block_number: u64) -> Light {
|
||||
assert!(cache_size % NODE_BYTES == 0, "Unaligned cache size");
|
||||
let num_nodes = cache_size / NODE_BYTES;
|
||||
|
||||
let mut nodes = Vec::with_capacity(num_nodes);
|
||||
nodes.resize(num_nodes, Node::default());
|
||||
let mut nodes: Vec<Node> = Vec::with_capacity(num_nodes);
|
||||
unsafe {
|
||||
// Use uninit instead of unnecessarily writing `size_of::<Node>() * num_nodes` 0s
|
||||
nodes.set_len(num_nodes);
|
||||
|
||||
sha3_512(&seedhash[0..32], &mut nodes.get_unchecked_mut(0).bytes);
|
||||
for i in 1..num_nodes {
|
||||
sha3::sha3_512(nodes.get_unchecked_mut(i).bytes.as_mut_ptr(), NODE_BYTES, nodes.get_unchecked(i - 1).bytes.as_ptr(), NODE_BYTES);
|
||||
|
@ -143,9 +143,11 @@ mod benchmarks {
|
||||
|
||||
#[bench]
|
||||
fn bench_light_compute(b: &mut Bencher) {
|
||||
use ::std::env;
|
||||
|
||||
let hash = [0xf5, 0x7e, 0x6f, 0x3a, 0xcf, 0xc0, 0xdd, 0x4b, 0x5b, 0xf2, 0xbe, 0xe4, 0x0a, 0xb3, 0x35, 0x8a, 0xa6, 0x87, 0x73, 0xa8, 0xd0, 0x9f, 0x5e, 0x59, 0x5e, 0xab, 0x55, 0x94, 0x05, 0x52, 0x7d, 0x72];
|
||||
let nonce = 0xd7b3ac70a301a249;
|
||||
let light = Light::new(486382);
|
||||
let light = Light::new(env::temp_dir(), 486382);
|
||||
|
||||
b.iter(|| light_compute(&light, &hash, nonce));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user