From e84f3082649e2c4d40d33e47564019905a255650 Mon Sep 17 00:00:00 2001 From: Jef Date: Fri, 28 Jul 2017 14:26:22 +0200 Subject: [PATCH] Fix unsoundness in ethash's unsafe code (#6140) * Fix benchmarks * Fix unsoundness in uses of unsafety * Remove most uses of unsafe indexing and ptr::copy_nonoverlapping This commit also includes a completely absurd optimisation that I promise is an honest win. You can check the benchmarks, I barely believe it myself. * Add safety comment * Add more safety comments --- ethash/src/compute.rs | 287 +++++++++++++++++++++++++++--------------- ethash/src/lib.rs | 4 +- 2 files changed, 192 insertions(+), 99 deletions(-) diff --git a/ethash/src/compute.rs b/ethash/src/compute.rs index 1bc546c84..ab2c758df 100644 --- a/ethash/src/compute.rs +++ b/ethash/src/compute.rs @@ -123,7 +123,9 @@ impl Light { } let num_nodes = cache_size / NODE_BYTES; let mut nodes: Vec = Vec::with_capacity(num_nodes); - nodes.resize(num_nodes, unsafe { mem::uninitialized() }); + + unsafe { nodes.set_len(num_nodes) }; + let buf = unsafe { slice::from_raw_parts_mut(nodes.as_mut_ptr() as *mut u8, cache_size) }; file.read_exact(buf)?; Ok(Light { @@ -208,17 +210,20 @@ pub fn slow_get_seedhash(block_number: u64) -> H256 { SeedHashCompute::resume_compute_seedhash([0u8; 32], 0, block_number / ETHASH_EPOCH_LENGTH) } -#[inline] fn fnv_hash(x: u32, y: u32) -> u32 { return x.wrapping_mul(FNV_PRIME) ^ y; } -#[inline] fn sha3_512(input: &[u8], output: &mut [u8]) { unsafe { sha3::sha3_512(output.as_mut_ptr(), output.len(), input.as_ptr(), input.len()) }; } -#[inline] +fn sha3_512_inplace(input: &mut [u8]) { + // This is safe since `sha3_*` uses an internal buffer and copies the result to the output. This + // means that we can reuse the input buffer for both input and output. + unsafe { sha3::sha3_512(input.as_mut_ptr(), input.len(), input.as_ptr(), input.len()) }; +} + fn get_cache_size(block_number: u64) -> usize { let mut sz: u64 = CACHE_BYTES_INIT + CACHE_BYTES_GROWTH * (block_number / ETHASH_EPOCH_LENGTH); sz = sz - NODE_BYTES as u64; @@ -228,7 +233,6 @@ fn get_cache_size(block_number: u64) -> usize { sz as usize } -#[inline] fn get_data_size(block_number: u64) -> usize { let mut sz: u64 = DATASET_BYTES_INIT + DATASET_BYTES_GROWTH * (block_number / ETHASH_EPOCH_LENGTH); sz = sz - ETHASH_MIX_BYTES as u64; @@ -238,7 +242,6 @@ fn get_data_size(block_number: u64) -> usize { sz as usize } - /// Difficulty quick check for POW preverification /// /// `header_hash` The hash of the header @@ -246,17 +249,27 @@ fn get_data_size(block_number: u64) -> usize { /// `mix_hash` The mix digest hash /// Boundary recovered from mix hash pub fn quick_get_difficulty(header_hash: &H256, nonce: u64, mix_hash: &H256) -> H256 { - let mut buf = [0u8; 64 + 32]; - unsafe { ptr::copy_nonoverlapping(header_hash.as_ptr(), buf.as_mut_ptr(), 32) }; - unsafe { ptr::copy_nonoverlapping(mem::transmute(&nonce), buf[32..].as_mut_ptr(), 8) }; + unsafe { + // This is safe - the `sha3_512` call below reads the first 40 bytes (which we explicitly set + // with two `copy_nonoverlapping` calls) but writes the first 64, and then we explicitly write + // the next 32 bytes before we read the whole thing with `sha3_256`. + // + // This cannot be elided by the compiler as it doesn't know the implementation of + // `sha3_512`. + let mut buf: [u8; 64 + 32] = mem::uninitialized(); - unsafe { sha3::sha3_512(buf.as_mut_ptr(), 64, buf.as_ptr(), 40) }; - unsafe { ptr::copy_nonoverlapping(mix_hash.as_ptr(), buf[64..].as_mut_ptr(), 32) }; + ptr::copy_nonoverlapping(header_hash.as_ptr(), buf.as_mut_ptr(), 32); + ptr::copy_nonoverlapping(mem::transmute(&nonce), buf[32..].as_mut_ptr(), 8); - let mut hash = [0u8; 32]; - unsafe { sha3::sha3_256(hash.as_mut_ptr(), hash.len(), buf.as_ptr(), buf.len()) }; - hash.as_mut_ptr(); - hash + sha3::sha3_512(buf.as_mut_ptr(), 64, buf.as_ptr(), 40); + ptr::copy_nonoverlapping(mix_hash.as_ptr(), buf[64..].as_mut_ptr(), 32); + + // This is initialized in `sha3_256` + let mut hash: [u8; 32] = mem::uninitialized(); + sha3::sha3_256(hash.as_mut_ptr(), hash.len(), buf.as_ptr(), buf.len()); + + hash + } } /// Calculate the light client data @@ -269,118 +282,194 @@ pub fn light_compute(light: &Light, header_hash: &H256, nonce: u64) -> ProofOfWo } fn hash_compute(light: &Light, full_size: usize, header_hash: &H256, nonce: u64) -> ProofOfWork { + macro_rules! make_const_array { + ($n:expr, $value:expr) => {{ + // We use explicit lifetimes to ensure that val's borrow is invalidated until the + // transmuted val dies. + unsafe fn make_const_array<'a, T, U>(val: &'a mut [T]) -> &'a mut [U; $n] { + use ::std::mem; + + debug_assert_eq!(val.len() * mem::size_of::(), $n * mem::size_of::()); + mem::transmute(val.as_mut_ptr()) + } + + make_const_array($value) + }} + } + + #[repr(C)] + struct MixBuf { + half_mix: Node, + compress_bytes: [u8; MIX_WORDS], + }; + if full_size % MIX_WORDS != 0 { panic!("Unaligned full size"); } - // pack hash and nonce together into first 40 bytes of s_mix - let mut s_mix: [Node; MIX_NODES + 1] = [Node::default(), Node::default(), Node::default()]; - unsafe { ptr::copy_nonoverlapping(header_hash.as_ptr(), s_mix.get_unchecked_mut(0).bytes.as_mut_ptr(), 32) }; - unsafe { ptr::copy_nonoverlapping(mem::transmute(&nonce), s_mix.get_unchecked_mut(0).bytes[32..].as_mut_ptr(), 8) }; - // compute sha3-512 hash and replicate across mix - unsafe { - sha3::sha3_512(s_mix.get_unchecked_mut(0).bytes.as_mut_ptr(), NODE_BYTES, s_mix.get_unchecked(0).bytes.as_ptr(), 40); - let (f_mix, mut mix) = s_mix.split_at_mut(1); - for w in 0..MIX_WORDS { - *mix.get_unchecked_mut(0).as_words_mut().get_unchecked_mut(w) = *f_mix.get_unchecked(0).as_words().get_unchecked(w % NODE_WORDS); - } + // You may be asking yourself: what in the name of Crypto Jesus is going on here? So: we need + // `half_mix` and `compress_bytes` in a single array later down in the code (we hash them + // together to create `value`) so that we can hash the full array. However, we do a bunch of + // reading and writing to these variables first. We originally allocated two arrays and then + // stuck them together with `ptr::copy_nonoverlapping` at the end, but this method is + // _significantly_ faster - by my benchmarks, a consistent 3-5%. This is the most ridiculous + // optimization I have ever done and I am so sorry. I can only chalk it up to cache locality + // improvements, since I can't imagine that 3-5% of our runtime is taken up by catting two + // arrays together. + let mut buf: MixBuf = MixBuf { + half_mix: unsafe { + // Pack `header_hash` and `nonce` together + // We explicitly write the first 40 bytes, leaving the last 24 as uninitialized. Then + // `sha3_512` reads the first 40 bytes (4th parameter) and overwrites the entire array, + // leaving it fully initialized. + let mut out: [u8; NODE_BYTES] = mem::uninitialized(); - let page_size = 4 * MIX_WORDS; - let num_full_pages = (full_size / page_size) as u32; - let cache: &[Node] = &light.cache; // deref once for better performance + ptr::copy_nonoverlapping( + header_hash.as_ptr(), + out.as_mut_ptr(), + header_hash.len(), + ); + ptr::copy_nonoverlapping( + mem::transmute(&nonce), + out[header_hash.len()..].as_mut_ptr(), + mem::size_of::(), + ); - debug_assert_eq!(ETHASH_ACCESSES, 64); - debug_assert_eq!(MIX_NODES, 2); - debug_assert_eq!(NODE_WORDS, 16); + // compute sha3-512 hash and replicate across mix + sha3::sha3_512( + out.as_mut_ptr(), + NODE_BYTES, + out.as_ptr(), + header_hash.len() + mem::size_of::() + ); + + Node { bytes: out } + }, + // This is fully initialized before being read, see `let mut compress = ...` below + compress_bytes: unsafe { mem::uninitialized() }, + }; + + let mut mix: [_; MIX_NODES] = [buf.half_mix.clone(), buf.half_mix.clone()]; + + let page_size = 4 * MIX_WORDS; + let num_full_pages = (full_size / page_size) as u32; + // deref once for better performance + let cache: &[Node] = &light.cache; + let first_val = buf.half_mix.as_words()[0]; + + debug_assert_eq!(MIX_NODES, 2); + debug_assert_eq!(NODE_WORDS, 16); + + for i in 0..ETHASH_ACCESSES as u32 { + let index = { + // This is trivially safe, but does not work on big-endian. The safety of this is + // asserted in debug builds (see the definition of `make_const_array!`). + let mix_words: &mut [u32; MIX_WORDS] = unsafe { + make_const_array!(MIX_WORDS, &mut mix) + }; + + fnv_hash( + first_val ^ i, + mix_words[i as usize % MIX_WORDS] + ) % num_full_pages + }; unroll! { - // ETHASH_ACCESSES - for i_usize in 0..64 { - let i = i_usize as u32; - - let index = fnv_hash( - f_mix.get_unchecked(0).as_words().get_unchecked(0) ^ i, - *mix.get_unchecked(0).as_words().get_unchecked(i as usize % MIX_WORDS) - ) % num_full_pages; + // MIX_NODES + for n in 0..2 { + let tmp_node = calculate_dag_item( + index * MIX_NODES as u32 + n as u32, + cache, + ); unroll! { - // MIX_NODES - for n in 0..2 { - let tmp_node = calculate_dag_item( - index * MIX_NODES as u32 + n as u32, - cache, - ); - - unroll! { - // NODE_WORDS - for w in 0..16 { - *mix.get_unchecked_mut(n).as_words_mut().get_unchecked_mut(w) = - fnv_hash( - *mix.get_unchecked(n).as_words().get_unchecked(w), - *tmp_node.as_words().get_unchecked(w), - ); - } - } + // NODE_WORDS + for w in 0..16 { + mix[n].as_words_mut()[w] = + fnv_hash( + mix[n].as_words()[w], + tmp_node.as_words()[w], + ); } } } } + } + let mix_words: [u32; MIX_WORDS] = unsafe { mem::transmute(mix) }; + + { + // This is an uninitialized buffer to begin with, but we iterate precisely `compress.len()` + // times and set each index, leaving the array fully initialized. THIS ONLY WORKS ON LITTLE- + // ENDIAN MACHINES. See a future PR to make this and the rest of the code work correctly on + // big-endian arches like mips. + let mut compress: &mut [u32; MIX_WORDS / 4] = unsafe { + make_const_array!(MIX_WORDS / 4, &mut buf.compress_bytes) + }; + + // Compress mix debug_assert_eq!(MIX_WORDS / 4, 8); - - // compress mix unroll! { for i in 0..8 { let w = i * 4; - let mut reduction = *mix.get_unchecked(0).as_words().get_unchecked(w + 0); - reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 1); - reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 2); - reduction = reduction.wrapping_mul(FNV_PRIME) ^ *mix.get_unchecked(0).as_words().get_unchecked(w + 3); - *mix.get_unchecked_mut(0).as_words_mut().get_unchecked_mut(i) = reduction; + + let mut reduction = mix_words[w + 0]; + reduction = reduction.wrapping_mul(FNV_PRIME) ^ mix_words[w + 1]; + reduction = reduction.wrapping_mul(FNV_PRIME) ^ mix_words[w + 2]; + reduction = reduction.wrapping_mul(FNV_PRIME) ^ mix_words[w + 3]; + compress[i] = reduction; } } + } - let mut mix_hash = [0u8; 32]; - let mut buf = [0u8; 32 + 64]; - ptr::copy_nonoverlapping(f_mix.get_unchecked_mut(0).bytes.as_ptr(), buf.as_mut_ptr(), 64); - ptr::copy_nonoverlapping(mix.get_unchecked_mut(0).bytes.as_ptr(), buf[64..].as_mut_ptr(), 32); - ptr::copy_nonoverlapping(mix.get_unchecked_mut(0).bytes.as_ptr(), mix_hash.as_mut_ptr(), 32); - let mut value: H256 = [0u8; 32]; - sha3::sha3_256(value.as_mut_ptr(), value.len(), buf.as_ptr(), buf.len()); - ProofOfWork { - mix_hash: mix_hash, - value: value, - } + let mix_hash = buf.compress_bytes; + + let value: H256 = unsafe { + // We can interpret the buffer as an array of `u8`s, since it's `repr(C)`. + let read_ptr: *const u8 = mem::transmute(&buf); + // We overwrite the second half since `sha3_256` has an internal buffer and so allows + // overlapping arrays as input. + let write_ptr: *mut u8 = mem::transmute(&mut buf.compress_bytes); + sha3::sha3_256( + write_ptr, + buf.compress_bytes.len(), + read_ptr, + buf.half_mix.bytes.len() + buf.compress_bytes.len(), + ); + buf.compress_bytes + }; + + ProofOfWork { + mix_hash: mix_hash, + value: value, } } fn calculate_dag_item(node_index: u32, cache: &[Node]) -> Node { - unsafe { - let num_parent_nodes = cache.len(); - let init = cache.get_unchecked(node_index as usize % num_parent_nodes); - let mut ret = init.clone(); - *ret.as_words_mut().get_unchecked_mut(0) ^= node_index; - sha3::sha3_512(ret.bytes.as_mut_ptr(), ret.bytes.len(), ret.bytes.as_ptr(), ret.bytes.len()); + let num_parent_nodes = cache.len(); + let mut ret = cache[node_index as usize % num_parent_nodes].clone(); + ret.as_words_mut()[0] ^= node_index; - debug_assert_eq!(NODE_WORDS, 16); - for i in 0..ETHASH_DATASET_PARENTS as u32 { - let parent_index = fnv_hash(node_index ^ i, *ret.as_words().get_unchecked(i as usize % NODE_WORDS)) % num_parent_nodes as u32; - let parent = cache.get_unchecked(parent_index as usize); + sha3_512_inplace(&mut ret.bytes); - unroll! { - for w in 0..16 { - *ret.as_words_mut().get_unchecked_mut(w) = - fnv_hash( - *ret.as_words().get_unchecked(w), - *parent.as_words().get_unchecked(w) - ); - } + debug_assert_eq!(NODE_WORDS, 16); + for i in 0..ETHASH_DATASET_PARENTS as u32 { + let parent_index = fnv_hash( + node_index ^ i, + ret.as_words()[i as usize % NODE_WORDS], + ) % num_parent_nodes as u32; + let parent = &cache[parent_index as usize]; + + unroll! { + for w in 0..16 { + ret.as_words_mut()[w] = fnv_hash(ret.as_words()[w], parent.as_words()[w]); } } - - sha3::sha3_512(ret.bytes.as_mut_ptr(), ret.bytes.len(), ret.bytes.as_ptr(), ret.bytes.len()); - ret } + + sha3_512_inplace(&mut ret.bytes); + + ret } fn light_new>(cache_dir: T, block_number: u64) -> Light { @@ -391,9 +480,11 @@ fn light_new>(cache_dir: T, block_number: u64) -> Light { assert!(cache_size % NODE_BYTES == 0, "Unaligned cache size"); let num_nodes = cache_size / NODE_BYTES; - let mut nodes = Vec::with_capacity(num_nodes); - nodes.resize(num_nodes, Node::default()); + let mut nodes: Vec = Vec::with_capacity(num_nodes); unsafe { + // Use uninit instead of unnecessarily writing `size_of::() * num_nodes` 0s + nodes.set_len(num_nodes); + sha3_512(&seedhash[0..32], &mut nodes.get_unchecked_mut(0).bytes); for i in 1..num_nodes { sha3::sha3_512(nodes.get_unchecked_mut(i).bytes.as_mut_ptr(), NODE_BYTES, nodes.get_unchecked(i - 1).bytes.as_ptr(), NODE_BYTES); diff --git a/ethash/src/lib.rs b/ethash/src/lib.rs index a9f563af3..9112546c4 100644 --- a/ethash/src/lib.rs +++ b/ethash/src/lib.rs @@ -143,9 +143,11 @@ mod benchmarks { #[bench] fn bench_light_compute(b: &mut Bencher) { + use ::std::env; + let hash = [0xf5, 0x7e, 0x6f, 0x3a, 0xcf, 0xc0, 0xdd, 0x4b, 0x5b, 0xf2, 0xbe, 0xe4, 0x0a, 0xb3, 0x35, 0x8a, 0xa6, 0x87, 0x73, 0xa8, 0xd0, 0x9f, 0x5e, 0x59, 0x5e, 0xab, 0x55, 0x94, 0x05, 0x52, 0x7d, 0x72]; let nonce = 0xd7b3ac70a301a249; - let light = Light::new(486382); + let light = Light::new(env::temp_dir(), 486382); b.iter(|| light_compute(&light, &hash, nonce)); }