From 0fd52176dce0f9801ce0cd9c23ce38e187e23fbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= Date: Mon, 7 Mar 2016 16:26:35 +0100 Subject: [PATCH 01/12] Fixing tests in bigint and util --- test.sh | 9 ++++++- util/bigint/src/uint.rs | 52 -------------------------------------- util/src/lib.rs | 56 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 53 deletions(-) diff --git a/test.sh b/test.sh index 0f5edb0d1..dd71d120a 100755 --- a/test.sh +++ b/test.sh @@ -1,4 +1,11 @@ #!/bin/sh # Running Parity Full Test Sute -cargo test --features ethcore/json-tests $1 -p ethash -p ethcore-util -p ethcore -p ethsync -p ethcore-rpc -p parity +cargo test --features ethcore/json-tests $1 \ + -p ethash \ + -p ethcore-util \ + -p ethcore \ + -p ethsync \ + -p ethcore-rpc \ + -p parity \ + -p bigint diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs index bd57e9d6d..62fcd8c6e 100644 --- a/util/bigint/src/uint.rs +++ b/util/bigint/src/uint.rs @@ -1948,58 +1948,6 @@ mod tests { assert_eq!(U256([1, 0, 0, 0]), result); } - #[test] - fn u256_multi_muls() { - use hash::*; - - let (result, _) = U256([0, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, 0])); - assert_eq!(U256([0, 0, 0, 0]), result); - - let (result, _) = U256([1, 0, 0, 0]).overflowing_mul(U256([1, 0, 0, 0])); - assert_eq!(U256([1, 0, 0, 0]), result); - - let (result, _) = U256([5, 0, 0, 0]).overflowing_mul(U256([5, 0, 0, 0])); - assert_eq!(U256([25, 0, 0, 0]), result); - - let (result, _) = U256([0, 5, 0, 0]).overflowing_mul(U256([0, 5, 0, 0])); - assert_eq!(U256([0, 0, 25, 0]), result); - - let (result, _) = U256([0, 0, 0, 1]).overflowing_mul(U256([1, 0, 0, 0])); - assert_eq!(U256([0, 0, 0, 1]), result); - - let (result, _) = U256([0, 0, 0, 5]).overflowing_mul(U256([2, 0, 0, 0])); - assert_eq!(U256([0, 0, 0, 10]), result); - - let (result, _) = U256([0, 0, 1, 0]).overflowing_mul(U256([0, 5, 0, 0])); - assert_eq!(U256([0, 0, 0, 5]), result); - - let (result, _) = U256([0, 0, 8, 0]).overflowing_mul(U256([0, 0, 7, 0])); - assert_eq!(U256([0, 0, 0, 0]), result); - - let (result, _) = U256([2, 0, 0, 0]).overflowing_mul(U256([0, 5, 0, 0])); - assert_eq!(U256([0, 10, 0, 0]), result); - - let (result, _) = U256([1, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX])); - assert_eq!(U256([0, 0, 0, ::std::u64::MAX]), result); - - let x1 = U256::from_str("0000000000000000000000000000000000000000000000000000012365124623").unwrap(); - let x2sqr_right = U256::from_str("000000000000000000000000000000000000000000014baeef72e0378e2328c9").unwrap(); - let x1sqr = x1 * x1; - assert_eq!(H256::from(x2sqr_right), H256::from(x1sqr)); - let x1cube = x1sqr * x1; - let x1cube_right = U256::from_str("0000000000000000000000000000000001798acde139361466f712813717897b").unwrap(); - assert_eq!(H256::from(x1cube_right), H256::from(x1cube)); - let x1quad = x1cube * x1; - let x1quad_right = U256::from_str("000000000000000000000001adbdd6bd6ff027485484b97f8a6a4c7129756dd1").unwrap(); - assert_eq!(H256::from(x1quad_right), H256::from(x1quad)); - let x1penta = x1quad * x1; - let x1penta_right = U256::from_str("00000000000001e92875ac24be246e1c57e0507e8c46cc8d233b77f6f4c72993").unwrap(); - assert_eq!(H256::from(x1penta_right), H256::from(x1penta)); - let x1septima = x1penta * x1; - let x1septima_right = U256::from_str("00022cca1da3f6e5722b7d3cc5bbfb486465ebc5a708dd293042f932d7eee119").unwrap(); - assert_eq!(H256::from(x1septima_right), H256::from(x1septima)); - } - #[test] fn u256_multi_muls_overflow() { let (_, overflow) = U256([1, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, 0])); diff --git a/util/src/lib.rs b/util/src/lib.rs index a50ba8da4..344da0980 100644 --- a/util/src/lib.rs +++ b/util/src/lib.rs @@ -167,3 +167,59 @@ pub use io::*; pub use log::*; pub use kvdb::*; +#[cfg(test)] +mod tests { + use super::numbers::*; + use std::str::FromStr; + + #[test] + fn u256_multi_muls() { + + let (result, _) = U256([0, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, 0])); + assert_eq!(U256([0, 0, 0, 0]), result); + + let (result, _) = U256([1, 0, 0, 0]).overflowing_mul(U256([1, 0, 0, 0])); + assert_eq!(U256([1, 0, 0, 0]), result); + + let (result, _) = U256([5, 0, 0, 0]).overflowing_mul(U256([5, 0, 0, 0])); + assert_eq!(U256([25, 0, 0, 0]), result); + + let (result, _) = U256([0, 5, 0, 0]).overflowing_mul(U256([0, 5, 0, 0])); + assert_eq!(U256([0, 0, 25, 0]), result); + + let (result, _) = U256([0, 0, 0, 1]).overflowing_mul(U256([1, 0, 0, 0])); + assert_eq!(U256([0, 0, 0, 1]), result); + + let (result, _) = U256([0, 0, 0, 5]).overflowing_mul(U256([2, 0, 0, 0])); + assert_eq!(U256([0, 0, 0, 10]), result); + + let (result, _) = U256([0, 0, 1, 0]).overflowing_mul(U256([0, 5, 0, 0])); + assert_eq!(U256([0, 0, 0, 5]), result); + + let (result, _) = U256([0, 0, 8, 0]).overflowing_mul(U256([0, 0, 7, 0])); + assert_eq!(U256([0, 0, 0, 0]), result); + + let (result, _) = U256([2, 0, 0, 0]).overflowing_mul(U256([0, 5, 0, 0])); + assert_eq!(U256([0, 10, 0, 0]), result); + + let (result, _) = U256([1, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX])); + assert_eq!(U256([0, 0, 0, ::std::u64::MAX]), result); + + let x1 = U256::from_str("0000000000000000000000000000000000000000000000000000012365124623").unwrap(); + let x2sqr_right = U256::from_str("000000000000000000000000000000000000000000014baeef72e0378e2328c9").unwrap(); + let x1sqr = x1 * x1; + assert_eq!(H256::from(x2sqr_right), H256::from(x1sqr)); + let x1cube = x1sqr * x1; + let x1cube_right = U256::from_str("0000000000000000000000000000000001798acde139361466f712813717897b").unwrap(); + assert_eq!(H256::from(x1cube_right), H256::from(x1cube)); + let x1quad = x1cube * x1; + let x1quad_right = U256::from_str("000000000000000000000001adbdd6bd6ff027485484b97f8a6a4c7129756dd1").unwrap(); + assert_eq!(H256::from(x1quad_right), H256::from(x1quad)); + let x1penta = x1quad * x1; + let x1penta_right = U256::from_str("00000000000001e92875ac24be246e1c57e0507e8c46cc8d233b77f6f4c72993").unwrap(); + assert_eq!(H256::from(x1penta_right), H256::from(x1penta)); + let x1septima = x1penta * x1; + let x1septima_right = U256::from_str("00022cca1da3f6e5722b7d3cc5bbfb486465ebc5a708dd293042f932d7eee119").unwrap(); + assert_eq!(H256::from(x1septima_right), H256::from(x1septima)); + } +} From 4717be07d647c9a3fa1e0da5ec636a3d67a94d3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= Date: Mon, 7 Mar 2016 16:17:14 +0100 Subject: [PATCH 02/12] Optimizing mul_u32 --- util/benches/bigint.rs | 2 +- util/bigint/src/uint.rs | 49 ++++++++++++----------------------------- 2 files changed, 15 insertions(+), 36 deletions(-) diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs index 575164cb6..80c4ce1d8 100644 --- a/util/benches/bigint.rs +++ b/util/benches/bigint.rs @@ -79,7 +79,7 @@ fn u256_full_mul(b: &mut Bencher) { b.iter(|| { let n = black_box(10000); (0..n).fold(U256([rand::random::(), rand::random::(), rand::random::(), rand::random::()]), - |old, new| { + |old, _new| { let U512(ref u512words) = old.full_mul(U256([rand::random::(), rand::random::(), rand::random::(), rand::random::()])); U256([u512words[0], u512words[2], u512words[2], u512words[3]]) }) diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs index 62fcd8c6e..ad4f0a99c 100644 --- a/util/bigint/src/uint.rs +++ b/util/bigint/src/uint.rs @@ -711,52 +711,31 @@ macro_rules! construct_uint { #[allow(dead_code)] // not used when multiplied with inline assembly /// Multiplication by u32 fn mul_u32(self, other: u32) -> Self { - let $name(ref arr) = self; - let mut carry = [0u64; $n_words]; - let mut ret = [0u64; $n_words]; - for i in 0..$n_words { - let upper = other as u64 * (arr[i] >> 32); - let lower = other as u64 * (arr[i] & 0xFFFFFFFF); - - ret[i] = lower.wrapping_add(upper << 32); - - if i < $n_words - 1 { - carry[i + 1] = upper >> 32; - if ret[i] < lower { - carry[i + 1] += 1; - } - } - } - $name(ret) + $name(carry) + let (ret, overflow) = self.overflowing_mul_u32(other); + panic_on_overflow!(overflow); + ret } #[allow(dead_code)] // not used when multiplied with inline assembly /// Overflowing multiplication by u32 fn overflowing_mul_u32(self, other: u32) -> (Self, bool) { let $name(ref arr) = self; - let mut carry = [0u64; $n_words]; + let o = other as u64; + let mut carry = [0u64; $n_words + 1]; let mut ret = [0u64; $n_words]; - let mut overflow = false; + for i in 0..$n_words { - let upper = other as u64 * (arr[i] >> 32); - let lower = other as u64 * (arr[i] & 0xFFFFFFFF); + let upper = o * (arr[i] >> 32); + let lower = o * (arr[i] & 0xFFFFFFFF); - ret[i] = lower.wrapping_add(upper << 32); + let (res1, overflow1) = lower.overflowing_add(upper << 32); + let (res2, overflow2) = res1.overflowing_add(carry[i]); - if i < $n_words - 1 { - carry[i + 1] = upper >> 32; - if ret[i] < lower { - carry[i + 1] += 1; - } - } else if (upper >> 32) > 0 || ret[i] < lower { - overflow = true - } + ret[i] = res2; + carry[i + 1] = (upper >> 32) + overflow1 as u64 + overflow2 as u64; } - let result = overflowing!( - $name(ret).overflowing_add($name(carry)), - overflow - ); - (result, overflow) + + ($name(ret), carry[$n_words] > 0) } } From cc0adf544208a36786e0ffa7f219ff79983d380b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= Date: Mon, 7 Mar 2016 17:06:08 +0100 Subject: [PATCH 03/12] Optimizing and simplifying add and shl --- util/bigint/src/uint.rs | 89 ++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 55 deletions(-) diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs index ad4f0a99c..47a975d5f 100644 --- a/util/bigint/src/uint.rs +++ b/util/bigint/src/uint.rs @@ -71,29 +71,19 @@ macro_rules! uint_overflowing_add_reg { ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({ let $name(ref me) = $self_expr; let $name(ref you) = $other; + let mut ret = [0u64; $n_words]; - let mut carry = [0u64; $n_words]; - let mut b_carry = false; - let mut overflow = false; + let mut carry = [0u64; $n_words + 1]; for i in 0..$n_words { - ret[i] = me[i].wrapping_add(you[i]); + let (res1, overflow1) = me[i].overflowing_add(you[i]); + let (res2, overflow2) = res1.overflowing_add(carry[i]); - if ret[i] < me[i] { - if i < $n_words - 1 { - carry[i + 1] = 1; - b_carry = true; - } else { - overflow = true; - } - } - } - if b_carry { - let ret = overflowing!($name(ret).overflowing_add($name(carry)), overflow); - (ret, overflow) - } else { - ($name(ret), overflow) + ret[i] = res2; + carry[i+1] = overflow1 as u64 + overflow2 as u64; } + + ($name(ret), carry[$n_words] > 0) }) } @@ -673,37 +663,10 @@ macro_rules! construct_uint { } fn overflowing_shl(self, shift32: u32) -> ($name, bool) { - let $name(ref original) = self; - let mut ret = [0u64; $n_words]; let shift = shift32 as usize; - let word_shift = shift / 64; - let bit_shift = shift % 64; - for i in 0..$n_words { - // Shift - if i + word_shift < $n_words { - ret[i + word_shift] += original[i] << bit_shift; - } - // Carry - if bit_shift > 0 && i + word_shift + 1 < $n_words { - ret[i + word_shift + 1] += original[i] >> (64 - bit_shift); - } - } - // Detecting overflow - let last = $n_words - word_shift - if bit_shift > 0 { 1 } else { 0 }; - let overflow = if bit_shift > 0 { - (original[last] >> (64 - bit_shift)) > 0 - } else if word_shift > 0 { - original[last] > 0 - } else { - false - }; - for i in last+1..$n_words-1 { - if original[i] > 0 { - return ($name(ret), true); - } - } - ($name(ret), overflow) + let res = self << shift; + (res, self != (res >> shift)) } } @@ -987,14 +950,15 @@ macro_rules! construct_uint { let mut ret = [0u64; $n_words]; let word_shift = shift / 64; let bit_shift = shift % 64; - for i in 0..$n_words { - // Shift - if i + word_shift < $n_words { - ret[i + word_shift] += original[i] << bit_shift; - } - // Carry - if bit_shift > 0 && i + word_shift + 1 < $n_words { - ret[i + word_shift + 1] += original[i] >> (64 - bit_shift); + + // shift + for i in word_shift..$n_words { + ret[i] += original[i - word_shift] << bit_shift; + } + // carry + if bit_shift > 0 { + for i in word_shift+1..$n_words { + ret[i] += original[i - 1 - word_shift] >> (64 - bit_shift); } } $name(ret) @@ -1672,6 +1636,11 @@ mod tests { #[test] pub fn uint256_shl_overflow() { + assert_eq!( + U256::from_str("7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff").unwrap() + << 4, + U256::from_str("fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0").unwrap() + ); assert_eq!( U256::from_str("7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff").unwrap() .overflowing_shl(4), @@ -1681,6 +1650,16 @@ mod tests { #[test] pub fn uint256_shl_overflow_words() { + assert_eq!( + U256::from_str("0000000000000001ffffffffffffffffffffffffffffffffffffffffffffffff").unwrap() + << 64, + U256::from_str("ffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000").unwrap() + ); + assert_eq!( + U256::from_str("0000000000000000ffffffffffffffffffffffffffffffffffffffffffffffff").unwrap() + << 64, + U256::from_str("ffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000").unwrap() + ); assert_eq!( U256::from_str("0000000000000001ffffffffffffffffffffffffffffffffffffffffffffffff").unwrap() .overflowing_shl(64), From e7be3c5378c50f2a11d053003946d958a93edf1b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= Date: Mon, 7 Mar 2016 17:09:19 +0100 Subject: [PATCH 04/12] Simplifing mul_u32 and add carry --- util/bigint/src/uint.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs index 47a975d5f..789fc744e 100644 --- a/util/bigint/src/uint.rs +++ b/util/bigint/src/uint.rs @@ -73,17 +73,17 @@ macro_rules! uint_overflowing_add_reg { let $name(ref you) = $other; let mut ret = [0u64; $n_words]; - let mut carry = [0u64; $n_words + 1]; + let mut carry = 0u64; for i in 0..$n_words { let (res1, overflow1) = me[i].overflowing_add(you[i]); - let (res2, overflow2) = res1.overflowing_add(carry[i]); + let (res2, overflow2) = res1.overflowing_add(carry); ret[i] = res2; - carry[i+1] = overflow1 as u64 + overflow2 as u64; + carry = overflow1 as u64 + overflow2 as u64; } - ($name(ret), carry[$n_words] > 0) + ($name(ret), carry > 0) }) } @@ -684,21 +684,21 @@ macro_rules! construct_uint { fn overflowing_mul_u32(self, other: u32) -> (Self, bool) { let $name(ref arr) = self; let o = other as u64; - let mut carry = [0u64; $n_words + 1]; let mut ret = [0u64; $n_words]; + let mut carry = 0; for i in 0..$n_words { let upper = o * (arr[i] >> 32); let lower = o * (arr[i] & 0xFFFFFFFF); let (res1, overflow1) = lower.overflowing_add(upper << 32); - let (res2, overflow2) = res1.overflowing_add(carry[i]); + let (res2, overflow2) = res1.overflowing_add(carry); ret[i] = res2; - carry[i + 1] = (upper >> 32) + overflow1 as u64 + overflow2 as u64; + carry = (upper >> 32) + overflow1 as u64 + overflow2 as u64; } - ($name(ret), carry[$n_words] > 0) + ($name(ret), carry > 0) } } From c5840be1cb1a55d0daf1c208dbfd3c7bcdea6c7c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= Date: Mon, 7 Mar 2016 18:36:17 +0100 Subject: [PATCH 05/12] Small improvements --- util/bigint/src/uint.rs | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs index 789fc744e..8fbaca532 100644 --- a/util/bigint/src/uint.rs +++ b/util/bigint/src/uint.rs @@ -379,11 +379,24 @@ macro_rules! uint_overflowing_mul_reg { ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ let mut res = $name::from(0u64); let mut overflow = false; - for i in 0..(2 * $n_words) { - let v = overflowing!($self_expr.overflowing_mul_u32(($other >> (32 * i)).low_u32()), overflow); - let res2 = overflowing!(v.overflowing_shl(32 * i as u32), overflow); - res = overflowing!(res.overflowing_add(res2), overflow); + + let mut current = $other; + let mut current_shift = 0; + let mut current_u32; + let mut i = 0; + + while i < 2*$n_words { + current_u32 = current.low_u32(); + + let v = overflowing!($self_expr.overflowing_mul_u32(current_u32), overflow); + let v_shifted = overflowing!(v.overflowing_shl(current_shift), overflow); + res = overflowing!(res.overflowing_add(v_shifted), overflow); + + current = current >> 32; + current_shift += 32; + i += 1; } + (res, overflow) }) } @@ -973,6 +986,7 @@ macro_rules! construct_uint { let mut ret = [0u64; $n_words]; let word_shift = shift / 64; let bit_shift = shift % 64; + for i in word_shift..$n_words { // Shift ret[i - word_shift] += original[i] >> bit_shift; @@ -989,9 +1003,11 @@ macro_rules! construct_uint { fn cmp(&self, other: &$name) -> Ordering { let &$name(ref me) = self; let &$name(ref you) = other; - for i in 0..$n_words { - if me[$n_words - 1 - i] < you[$n_words - 1 - i] { return Ordering::Less; } - if me[$n_words - 1 - i] > you[$n_words - 1 - i] { return Ordering::Greater; } + let mut i = $n_words; + while i > 0 { + i -= 1; + if me[i] < you[i] { return Ordering::Less; } + if me[i] > you[i] { return Ordering::Greater; } } Ordering::Equal } From 76865694ce705b945655b685e05ef21ade44e8e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= Date: Mon, 7 Mar 2016 19:03:29 +0100 Subject: [PATCH 06/12] Subtraction optimization --- util/bigint/src/uint.rs | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs index 8fbaca532..0e8d1e7b7 100644 --- a/util/bigint/src/uint.rs +++ b/util/bigint/src/uint.rs @@ -166,9 +166,22 @@ macro_rules! uint_overflowing_add { #[cfg(not(all(asm_available, target_arch="x86_64")))] macro_rules! uint_overflowing_sub { ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ - let res = overflowing!((!$other).overflowing_add(From::from(1u64))); - let res = overflowing!($self_expr.overflowing_add(res)); - (res, $self_expr < $other) + let $name(ref me) = $self_expr; + let $name(ref you) = $other; + + let mut ret = [0u64; $n_words]; + let mut carry = 0u64; + + for i in 0..$n_words { + let (res1, overflow1) = me[i].overflowing_sub(you[i]); + let (res2, overflow2) = res1.overflowing_sub(carry); + + ret[i] = res2; + carry = overflow1 as u64 + overflow2 as u64; + } + + ($name(ret), carry > 0) + }) } From 17b2d2a2d71897d55bf869d7da194bc730a71a75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= Date: Tue, 8 Mar 2016 01:13:00 +0100 Subject: [PATCH 07/12] Implementing mul and full_mul --- util/bigint/src/uint.rs | 180 +++++++++++++++++++++------------------- 1 file changed, 96 insertions(+), 84 deletions(-) diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs index 0e8d1e7b7..6a6658235 100644 --- a/util/bigint/src/uint.rs +++ b/util/bigint/src/uint.rs @@ -390,27 +390,47 @@ macro_rules! uint_overflowing_mul { macro_rules! uint_overflowing_mul_reg { ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ - let mut res = $name::from(0u64); - let mut overflow = false; + let $name(ref me) = $self_expr; + let $name(ref you) = $other; + let mut ret = [0u64; 2*$n_words]; - let mut current = $other; - let mut current_shift = 0; - let mut current_u32; - let mut i = 0; + for i in 0..$n_words { + let mut carry2 = 0u64; + let (b_u, b_l) = (you[i] >> 32, you[i] & 0xFFFFFFFF); - while i < 2*$n_words { - current_u32 = current.low_u32(); + for j in 0..$n_words { + let a = me[j]; - let v = overflowing!($self_expr.overflowing_mul_u32(current_u32), overflow); - let v_shifted = overflowing!(v.overflowing_shl(current_shift), overflow); - res = overflowing!(res.overflowing_add(v_shifted), overflow); + // multiply parts + let (c_l, overflow_l) = mul_u32(a, b_l as u32, ret[j + i]); + let (c_u, overflow_u) = mul_u32(a, b_u as u32, c_l >> 32); - current = current >> 32; - current_shift += 32; - i += 1; + // This won't overflow + ret[j + i] = (c_l & 0xFFFFFFFF) + (c_u << 32); + + // carry1 = overflow_l + (c_u >> 32) + (overflow_u << 32) + carry2 + c0; + let (ca1, c1) = overflow_l.overflowing_add((c_u >> 32) + (overflow_u << 32)); + let (ca1, c2) = ca1.overflowing_add(ret[j + i + 1]); + let (ca1, c3) = ca1.overflowing_add(carry2); + + ret[j + i + 1] = ca1; + + // Will never overflow + carry2 = (overflow_u >> 32) + c1 as u64 + c2 as u64 + c3 as u64; + } } - (res, overflow) + let mut res = [0u64; $n_words]; + let mut overflow = false; + for i in 0..$n_words { + res[i] = ret[i]; + } + + for i in $n_words..2*$n_words { + overflow |= ret[i] != 0; + } + + ($name(res), overflow) }) } @@ -438,6 +458,19 @@ macro_rules! panic_on_overflow { } } +#[inline(always)] +fn mul_u32(a: u64, b: u32, carry: u64) -> (u64, u64) { + let b = b as u64; + let upper = b * (a >> 32); + let lower = b * (a & 0xFFFFFFFF); + + let (res1, overflow1) = lower.overflowing_add(upper << 32); + let (res2, overflow2) = res1.overflowing_add(carry); + + let carry = (upper >> 32) + overflow1 as u64 + overflow2 as u64; + (res2, carry) +} + /// Large, fixed-length unsigned integer type. pub trait Uint: Sized + Default + FromStr + From + fmt::Debug + fmt::Display + PartialOrd + Ord + PartialEq + Eq + Hash { @@ -496,9 +529,6 @@ pub trait Uint: Sized + Default + FromStr + From + fmt::Debug + fmt::Displa /// Returns negation of this `Uint` and overflow (always true) fn overflowing_neg(self) -> (Self, bool); - - /// Shifts this `Uint` and returns overflow - fn overflowing_shl(self, shift: u32) -> (Self, bool); } macro_rules! construct_uint { @@ -687,13 +717,6 @@ macro_rules! construct_uint { fn overflowing_neg(self) -> ($name, bool) { (!self, true) } - - fn overflowing_shl(self, shift32: u32) -> ($name, bool) { - let shift = shift32 as usize; - - let res = self << shift; - (res, self != (res >> shift)) - } } impl $name { @@ -709,19 +732,13 @@ macro_rules! construct_uint { /// Overflowing multiplication by u32 fn overflowing_mul_u32(self, other: u32) -> (Self, bool) { let $name(ref arr) = self; - let o = other as u64; let mut ret = [0u64; $n_words]; let mut carry = 0; for i in 0..$n_words { - let upper = o * (arr[i] >> 32); - let lower = o * (arr[i] & 0xFFFFFFFF); - - let (res1, overflow1) = lower.overflowing_add(upper << 32); - let (res2, overflow2) = res1.overflowing_add(carry); - - ret[i] = res2; - carry = (upper >> 32) + overflow1 as u64 + overflow2 as u64; + let (res, carry2) = mul_u32(arr[i], other, carry); + ret[i] = res; + carry = carry2; } ($name(ret), carry > 0) @@ -1233,10 +1250,37 @@ impl U256 { /// No overflow possible #[cfg(not(all(asm_available, target_arch="x86_64")))] pub fn full_mul(self, other: U256) -> U512 { - let self_512 = U512::from(self); - let other_512 = U512::from(other); - let (result, _) = self_512.overflowing_mul(other_512); - result + let U256(ref me) = self; + let U256(ref you) = other; + let mut ret = [0u64; 8]; + + for i in 0..4 { + let mut carry2 = 0u64; + let (b_u, b_l) = (you[i] >> 32, you[i] & 0xFFFFFFFF); + + for j in 0..4 { + let a = me[j]; + + // multiply parts + let (c_l, overflow_l) = mul_u32(a, b_l as u32, ret[j + i]); + let (c_u, overflow_u) = mul_u32(a, b_u as u32, c_l >> 32); + + // This won't overflow + ret[j + i] = (c_l & 0xFFFFFFFF) + (c_u << 32); + + // carry1 = overflow_l + (c_u >> 32) + (overflow_u << 32) + carry2 + c0; + let (ca1, c1) = overflow_l.overflowing_add((c_u >> 32) + (overflow_u << 32)); + let (ca1, c2) = ca1.overflowing_add(ret[j + i + 1]); + let (ca1, c3) = ca1.overflowing_add(carry2); + + ret[j + i + 1] = ca1; + + // Will never overflow + carry2 = (overflow_u >> 32) + c1 as u64 + c2 as u64 + c3 as u64; + } + } + + U512(ret) } } @@ -1502,6 +1546,18 @@ mod tests { //// TODO: bit inversion } + #[test] + pub fn uint256_simple_mul() { + let a = U256::from_str("10000000000000000").unwrap(); + let b = U256::from_str("10000000000000000").unwrap(); + + let c = U256::from_str("100000000000000000000000000000000").unwrap(); + println!("Multiplying"); + let result = a.overflowing_mul(b); + println!("Got result"); + assert_eq!(result, (c, false)) + } + #[test] pub fn uint256_extreme_bitshift_test() { //// Shifting a u64 by 64 bits gives an undefined value, so make sure that @@ -1664,21 +1720,16 @@ mod tests { } #[test] - pub fn uint256_shl_overflow() { + pub fn uint256_shl() { assert_eq!( U256::from_str("7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff").unwrap() << 4, U256::from_str("fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0").unwrap() ); - assert_eq!( - U256::from_str("7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff").unwrap() - .overflowing_shl(4), - (U256::from_str("fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0").unwrap(), true) - ); } #[test] - pub fn uint256_shl_overflow_words() { + pub fn uint256_shl_words() { assert_eq!( U256::from_str("0000000000000001ffffffffffffffffffffffffffffffffffffffffffffffff").unwrap() << 64, @@ -1689,45 +1740,6 @@ mod tests { << 64, U256::from_str("ffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000").unwrap() ); - assert_eq!( - U256::from_str("0000000000000001ffffffffffffffffffffffffffffffffffffffffffffffff").unwrap() - .overflowing_shl(64), - (U256::from_str("ffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000").unwrap(), true) - ); - assert_eq!( - U256::from_str("0000000000000000ffffffffffffffffffffffffffffffffffffffffffffffff").unwrap() - .overflowing_shl(64), - (U256::from_str("ffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000").unwrap(), false) - ); - } - - #[test] - pub fn uint256_shl_overflow_words2() { - assert_eq!( - U256::from_str("00000000000000000000000000000001ffffffffffffffffffffffffffffffff").unwrap() - .overflowing_shl(128), - (U256::from_str("ffffffffffffffffffffffffffffffff00000000000000000000000000000000").unwrap(), true) - ); - assert_eq!( - U256::from_str("00000000000000000000000000000000ffffffffffffffffffffffffffffffff").unwrap() - .overflowing_shl(128), - (U256::from_str("ffffffffffffffffffffffffffffffff00000000000000000000000000000000").unwrap(), false) - ); - assert_eq!( - U256::from_str("00000000000000000000000000000000ffffffffffffffffffffffffffffffff").unwrap() - .overflowing_shl(129), - (U256::from_str("fffffffffffffffffffffffffffffffe00000000000000000000000000000000").unwrap(), true) - ); - } - - - #[test] - pub fn uint256_shl_overflow2() { - assert_eq!( - U256::from_str("0fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff").unwrap() - .overflowing_shl(4), - (U256::from_str("fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0").unwrap(), false) - ); } #[test] From 9ae2341ba9ba33af6e0baa4e0a9ee335c90bf56a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= Date: Tue, 8 Mar 2016 10:05:46 +0100 Subject: [PATCH 08/12] Couple of more aggresive optimizations --- util/bigint/src/uint.rs | 81 +++++++++++++++++++++++------------------ 1 file changed, 46 insertions(+), 35 deletions(-) diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs index 6a6658235..68af6dad8 100644 --- a/util/bigint/src/uint.rs +++ b/util/bigint/src/uint.rs @@ -395,28 +395,31 @@ macro_rules! uint_overflowing_mul_reg { let mut ret = [0u64; 2*$n_words]; for i in 0..$n_words { + if you[i] == 0 { + continue; + } + let mut carry2 = 0u64; - let (b_u, b_l) = (you[i] >> 32, you[i] & 0xFFFFFFFF); + let (b_u, b_l) = split(you[i]); for j in 0..$n_words { - let a = me[j]; + if me[j] == 0 { + continue; + } + + let a = split(me[j]); // multiply parts - let (c_l, overflow_l) = mul_u32(a, b_l as u32, ret[j + i]); - let (c_u, overflow_u) = mul_u32(a, b_u as u32, c_l >> 32); + let (c_l, overflow_l) = mul_u32(a, b_l, ret[i + j]); + let (c_u, overflow_u) = mul_u32(a, b_u, c_l >> 32); + ret[i + j] = (c_l & 0xFFFFFFFF) + (c_u << 32); - // This won't overflow - ret[j + i] = (c_l & 0xFFFFFFFF) + (c_u << 32); + // Only single overflow possible here + let carry = (c_u >> 32) + (overflow_u << 32) + overflow_l + carry2; + let (carry, o) = carry.overflowing_add(ret[i + j + 1]); - // carry1 = overflow_l + (c_u >> 32) + (overflow_u << 32) + carry2 + c0; - let (ca1, c1) = overflow_l.overflowing_add((c_u >> 32) + (overflow_u << 32)); - let (ca1, c2) = ca1.overflowing_add(ret[j + i + 1]); - let (ca1, c3) = ca1.overflowing_add(carry2); - - ret[j + i + 1] = ca1; - - // Will never overflow - carry2 = (overflow_u >> 32) + c1 as u64 + c2 as u64 + c3 as u64; + ret[i + j + 1] = carry; + carry2 = o as u64; } } @@ -459,10 +462,9 @@ macro_rules! panic_on_overflow { } #[inline(always)] -fn mul_u32(a: u64, b: u32, carry: u64) -> (u64, u64) { - let b = b as u64; - let upper = b * (a >> 32); - let lower = b * (a & 0xFFFFFFFF); +fn mul_u32(a: (u64, u64), b: u64, carry: u64) -> (u64, u64) { + let upper = b * a.0; + let lower = b * a.1; let (res1, overflow1) = lower.overflowing_add(upper << 32); let (res2, overflow2) = res1.overflowing_add(carry); @@ -471,6 +473,11 @@ fn mul_u32(a: u64, b: u32, carry: u64) -> (u64, u64) { (res2, carry) } +#[inline(always)] +fn split(a: u64) -> (u64, u64) { + (a >> 32, a & 0xFFFFFFFF) +} + /// Large, fixed-length unsigned integer type. pub trait Uint: Sized + Default + FromStr + From + fmt::Debug + fmt::Display + PartialOrd + Ord + PartialEq + Eq + Hash { @@ -734,9 +741,10 @@ macro_rules! construct_uint { let $name(ref arr) = self; let mut ret = [0u64; $n_words]; let mut carry = 0; + let o = other as u64; for i in 0..$n_words { - let (res, carry2) = mul_u32(arr[i], other, carry); + let (res, carry2) = mul_u32(split(arr[i]), o, carry); ret[i] = res; carry = carry2; } @@ -1255,28 +1263,31 @@ impl U256 { let mut ret = [0u64; 8]; for i in 0..4 { + if you[i] == 0 { + continue; + } + let mut carry2 = 0u64; - let (b_u, b_l) = (you[i] >> 32, you[i] & 0xFFFFFFFF); + let (b_u, b_l) = split(you[i]); for j in 0..4 { - let a = me[j]; + if me[j] == 0 { + continue; + } + + let a = split(me[j]); // multiply parts - let (c_l, overflow_l) = mul_u32(a, b_l as u32, ret[j + i]); - let (c_u, overflow_u) = mul_u32(a, b_u as u32, c_l >> 32); + let (c_l, overflow_l) = mul_u32(a, b_l, ret[i + j]); + let (c_u, overflow_u) = mul_u32(a, b_u, c_l >> 32); + ret[i + j] = (c_l & 0xFFFFFFFF) + (c_u << 32); - // This won't overflow - ret[j + i] = (c_l & 0xFFFFFFFF) + (c_u << 32); + // Only single overflow possible here + let carry = (c_u >> 32) + (overflow_u << 32) + overflow_l + carry2; + let (carry, o) = carry.overflowing_add(ret[i + j + 1]); - // carry1 = overflow_l + (c_u >> 32) + (overflow_u << 32) + carry2 + c0; - let (ca1, c1) = overflow_l.overflowing_add((c_u >> 32) + (overflow_u << 32)); - let (ca1, c2) = ca1.overflowing_add(ret[j + i + 1]); - let (ca1, c3) = ca1.overflowing_add(carry2); - - ret[j + i + 1] = ca1; - - // Will never overflow - carry2 = (overflow_u >> 32) + c1 as u64 + c2 as u64 + c3 as u64; + ret[i + j + 1] = carry; + carry2 = o as u64; } } From c47209e9bf25b72684aa4699c6a70a1dfe6307b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= Date: Tue, 8 Mar 2016 12:09:04 +0100 Subject: [PATCH 09/12] Using better subtraction when optimizations are enabled --- util/bigint/src/uint.rs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs index 68af6dad8..801c5f5bd 100644 --- a/util/bigint/src/uint.rs +++ b/util/bigint/src/uint.rs @@ -165,6 +165,12 @@ macro_rules! uint_overflowing_add { #[cfg(not(all(asm_available, target_arch="x86_64")))] macro_rules! uint_overflowing_sub { + ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ + uint_overflowing_sub_reg!($name, $n_words, $self_expr, $other) + }) +} + +macro_rules! uint_overflowing_sub_reg { ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ let $name(ref me) = $self_expr; let $name(ref you) = $other; @@ -255,9 +261,7 @@ macro_rules! uint_overflowing_sub { (U512(result), overflow != 0) }); ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ - let res = overflowing!((!$other).overflowing_add(From::from(1u64))); - let res = overflowing!($self_expr.overflowing_add(res)); - (res, $self_expr < $other) + uint_overflowing_sub_reg!($name, $n_words, $self_expr, $other) }) } From 389779d86ca0d714cff0a8861a0f9ac58219d6d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= Date: Wed, 9 Mar 2016 00:05:47 +0100 Subject: [PATCH 10/12] Updating benchmarks to avoid inlining/optimizing --- util/benches/bigint.rs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs index 80c4ce1d8..3f4164d18 100644 --- a/util/benches/bigint.rs +++ b/util/benches/bigint.rs @@ -50,9 +50,16 @@ fn u256_sub(b: &mut Bencher) { fn u512_sub(b: &mut Bencher) { b.iter(|| { let n = black_box(10000); - (0..n).fold(U512([rand::random::(), rand::random::(), rand::random::(), rand::random::(), - rand::random::(), rand::random::(), rand::random::(), rand::random::()]), - |old, new| { old.overflowing_sub(U512([0, 0, 0, 0, 0, 0, 0, new])).0 }) + (0..n).fold( + U512([ + rand::random::(), rand::random::(), rand::random::(), rand::random::(), + rand::random::(), rand::random::(), rand::random::(), rand::random::() + ]), + |old, new| { + let p = new % 2; + old.overflowing_sub(U512([p, p, p, p, p, p, p, new])).0 + } + ) }); } From b3fc16ed9a51e91736a26c7e95cf210399a70d8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= Date: Wed, 9 Mar 2016 11:32:23 +0100 Subject: [PATCH 11/12] Fixing bug in multiplication implementation --- util/bigint/src/uint.rs | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs index 801c5f5bd..69aaa5809 100644 --- a/util/bigint/src/uint.rs +++ b/util/bigint/src/uint.rs @@ -407,7 +407,7 @@ macro_rules! uint_overflowing_mul_reg { let (b_u, b_l) = split(you[i]); for j in 0..$n_words { - if me[j] == 0 { + if me[j] == 0 && carry2 == 0 { continue; } @@ -1640,6 +1640,14 @@ mod tests { assert_eq!(U256::from(1u64) * U256::from(10u64), U256::from(10u64)); } + #[test] + pub fn uint256_mul2() { + let a = U512::from_str("10000000000000000fffffffffffffffe").unwrap(); + let b = U512::from_str("ffffffffffffffffffffffffffffffff").unwrap(); + + assert_eq!(a * b, U512::from_str("10000000000000000fffffffffffffffcffffffffffffffff0000000000000002").unwrap()); + } + #[test] pub fn uint256_overflowing_mul() { assert_eq!( From 79d2beb42aaeb1bec47031b55f723cedadf5d54b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= Date: Wed, 9 Mar 2016 11:50:35 +0100 Subject: [PATCH 12/12] Same bug in full_mul --- util/bigint/src/uint.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs index 69aaa5809..698b12f42 100644 --- a/util/bigint/src/uint.rs +++ b/util/bigint/src/uint.rs @@ -1275,7 +1275,7 @@ impl U256 { let (b_u, b_l) = split(you[i]); for j in 0..4 { - if me[j] == 0 { + if me[j] == 0 && carry2 == 0 { continue; }