From 0fd52176dce0f9801ce0cd9c23ce38e187e23fbf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= <tomasz@ethcore.io>
Date: Mon, 7 Mar 2016 16:26:35 +0100
Subject: [PATCH 01/12] Fixing tests in bigint and util

---
 test.sh                 |  9 ++++++-
 util/bigint/src/uint.rs | 52 --------------------------------------
 util/src/lib.rs         | 56 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 53 deletions(-)

diff --git a/test.sh b/test.sh
index 0f5edb0d1..dd71d120a 100755
--- a/test.sh
+++ b/test.sh
@@ -1,4 +1,11 @@
 #!/bin/sh
 # Running Parity Full Test Sute
 
-cargo test --features ethcore/json-tests $1 -p ethash -p ethcore-util -p ethcore -p ethsync -p ethcore-rpc -p parity
+cargo test --features ethcore/json-tests $1 \
+	-p ethash \
+	-p ethcore-util \
+	-p ethcore \
+	-p ethsync \
+	-p ethcore-rpc \
+	-p parity \
+	-p bigint
diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs
index bd57e9d6d..62fcd8c6e 100644
--- a/util/bigint/src/uint.rs
+++ b/util/bigint/src/uint.rs
@@ -1948,58 +1948,6 @@ mod tests {
 		assert_eq!(U256([1, 0, 0, 0]), result);
 	}
 
-	#[test]
-	fn u256_multi_muls() {
-		use hash::*;
-
-		let (result, _) = U256([0, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, 0]));
-		assert_eq!(U256([0, 0, 0, 0]), result);
-
-		let (result, _) = U256([1, 0, 0, 0]).overflowing_mul(U256([1, 0, 0, 0]));
-		assert_eq!(U256([1, 0, 0, 0]), result);
-
-		let (result, _) = U256([5, 0, 0, 0]).overflowing_mul(U256([5, 0, 0, 0]));
-		assert_eq!(U256([25, 0, 0, 0]), result);
-
-		let (result, _) = U256([0, 5, 0, 0]).overflowing_mul(U256([0, 5, 0, 0]));
-		assert_eq!(U256([0, 0, 25, 0]), result);
-
-		let (result, _) = U256([0, 0, 0, 1]).overflowing_mul(U256([1, 0, 0, 0]));
-		assert_eq!(U256([0, 0, 0, 1]), result);
-
-		let (result, _) = U256([0, 0, 0, 5]).overflowing_mul(U256([2, 0, 0, 0]));
-		assert_eq!(U256([0, 0, 0, 10]), result);
-
-		let (result, _) = U256([0, 0, 1, 0]).overflowing_mul(U256([0, 5, 0, 0]));
-		assert_eq!(U256([0, 0, 0, 5]), result);
-
-		let (result, _) = U256([0, 0, 8, 0]).overflowing_mul(U256([0, 0, 7, 0]));
-		assert_eq!(U256([0, 0, 0, 0]), result);
-
-		let (result, _) = U256([2, 0, 0, 0]).overflowing_mul(U256([0, 5, 0, 0]));
-		assert_eq!(U256([0, 10, 0, 0]), result);
-
-		let (result, _) = U256([1, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX]));
-		assert_eq!(U256([0, 0, 0, ::std::u64::MAX]), result);
-
-		let x1 = U256::from_str("0000000000000000000000000000000000000000000000000000012365124623").unwrap();
-		let x2sqr_right = U256::from_str("000000000000000000000000000000000000000000014baeef72e0378e2328c9").unwrap();
-		let x1sqr = x1 * x1;
-		assert_eq!(H256::from(x2sqr_right), H256::from(x1sqr));
-		let x1cube = x1sqr * x1;
-		let x1cube_right = U256::from_str("0000000000000000000000000000000001798acde139361466f712813717897b").unwrap();
-		assert_eq!(H256::from(x1cube_right), H256::from(x1cube));
-		let x1quad = x1cube * x1;
-		let x1quad_right = U256::from_str("000000000000000000000001adbdd6bd6ff027485484b97f8a6a4c7129756dd1").unwrap();
-		assert_eq!(H256::from(x1quad_right), H256::from(x1quad));
-		let x1penta = x1quad * x1;
-		let x1penta_right = U256::from_str("00000000000001e92875ac24be246e1c57e0507e8c46cc8d233b77f6f4c72993").unwrap();
-		assert_eq!(H256::from(x1penta_right), H256::from(x1penta));
-		let x1septima = x1penta * x1;
-		let x1septima_right = U256::from_str("00022cca1da3f6e5722b7d3cc5bbfb486465ebc5a708dd293042f932d7eee119").unwrap();
-		assert_eq!(H256::from(x1septima_right), H256::from(x1septima));
-	}
-
     #[test]
     fn u256_multi_muls_overflow() {
 		let (_, overflow) = U256([1, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, 0]));
diff --git a/util/src/lib.rs b/util/src/lib.rs
index a50ba8da4..344da0980 100644
--- a/util/src/lib.rs
+++ b/util/src/lib.rs
@@ -167,3 +167,59 @@ pub use io::*;
 pub use log::*;
 pub use kvdb::*;
 
+#[cfg(test)]
+mod tests {
+	use super::numbers::*;
+	use std::str::FromStr;
+
+	#[test]
+	fn u256_multi_muls() {
+
+		let (result, _) = U256([0, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, 0]));
+		assert_eq!(U256([0, 0, 0, 0]), result);
+
+		let (result, _) = U256([1, 0, 0, 0]).overflowing_mul(U256([1, 0, 0, 0]));
+		assert_eq!(U256([1, 0, 0, 0]), result);
+
+		let (result, _) = U256([5, 0, 0, 0]).overflowing_mul(U256([5, 0, 0, 0]));
+		assert_eq!(U256([25, 0, 0, 0]), result);
+
+		let (result, _) = U256([0, 5, 0, 0]).overflowing_mul(U256([0, 5, 0, 0]));
+		assert_eq!(U256([0, 0, 25, 0]), result);
+
+		let (result, _) = U256([0, 0, 0, 1]).overflowing_mul(U256([1, 0, 0, 0]));
+		assert_eq!(U256([0, 0, 0, 1]), result);
+
+		let (result, _) = U256([0, 0, 0, 5]).overflowing_mul(U256([2, 0, 0, 0]));
+		assert_eq!(U256([0, 0, 0, 10]), result);
+
+		let (result, _) = U256([0, 0, 1, 0]).overflowing_mul(U256([0, 5, 0, 0]));
+		assert_eq!(U256([0, 0, 0, 5]), result);
+
+		let (result, _) = U256([0, 0, 8, 0]).overflowing_mul(U256([0, 0, 7, 0]));
+		assert_eq!(U256([0, 0, 0, 0]), result);
+
+		let (result, _) = U256([2, 0, 0, 0]).overflowing_mul(U256([0, 5, 0, 0]));
+		assert_eq!(U256([0, 10, 0, 0]), result);
+
+		let (result, _) = U256([1, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX]));
+		assert_eq!(U256([0, 0, 0, ::std::u64::MAX]), result);
+
+		let x1 = U256::from_str("0000000000000000000000000000000000000000000000000000012365124623").unwrap();
+		let x2sqr_right = U256::from_str("000000000000000000000000000000000000000000014baeef72e0378e2328c9").unwrap();
+		let x1sqr = x1 * x1;
+		assert_eq!(H256::from(x2sqr_right), H256::from(x1sqr));
+		let x1cube = x1sqr * x1;
+		let x1cube_right = U256::from_str("0000000000000000000000000000000001798acde139361466f712813717897b").unwrap();
+		assert_eq!(H256::from(x1cube_right), H256::from(x1cube));
+		let x1quad = x1cube * x1;
+		let x1quad_right = U256::from_str("000000000000000000000001adbdd6bd6ff027485484b97f8a6a4c7129756dd1").unwrap();
+		assert_eq!(H256::from(x1quad_right), H256::from(x1quad));
+		let x1penta = x1quad * x1;
+		let x1penta_right = U256::from_str("00000000000001e92875ac24be246e1c57e0507e8c46cc8d233b77f6f4c72993").unwrap();
+		assert_eq!(H256::from(x1penta_right), H256::from(x1penta));
+		let x1septima = x1penta * x1;
+		let x1septima_right = U256::from_str("00022cca1da3f6e5722b7d3cc5bbfb486465ebc5a708dd293042f932d7eee119").unwrap();
+		assert_eq!(H256::from(x1septima_right), H256::from(x1septima));
+	}
+}

From 4717be07d647c9a3fa1e0da5ec636a3d67a94d3a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= <tomasz@ethcore.io>
Date: Mon, 7 Mar 2016 16:17:14 +0100
Subject: [PATCH 02/12] Optimizing mul_u32

---
 util/benches/bigint.rs  |  2 +-
 util/bigint/src/uint.rs | 49 ++++++++++++-----------------------------
 2 files changed, 15 insertions(+), 36 deletions(-)

diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs
index 575164cb6..80c4ce1d8 100644
--- a/util/benches/bigint.rs
+++ b/util/benches/bigint.rs
@@ -79,7 +79,7 @@ fn u256_full_mul(b: &mut Bencher) {
 	b.iter(|| {
 		let n = black_box(10000);
 		(0..n).fold(U256([rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>()]),
-			|old, new| {
+			|old, _new| {
 				let U512(ref u512words) = old.full_mul(U256([rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>()]));
 				U256([u512words[0], u512words[2], u512words[2], u512words[3]])
 			})
diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs
index 62fcd8c6e..ad4f0a99c 100644
--- a/util/bigint/src/uint.rs
+++ b/util/bigint/src/uint.rs
@@ -711,52 +711,31 @@ macro_rules! construct_uint {
 			#[allow(dead_code)] // not used when multiplied with inline assembly
 			/// Multiplication by u32
 			fn mul_u32(self, other: u32) -> Self {
-				let $name(ref arr) = self;
-				let mut carry = [0u64; $n_words];
-				let mut ret = [0u64; $n_words];
-				for i in 0..$n_words {
-					let upper = other as u64 * (arr[i] >> 32);
-					let lower = other as u64 * (arr[i] & 0xFFFFFFFF);
-
-					ret[i] = lower.wrapping_add(upper << 32);
-
-					if i < $n_words - 1 {
-						carry[i + 1] = upper >> 32;
-						if ret[i] < lower {
-							carry[i + 1] += 1;
-						}
-					}
-				}
-				$name(ret) + $name(carry)
+				let (ret, overflow) = self.overflowing_mul_u32(other);
+				panic_on_overflow!(overflow);
+				ret
 			}
 
 			#[allow(dead_code)] // not used when multiplied with inline assembly
 			/// Overflowing multiplication by u32
 			fn overflowing_mul_u32(self, other: u32) -> (Self, bool) {
 				let $name(ref arr) = self;
-				let mut carry = [0u64; $n_words];
+				let o = other as u64;
+				let mut carry = [0u64; $n_words + 1];
 				let mut ret = [0u64; $n_words];
-				let mut overflow = false;
+
 				for i in 0..$n_words {
-					let upper = other as u64 * (arr[i] >> 32);
-					let lower = other as u64 * (arr[i] & 0xFFFFFFFF);
+					let upper = o * (arr[i] >> 32);
+					let lower = o * (arr[i] & 0xFFFFFFFF);
 
-					ret[i] = lower.wrapping_add(upper << 32);
+					let (res1, overflow1) = lower.overflowing_add(upper << 32);
+					let (res2, overflow2) = res1.overflowing_add(carry[i]);
 
-					if i < $n_words - 1 {
-						carry[i + 1] = upper >> 32;
-						if ret[i] < lower {
-							carry[i + 1] += 1;
-						}
-					} else if (upper >> 32) > 0 || ret[i] < lower {
-						overflow = true
-					}
+					ret[i] = res2;
+					carry[i + 1] = (upper >> 32) + overflow1 as u64 + overflow2 as u64;
 				}
-				let result = overflowing!(
-					$name(ret).overflowing_add($name(carry)),
-					overflow
-					);
-				(result, overflow)
+
+				($name(ret), carry[$n_words] > 0)
 			}
 		}
 

From cc0adf544208a36786e0ffa7f219ff79983d380b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= <tomasz@ethcore.io>
Date: Mon, 7 Mar 2016 17:06:08 +0100
Subject: [PATCH 03/12] Optimizing and simplifying add and shl

---
 util/bigint/src/uint.rs | 89 ++++++++++++++++-------------------------
 1 file changed, 34 insertions(+), 55 deletions(-)

diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs
index ad4f0a99c..47a975d5f 100644
--- a/util/bigint/src/uint.rs
+++ b/util/bigint/src/uint.rs
@@ -71,29 +71,19 @@ macro_rules! uint_overflowing_add_reg {
 	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
 		let $name(ref me) = $self_expr;
 		let $name(ref you) = $other;
+
 		let mut ret = [0u64; $n_words];
-		let mut carry = [0u64; $n_words];
-		let mut b_carry = false;
-		let mut overflow = false;
+		let mut carry = [0u64; $n_words + 1];
 
 		for i in 0..$n_words {
-			ret[i] = me[i].wrapping_add(you[i]);
+			let (res1, overflow1) = me[i].overflowing_add(you[i]);
+			let (res2, overflow2) = res1.overflowing_add(carry[i]);
 
-			if ret[i] < me[i] {
-				if i < $n_words - 1 {
-					carry[i + 1] = 1;
-					b_carry = true;
-				} else {
-					overflow = true;
-				}
-			}
-		}
-		if b_carry {
-			let ret = overflowing!($name(ret).overflowing_add($name(carry)), overflow);
-			(ret, overflow)
-		} else {
-			($name(ret), overflow)
+			ret[i] = res2;
+			carry[i+1] = overflow1 as u64 + overflow2 as u64;
 		}
+
+		($name(ret), carry[$n_words] > 0)
 	})
 }
 
@@ -673,37 +663,10 @@ macro_rules! construct_uint {
 			}
 
 			fn overflowing_shl(self, shift32: u32) -> ($name, bool) {
-				let $name(ref original) = self;
-				let mut ret = [0u64; $n_words];
 				let shift = shift32 as usize;
-				let word_shift = shift / 64;
-				let bit_shift = shift % 64;
-				for i in 0..$n_words {
-					// Shift
-					if i + word_shift < $n_words {
-						ret[i + word_shift] += original[i] << bit_shift;
-					}
-					// Carry
-					if bit_shift > 0 && i + word_shift + 1 < $n_words {
-						ret[i + word_shift + 1] += original[i] >> (64 - bit_shift);
-					}
-				}
-				// Detecting overflow
-				let last = $n_words - word_shift - if bit_shift > 0 { 1 } else { 0 };
-				let overflow = if bit_shift > 0 {
-					(original[last] >> (64 - bit_shift)) > 0
-				} else if word_shift > 0 {
-					original[last] > 0
-				} else {
-					false
-				};
 
-				for i in last+1..$n_words-1 {
-					if original[i] > 0 {
-						return ($name(ret), true);
-					}
-				}
-				($name(ret), overflow)
+				let res = self << shift;
+				(res, self != (res >> shift))
 			}
 		}
 
@@ -987,14 +950,15 @@ macro_rules! construct_uint {
 				let mut ret = [0u64; $n_words];
 				let word_shift = shift / 64;
 				let bit_shift = shift % 64;
-				for i in 0..$n_words {
-					// Shift
-					if i + word_shift < $n_words {
-						ret[i + word_shift] += original[i] << bit_shift;
-					}
-					// Carry
-					if bit_shift > 0 && i + word_shift + 1 < $n_words {
-						ret[i + word_shift + 1] += original[i] >> (64 - bit_shift);
+
+				// shift
+				for i in word_shift..$n_words {
+					ret[i] += original[i - word_shift] << bit_shift;
+				}
+				// carry
+				if bit_shift > 0 {
+					for i in word_shift+1..$n_words {
+						ret[i] += original[i - 1 - word_shift] >> (64 - bit_shift);
 					}
 				}
 				$name(ret)
@@ -1672,6 +1636,11 @@ mod tests {
 
 	#[test]
 	pub fn uint256_shl_overflow() {
+		assert_eq!(
+			U256::from_str("7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff").unwrap()
+			<< 4,
+			U256::from_str("fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0").unwrap()
+		);
 		assert_eq!(
 			U256::from_str("7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff").unwrap()
 			.overflowing_shl(4),
@@ -1681,6 +1650,16 @@ mod tests {
 
 	#[test]
 	pub fn uint256_shl_overflow_words() {
+		assert_eq!(
+			U256::from_str("0000000000000001ffffffffffffffffffffffffffffffffffffffffffffffff").unwrap()
+			<< 64,
+			U256::from_str("ffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000").unwrap()
+		);
+		assert_eq!(
+			U256::from_str("0000000000000000ffffffffffffffffffffffffffffffffffffffffffffffff").unwrap()
+			<< 64,
+			U256::from_str("ffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000").unwrap()
+		);
 		assert_eq!(
 			U256::from_str("0000000000000001ffffffffffffffffffffffffffffffffffffffffffffffff").unwrap()
 			.overflowing_shl(64),

From e7be3c5378c50f2a11d053003946d958a93edf1b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= <tomasz@ethcore.io>
Date: Mon, 7 Mar 2016 17:09:19 +0100
Subject: [PATCH 04/12] Simplifing mul_u32 and add carry

---
 util/bigint/src/uint.rs | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs
index 47a975d5f..789fc744e 100644
--- a/util/bigint/src/uint.rs
+++ b/util/bigint/src/uint.rs
@@ -73,17 +73,17 @@ macro_rules! uint_overflowing_add_reg {
 		let $name(ref you) = $other;
 
 		let mut ret = [0u64; $n_words];
-		let mut carry = [0u64; $n_words + 1];
+		let mut carry = 0u64;
 
 		for i in 0..$n_words {
 			let (res1, overflow1) = me[i].overflowing_add(you[i]);
-			let (res2, overflow2) = res1.overflowing_add(carry[i]);
+			let (res2, overflow2) = res1.overflowing_add(carry);
 
 			ret[i] = res2;
-			carry[i+1] = overflow1 as u64 + overflow2 as u64;
+			carry = overflow1 as u64 + overflow2 as u64;
 		}
 
-		($name(ret), carry[$n_words] > 0)
+		($name(ret), carry > 0)
 	})
 }
 
@@ -684,21 +684,21 @@ macro_rules! construct_uint {
 			fn overflowing_mul_u32(self, other: u32) -> (Self, bool) {
 				let $name(ref arr) = self;
 				let o = other as u64;
-				let mut carry = [0u64; $n_words + 1];
 				let mut ret = [0u64; $n_words];
+				let mut carry = 0;
 
 				for i in 0..$n_words {
 					let upper = o * (arr[i] >> 32);
 					let lower = o * (arr[i] & 0xFFFFFFFF);
 
 					let (res1, overflow1) = lower.overflowing_add(upper << 32);
-					let (res2, overflow2) = res1.overflowing_add(carry[i]);
+					let (res2, overflow2) = res1.overflowing_add(carry);
 
 					ret[i] = res2;
-					carry[i + 1] = (upper >> 32) + overflow1 as u64 + overflow2 as u64;
+					carry = (upper >> 32) + overflow1 as u64 + overflow2 as u64;
 				}
 
-				($name(ret), carry[$n_words] > 0)
+				($name(ret), carry > 0)
 			}
 		}
 

From c5840be1cb1a55d0daf1c208dbfd3c7bcdea6c7c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= <tomasz@ethcore.io>
Date: Mon, 7 Mar 2016 18:36:17 +0100
Subject: [PATCH 05/12] Small improvements

---
 util/bigint/src/uint.rs | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs
index 789fc744e..8fbaca532 100644
--- a/util/bigint/src/uint.rs
+++ b/util/bigint/src/uint.rs
@@ -379,11 +379,24 @@ macro_rules! uint_overflowing_mul_reg {
 	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		let mut res = $name::from(0u64);
 		let mut overflow = false;
-		for i in 0..(2 * $n_words) {
-			let v = overflowing!($self_expr.overflowing_mul_u32(($other >> (32 * i)).low_u32()), overflow);
-			let res2 = overflowing!(v.overflowing_shl(32 * i as u32), overflow);
-			res = overflowing!(res.overflowing_add(res2), overflow);
+
+		let mut current = $other;
+		let mut current_shift = 0;
+		let mut current_u32;
+		let mut i = 0;
+
+		while i < 2*$n_words {
+			current_u32 = current.low_u32();
+
+			let v = overflowing!($self_expr.overflowing_mul_u32(current_u32), overflow);
+			let v_shifted = overflowing!(v.overflowing_shl(current_shift), overflow);
+			res = overflowing!(res.overflowing_add(v_shifted), overflow);
+
+			current = current >> 32;
+			current_shift += 32;
+			i += 1;
 		}
+
 		(res, overflow)
 	})
 }
@@ -973,6 +986,7 @@ macro_rules! construct_uint {
 				let mut ret = [0u64; $n_words];
 				let word_shift = shift / 64;
 				let bit_shift = shift % 64;
+
 				for i in word_shift..$n_words {
 					// Shift
 					ret[i - word_shift] += original[i] >> bit_shift;
@@ -989,9 +1003,11 @@ macro_rules! construct_uint {
 			fn cmp(&self, other: &$name) -> Ordering {
 				let &$name(ref me) = self;
 				let &$name(ref you) = other;
-				for i in 0..$n_words {
-					if me[$n_words - 1 - i] < you[$n_words - 1 - i] { return Ordering::Less; }
-					if me[$n_words - 1 - i] > you[$n_words - 1 - i] { return Ordering::Greater; }
+				let mut i = $n_words;
+				while i > 0 {
+					i -= 1;
+					if me[i] < you[i] { return Ordering::Less; }
+					if me[i] > you[i] { return Ordering::Greater; }
 				}
 				Ordering::Equal
 			}

From 76865694ce705b945655b685e05ef21ade44e8e9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= <tomasz@ethcore.io>
Date: Mon, 7 Mar 2016 19:03:29 +0100
Subject: [PATCH 06/12] Subtraction optimization

---
 util/bigint/src/uint.rs | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs
index 8fbaca532..0e8d1e7b7 100644
--- a/util/bigint/src/uint.rs
+++ b/util/bigint/src/uint.rs
@@ -166,9 +166,22 @@ macro_rules! uint_overflowing_add {
 #[cfg(not(all(asm_available, target_arch="x86_64")))]
 macro_rules! uint_overflowing_sub {
 	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
-		let res = overflowing!((!$other).overflowing_add(From::from(1u64)));
-		let res = overflowing!($self_expr.overflowing_add(res));
-		(res, $self_expr < $other)
+		let $name(ref me) = $self_expr;
+		let $name(ref you) = $other;
+
+		let mut ret = [0u64; $n_words];
+		let mut carry = 0u64;
+
+		for i in 0..$n_words {
+			let (res1, overflow1) = me[i].overflowing_sub(you[i]);
+			let (res2, overflow2) = res1.overflowing_sub(carry);
+
+			ret[i] = res2;
+			carry = overflow1 as u64 + overflow2 as u64;
+		}
+
+		($name(ret), carry > 0)
+
 	})
 }
 

From 17b2d2a2d71897d55bf869d7da194bc730a71a75 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= <tomasz@ethcore.io>
Date: Tue, 8 Mar 2016 01:13:00 +0100
Subject: [PATCH 07/12] Implementing mul and full_mul

---
 util/bigint/src/uint.rs | 180 +++++++++++++++++++++-------------------
 1 file changed, 96 insertions(+), 84 deletions(-)

diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs
index 0e8d1e7b7..6a6658235 100644
--- a/util/bigint/src/uint.rs
+++ b/util/bigint/src/uint.rs
@@ -390,27 +390,47 @@ macro_rules! uint_overflowing_mul {
 
 macro_rules! uint_overflowing_mul_reg {
 	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
-		let mut res = $name::from(0u64);
-		let mut overflow = false;
+		let $name(ref me) = $self_expr;
+		let $name(ref you) = $other;
+		let mut ret = [0u64; 2*$n_words];
 
-		let mut current = $other;
-		let mut current_shift = 0;
-		let mut current_u32;
-		let mut i = 0;
+		for i in 0..$n_words {
+			let mut carry2 = 0u64;
+			let (b_u, b_l) = (you[i] >> 32, you[i] & 0xFFFFFFFF);
 
-		while i < 2*$n_words {
-			current_u32 = current.low_u32();
+			for j in 0..$n_words {
+				let a = me[j];
 
-			let v = overflowing!($self_expr.overflowing_mul_u32(current_u32), overflow);
-			let v_shifted = overflowing!(v.overflowing_shl(current_shift), overflow);
-			res = overflowing!(res.overflowing_add(v_shifted), overflow);
+				// multiply parts
+				let (c_l, overflow_l) = mul_u32(a, b_l as u32, ret[j + i]);
+				let (c_u, overflow_u) = mul_u32(a, b_u as u32, c_l >> 32);
 
-			current = current >> 32;
-			current_shift += 32;
-			i += 1;
+				// This won't overflow
+				ret[j + i] = (c_l & 0xFFFFFFFF) + (c_u << 32);
+
+				// carry1 = overflow_l + (c_u >> 32) + (overflow_u << 32) + carry2 + c0;
+				let (ca1, c1) = overflow_l.overflowing_add((c_u >> 32) + (overflow_u << 32));
+				let (ca1, c2) = ca1.overflowing_add(ret[j + i + 1]);
+				let (ca1, c3) = ca1.overflowing_add(carry2);
+
+				ret[j + i + 1] = ca1;
+
+				// Will never overflow
+				carry2 = (overflow_u >> 32) + c1 as u64 + c2 as u64 + c3 as u64;
+			}
 		}
 
-		(res, overflow)
+		let mut res = [0u64; $n_words];
+		let mut overflow = false;
+		for i in 0..$n_words {
+			res[i] = ret[i];
+		}
+
+		for i in $n_words..2*$n_words {
+			overflow |= ret[i] != 0;
+		}
+
+		($name(res), overflow)
 	})
 }
 
@@ -438,6 +458,19 @@ macro_rules! panic_on_overflow {
 	}
 }
 
+#[inline(always)]
+fn mul_u32(a: u64, b: u32, carry: u64) -> (u64, u64) {
+	let b = b as u64;
+	let upper = b * (a >> 32);
+	let lower = b * (a & 0xFFFFFFFF);
+
+	let (res1, overflow1) = lower.overflowing_add(upper << 32);
+	let (res2, overflow2) = res1.overflowing_add(carry);
+
+	let carry = (upper >> 32) + overflow1 as u64 + overflow2 as u64;
+	(res2, carry)
+}
+
 /// Large, fixed-length unsigned integer type.
 pub trait Uint: Sized + Default + FromStr + From<u64> + fmt::Debug + fmt::Display + PartialOrd + Ord + PartialEq + Eq + Hash {
 
@@ -496,9 +529,6 @@ pub trait Uint: Sized + Default + FromStr + From<u64> + fmt::Debug + fmt::Displa
 
 	/// Returns negation of this `Uint` and overflow (always true)
 	fn overflowing_neg(self) -> (Self, bool);
-
-	/// Shifts this `Uint` and returns overflow
-	fn overflowing_shl(self, shift: u32) -> (Self, bool);
 }
 
 macro_rules! construct_uint {
@@ -687,13 +717,6 @@ macro_rules! construct_uint {
 			fn overflowing_neg(self) -> ($name, bool) {
 				(!self, true)
 			}
-
-			fn overflowing_shl(self, shift32: u32) -> ($name, bool) {
-				let shift = shift32 as usize;
-
-				let res = self << shift;
-				(res, self != (res >> shift))
-			}
 		}
 
 		impl $name {
@@ -709,19 +732,13 @@ macro_rules! construct_uint {
 			/// Overflowing multiplication by u32
 			fn overflowing_mul_u32(self, other: u32) -> (Self, bool) {
 				let $name(ref arr) = self;
-				let o = other as u64;
 				let mut ret = [0u64; $n_words];
 				let mut carry = 0;
 
 				for i in 0..$n_words {
-					let upper = o * (arr[i] >> 32);
-					let lower = o * (arr[i] & 0xFFFFFFFF);
-
-					let (res1, overflow1) = lower.overflowing_add(upper << 32);
-					let (res2, overflow2) = res1.overflowing_add(carry);
-
-					ret[i] = res2;
-					carry = (upper >> 32) + overflow1 as u64 + overflow2 as u64;
+					let (res, carry2) = mul_u32(arr[i], other, carry);
+					ret[i] = res;
+					carry = carry2;
 				}
 
 				($name(ret), carry > 0)
@@ -1233,10 +1250,37 @@ impl U256 {
 	/// No overflow possible
 	#[cfg(not(all(asm_available, target_arch="x86_64")))]
 	pub fn full_mul(self, other: U256) -> U512 {
-		let self_512 = U512::from(self);
-		let other_512 = U512::from(other);
-		let (result, _) = self_512.overflowing_mul(other_512);
-		result
+		let U256(ref me) = self;
+		let U256(ref you) = other;
+		let mut ret = [0u64; 8];
+
+		for i in 0..4 {
+			let mut carry2 = 0u64;
+			let (b_u, b_l) = (you[i] >> 32, you[i] & 0xFFFFFFFF);
+
+			for j in 0..4 {
+				let a = me[j];
+
+				// multiply parts
+				let (c_l, overflow_l) = mul_u32(a, b_l as u32, ret[j + i]);
+				let (c_u, overflow_u) = mul_u32(a, b_u as u32, c_l >> 32);
+
+				// This won't overflow
+				ret[j + i] = (c_l & 0xFFFFFFFF) + (c_u << 32);
+
+				// carry1 = overflow_l + (c_u >> 32) + (overflow_u << 32) + carry2 + c0;
+				let (ca1, c1) = overflow_l.overflowing_add((c_u >> 32) + (overflow_u << 32));
+				let (ca1, c2) = ca1.overflowing_add(ret[j + i + 1]);
+				let (ca1, c3) = ca1.overflowing_add(carry2);
+
+				ret[j + i + 1] = ca1;
+
+				// Will never overflow
+				carry2 = (overflow_u >> 32) + c1 as u64 + c2 as u64 + c3 as u64;
+			}
+		}
+
+		U512(ret)
 	}
 }
 
@@ -1502,6 +1546,18 @@ mod tests {
 		//// TODO: bit inversion
 	}
 
+	#[test]
+	pub fn uint256_simple_mul() {
+		let a = U256::from_str("10000000000000000").unwrap();
+		let b = U256::from_str("10000000000000000").unwrap();
+
+		let c = U256::from_str("100000000000000000000000000000000").unwrap();
+		println!("Multiplying");
+		let result = a.overflowing_mul(b);
+		println!("Got result");
+		assert_eq!(result, (c, false))
+	}
+
 	#[test]
 	pub fn uint256_extreme_bitshift_test() {
 		//// Shifting a u64 by 64 bits gives an undefined value, so make sure that
@@ -1664,21 +1720,16 @@ mod tests {
 	}
 
 	#[test]
-	pub fn uint256_shl_overflow() {
+	pub fn uint256_shl() {
 		assert_eq!(
 			U256::from_str("7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff").unwrap()
 			<< 4,
 			U256::from_str("fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0").unwrap()
 		);
-		assert_eq!(
-			U256::from_str("7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff").unwrap()
-			.overflowing_shl(4),
-			(U256::from_str("fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0").unwrap(), true)
-		);
 	}
 
 	#[test]
-	pub fn uint256_shl_overflow_words() {
+	pub fn uint256_shl_words() {
 		assert_eq!(
 			U256::from_str("0000000000000001ffffffffffffffffffffffffffffffffffffffffffffffff").unwrap()
 			<< 64,
@@ -1689,45 +1740,6 @@ mod tests {
 			<< 64,
 			U256::from_str("ffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000").unwrap()
 		);
-		assert_eq!(
-			U256::from_str("0000000000000001ffffffffffffffffffffffffffffffffffffffffffffffff").unwrap()
-			.overflowing_shl(64),
-			(U256::from_str("ffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000").unwrap(), true)
-		);
-		assert_eq!(
-			U256::from_str("0000000000000000ffffffffffffffffffffffffffffffffffffffffffffffff").unwrap()
-			.overflowing_shl(64),
-			(U256::from_str("ffffffffffffffffffffffffffffffffffffffffffffffff0000000000000000").unwrap(), false)
-		);
-	}
-
-	#[test]
-	pub fn uint256_shl_overflow_words2() {
-		assert_eq!(
-			U256::from_str("00000000000000000000000000000001ffffffffffffffffffffffffffffffff").unwrap()
-			.overflowing_shl(128),
-			(U256::from_str("ffffffffffffffffffffffffffffffff00000000000000000000000000000000").unwrap(), true)
-		);
-		assert_eq!(
-			U256::from_str("00000000000000000000000000000000ffffffffffffffffffffffffffffffff").unwrap()
-			.overflowing_shl(128),
-			(U256::from_str("ffffffffffffffffffffffffffffffff00000000000000000000000000000000").unwrap(), false)
-		);
-		assert_eq!(
-			U256::from_str("00000000000000000000000000000000ffffffffffffffffffffffffffffffff").unwrap()
-			.overflowing_shl(129),
-			(U256::from_str("fffffffffffffffffffffffffffffffe00000000000000000000000000000000").unwrap(), true)
-		);
-	}
-
-
-	#[test]
-	pub fn uint256_shl_overflow2() {
-		assert_eq!(
-			U256::from_str("0fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff").unwrap()
-			.overflowing_shl(4),
-			(U256::from_str("fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff0").unwrap(), false)
-		);
 	}
 
 	#[test]

From 9ae2341ba9ba33af6e0baa4e0a9ee335c90bf56a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= <tomasz@ethcore.io>
Date: Tue, 8 Mar 2016 10:05:46 +0100
Subject: [PATCH 08/12] Couple of more aggresive optimizations

---
 util/bigint/src/uint.rs | 81 +++++++++++++++++++++++------------------
 1 file changed, 46 insertions(+), 35 deletions(-)

diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs
index 6a6658235..68af6dad8 100644
--- a/util/bigint/src/uint.rs
+++ b/util/bigint/src/uint.rs
@@ -395,28 +395,31 @@ macro_rules! uint_overflowing_mul_reg {
 		let mut ret = [0u64; 2*$n_words];
 
 		for i in 0..$n_words {
+			if you[i] == 0 {
+				continue;
+			}
+
 			let mut carry2 = 0u64;
-			let (b_u, b_l) = (you[i] >> 32, you[i] & 0xFFFFFFFF);
+			let (b_u, b_l) = split(you[i]);
 
 			for j in 0..$n_words {
-				let a = me[j];
+				if me[j] == 0 {
+					continue;
+				}
+
+				let a = split(me[j]);
 
 				// multiply parts
-				let (c_l, overflow_l) = mul_u32(a, b_l as u32, ret[j + i]);
-				let (c_u, overflow_u) = mul_u32(a, b_u as u32, c_l >> 32);
+				let (c_l, overflow_l) = mul_u32(a, b_l, ret[i + j]);
+				let (c_u, overflow_u) = mul_u32(a, b_u, c_l >> 32);
+				ret[i + j] = (c_l & 0xFFFFFFFF) + (c_u << 32);
 
-				// This won't overflow
-				ret[j + i] = (c_l & 0xFFFFFFFF) + (c_u << 32);
+				// Only single overflow possible here
+				let carry = (c_u >> 32) + (overflow_u << 32) + overflow_l + carry2;
+				let (carry, o) = carry.overflowing_add(ret[i + j + 1]);
 
-				// carry1 = overflow_l + (c_u >> 32) + (overflow_u << 32) + carry2 + c0;
-				let (ca1, c1) = overflow_l.overflowing_add((c_u >> 32) + (overflow_u << 32));
-				let (ca1, c2) = ca1.overflowing_add(ret[j + i + 1]);
-				let (ca1, c3) = ca1.overflowing_add(carry2);
-
-				ret[j + i + 1] = ca1;
-
-				// Will never overflow
-				carry2 = (overflow_u >> 32) + c1 as u64 + c2 as u64 + c3 as u64;
+				ret[i + j + 1] = carry;
+				carry2 = o as u64;
 			}
 		}
 
@@ -459,10 +462,9 @@ macro_rules! panic_on_overflow {
 }
 
 #[inline(always)]
-fn mul_u32(a: u64, b: u32, carry: u64) -> (u64, u64) {
-	let b = b as u64;
-	let upper = b * (a >> 32);
-	let lower = b * (a & 0xFFFFFFFF);
+fn mul_u32(a: (u64, u64), b: u64, carry: u64) -> (u64, u64) {
+	let upper = b * a.0;
+	let lower = b * a.1;
 
 	let (res1, overflow1) = lower.overflowing_add(upper << 32);
 	let (res2, overflow2) = res1.overflowing_add(carry);
@@ -471,6 +473,11 @@ fn mul_u32(a: u64, b: u32, carry: u64) -> (u64, u64) {
 	(res2, carry)
 }
 
+#[inline(always)]
+fn split(a: u64) -> (u64, u64) {
+	(a >> 32, a & 0xFFFFFFFF)
+}
+
 /// Large, fixed-length unsigned integer type.
 pub trait Uint: Sized + Default + FromStr + From<u64> + fmt::Debug + fmt::Display + PartialOrd + Ord + PartialEq + Eq + Hash {
 
@@ -734,9 +741,10 @@ macro_rules! construct_uint {
 				let $name(ref arr) = self;
 				let mut ret = [0u64; $n_words];
 				let mut carry = 0;
+				let o = other as u64;
 
 				for i in 0..$n_words {
-					let (res, carry2) = mul_u32(arr[i], other, carry);
+					let (res, carry2) = mul_u32(split(arr[i]), o, carry);
 					ret[i] = res;
 					carry = carry2;
 				}
@@ -1255,28 +1263,31 @@ impl U256 {
 		let mut ret = [0u64; 8];
 
 		for i in 0..4 {
+			if you[i] == 0 {
+				continue;
+			}
+
 			let mut carry2 = 0u64;
-			let (b_u, b_l) = (you[i] >> 32, you[i] & 0xFFFFFFFF);
+			let (b_u, b_l) = split(you[i]);
 
 			for j in 0..4 {
-				let a = me[j];
+				if me[j] == 0 {
+					continue;
+				}
+
+				let a = split(me[j]);
 
 				// multiply parts
-				let (c_l, overflow_l) = mul_u32(a, b_l as u32, ret[j + i]);
-				let (c_u, overflow_u) = mul_u32(a, b_u as u32, c_l >> 32);
+				let (c_l, overflow_l) = mul_u32(a, b_l, ret[i + j]);
+				let (c_u, overflow_u) = mul_u32(a, b_u, c_l >> 32);
+				ret[i + j] = (c_l & 0xFFFFFFFF) + (c_u << 32);
 
-				// This won't overflow
-				ret[j + i] = (c_l & 0xFFFFFFFF) + (c_u << 32);
+				// Only single overflow possible here
+				let carry = (c_u >> 32) + (overflow_u << 32) + overflow_l + carry2;
+				let (carry, o) = carry.overflowing_add(ret[i + j + 1]);
 
-				// carry1 = overflow_l + (c_u >> 32) + (overflow_u << 32) + carry2 + c0;
-				let (ca1, c1) = overflow_l.overflowing_add((c_u >> 32) + (overflow_u << 32));
-				let (ca1, c2) = ca1.overflowing_add(ret[j + i + 1]);
-				let (ca1, c3) = ca1.overflowing_add(carry2);
-
-				ret[j + i + 1] = ca1;
-
-				// Will never overflow
-				carry2 = (overflow_u >> 32) + c1 as u64 + c2 as u64 + c3 as u64;
+				ret[i + j + 1] = carry;
+				carry2 = o as u64;
 			}
 		}
 

From c47209e9bf25b72684aa4699c6a70a1dfe6307b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= <tomasz@ethcore.io>
Date: Tue, 8 Mar 2016 12:09:04 +0100
Subject: [PATCH 09/12] Using better subtraction when optimizations are enabled

---
 util/bigint/src/uint.rs | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs
index 68af6dad8..801c5f5bd 100644
--- a/util/bigint/src/uint.rs
+++ b/util/bigint/src/uint.rs
@@ -165,6 +165,12 @@ macro_rules! uint_overflowing_add {
 
 #[cfg(not(all(asm_available, target_arch="x86_64")))]
 macro_rules! uint_overflowing_sub {
+	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
+		uint_overflowing_sub_reg!($name, $n_words, $self_expr, $other)
+	})
+}
+
+macro_rules! uint_overflowing_sub_reg {
 	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		let $name(ref me) = $self_expr;
 		let $name(ref you) = $other;
@@ -255,9 +261,7 @@ macro_rules! uint_overflowing_sub {
 		(U512(result), overflow != 0)
 	});
 	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
-		let res = overflowing!((!$other).overflowing_add(From::from(1u64)));
-		let res = overflowing!($self_expr.overflowing_add(res));
-		(res, $self_expr < $other)
+		uint_overflowing_sub_reg!($name, $n_words, $self_expr, $other)
 	})
 }
 

From 389779d86ca0d714cff0a8861a0f9ac58219d6d1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= <tomasz@ethcore.io>
Date: Wed, 9 Mar 2016 00:05:47 +0100
Subject: [PATCH 10/12] Updating benchmarks to avoid inlining/optimizing

---
 util/benches/bigint.rs | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs
index 80c4ce1d8..3f4164d18 100644
--- a/util/benches/bigint.rs
+++ b/util/benches/bigint.rs
@@ -50,9 +50,16 @@ fn u256_sub(b: &mut Bencher) {
 fn u512_sub(b: &mut Bencher) {
 	b.iter(|| {
 		let n = black_box(10000);
-		(0..n).fold(U512([rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(),
-				rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>()]),
-			|old, new| { old.overflowing_sub(U512([0, 0, 0, 0, 0, 0, 0, new])).0 })
+		(0..n).fold(
+			U512([
+				rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(),
+				rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>()
+			]),
+			|old, new| {
+				let p = new % 2;
+				old.overflowing_sub(U512([p, p, p, p, p, p, p, new])).0
+			}
+		)
 	});
 }
 

From b3fc16ed9a51e91736a26c7e95cf210399a70d8d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= <tomasz@ethcore.io>
Date: Wed, 9 Mar 2016 11:32:23 +0100
Subject: [PATCH 11/12] Fixing bug in multiplication implementation

---
 util/bigint/src/uint.rs | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs
index 801c5f5bd..69aaa5809 100644
--- a/util/bigint/src/uint.rs
+++ b/util/bigint/src/uint.rs
@@ -407,7 +407,7 @@ macro_rules! uint_overflowing_mul_reg {
 			let (b_u, b_l) = split(you[i]);
 
 			for j in 0..$n_words {
-				if me[j] == 0 {
+				if me[j] == 0 && carry2 == 0 {
 					continue;
 				}
 
@@ -1640,6 +1640,14 @@ mod tests {
 		assert_eq!(U256::from(1u64) * U256::from(10u64), U256::from(10u64));
 	}
 
+	#[test]
+	pub fn uint256_mul2() {
+		let a = U512::from_str("10000000000000000fffffffffffffffe").unwrap();
+		let b = U512::from_str("ffffffffffffffffffffffffffffffff").unwrap();
+
+		assert_eq!(a * b, U512::from_str("10000000000000000fffffffffffffffcffffffffffffffff0000000000000002").unwrap());
+	}
+
 	#[test]
 	pub fn uint256_overflowing_mul() {
 		assert_eq!(

From 79d2beb42aaeb1bec47031b55f723cedadf5d54b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tomasz=20Drwi=C4=99ga?= <tomasz@ethcore.io>
Date: Wed, 9 Mar 2016 11:50:35 +0100
Subject: [PATCH 12/12] Same bug in full_mul

---
 util/bigint/src/uint.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/util/bigint/src/uint.rs b/util/bigint/src/uint.rs
index 69aaa5809..698b12f42 100644
--- a/util/bigint/src/uint.rs
+++ b/util/bigint/src/uint.rs
@@ -1275,7 +1275,7 @@ impl U256 {
 			let (b_u, b_l) = split(you[i]);
 
 			for j in 0..4 {
-				if me[j] == 0 {
+				if me[j] == 0 && carry2 == 0 {
 					continue;
 				}