From dd8652dbf41715dcb44e7fa4c835634b99c1d771 Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Wed, 24 Feb 2016 21:17:29 +0300
Subject: [PATCH 01/23] u256 to inline assembly opt

---
 util/benches/bigint.rs | 37 +++++++++++++++++
 util/src/lib.rs        |  1 +
 util/src/uint.rs       | 90 ++++++++++++++++++++++++++++++------------
 3 files changed, 102 insertions(+), 26 deletions(-)
 create mode 100644 util/benches/bigint.rs

diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs
new file mode 100644
index 000000000..6ba30d88c
--- /dev/null
+++ b/util/benches/bigint.rs
@@ -0,0 +1,37 @@
+// Copyright 2015, 2016 Ethcore (UK) Ltd.
+// This file is part of Parity.
+
+// Parity is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Parity is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Parity.  If not, see <http://www.gnu.org/licenses/>.
+
+//! benchmarking for rlp
+//! should be started with:
+//! ```bash
+//! multirust run nightly cargo bench
+//! ```
+
+#![feature(test)]
+
+extern crate test;
+extern crate ethcore_util;
+
+use test::{Bencher, black_box};
+use ethcore_util::uint::*;
+
+#[bench]
+fn u256_first_degree(b: &mut test::Bencher) {
+	b.iter(|| {
+		let n = black_box(10000);
+		(0..n).fold(U256::zero(), |old, new| { old.overflowing_add(U256::from(new)).0 })
+	});
+}
diff --git a/util/src/lib.rs b/util/src/lib.rs
index 2b7438cf3..1f04240dc 100644
--- a/util/src/lib.rs
+++ b/util/src/lib.rs
@@ -16,6 +16,7 @@
 
 #![warn(missing_docs)]
 #![cfg_attr(feature="dev", feature(plugin))]
+#![cfg_attr(feature="dev", feature(asm))]
 #![cfg_attr(feature="dev", plugin(clippy))]
 
 // Clippy settings
diff --git a/util/src/uint.rs b/util/src/uint.rs
index 6490cbd9b..8266aff42 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -51,6 +51,64 @@ macro_rules! impl_map_from {
 	}
 }
 
+macro_rules! overflowing_add_regular {
+	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
+		let $name(ref me) = $self_expr;
+		let $name(ref you) = $other;
+		let mut ret = [0u64; $n_words];
+		let mut carry = [0u64; $n_words];
+		let mut b_carry = false;
+		let mut overflow = false;
+
+		for i in 0..$n_words {
+			ret[i] = me[i].wrapping_add(you[i]);
+
+			if ret[i] < me[i] {
+				if i < $n_words - 1 {
+					carry[i + 1] = 1;
+					b_carry = true;
+				} else {
+					overflow = true;
+				}
+			}
+		}
+		if b_carry {
+			let ret = overflowing!($name(ret).overflowing_add($name(carry)), overflow);
+			(ret, overflow)
+		} else {
+			($name(ret), overflow)
+		}
+	})
+}
+
+macro_rules! overflowing_add_u256_asm {
+	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
+		let mut result: [u64; 4] = unsafe { mem::uninitialized() };
+		let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
+		let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };
+
+		let overflow: u8;
+		unsafe {
+			asm!("
+				xor %al, %al
+				adc $9, %r8
+				adc $10, %r9
+				adc $11, %r10
+				adc $12, %r11
+				adc $$0, %al"
+				: "={r8}"(result[0]), "={r9}"(result[1]), "={r10}"(result[2]), "={r11}"(result[3]), "={al}"(overflow)
+				: "{r8}"(self_t[0]), "{r9}"(self_t[1]), "{r10}"(self_t[2]), "{r11}"(self_t[3]), "m"(other_t[0]), "m"(other_t[1]), "m"(other_t[2]), "m"(other_t[3])
+				:
+				:
+			);
+		}
+		(U256(result), overflow != 0)
+	});
+	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
+		overflowing_add_regular!($name, $n_words, $self_expr, $other)
+	)
+}
+
 macro_rules! overflowing {
 	($op: expr, $overflow: expr) => (
 		{
@@ -297,32 +355,14 @@ macro_rules! construct_uint {
 				(res, overflow)
 			}
 
+			#[cfg(all(feature = "dev", target_arch = "x86_64"))]
 			fn overflowing_add(self, other: $name) -> ($name, bool) {
-				let $name(ref me) = self;
-				let $name(ref you) = other;
-				let mut ret = [0u64; $n_words];
-				let mut carry = [0u64; $n_words];
-				let mut b_carry = false;
-				let mut overflow = false;
+				overflowing_add_u256_asm!($name, $n_words, self, other)
+			}
 
-				for i in 0..$n_words {
-					ret[i] = me[i].wrapping_add(you[i]);
-
-					if ret[i] < me[i] {
-						if i < $n_words - 1 {
-							carry[i + 1] = 1;
-							b_carry = true;
-						} else {
-							overflow = true;
-						}
-					}
-				}
-				if b_carry {
-					let ret = overflowing!($name(ret).overflowing_add($name(carry)), overflow);
-					(ret, overflow)
-				} else {
-					($name(ret), overflow)
-				}
+			#[cfg(not(all(feature = "dev", target_arch = "x86_64")))]
+			fn overflowing_add(self, other: $name) -> ($name, bool) {
+				overflowing_add_regular!($name, $n_words, self, other)
 			}
 
 			fn overflowing_sub(self, other: $name) -> ($name, bool) {
@@ -1171,8 +1211,6 @@ mod tests {
 		);
 	}
 
-
-
 	#[test]
 	#[should_panic]
 	pub fn uint256_mul_overflow_panic() {

From 476bb85d414ab5ae0ee1d67435757a517a3ff430 Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Wed, 24 Feb 2016 21:36:31 +0300
Subject: [PATCH 02/23] r m/r + setc/xor

---
 util/src/uint.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/util/src/uint.rs b/util/src/uint.rs
index 8266aff42..db8792592 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -91,13 +91,13 @@ macro_rules! overflowing_add_u256_asm {
 		unsafe {
 			asm!("
 				xor %al, %al
-				adc $9, %r8
-				adc $10, %r9
-				adc $11, %r10
-				adc $12, %r11
-				adc $$0, %al"
-				: "={r8}"(result[0]), "={r9}"(result[1]), "={r10}"(result[2]), "={r11}"(result[3]), "={al}"(overflow)
-				: "{r8}"(self_t[0]), "{r9}"(self_t[1]), "{r10}"(self_t[2]), "{r11}"(self_t[3]), "m"(other_t[0]), "m"(other_t[1]), "m"(other_t[2]), "m"(other_t[3])
+                adc $9, $0
+                adc $10, $1
+                adc $11, $2
+                adc $12, $3
+                adc $$0, %al"
+             	: "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow)
+				: "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3])
 				:
 				:
 			);

From 7821505139c32d8326dae60efe9b6b63e8e7530f Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Wed, 24 Feb 2016 23:08:21 +0300
Subject: [PATCH 03/23] sub x64 optimize

---
 util/benches/bigint.rs | 11 +++++++++-
 util/src/uint.rs       | 49 ++++++++++++++++++++++++++++++++++++------
 2 files changed, 52 insertions(+), 8 deletions(-)

diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs
index 6ba30d88c..524d31508 100644
--- a/util/benches/bigint.rs
+++ b/util/benches/bigint.rs
@@ -29,9 +29,18 @@ use test::{Bencher, black_box};
 use ethcore_util::uint::*;
 
 #[bench]
-fn u256_first_degree(b: &mut test::Bencher) {
+fn u256_add(b: &mut Bencher) {
 	b.iter(|| {
 		let n = black_box(10000);
 		(0..n).fold(U256::zero(), |old, new| { old.overflowing_add(U256::from(new)).0 })
 	});
 }
+
+#[bench]
+fn u256_sub(b: &mut Bencher) {
+	b.iter(|| {
+		let n = black_box(10000);
+		(0..n).fold(U256::zero(), |old, new| { old.overflowing_add(U256::from(new)).0 })
+	});
+}
+
diff --git a/util/src/uint.rs b/util/src/uint.rs
index db8792592..147b83e42 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -41,6 +41,8 @@ use from_json::*;
 use rustc_serialize::hex::ToHex;
 use serde;
 
+#[cfg_attr(x64_asm_optimizations, all(feature = "dev", target_arch = "x86_64"))]
+
 macro_rules! impl_map_from {
 	($thing:ident, $from:ty, $to:ty) => {
 		impl From<$from> for $thing {
@@ -81,7 +83,7 @@ macro_rules! overflowing_add_regular {
 	})
 }
 
-macro_rules! overflowing_add_u256_asm {
+macro_rules! add_64x_optimized {
 	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		let mut result: [u64; 4] = unsafe { mem::uninitialized() };
 		let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
@@ -90,12 +92,38 @@ macro_rules! overflowing_add_u256_asm {
 		let overflow: u8;
 		unsafe {
 			asm!("
-				xor %al, %al
                 adc $9, $0
                 adc $10, $1
                 adc $11, $2
                 adc $12, $3
-                adc $$0, %al"
+                setc %al"
+             	: "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow)
+				: "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3])
+				:
+				:
+			);
+		}
+		(U256(result), overflow != 0)
+	});
+	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
+		overflowing_add_regular!($name, $n_words, $self_expr, $other)
+	)
+}
+
+macro_rules! sub_64x_optimized {
+	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
+		let mut result: [u64; 4] = unsafe { mem::uninitialized() };
+		let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
+		let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };
+
+		let overflow: u8;
+		unsafe {
+			asm!("
+                sbb $9, %r8
+                sbb $10, %r9
+                sbb $11, %r10
+                sbb $12, %r11
+                setb %al"
              	: "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow)
 				: "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3])
 				:
@@ -355,16 +383,23 @@ macro_rules! construct_uint {
 				(res, overflow)
 			}
 
-			#[cfg(all(feature = "dev", target_arch = "x86_64"))]
+			/// Optimized instructions
+			#[cfg(x64_asm_optimizations)]
+			#[inline]
 			fn overflowing_add(self, other: $name) -> ($name, bool) {
-				overflowing_add_u256_asm!($name, $n_words, self, other)
+				add_64x_optimized!($name, $n_words, self, other)
 			}
-
-			#[cfg(not(all(feature = "dev", target_arch = "x86_64")))]
+			#[cfg(not(x64_asm_optimizations))]
 			fn overflowing_add(self, other: $name) -> ($name, bool) {
 				overflowing_add_regular!($name, $n_words, self, other)
 			}
 
+			#[cfg(x64_asm_optimizations)]
+			#[inline]
+			fn overflowing_sub(self, other: $name) -> ($name, bool) {
+				sub_64x_optimized!($name, $n_words, self, other)
+			}
+			#[cfg(not(x64_asm_optimizations))]
 			fn overflowing_sub(self, other: $name) -> ($name, bool) {
 				let res = overflowing!((!other).overflowing_add(From::from(1u64)));
 				let res = overflowing!(self.overflowing_add(res));

From ccaa1946810e5566304f5e6e5cc2f33883f2a99b Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Thu, 25 Feb 2016 02:00:34 +0300
Subject: [PATCH 04/23] mul, bench showtime

---
 util/benches/bigint.rs |  20 ++++-
 util/src/uint.rs       | 161 +++++++++++++++++++++++++++++++++--------
 2 files changed, 150 insertions(+), 31 deletions(-)

diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs
index 524d31508..38ce10a4a 100644
--- a/util/benches/bigint.rs
+++ b/util/benches/bigint.rs
@@ -21,6 +21,7 @@
 //! ```
 
 #![feature(test)]
+#![feature(asm)]
 
 extern crate test;
 extern crate ethcore_util;
@@ -40,7 +41,24 @@ fn u256_add(b: &mut Bencher) {
 fn u256_sub(b: &mut Bencher) {
 	b.iter(|| {
 		let n = black_box(10000);
-		(0..n).fold(U256::zero(), |old, new| { old.overflowing_add(U256::from(new)).0 })
+		(0..n).fold(U256::zero(), |old, new| { old.overflowing_sub(U256::from(new)).0 })
+	});
+}
+
+#[bench]
+fn u256_mul(b: &mut Bencher) {
+	b.iter(|| {
+		let n = black_box(10000);
+		(0..n).fold(U256([12345u64, 0u64, 0u64, 0u64]), |old, new| { old.overflowing_mul(U256::from(new)).0 })
+	});
+}
+
+
+#[bench]
+fn u128_mul(b: &mut Bencher) {
+	b.iter(|| {
+		let n = black_box(10000);
+		(0..n).fold(U128([12345u64, 0u64]), |old, new| { old.overflowing_mul(U128::from(new)).0 })
 	});
 }
 
diff --git a/util/src/uint.rs b/util/src/uint.rs
index 147b83e42..38b4e4906 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -41,8 +41,6 @@ use from_json::*;
 use rustc_serialize::hex::ToHex;
 use serde;
 
-#[cfg_attr(x64_asm_optimizations, all(feature = "dev", target_arch = "x86_64"))]
-
 macro_rules! impl_map_from {
 	($thing:ident, $from:ty, $to:ty) => {
 		impl From<$from> for $thing {
@@ -53,7 +51,8 @@ macro_rules! impl_map_from {
 	}
 }
 
-macro_rules! overflowing_add_regular {
+#[cfg(not(all(feature="dev", target_arch = "x86_64")))]
+macro_rules! uint_overflowing_add {
 	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
 		let $name(ref me) = $self_expr;
 		let $name(ref you) = $other;
@@ -83,7 +82,8 @@ macro_rules! overflowing_add_regular {
 	})
 }
 
-macro_rules! add_64x_optimized {
+#[cfg(all(feature="dev", target_arch = "x86_64"))]
+macro_rules! uint_overflowing_add {
 	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		let mut result: [u64; 4] = unsafe { mem::uninitialized() };
 		let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
@@ -110,7 +110,17 @@ macro_rules! add_64x_optimized {
 	)
 }
 
-macro_rules! sub_64x_optimized {
+#[cfg(not(all(feature="dev", target_arch = "x86_64")))]
+macro_rules! uint_overflowing_sub {
+	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
+		let res = overflowing!((!$other).overflowing_add(From::from(1u64)));
+		let res = overflowing!($self_expr.overflowing_add(res));
+		(res, $self_expr < $other)
+	})
+}
+
+#[cfg(all(feature="dev", target_arch = "x86_64"))]
+macro_rules! uint_overflowing_sub {
 	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		let mut result: [u64; 4] = unsafe { mem::uninitialized() };
 		let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
@@ -137,6 +147,119 @@ macro_rules! sub_64x_optimized {
 	)
 }
 
+#[cfg(all(feature="dev", target_arch = "x86_64"))]
+macro_rules! uint_overflowing_mul {
+	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
+		let mut result: [u64; 4] = unsafe { mem::uninitialized() };
+		let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
+		let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };
+
+		let overflow: u8;
+		unsafe {
+			asm!("
+				mov $5, %rax
+				mulq $9
+				mov %rax, %r8
+				adc $6, %rdx
+				pushf
+
+				mov %rdx, %rax
+				mulq $9
+				popf
+				adc $$0, %rax
+				adc $7, %rdx
+				pushf
+				mov %rax, %r9
+
+
+				mov %rdx, %rax
+				mulq $9
+				popf
+				adc $$0, %rax
+				adc $8, %rdx
+				pushf
+				mov %rax, %r10
+
+				mov %rdx, %rax
+				mulq $9
+				popf
+				adc $$0, %rax
+				mov %rax, %r11
+				mov %rdx, %rcx
+
+				mov $5, %rax
+				mulq $10
+				adc %rax, %r9
+				adc $6, %rdx
+				pushf
+
+				mov %rdx, %rax
+				mulq $10
+				popf
+				adc %rax, %r10
+				adc $7, %rdx
+				pushf
+
+				mov %rdx, %rax
+				mulq $10
+				popf
+				adc %rax, %r11
+				pushf
+				or %rax, %rcx
+
+				mov $5, %rax
+				mulq $11
+				popf
+				adc %rax, %r10
+				adc $6, %rdx
+				pushf
+
+				mov %rdx, %rax
+				mulq $11
+				popf
+				adc %rax, %r11
+				pushf
+				or %rdx, %rcx
+
+				mov $5, %rax
+				mulq $12
+				popf
+				adc %rax, %r11
+			    or %rdx, %rcx
+                "
+				: /* $0 */ "={r8}"(result[0]), /* $1 */ "={r9}"(result[1]), /* $2 */ "={r10}"(result[2]),
+				  /* $3 */ "={r11}"(result[3]), /* $4 */  "={rcx}"(overflow)
+
+				: /* $5 */ "m"(self_t[0]), /* $6 */ "m"(self_t[1]), /* $7 */  "m"(self_t[2]),
+				  /* $8 */ "m"(self_t[3]), /* $9 */ "m"(other_t[0]), /* $10 */ "m"(other_t[1]),
+				  /* $11 */ "m"(other_t[2]), /* $12 */ "m"(other_t[3])
+				: "rax", "rdx"
+				:
+
+			);
+		}
+		(U256(result), overflow > 0)
+	});
+	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
+		overflowing_mul_regular!($name, $n_words, $self_expr, $other)
+	)
+}
+
+#[cfg(not(all(feature="dev", target_arch = "x86_64")))]
+macro_rules! uint_overflowing_mul {
+	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
+		let mut res = $name::from(0u64);
+		let mut overflow = false;
+		// TODO: be more efficient about this
+		for i in 0..(2 * $n_words) {
+			let v = overflowing!($self_expr.overflowing_mul_u32(($other >> (32 * i)).low_u32()), overflow);
+			let res2 = overflowing!(v.overflowing_shl(32 * i as u32), overflow);
+			res = overflowing!(res.overflowing_add(res2), overflow);
+		}
+		(res, overflow)
+	})
+}
+
 macro_rules! overflowing {
 	($op: expr, $overflow: expr) => (
 		{
@@ -384,38 +507,16 @@ macro_rules! construct_uint {
 			}
 
 			/// Optimized instructions
-			#[cfg(x64_asm_optimizations)]
-			#[inline]
 			fn overflowing_add(self, other: $name) -> ($name, bool) {
-				add_64x_optimized!($name, $n_words, self, other)
-			}
-			#[cfg(not(x64_asm_optimizations))]
-			fn overflowing_add(self, other: $name) -> ($name, bool) {
-				overflowing_add_regular!($name, $n_words, self, other)
+				uint_overflowing_add!($name, $n_words, self, other)
 			}
 
-			#[cfg(x64_asm_optimizations)]
-			#[inline]
 			fn overflowing_sub(self, other: $name) -> ($name, bool) {
-				sub_64x_optimized!($name, $n_words, self, other)
-			}
-			#[cfg(not(x64_asm_optimizations))]
-			fn overflowing_sub(self, other: $name) -> ($name, bool) {
-				let res = overflowing!((!other).overflowing_add(From::from(1u64)));
-				let res = overflowing!(self.overflowing_add(res));
-				(res, self < other)
+				uint_overflowing_sub!($name, $n_words, self, other)
 			}
 
 			fn overflowing_mul(self, other: $name) -> ($name, bool) {
-				let mut res = $name::from(0u64);
-				let mut overflow = false;
-				// TODO: be more efficient about this
-				for i in 0..(2 * $n_words) {
-					let v = overflowing!(self.overflowing_mul_u32((other >> (32 * i)).low_u32()), overflow);
-					let res2 = overflowing!(v.overflowing_shl(32 * i as u32), overflow);
-					res = overflowing!(res.overflowing_add(res2), overflow);
-				}
-				(res, overflow)
+				uint_overflowing_mul!($name, $n_words, self, other)
 			}
 
 			fn overflowing_div(self, other: $name) -> ($name, bool) {

From 0794049d18708a3d258df42cd43f51ce03a9ae33 Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Thu, 25 Feb 2016 02:05:59 +0300
Subject: [PATCH 05/23] fix naughty macros

---
 util/src/uint.rs | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/util/src/uint.rs b/util/src/uint.rs
index 38b4e4906..f27150199 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -53,6 +53,12 @@ macro_rules! impl_map_from {
 
 #[cfg(not(all(feature="dev", target_arch = "x86_64")))]
 macro_rules! uint_overflowing_add {
+	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
+		uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
+	})
+}
+
+macro_rules! uint_overflowing_add_reg {
 	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
 		let $name(ref me) = $self_expr;
 		let $name(ref you) = $other;
@@ -82,6 +88,7 @@ macro_rules! uint_overflowing_add {
 	})
 }
 
+
 #[cfg(all(feature="dev", target_arch = "x86_64"))]
 macro_rules! uint_overflowing_add {
 	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
@@ -106,7 +113,7 @@ macro_rules! uint_overflowing_add {
 		(U256(result), overflow != 0)
 	});
 	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
-		overflowing_add_regular!($name, $n_words, $self_expr, $other)
+		uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
 	)
 }
 
@@ -142,9 +149,11 @@ macro_rules! uint_overflowing_sub {
 		}
 		(U256(result), overflow != 0)
 	});
-	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
-		overflowing_add_regular!($name, $n_words, $self_expr, $other)
-	)
+	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
+		let res = overflowing!((!$other).overflowing_add(From::from(1u64)));
+		let res = overflowing!($self_expr.overflowing_add(res));
+		(res, $self_expr < $other)
+	})
 }
 
 #[cfg(all(feature="dev", target_arch = "x86_64"))]
@@ -241,12 +250,18 @@ macro_rules! uint_overflowing_mul {
 		(U256(result), overflow > 0)
 	});
 	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
-		overflowing_mul_regular!($name, $n_words, $self_expr, $other)
+		uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other)
 	)
 }
 
 #[cfg(not(all(feature="dev", target_arch = "x86_64")))]
 macro_rules! uint_overflowing_mul {
+	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
+		uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other)
+	})
+}
+
+macro_rules! uint_overflowing_mul_reg {
 	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		let mut res = $name::from(0u64);
 		let mut overflow = false;

From da69ea51fe2c5da3b540fb07066588a7a4e27d1e Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Thu, 25 Feb 2016 03:09:33 +0300
Subject: [PATCH 06/23] inline

---
 util/benches/bigint.rs | 12 ++++++++++--
 util/src/uint.rs       | 27 ++++++++++++++++-----------
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs
index 38ce10a4a..3a9c6d118 100644
--- a/util/benches/bigint.rs
+++ b/util/benches/bigint.rs
@@ -33,7 +33,15 @@ use ethcore_util::uint::*;
 fn u256_add(b: &mut Bencher) {
 	b.iter(|| {
 		let n = black_box(10000);
-		(0..n).fold(U256::zero(), |old, new| { old.overflowing_add(U256::from(new)).0 })
+		(0..n).fold(U256::from(1234599u64), |old, new| { old.overflowing_add(U256::from(new)).0 })
+	});
+}
+
+#[bench]
+fn u256_uber_add(b: &mut Bencher) {
+	b.iter(|| {
+		let n = black_box(10000);
+		(0..n).fold(U256::from(1234599u64), |old, new| { old.uber_add(U256::from(new)).0 })
 	});
 }
 
@@ -41,7 +49,7 @@ fn u256_add(b: &mut Bencher) {
 fn u256_sub(b: &mut Bencher) {
 	b.iter(|| {
 		let n = black_box(10000);
-		(0..n).fold(U256::zero(), |old, new| { old.overflowing_sub(U256::from(new)).0 })
+		(0..n).fold(U256::from(::std::u64::MAX), |old, new| { old.overflowing_sub(U256::from(new)).0 })
 	});
 }
 
diff --git a/util/src/uint.rs b/util/src/uint.rs
index f27150199..8dd7d8638 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -97,17 +97,19 @@ macro_rules! uint_overflowing_add {
 		let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };
 
 		let overflow: u8;
-		unsafe {
-			asm!("
-                adc $9, $0
-                adc $10, $1
-                adc $11, $2
-                adc $12, $3
-                setc %al"
-             	: "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow)
-				: "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3])
-				:
-				:
+        unsafe {
+            asm!("
+                adc $9, %r8
+                adc $10, %r9
+                adc $11, %r10
+                adc $12, %r11
+                setc %al
+                "
+            : "={r8}"(result[0]), "={r9}"(result[1]), "={r10}"(result[2]), "={r11}"(result[3]), "={al}"(overflow)
+            : "{r8}"(self_t[0]), "{r9}"(self_t[1]), "{r10}"(self_t[2]), "{r11}"(self_t[3]),
+			  "m"(other_t[0]), "m"(other_t[1]), "m"(other_t[2]), "m"(other_t[3])
+            :
+            :
 			);
 		}
 		(U256(result), overflow != 0)
@@ -522,14 +524,17 @@ macro_rules! construct_uint {
 			}
 
 			/// Optimized instructions
+			#[inline(always)]
 			fn overflowing_add(self, other: $name) -> ($name, bool) {
 				uint_overflowing_add!($name, $n_words, self, other)
 			}
 
+			#[inline(always)]
 			fn overflowing_sub(self, other: $name) -> ($name, bool) {
 				uint_overflowing_sub!($name, $n_words, self, other)
 			}
 
+			#[inline(always)]
 			fn overflowing_mul(self, other: $name) -> ($name, bool) {
 				uint_overflowing_mul!($name, $n_words, self, other)
 			}

From ae76a509dcc956ea329781bbc9cb6b6cc373580e Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Thu, 25 Feb 2016 03:10:02 +0300
Subject: [PATCH 07/23] inline test

---
 util/benches/bigint.rs | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs
index 3a9c6d118..826d5023e 100644
--- a/util/benches/bigint.rs
+++ b/util/benches/bigint.rs
@@ -37,13 +37,6 @@ fn u256_add(b: &mut Bencher) {
 	});
 }
 
-#[bench]
-fn u256_uber_add(b: &mut Bencher) {
-	b.iter(|| {
-		let n = black_box(10000);
-		(0..n).fold(U256::from(1234599u64), |old, new| { old.uber_add(U256::from(new)).0 })
-	});
-}
 
 #[bench]
 fn u256_sub(b: &mut Bencher) {

From f17d893f53f2551d51e590cb4ce2d296750f4093 Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Thu, 25 Feb 2016 16:20:57 +0300
Subject: [PATCH 08/23] fixed mul, fixed register pref

---
 util/src/uint.rs | 159 +++++++++++++++++++++++++++--------------------
 1 file changed, 93 insertions(+), 66 deletions(-)

diff --git a/util/src/uint.rs b/util/src/uint.rs
index 8dd7d8638..6869c3ec1 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -99,15 +99,15 @@ macro_rules! uint_overflowing_add {
 		let overflow: u8;
         unsafe {
             asm!("
-                adc $9, %r8
-                adc $10, %r9
-                adc $11, %r10
-                adc $12, %r11
+                adc $9, $0
+                adc $10, $1
+                adc $11, $2
+                adc $12, $3
                 setc %al
                 "
-            : "={r8}"(result[0]), "={r9}"(result[1]), "={r10}"(result[2]), "={r11}"(result[3]), "={al}"(overflow)
-            : "{r8}"(self_t[0]), "{r9}"(self_t[1]), "{r10}"(self_t[2]), "{r11}"(self_t[3]),
-			  "m"(other_t[0]), "m"(other_t[1]), "m"(other_t[2]), "m"(other_t[3])
+            : "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow)
+            : "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]),
+			  "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3])
             :
             :
 			);
@@ -138,10 +138,10 @@ macro_rules! uint_overflowing_sub {
 		let overflow: u8;
 		unsafe {
 			asm!("
-                sbb $9, %r8
-                sbb $10, %r9
-                sbb $11, %r10
-                sbb $12, %r11
+                sbb $9, $0
+                sbb $10, $1
+                sbb $11, $2
+                sbb $12, $3
                 setb %al"
              	: "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow)
 				: "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3])
@@ -168,76 +168,103 @@ macro_rules! uint_overflowing_mul {
 		let overflow: u8;
 		unsafe {
 			asm!("
+                clc
 				mov $5, %rax
 				mulq $9
-				mov %rax, %r8
-				adc $6, %rdx
-				pushf
+				mov %rax, $0
+				mov %rdx, $1
 
-				mov %rdx, %rax
+				mov $6, %rax
+				mulq $9
+				clc
+				adc %rax, $1
+				mov %rdx, $2
+
+				mov $5, %rax
+				pushf
+				mulq $10
+				popf
+				adc %rax, $1
+				adc %rdx, $2
+
+				mov $6, %rax
+				mulq $10
+				clc
+				adc %rax, $2
+				mov %rdx, $3
+
+				mov $7, %rax
+				mulq $9
+				clc
+				adc %rax, $2
+				adc %rdx, $3
+
+				mov $5, %rax
+				mulq $11
+				clc
+    			adc %rax, $2
+				adc %rdx, $3
+
+				mov $8, %rax
+				pushf
 				mulq $9
 				popf
-				adc $$0, %rax
-				adc $7, %rdx
-				pushf
-				mov %rax, %r9
-
-
-				mov %rdx, %rax
-				mulq $9
-				popf
-				adc $$0, %rax
-				adc $8, %rdx
-				pushf
-				mov %rax, %r10
-
-				mov %rdx, %rax
-				mulq $9
-				popf
-				adc $$0, %rax
-				mov %rax, %r11
+				adc %rax, $3
+				adc $$0, %rdx
 				mov %rdx, %rcx
+				clc
 
-				mov $5, %rax
-				mulq $10
-				adc %rax, %r9
-				adc $6, %rdx
+				mov $7, %rax
 				pushf
-
-				mov %rdx, %rax
 				mulq $10
 				popf
-				adc %rax, %r10
-				adc $7, %rdx
-				pushf
-
-				mov %rdx, %rax
-				mulq $10
-				popf
-				adc %rax, %r11
-				pushf
-				or %rax, %rcx
-
-				mov $5, %rax
-				mulq $11
-				popf
-				adc %rax, %r10
-				adc $6, %rdx
-				pushf
-
-				mov %rdx, %rax
-				mulq $11
-				popf
-				adc %rax, %r11
-				pushf
+				adc %rax, $3
+				adc $$0, %rdx
 				or %rdx, %rcx
+				clc
+
+				mov $6, %rax
+				pushf
+				mulq $11
+				popf
+				adc %rax, $3
+				adc $$0, %rdx
+				or %rdx, %rcx
+				clc
 
 				mov $5, %rax
+				pushf
 				mulq $12
 				popf
-				adc %rax, %r11
-			    or %rdx, %rcx
-                "
+				adc %rax, $3
+				adc $$0, %rdx
+				or %rdx, %rcx
+				clc
+
+				cmpq $$0, %rcx
+				jne 2f
+
+				mov $8, %rax
+				cmpq $$0, %rax
+				setz %cl
+
+				mov $7, %rax
+				cmpq $$0, %rax
+				sete %dl
+				or %dl, %cl
+
+				mov $3, %rax
+				cmpq $$0, %rax
+				sete %dl
+
+				mov $2, %rax
+				cmpq $$0, %rax
+			    sete %bl
+			    or %bl, %dl
+
+			    and %dl, %cl
+
+			    2:              "
 				: /* $0 */ "={r8}"(result[0]), /* $1 */ "={r9}"(result[1]), /* $2 */ "={r10}"(result[2]),
 				  /* $3 */ "={r11}"(result[3]), /* $4 */  "={rcx}"(overflow)
 

From 5467b06c4f845bd8fde8adf216dec75deb4dbea6 Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Thu, 25 Feb 2016 16:40:36 +0300
Subject: [PATCH 09/23] fix bench iter

---
 util/benches/bigint.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs
index 826d5023e..a22edcfbc 100644
--- a/util/benches/bigint.rs
+++ b/util/benches/bigint.rs
@@ -33,7 +33,7 @@ use ethcore_util::uint::*;
 fn u256_add(b: &mut Bencher) {
 	b.iter(|| {
 		let n = black_box(10000);
-		(0..n).fold(U256::from(1234599u64), |old, new| { old.overflowing_add(U256::from(new)).0 })
+		(0..n).fold(U256([12345u64, 0u64, 0u64, 0u64]), |old, new| { old.overflowing_add(U256::from(new)).0 })
 	});
 }
 
@@ -42,7 +42,7 @@ fn u256_add(b: &mut Bencher) {
 fn u256_sub(b: &mut Bencher) {
 	b.iter(|| {
 		let n = black_box(10000);
-		(0..n).fold(U256::from(::std::u64::MAX), |old, new| { old.overflowing_sub(U256::from(new)).0 })
+		(0..n).fold(U256([::std::u64::MAX, 0u64, 0u64, 0u64]), |old, new| { old.overflowing_sub(U256::from(new)).0 })
 	});
 }
 

From fb5779a00eeeef78228ec61e09331fbed503293a Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Thu, 25 Feb 2016 16:55:03 +0300
Subject: [PATCH 10/23] specific feature for asm opt

---
 util/Cargo.toml  |  1 +
 util/src/lib.rs  |  2 +-
 util/src/uint.rs | 12 ++++++------
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/util/Cargo.toml b/util/Cargo.toml
index 6d2ebcd9b..e2e91eb4b 100644
--- a/util/Cargo.toml
+++ b/util/Cargo.toml
@@ -39,6 +39,7 @@ target_info = "0.1"
 [features]
 default = []
 dev = ["clippy"]
+x64asm = []
 
 [build-dependencies]
 vergen = "*"
diff --git a/util/src/lib.rs b/util/src/lib.rs
index 1f04240dc..d0c74af10 100644
--- a/util/src/lib.rs
+++ b/util/src/lib.rs
@@ -16,7 +16,7 @@
 
 #![warn(missing_docs)]
 #![cfg_attr(feature="dev", feature(plugin))]
-#![cfg_attr(feature="dev", feature(asm))]
+#![cfg_attr(feature="x64asm", feature(asm))]
 #![cfg_attr(feature="dev", plugin(clippy))]
 
 // Clippy settings
diff --git a/util/src/uint.rs b/util/src/uint.rs
index 6869c3ec1..98c16ab90 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -51,7 +51,7 @@ macro_rules! impl_map_from {
 	}
 }
 
-#[cfg(not(all(feature="dev", target_arch = "x86_64")))]
+#[cfg(not(all(feature="x64asm", target_arch = "x86_64")))]
 macro_rules! uint_overflowing_add {
 	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
 		uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
@@ -89,7 +89,7 @@ macro_rules! uint_overflowing_add_reg {
 }
 
 
-#[cfg(all(feature="dev", target_arch = "x86_64"))]
+#[cfg(all(feature="x64asm", target_arch = "x86_64"))]
 macro_rules! uint_overflowing_add {
 	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		let mut result: [u64; 4] = unsafe { mem::uninitialized() };
@@ -119,7 +119,7 @@ macro_rules! uint_overflowing_add {
 	)
 }
 
-#[cfg(not(all(feature="dev", target_arch = "x86_64")))]
+#[cfg(not(all(feature="x64asm", target_arch = "x86_64")))]
 macro_rules! uint_overflowing_sub {
 	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		let res = overflowing!((!$other).overflowing_add(From::from(1u64)));
@@ -128,7 +128,7 @@ macro_rules! uint_overflowing_sub {
 	})
 }
 
-#[cfg(all(feature="dev", target_arch = "x86_64"))]
+#[cfg(all(feature="x64asm", target_arch = "x86_64"))]
 macro_rules! uint_overflowing_sub {
 	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		let mut result: [u64; 4] = unsafe { mem::uninitialized() };
@@ -158,7 +158,7 @@ macro_rules! uint_overflowing_sub {
 	})
 }
 
-#[cfg(all(feature="dev", target_arch = "x86_64"))]
+#[cfg(all(feature="x64asm", target_arch = "x86_64"))]
 macro_rules! uint_overflowing_mul {
 	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		let mut result: [u64; 4] = unsafe { mem::uninitialized() };
@@ -283,7 +283,7 @@ macro_rules! uint_overflowing_mul {
 	)
 }
 
-#[cfg(not(all(feature="dev", target_arch = "x86_64")))]
+#[cfg(not(all(feature="x64asm", target_arch = "x86_64")))]
 macro_rules! uint_overflowing_mul {
 	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other)

From 7525ff23cf1802cbeb6e4d51394eac3974de4c70 Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Thu, 25 Feb 2016 17:59:08 +0300
Subject: [PATCH 11/23] removed artefact cls/pushf/popf

---
 util/src/uint.rs | 40 +++++++++++-----------------------------
 1 file changed, 11 insertions(+), 29 deletions(-)

diff --git a/util/src/uint.rs b/util/src/uint.rs
index 98c16ab90..bebaade22 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -168,7 +168,6 @@ macro_rules! uint_overflowing_mul {
 		let overflow: u8;
 		unsafe {
 			asm!("
-                clc
 				mov $5, %rax
 				mulq $9
 				mov %rax, $0
@@ -176,77 +175,59 @@ macro_rules! uint_overflowing_mul {
 
 				mov $6, %rax
 				mulq $9
-				clc
-				adc %rax, $1
+				add %rax, $1
 				mov %rdx, $2
 
 				mov $5, %rax
-				pushf
 				mulq $10
-				popf
-				adc %rax, $1
+				add %rax, $1
 				adc %rdx, $2
 
 				mov $6, %rax
 				mulq $10
-				clc
-				adc %rax, $2
+				add %rax, $2
 				mov %rdx, $3
 
 				mov $7, %rax
 				mulq $9
-				clc
-				adc %rax, $2
+				add %rax, $2
 				adc %rdx, $3
 
 				mov $5, %rax
 				mulq $11
-				clc
-    			adc %rax, $2
+    			add %rax, $2
 				adc %rdx, $3
 
 				mov $8, %rax
-				pushf
 				mulq $9
-				popf
 				adc %rax, $3
 				adc $$0, %rdx
 				mov %rdx, %rcx
-				clc
 
 				mov $7, %rax
-				pushf
 				mulq $10
-				popf
-				adc %rax, $3
+				add %rax, $3
 				adc $$0, %rdx
 				or %rdx, %rcx
-				clc
 
 				mov $6, %rax
-				pushf
 				mulq $11
-				popf
-				adc %rax, $3
+				add %rax, $3
 				adc $$0, %rdx
 				or %rdx, %rcx
-				clc
 
 				mov $5, %rax
-				pushf
 				mulq $12
-				popf
-				adc %rax, $3
+				add %rax, $3
 				adc $$0, %rdx
 				or %rdx, %rcx
-				clc
 
 				cmpq $$0, %rcx
 				jne 2f
 
 				mov $8, %rax
 				cmpq $$0, %rax
-				setz %cl
+				sete %cl
 
 				mov $7, %rax
 				cmpq $$0, %rax
@@ -264,7 +245,8 @@ macro_rules! uint_overflowing_mul {
 
 			    and %dl, %cl
 
-			    2:              "
+			    2:
+			    "
 				: /* $0 */ "={r8}"(result[0]), /* $1 */ "={r9}"(result[1]), /* $2 */ "={r10}"(result[2]),
 				  /* $3 */ "={r11}"(result[3]), /* $4 */  "={rcx}"(overflow)
 

From 864e7540742ebc1c408e9e2de57f96eee28d7c5b Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Thu, 25 Feb 2016 18:02:08 +0300
Subject: [PATCH 12/23] overflowing_sub in sub

---
 util/src/uint.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/util/src/uint.rs b/util/src/uint.rs
index bebaade22..6793376a0 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -765,9 +765,9 @@ macro_rules! construct_uint {
 
 			#[inline]
 			fn sub(self, other: $name) -> $name {
-				panic_on_overflow!(self < other);
-				let res = overflowing!((!other).overflowing_add(From::from(1u64)));
-				overflowing!(self.overflowing_add(res))
+				let (result, overflow) = self.overflowing_sub(other);
+				panic_on_overflow!(overflow);
+				result
 			}
 		}
 

From 5d22ad3fc8abe4617684213833550207ddca2c6b Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Thu, 25 Feb 2016 18:10:33 +0300
Subject: [PATCH 13/23] counter jump better

---
 util/src/uint.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/util/src/uint.rs b/util/src/uint.rs
index 6793376a0..f4d5b5b76 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -222,8 +222,7 @@ macro_rules! uint_overflowing_mul {
 				adc $$0, %rdx
 				or %rdx, %rcx
 
-				cmpq $$0, %rcx
-				jne 2f
+				jrcxz 2f
 
 				mov $8, %rax
 				cmpq $$0, %rax
@@ -234,6 +233,8 @@ macro_rules! uint_overflowing_mul {
 				sete %dl
 				or %dl, %cl
 
+				jrcxz 2f
+
 				mov $3, %rax
 				cmpq $$0, %rax
 				sete %dl

From 2ee4a0c8c6ff29d6bbbc15752778590364d9bdb3 Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Thu, 25 Feb 2016 18:16:08 +0300
Subject: [PATCH 14/23] mistake of ne/jcxz

---
 util/src/uint.rs | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/util/src/uint.rs b/util/src/uint.rs
index f4d5b5b76..6793376a0 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -222,7 +222,8 @@ macro_rules! uint_overflowing_mul {
 				adc $$0, %rdx
 				or %rdx, %rcx
 
-				jrcxz 2f
+				cmpq $$0, %rcx
+				jne 2f
 
 				mov $8, %rax
 				cmpq $$0, %rax
@@ -233,8 +234,6 @@ macro_rules! uint_overflowing_mul {
 				sete %dl
 				or %dl, %cl
 
-				jrcxz 2f
-
 				mov $3, %rax
 				cmpq $$0, %rax
 				sete %dl

From 600859ed04acd3650868f35bf4a1add4f983702d Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Thu, 25 Feb 2016 19:58:09 +0300
Subject: [PATCH 15/23] [ci skip] flush

---
 util/src/uint.rs | 48 +++++++++++++-----------------------------------
 1 file changed, 13 insertions(+), 35 deletions(-)

diff --git a/util/src/uint.rs b/util/src/uint.rs
index 6793376a0..8e9172a04 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -165,7 +165,7 @@ macro_rules! uint_overflowing_mul {
 		let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
 		let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };
 
-		let overflow: u8;
+		let overflow: u64;
 		unsafe {
 			asm!("
 				mov $5, %rax
@@ -222,25 +222,25 @@ macro_rules! uint_overflowing_mul {
 				adc $$0, %rdx
 				or %rdx, %rcx
 
-				cmpq $$0, %rcx
+                cmpq $$0, %rcx
 				jne 2f
 
 				mov $8, %rax
 				cmpq $$0, %rax
-				sete %cl
+				setne %cl
 
 				mov $7, %rax
 				cmpq $$0, %rax
-				sete %dl
+				setne %dl
 				or %dl, %cl
 
 				mov $3, %rax
 				cmpq $$0, %rax
-				sete %dl
+				setne %dl
 
 				mov $2, %rax
 				cmpq $$0, %rax
-			    sete %bl
+			    setne %bl
 			    or %bl, %dl
 
 			    and %dl, %cl
@@ -253,7 +253,7 @@ macro_rules! uint_overflowing_mul {
 				: /* $5 */ "m"(self_t[0]), /* $6 */ "m"(self_t[1]), /* $7 */  "m"(self_t[2]),
 				  /* $8 */ "m"(self_t[3]), /* $9 */ "m"(other_t[0]), /* $10 */ "m"(other_t[1]),
 				  /* $11 */ "m"(other_t[2]), /* $12 */ "m"(other_t[3])
-				: "rax", "rdx"
+           		: "rax", "rdx", "rbx"
 				:
 
 			);
@@ -740,23 +740,8 @@ macro_rules! construct_uint {
 			type Output = $name;
 
 			fn add(self, other: $name) -> $name {
-				let $name(ref me) = self;
-				let $name(ref you) = other;
-				let mut ret = [0u64; $n_words];
-				let mut carry = [0u64; $n_words];
-				let mut b_carry = false;
-				for i in 0..$n_words {
-					if i < $n_words - 1 {
-						ret[i] = me[i].wrapping_add(you[i]);
-						if ret[i] < me[i] {
-							carry[i + 1] = 1;
-							b_carry = true;
-						}
-					} else {
-						ret[i] = me[i] + you[i];
-					}
-				}
-				if b_carry { $name(ret) + $name(carry) } else { $name(ret) }
+				let (result, _) = self.overflowing_add(other);
+				result
 			}
 		}
 
@@ -765,8 +750,7 @@ macro_rules! construct_uint {
 
 			#[inline]
 			fn sub(self, other: $name) -> $name {
-				let (result, overflow) = self.overflowing_sub(other);
-				panic_on_overflow!(overflow);
+				let (result, _) = self.overflowing_sub(other);
 				result
 			}
 		}
@@ -775,15 +759,9 @@ macro_rules! construct_uint {
 			type Output = $name;
 
 			fn mul(self, other: $name) -> $name {
-				let mut res = $name::from(0u64);
-				// TODO: be more efficient about this
-				for i in 0..(2 * $n_words) {
-					let v = self.mul_u32((other >> (32 * i)).low_u32());
-					let (r, overflow) = v.overflowing_shl(32 * i as u32);
-					panic_on_overflow!(overflow);
-					res = res + r;
-				}
-				res
+				let (result, overflow) = self.overflowing_mul(other);
+				panic_on_overflow!(overflow);
+				result
 			}
 		}
 

From e946e2ab183f8c1c60d88769602e654911244396 Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Thu, 25 Feb 2016 22:27:22 +0300
Subject: [PATCH 16/23] epic mul overflow bug

---
 util/src/uint.rs | 32 +++++++++++++-------------------
 1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/util/src/uint.rs b/util/src/uint.rs
index 8e9172a04..f9d9b4af8 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -225,25 +225,17 @@ macro_rules! uint_overflowing_mul {
                 cmpq $$0, %rcx
 				jne 2f
 
-				mov $8, %rax
-				cmpq $$0, %rax
-				setne %cl
+				popcnt $8, %rcx
+				popcnt $7, %rax
+				add %rax, %rcx
+				jrcxz 2f
 
-				mov $7, %rax
-				cmpq $$0, %rax
-				setne %dl
-				or %dl, %cl
+				popcnt $12, %rcx
+				popcnt $11, %rax
+				add %rax, %rcx
+				jrcxz 2f
 
-				mov $3, %rax
-				cmpq $$0, %rax
-				setne %dl
-
-				mov $2, %rax
-				cmpq $$0, %rax
-			    setne %bl
-			    or %bl, %dl
-
-			    and %dl, %cl
+				mov $$1, %rcx
 
 			    2:
 			    "
@@ -740,7 +732,8 @@ macro_rules! construct_uint {
 			type Output = $name;
 
 			fn add(self, other: $name) -> $name {
-				let (result, _) = self.overflowing_add(other);
+				let (result, overflow) = self.overflowing_add(other);
+				panic_on_overflow!(overflow);
 				result
 			}
 		}
@@ -750,7 +743,8 @@ macro_rules! construct_uint {
 
 			#[inline]
 			fn sub(self, other: $name) -> $name {
-				let (result, _) = self.overflowing_sub(other);
+				let (result, overflow) = self.overflowing_sub(other);
+				panic_on_overflow!(overflow);
 				result
 			}
 		}

From 4b0ec642995206fcef7cca18b8a3cec733813be3 Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Thu, 25 Feb 2016 22:48:34 +0300
Subject: [PATCH 17/23] random init for benches

---
 util/benches/bigint.rs | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs
index a22edcfbc..3b2012bc7 100644
--- a/util/benches/bigint.rs
+++ b/util/benches/bigint.rs
@@ -25,6 +25,7 @@
 
 extern crate test;
 extern crate ethcore_util;
+extern crate rand;
 
 use test::{Bencher, black_box};
 use ethcore_util::uint::*;
@@ -33,7 +34,7 @@ use ethcore_util::uint::*;
 fn u256_add(b: &mut Bencher) {
 	b.iter(|| {
 		let n = black_box(10000);
-		(0..n).fold(U256([12345u64, 0u64, 0u64, 0u64]), |old, new| { old.overflowing_add(U256::from(new)).0 })
+		(0..n).fold(U256([rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>()]), |old, new| { old.overflowing_add(U256::from(new)).0 })
 	});
 }
 
@@ -42,7 +43,7 @@ fn u256_add(b: &mut Bencher) {
 fn u256_sub(b: &mut Bencher) {
 	b.iter(|| {
 		let n = black_box(10000);
-		(0..n).fold(U256([::std::u64::MAX, 0u64, 0u64, 0u64]), |old, new| { old.overflowing_sub(U256::from(new)).0 })
+		(0..n).fold(U256([rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>()]), |old, new| { old.overflowing_sub(U256::from(new)).0 })
 	});
 }
 
@@ -50,7 +51,7 @@ fn u256_sub(b: &mut Bencher) {
 fn u256_mul(b: &mut Bencher) {
 	b.iter(|| {
 		let n = black_box(10000);
-		(0..n).fold(U256([12345u64, 0u64, 0u64, 0u64]), |old, new| { old.overflowing_mul(U256::from(new)).0 })
+		(0..n).fold(U256([rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>()]), |old, new| { old.overflowing_mul(U256::from(new)).0 })
 	});
 }
 

From f29417eea91f689fe57d3d8b06e1921b93235291 Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Fri, 26 Feb 2016 14:50:55 +0300
Subject: [PATCH 18/23] allow dead code for macros expansion

---
 util/src/uint.rs | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/util/src/uint.rs b/util/src/uint.rs
index f9d9b4af8..ca727190b 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -588,6 +588,7 @@ macro_rules! construct_uint {
 		}
 
 		impl $name {
+			#[allow(dead_code)] // not used when multiplied with inline assembly
 			/// Multiplication by u32
 			fn mul_u32(self, other: u32) -> Self {
 				let $name(ref arr) = self;
@@ -609,6 +610,7 @@ macro_rules! construct_uint {
 				$name(ret) + $name(carry)
 			}
 
+			#[allow(dead_code)] // not used when multiplied with inline assembly
 			/// Overflowing multiplication by u32
 			fn overflowing_mul_u32(self, other: u32) -> (Self, bool) {
 				let $name(ref arr) = self;

From e95538f3ec716af0e051b6ed88761105b598defd Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Fri, 26 Feb 2016 15:56:55 +0300
Subject: [PATCH 19/23] [ci skip] style fixes, multipart add test

---
 util/src/uint.rs | 35 ++++++++++++++++++++++++++++-------
 1 file changed, 28 insertions(+), 7 deletions(-)

diff --git a/util/src/uint.rs b/util/src/uint.rs
index ca727190b..82d3afe97 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -51,7 +51,7 @@ macro_rules! impl_map_from {
 	}
 }
 
-#[cfg(not(all(feature="x64asm", target_arch = "x86_64")))]
+#[cfg(not(all(feature="x64asm", target_arch="x86_64")))]
 macro_rules! uint_overflowing_add {
 	($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
 		uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
@@ -89,7 +89,7 @@ macro_rules! uint_overflowing_add_reg {
 }
 
 
-#[cfg(all(feature="x64asm", target_arch = "x86_64"))]
+#[cfg(all(feature="x64asm", target_arch="x86_64"))]
 macro_rules! uint_overflowing_add {
 	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		let mut result: [u64; 4] = unsafe { mem::uninitialized() };
@@ -119,7 +119,7 @@ macro_rules! uint_overflowing_add {
 	)
 }
 
-#[cfg(not(all(feature="x64asm", target_arch = "x86_64")))]
+#[cfg(not(all(feature="x64asm", target_arch="x86_64")))]
 macro_rules! uint_overflowing_sub {
 	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		let res = overflowing!((!$other).overflowing_add(From::from(1u64)));
@@ -128,7 +128,7 @@ macro_rules! uint_overflowing_sub {
 	})
 }
 
-#[cfg(all(feature="x64asm", target_arch = "x86_64"))]
+#[cfg(all(feature="x64asm", target_arch="x86_64"))]
 macro_rules! uint_overflowing_sub {
 	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		let mut result: [u64; 4] = unsafe { mem::uninitialized() };
@@ -158,7 +158,7 @@ macro_rules! uint_overflowing_sub {
 	})
 }
 
-#[cfg(all(feature="x64asm", target_arch = "x86_64"))]
+#[cfg(all(feature="x64asm", target_arch="x86_64"))]
 macro_rules! uint_overflowing_mul {
 	(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		let mut result: [u64; 4] = unsafe { mem::uninitialized() };
@@ -222,7 +222,7 @@ macro_rules! uint_overflowing_mul {
 				adc $$0, %rdx
 				or %rdx, %rcx
 
-                cmpq $$0, %rcx
+				cmpq $$0, %rcx
 				jne 2f
 
 				popcnt $8, %rcx
@@ -257,7 +257,7 @@ macro_rules! uint_overflowing_mul {
 	)
 }
 
-#[cfg(not(all(feature="x64asm", target_arch = "x86_64")))]
+#[cfg(not(all(feature="x64asm", target_arch="x86_64")))]
 macro_rules! uint_overflowing_mul {
 	($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
 		uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other)
@@ -1468,5 +1468,26 @@ mod tests {
 	fn display_uint_zero() {
 		assert_eq!(format!("{}", U256::from(0)), "0");
 	}
+
+
+    #[test]
+    fn u256_multi_adds() {
+        let (result, _) = U256([0, 0, 0, 0]).overflowing_add(U256([0, 0, 0, 0]));
+        assert_eq!(result, U256([0, 0, 0, 0]));
+
+        let (result, _) = U256([0, 0, 0, 1]).overflowing_add(U256([0, 0, 0, 1]));
+        assert_eq!(result, U256([0, 0, 0, 2]));
+
+        let (result, overflow) = U256([0, 0, 2, 1]).overflowing_add(U256([0, 0, 3, 1]));
+        assert_eq!(result, U256([0, 0, 5, 2]));
+        assert!(!overflow);
+
+        let (_, overflow) = U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX])
+			.overflowing_add(U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX]));
+        assert!(overflow);
+
+        let (_, overflow) = U256([0, 0, 0, ::std::u64::MAX]).overflowing_add(U256([0, 0, 0, ::std::u64::MAX]));
+        assert!(overflow);
+    }
 }
 

From 228e3fefe02445b39ce227d182331483beb90dd5 Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Fri, 26 Feb 2016 16:03:04 +0300
Subject: [PATCH 20/23] [ci skip] multipart sub test

---
 util/src/uint.rs | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/util/src/uint.rs b/util/src/uint.rs
index 82d3afe97..245381b4b 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -1489,5 +1489,32 @@ mod tests {
         let (_, overflow) = U256([0, 0, 0, ::std::u64::MAX]).overflowing_add(U256([0, 0, 0, ::std::u64::MAX]));
         assert!(overflow);
     }
+
+
+    #[test]
+    fn u256_multi_subs() {
+        let (result, _) = U256([0, 0, 0, 0]).overflowing_sub(U256([0, 0, 0, 0]));
+        assert_eq!(result, U256([0, 0, 0, 0]));
+
+        let (result, _) = U256([0, 0, 0, 1]).overflowing_sub(U256([0, 0, 0, 1]));
+        assert_eq!(result, U256([0, 0, 0, 0]));
+
+        let (_, overflow) = U256([0, 0, 2, 1]).overflowing_sub(U256([0, 0, 3, 1]));
+        assert!(overflow);
+
+        let (result, overflow) = U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX])
+                                .overflowing_sub(U256([::std::u64::MAX/2, ::std::u64::MAX/2, ::std::u64::MAX/2, ::std::u64::MAX/2]));
+        assert!(!overflow);
+        assert_eq!(U256([::std::u64::MAX/2+1, ::std::u64::MAX/2+1, ::std::u64::MAX/2+1, ::std::u64::MAX/2+1]), result);
+
+        let (result, overflow) = U256([0, 0, 0, 1]).overflowing_sub(U256([0, 0, 1, 0]));
+        assert!(!overflow);
+        assert_eq!(U256([0, 0, ::std::u64::MAX, 0]), result);
+
+        let (result, overflow) = U256([0, 0, 0, 1]).overflowing_sub(U256([1, 0, 0, 0]));
+        assert!(!overflow);
+        assert_eq!(U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, 0]), result);
+    }
+
 }
 

From 3858a2011fb765dbb785f15773f21bb4578548f6 Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Fri, 26 Feb 2016 16:12:47 +0300
Subject: [PATCH 21/23] [ci skip] mul multipart tests

---
 util/src/uint.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)

diff --git a/util/src/uint.rs b/util/src/uint.rs
index 245381b4b..a34742bd9 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -1516,5 +1516,51 @@ mod tests {
         assert_eq!(U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, 0]), result);
     }
 
+
+	#[test]
+	fn u256_multi_muls() {
+        let (result, _) = U256([0, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, 0]));
+        assert_eq!(U256([0, 0, 0, 0]), result);
+
+        let (result, _) = U256([1, 0, 0, 0]).overflowing_mul(U256([1, 0, 0, 0]));
+        assert_eq!(U256([1, 0, 0, 0]), result);
+
+        let (result, _) = U256([5, 0, 0, 0]).overflowing_mul(U256([5, 0, 0, 0]));
+        assert_eq!(U256([25, 0, 0, 0]), result);
+
+        let (result, _) = U256([0, 5, 0, 0]).overflowing_mul(U256([0, 5, 0, 0]));
+        assert_eq!(U256([0, 0, 25, 0]), result);
+
+        let (result, _) = U256([0, 0, 0, 1]).overflowing_mul(U256([1, 0, 0, 0]));
+        assert_eq!(U256([0, 0, 0, 1]), result);
+
+        let (result, _) = U256([0, 0, 0, 5]).overflowing_mul(U256([2, 0, 0, 0]));
+        assert_eq!(U256([0, 0, 0, 10]), result);
+
+        let (result, _) = U256([0, 0, 1, 0]).overflowing_mul(U256([0, 5, 0, 0]));
+        assert_eq!(U256([0, 0, 0, 5]), result);
+
+        let (result, _) = U256([0, 0, 8, 0]).overflowing_mul(U256([0, 0, 7, 0]));
+        assert_eq!(U256([0, 0, 0, 0]), result);
+
+        let (result, _) = U256([2, 0, 0, 0]).overflowing_mul(U256([0, 5, 0, 0]));
+        assert_eq!(U256([0, 10, 0, 0]), result);
+
+        let (result, _) = U256([::std::u64::MAX, 0, 0, 0]).overflowing_mul(U256([::std::u64::MAX, 0, 0, 0]));
+        assert_eq!(U256([1, ::std::u64::MAX-1, 0, 0]), result);
+
+        let (result, _) = U256([0, 0, 0, ::std::u64::MAX]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX]));
+        assert_eq!(U256([0, 0, 0, 0]), result);
+
+        let (result, _) = U256([1, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX]));
+        assert_eq!(U256([0, 0, 0, ::std::u64::MAX]), result);
+
+        let (result, _) = U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX])
+			.overflowing_mul(U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX]));
+        assert_eq!(U256([1, 0, 0, 0]), result);
+	}
+
+
+
 }
 

From 023c6236500b82f89110c0f482e15b23a559cbb5 Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Fri, 26 Feb 2016 16:19:55 +0300
Subject: [PATCH 22/23] mul overflow multipart test

---
 util/src/uint.rs | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

diff --git a/util/src/uint.rs b/util/src/uint.rs
index a34742bd9..98541fe33 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -1560,7 +1560,34 @@ mod tests {
         assert_eq!(U256([1, 0, 0, 0]), result);
 	}
 
+    #[test]
+    fn u256_multi_muls_overflow() {
+        let (_, overflow) = U256([1, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, 0]));
+        assert!(!overflow);
 
+        let (_, overflow) = U256([1, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX]));
+        assert!(!overflow);
 
+        let (_, overflow) = U256([0, 1, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX]));
+        assert!(!overflow);
+
+        let (_, overflow) = U256([0, 1, 0, 0]).overflowing_mul(U256([0, 1, 0, 0]));
+        assert!(!overflow);
+
+        let (_, overflow) = U256([0, 1, 0, ::std::u64::MAX]).overflowing_mul(U256([0, 1, 0, ::std::u64::MAX]));
+        assert!(overflow);
+
+        let (_, overflow) = U256([0, ::std::u64::MAX, 0, 0]).overflowing_mul(U256([0, ::std::u64::MAX, 0, 0]));
+        assert!(!overflow);
+
+        let (_, overflow) = U256([1, 0, 0, 0]).overflowing_mul(U256([10, 0, 0, 0]));
+        assert!(!overflow);
+
+        let (_, overflow) = U256([2, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX / 2]));
+        assert!(!overflow);
+
+        let (_, overflow) = U256([0, 0, 8, 0]).overflowing_mul(U256([0, 0, 7, 0]));
+        assert!(overflow);
+    }
 }
 

From 5013c4d1f1efc38e2237ed691e48f9e3f5e54aa0 Mon Sep 17 00:00:00 2001
From: Nikolay Volf <nikvolf@gmail.com>
Date: Fri, 26 Feb 2016 16:50:12 +0300
Subject: [PATCH 23/23] naughty overflow bug fixed

---
 util/src/uint.rs | 26 ++++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/util/src/uint.rs b/util/src/uint.rs
index 98541fe33..b4940cfb8 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -226,16 +226,30 @@ macro_rules! uint_overflowing_mul {
 				jne 2f
 
 				popcnt $8, %rcx
-				popcnt $7, %rax
-				add %rax, %rcx
-				jrcxz 2f
+				jrcxz 12f
 
 				popcnt $12, %rcx
 				popcnt $11, %rax
 				add %rax, %rcx
-				jrcxz 2f
+				popcnt $10, %rax
+				add %rax, %rcx
+				jmp 2f
 
-				mov $$1, %rcx
+				12:
+				popcnt $12, %rcx
+				jrcxz 11f
+
+				popcnt $7, %rcx
+				popcnt $6, %rax
+				add %rax, %rcx
+
+				cmpq $$0, %rcx
+				jne 2f
+
+				11:
+				popcnt $11, %rcx
+				jrcxz 2f
+				popcnt $7, %rcx
 
 			    2:
 			    "
@@ -1569,7 +1583,7 @@ mod tests {
         assert!(!overflow);
 
         let (_, overflow) = U256([0, 1, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX]));
-        assert!(!overflow);
+        assert!(overflow);
 
         let (_, overflow) = U256([0, 1, 0, 0]).overflowing_mul(U256([0, 1, 0, 0]));
         assert!(!overflow);