diff --git a/util/Cargo.toml b/util/Cargo.toml
index 96ad8a5c2..3c742715e 100644
--- a/util/Cargo.toml
+++ b/util/Cargo.toml
@@ -39,6 +39,7 @@ target_info = "0.1"
[features]
default = []
dev = ["clippy"]
+x64asm = []
[build-dependencies]
vergen = "*"
diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs
new file mode 100644
index 000000000..3b2012bc7
--- /dev/null
+++ b/util/benches/bigint.rs
@@ -0,0 +1,66 @@
+// Copyright 2015, 2016 Ethcore (UK) Ltd.
+// This file is part of Parity.
+
+// Parity is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+
+// Parity is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License
+// along with Parity. If not, see .
+
+//! benchmarking for rlp
+//! should be started with:
+//! ```bash
+//! multirust run nightly cargo bench
+//! ```
+
+#![feature(test)]
+#![feature(asm)]
+
+extern crate test;
+extern crate ethcore_util;
+extern crate rand;
+
+use test::{Bencher, black_box};
+use ethcore_util::uint::*;
+
+#[bench]
+fn u256_add(b: &mut Bencher) {
+ b.iter(|| {
+ let n = black_box(10000);
+ (0..n).fold(U256([rand::random::(), rand::random::(), rand::random::(), rand::random::()]), |old, new| { old.overflowing_add(U256::from(new)).0 })
+ });
+}
+
+
+#[bench]
+fn u256_sub(b: &mut Bencher) {
+ b.iter(|| {
+ let n = black_box(10000);
+ (0..n).fold(U256([rand::random::(), rand::random::(), rand::random::(), rand::random::()]), |old, new| { old.overflowing_sub(U256::from(new)).0 })
+ });
+}
+
+#[bench]
+fn u256_mul(b: &mut Bencher) {
+ b.iter(|| {
+ let n = black_box(10000);
+ (0..n).fold(U256([rand::random::(), rand::random::(), rand::random::(), rand::random::()]), |old, new| { old.overflowing_mul(U256::from(new)).0 })
+ });
+}
+
+
+#[bench]
+fn u128_mul(b: &mut Bencher) {
+ b.iter(|| {
+ let n = black_box(10000);
+ (0..n).fold(U128([12345u64, 0u64]), |old, new| { old.overflowing_mul(U128::from(new)).0 })
+ });
+}
+
diff --git a/util/src/lib.rs b/util/src/lib.rs
index 0d074df8c..be1e788c9 100644
--- a/util/src/lib.rs
+++ b/util/src/lib.rs
@@ -16,6 +16,7 @@
#![warn(missing_docs)]
#![cfg_attr(feature="dev", feature(plugin))]
+#![cfg_attr(feature="x64asm", feature(asm))]
#![cfg_attr(feature="dev", plugin(clippy))]
// Clippy settings
diff --git a/util/src/uint.rs b/util/src/uint.rs
index 6490cbd9b..b4940cfb8 100644
--- a/util/src/uint.rs
+++ b/util/src/uint.rs
@@ -51,6 +51,247 @@ macro_rules! impl_map_from {
}
}
+#[cfg(not(all(feature="x64asm", target_arch="x86_64")))]
+macro_rules! uint_overflowing_add {
+ ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
+ uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
+ })
+}
+
+macro_rules! uint_overflowing_add_reg {
+ ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
+ let $name(ref me) = $self_expr;
+ let $name(ref you) = $other;
+ let mut ret = [0u64; $n_words];
+ let mut carry = [0u64; $n_words];
+ let mut b_carry = false;
+ let mut overflow = false;
+
+ for i in 0..$n_words {
+ ret[i] = me[i].wrapping_add(you[i]);
+
+ if ret[i] < me[i] {
+ if i < $n_words - 1 {
+ carry[i + 1] = 1;
+ b_carry = true;
+ } else {
+ overflow = true;
+ }
+ }
+ }
+ if b_carry {
+ let ret = overflowing!($name(ret).overflowing_add($name(carry)), overflow);
+ (ret, overflow)
+ } else {
+ ($name(ret), overflow)
+ }
+ })
+}
+
+
+#[cfg(all(feature="x64asm", target_arch="x86_64"))]
+macro_rules! uint_overflowing_add {
+ (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
+ let mut result: [u64; 4] = unsafe { mem::uninitialized() };
+ let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
+ let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };
+
+ let overflow: u8;
+ unsafe {
+ asm!("
+ adc $9, $0
+ adc $10, $1
+ adc $11, $2
+ adc $12, $3
+ setc %al
+ "
+ : "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow)
+ : "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]),
+ "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3])
+ :
+ :
+ );
+ }
+ (U256(result), overflow != 0)
+ });
+ ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
+ uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
+ )
+}
+
+#[cfg(not(all(feature="x64asm", target_arch="x86_64")))]
+macro_rules! uint_overflowing_sub {
+ ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
+ let res = overflowing!((!$other).overflowing_add(From::from(1u64)));
+ let res = overflowing!($self_expr.overflowing_add(res));
+ (res, $self_expr < $other)
+ })
+}
+
+#[cfg(all(feature="x64asm", target_arch="x86_64"))]
+macro_rules! uint_overflowing_sub {
+ (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
+ let mut result: [u64; 4] = unsafe { mem::uninitialized() };
+ let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
+ let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };
+
+ let overflow: u8;
+ unsafe {
+ asm!("
+ sbb $9, $0
+ sbb $10, $1
+ sbb $11, $2
+ sbb $12, $3
+ setb %al"
+ : "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow)
+ : "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3])
+ :
+ :
+ );
+ }
+ (U256(result), overflow != 0)
+ });
+ ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
+ let res = overflowing!((!$other).overflowing_add(From::from(1u64)));
+ let res = overflowing!($self_expr.overflowing_add(res));
+ (res, $self_expr < $other)
+ })
+}
+
+#[cfg(all(feature="x64asm", target_arch="x86_64"))]
+macro_rules! uint_overflowing_mul {
+ (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
+ let mut result: [u64; 4] = unsafe { mem::uninitialized() };
+ let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
+ let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };
+
+ let overflow: u64;
+ unsafe {
+ asm!("
+ mov $5, %rax
+ mulq $9
+ mov %rax, $0
+ mov %rdx, $1
+
+ mov $6, %rax
+ mulq $9
+ add %rax, $1
+ mov %rdx, $2
+
+ mov $5, %rax
+ mulq $10
+ add %rax, $1
+ adc %rdx, $2
+
+ mov $6, %rax
+ mulq $10
+ add %rax, $2
+ mov %rdx, $3
+
+ mov $7, %rax
+ mulq $9
+ add %rax, $2
+ adc %rdx, $3
+
+ mov $5, %rax
+ mulq $11
+ add %rax, $2
+ adc %rdx, $3
+
+ mov $8, %rax
+ mulq $9
+ adc %rax, $3
+ adc $$0, %rdx
+ mov %rdx, %rcx
+
+ mov $7, %rax
+ mulq $10
+ add %rax, $3
+ adc $$0, %rdx
+ or %rdx, %rcx
+
+ mov $6, %rax
+ mulq $11
+ add %rax, $3
+ adc $$0, %rdx
+ or %rdx, %rcx
+
+ mov $5, %rax
+ mulq $12
+ add %rax, $3
+ adc $$0, %rdx
+ or %rdx, %rcx
+
+ cmpq $$0, %rcx
+ jne 2f
+
+ popcnt $8, %rcx
+ jrcxz 12f
+
+ popcnt $12, %rcx
+ popcnt $11, %rax
+ add %rax, %rcx
+ popcnt $10, %rax
+ add %rax, %rcx
+ jmp 2f
+
+ 12:
+ popcnt $12, %rcx
+ jrcxz 11f
+
+ popcnt $7, %rcx
+ popcnt $6, %rax
+ add %rax, %rcx
+
+ cmpq $$0, %rcx
+ jne 2f
+
+ 11:
+ popcnt $11, %rcx
+ jrcxz 2f
+ popcnt $7, %rcx
+
+ 2:
+ "
+ : /* $0 */ "={r8}"(result[0]), /* $1 */ "={r9}"(result[1]), /* $2 */ "={r10}"(result[2]),
+ /* $3 */ "={r11}"(result[3]), /* $4 */ "={rcx}"(overflow)
+
+ : /* $5 */ "m"(self_t[0]), /* $6 */ "m"(self_t[1]), /* $7 */ "m"(self_t[2]),
+ /* $8 */ "m"(self_t[3]), /* $9 */ "m"(other_t[0]), /* $10 */ "m"(other_t[1]),
+ /* $11 */ "m"(other_t[2]), /* $12 */ "m"(other_t[3])
+ : "rax", "rdx", "rbx"
+ :
+
+ );
+ }
+ (U256(result), overflow > 0)
+ });
+ ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
+ uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other)
+ )
+}
+
+#[cfg(not(all(feature="x64asm", target_arch="x86_64")))]
+macro_rules! uint_overflowing_mul {
+ ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
+ uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other)
+ })
+}
+
+macro_rules! uint_overflowing_mul_reg {
+ ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
+ let mut res = $name::from(0u64);
+ let mut overflow = false;
+ // TODO: be more efficient about this
+ for i in 0..(2 * $n_words) {
+ let v = overflowing!($self_expr.overflowing_mul_u32(($other >> (32 * i)).low_u32()), overflow);
+ let res2 = overflowing!(v.overflowing_shl(32 * i as u32), overflow);
+ res = overflowing!(res.overflowing_add(res2), overflow);
+ }
+ (res, overflow)
+ })
+}
+
macro_rules! overflowing {
($op: expr, $overflow: expr) => (
{
@@ -297,50 +538,20 @@ macro_rules! construct_uint {
(res, overflow)
}
+ /// Optimized instructions
+ #[inline(always)]
fn overflowing_add(self, other: $name) -> ($name, bool) {
- let $name(ref me) = self;
- let $name(ref you) = other;
- let mut ret = [0u64; $n_words];
- let mut carry = [0u64; $n_words];
- let mut b_carry = false;
- let mut overflow = false;
-
- for i in 0..$n_words {
- ret[i] = me[i].wrapping_add(you[i]);
-
- if ret[i] < me[i] {
- if i < $n_words - 1 {
- carry[i + 1] = 1;
- b_carry = true;
- } else {
- overflow = true;
- }
- }
- }
- if b_carry {
- let ret = overflowing!($name(ret).overflowing_add($name(carry)), overflow);
- (ret, overflow)
- } else {
- ($name(ret), overflow)
- }
+ uint_overflowing_add!($name, $n_words, self, other)
}
+ #[inline(always)]
fn overflowing_sub(self, other: $name) -> ($name, bool) {
- let res = overflowing!((!other).overflowing_add(From::from(1u64)));
- let res = overflowing!(self.overflowing_add(res));
- (res, self < other)
+ uint_overflowing_sub!($name, $n_words, self, other)
}
+ #[inline(always)]
fn overflowing_mul(self, other: $name) -> ($name, bool) {
- let mut res = $name::from(0u64);
- let mut overflow = false;
- // TODO: be more efficient about this
- for i in 0..(2 * $n_words) {
- let v = overflowing!(self.overflowing_mul_u32((other >> (32 * i)).low_u32()), overflow);
- let res2 = overflowing!(v.overflowing_shl(32 * i as u32), overflow);
- res = overflowing!(res.overflowing_add(res2), overflow);
- }
- (res, overflow)
+ uint_overflowing_mul!($name, $n_words, self, other)
}
fn overflowing_div(self, other: $name) -> ($name, bool) {
@@ -391,6 +602,7 @@ macro_rules! construct_uint {
}
impl $name {
+ #[allow(dead_code)] // not used when multiplied with inline assembly
/// Multiplication by u32
fn mul_u32(self, other: u32) -> Self {
let $name(ref arr) = self;
@@ -412,6 +624,7 @@ macro_rules! construct_uint {
$name(ret) + $name(carry)
}
+ #[allow(dead_code)] // not used when multiplied with inline assembly
/// Overflowing multiplication by u32
fn overflowing_mul_u32(self, other: u32) -> (Self, bool) {
let $name(ref arr) = self;
@@ -535,23 +748,9 @@ macro_rules! construct_uint {
type Output = $name;
fn add(self, other: $name) -> $name {
- let $name(ref me) = self;
- let $name(ref you) = other;
- let mut ret = [0u64; $n_words];
- let mut carry = [0u64; $n_words];
- let mut b_carry = false;
- for i in 0..$n_words {
- if i < $n_words - 1 {
- ret[i] = me[i].wrapping_add(you[i]);
- if ret[i] < me[i] {
- carry[i + 1] = 1;
- b_carry = true;
- }
- } else {
- ret[i] = me[i] + you[i];
- }
- }
- if b_carry { $name(ret) + $name(carry) } else { $name(ret) }
+ let (result, overflow) = self.overflowing_add(other);
+ panic_on_overflow!(overflow);
+ result
}
}
@@ -560,9 +759,9 @@ macro_rules! construct_uint {
#[inline]
fn sub(self, other: $name) -> $name {
- panic_on_overflow!(self < other);
- let res = overflowing!((!other).overflowing_add(From::from(1u64)));
- overflowing!(self.overflowing_add(res))
+ let (result, overflow) = self.overflowing_sub(other);
+ panic_on_overflow!(overflow);
+ result
}
}
@@ -570,15 +769,9 @@ macro_rules! construct_uint {
type Output = $name;
fn mul(self, other: $name) -> $name {
- let mut res = $name::from(0u64);
- // TODO: be more efficient about this
- for i in 0..(2 * $n_words) {
- let v = self.mul_u32((other >> (32 * i)).low_u32());
- let (r, overflow) = v.overflowing_shl(32 * i as u32);
- panic_on_overflow!(overflow);
- res = res + r;
- }
- res
+ let (result, overflow) = self.overflowing_mul(other);
+ panic_on_overflow!(overflow);
+ result
}
}
@@ -1171,8 +1364,6 @@ mod tests {
);
}
-
-
#[test]
#[should_panic]
pub fn uint256_mul_overflow_panic() {
@@ -1291,5 +1482,126 @@ mod tests {
fn display_uint_zero() {
assert_eq!(format!("{}", U256::from(0)), "0");
}
+
+
+ #[test]
+ fn u256_multi_adds() {
+ let (result, _) = U256([0, 0, 0, 0]).overflowing_add(U256([0, 0, 0, 0]));
+ assert_eq!(result, U256([0, 0, 0, 0]));
+
+ let (result, _) = U256([0, 0, 0, 1]).overflowing_add(U256([0, 0, 0, 1]));
+ assert_eq!(result, U256([0, 0, 0, 2]));
+
+ let (result, overflow) = U256([0, 0, 2, 1]).overflowing_add(U256([0, 0, 3, 1]));
+ assert_eq!(result, U256([0, 0, 5, 2]));
+ assert!(!overflow);
+
+ let (_, overflow) = U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX])
+ .overflowing_add(U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX]));
+ assert!(overflow);
+
+ let (_, overflow) = U256([0, 0, 0, ::std::u64::MAX]).overflowing_add(U256([0, 0, 0, ::std::u64::MAX]));
+ assert!(overflow);
+ }
+
+
+ #[test]
+ fn u256_multi_subs() {
+ let (result, _) = U256([0, 0, 0, 0]).overflowing_sub(U256([0, 0, 0, 0]));
+ assert_eq!(result, U256([0, 0, 0, 0]));
+
+ let (result, _) = U256([0, 0, 0, 1]).overflowing_sub(U256([0, 0, 0, 1]));
+ assert_eq!(result, U256([0, 0, 0, 0]));
+
+ let (_, overflow) = U256([0, 0, 2, 1]).overflowing_sub(U256([0, 0, 3, 1]));
+ assert!(overflow);
+
+ let (result, overflow) = U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX])
+ .overflowing_sub(U256([::std::u64::MAX/2, ::std::u64::MAX/2, ::std::u64::MAX/2, ::std::u64::MAX/2]));
+ assert!(!overflow);
+ assert_eq!(U256([::std::u64::MAX/2+1, ::std::u64::MAX/2+1, ::std::u64::MAX/2+1, ::std::u64::MAX/2+1]), result);
+
+ let (result, overflow) = U256([0, 0, 0, 1]).overflowing_sub(U256([0, 0, 1, 0]));
+ assert!(!overflow);
+ assert_eq!(U256([0, 0, ::std::u64::MAX, 0]), result);
+
+ let (result, overflow) = U256([0, 0, 0, 1]).overflowing_sub(U256([1, 0, 0, 0]));
+ assert!(!overflow);
+ assert_eq!(U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, 0]), result);
+ }
+
+
+ #[test]
+ fn u256_multi_muls() {
+ let (result, _) = U256([0, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, 0]));
+ assert_eq!(U256([0, 0, 0, 0]), result);
+
+ let (result, _) = U256([1, 0, 0, 0]).overflowing_mul(U256([1, 0, 0, 0]));
+ assert_eq!(U256([1, 0, 0, 0]), result);
+
+ let (result, _) = U256([5, 0, 0, 0]).overflowing_mul(U256([5, 0, 0, 0]));
+ assert_eq!(U256([25, 0, 0, 0]), result);
+
+ let (result, _) = U256([0, 5, 0, 0]).overflowing_mul(U256([0, 5, 0, 0]));
+ assert_eq!(U256([0, 0, 25, 0]), result);
+
+ let (result, _) = U256([0, 0, 0, 1]).overflowing_mul(U256([1, 0, 0, 0]));
+ assert_eq!(U256([0, 0, 0, 1]), result);
+
+ let (result, _) = U256([0, 0, 0, 5]).overflowing_mul(U256([2, 0, 0, 0]));
+ assert_eq!(U256([0, 0, 0, 10]), result);
+
+ let (result, _) = U256([0, 0, 1, 0]).overflowing_mul(U256([0, 5, 0, 0]));
+ assert_eq!(U256([0, 0, 0, 5]), result);
+
+ let (result, _) = U256([0, 0, 8, 0]).overflowing_mul(U256([0, 0, 7, 0]));
+ assert_eq!(U256([0, 0, 0, 0]), result);
+
+ let (result, _) = U256([2, 0, 0, 0]).overflowing_mul(U256([0, 5, 0, 0]));
+ assert_eq!(U256([0, 10, 0, 0]), result);
+
+ let (result, _) = U256([::std::u64::MAX, 0, 0, 0]).overflowing_mul(U256([::std::u64::MAX, 0, 0, 0]));
+ assert_eq!(U256([1, ::std::u64::MAX-1, 0, 0]), result);
+
+ let (result, _) = U256([0, 0, 0, ::std::u64::MAX]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX]));
+ assert_eq!(U256([0, 0, 0, 0]), result);
+
+ let (result, _) = U256([1, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX]));
+ assert_eq!(U256([0, 0, 0, ::std::u64::MAX]), result);
+
+ let (result, _) = U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX])
+ .overflowing_mul(U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX]));
+ assert_eq!(U256([1, 0, 0, 0]), result);
+ }
+
+ #[test]
+ fn u256_multi_muls_overflow() {
+ let (_, overflow) = U256([1, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, 0]));
+ assert!(!overflow);
+
+ let (_, overflow) = U256([1, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX]));
+ assert!(!overflow);
+
+ let (_, overflow) = U256([0, 1, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX]));
+ assert!(overflow);
+
+ let (_, overflow) = U256([0, 1, 0, 0]).overflowing_mul(U256([0, 1, 0, 0]));
+ assert!(!overflow);
+
+ let (_, overflow) = U256([0, 1, 0, ::std::u64::MAX]).overflowing_mul(U256([0, 1, 0, ::std::u64::MAX]));
+ assert!(overflow);
+
+ let (_, overflow) = U256([0, ::std::u64::MAX, 0, 0]).overflowing_mul(U256([0, ::std::u64::MAX, 0, 0]));
+ assert!(!overflow);
+
+ let (_, overflow) = U256([1, 0, 0, 0]).overflowing_mul(U256([10, 0, 0, 0]));
+ assert!(!overflow);
+
+ let (_, overflow) = U256([2, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX / 2]));
+ assert!(!overflow);
+
+ let (_, overflow) = U256([0, 0, 8, 0]).overflowing_mul(U256([0, 0, 7, 0]));
+ assert!(overflow);
+ }
}