From dd8652dbf41715dcb44e7fa4c835634b99c1d771 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Wed, 24 Feb 2016 21:17:29 +0300 Subject: [PATCH 01/23] u256 to inline assembly opt --- util/benches/bigint.rs | 37 +++++++++++++++++ util/src/lib.rs | 1 + util/src/uint.rs | 90 ++++++++++++++++++++++++++++++------------ 3 files changed, 102 insertions(+), 26 deletions(-) create mode 100644 util/benches/bigint.rs diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs new file mode 100644 index 000000000..6ba30d88c --- /dev/null +++ b/util/benches/bigint.rs @@ -0,0 +1,37 @@ +// Copyright 2015, 2016 Ethcore (UK) Ltd. +// This file is part of Parity. + +// Parity is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. + +// Parity is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. + +// You should have received a copy of the GNU General Public License +// along with Parity. If not, see . + +//! benchmarking for rlp +//! should be started with: +//! ```bash +//! multirust run nightly cargo bench +//! ``` + +#![feature(test)] + +extern crate test; +extern crate ethcore_util; + +use test::{Bencher, black_box}; +use ethcore_util::uint::*; + +#[bench] +fn u256_first_degree(b: &mut test::Bencher) { + b.iter(|| { + let n = black_box(10000); + (0..n).fold(U256::zero(), |old, new| { old.overflowing_add(U256::from(new)).0 }) + }); +} diff --git a/util/src/lib.rs b/util/src/lib.rs index 2b7438cf3..1f04240dc 100644 --- a/util/src/lib.rs +++ b/util/src/lib.rs @@ -16,6 +16,7 @@ #![warn(missing_docs)] #![cfg_attr(feature="dev", feature(plugin))] +#![cfg_attr(feature="dev", feature(asm))] #![cfg_attr(feature="dev", plugin(clippy))] // Clippy settings diff --git a/util/src/uint.rs b/util/src/uint.rs index 6490cbd9b..8266aff42 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -51,6 +51,64 @@ macro_rules! impl_map_from { } } +macro_rules! overflowing_add_regular { + ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({ + let $name(ref me) = $self_expr; + let $name(ref you) = $other; + let mut ret = [0u64; $n_words]; + let mut carry = [0u64; $n_words]; + let mut b_carry = false; + let mut overflow = false; + + for i in 0..$n_words { + ret[i] = me[i].wrapping_add(you[i]); + + if ret[i] < me[i] { + if i < $n_words - 1 { + carry[i + 1] = 1; + b_carry = true; + } else { + overflow = true; + } + } + } + if b_carry { + let ret = overflowing!($name(ret).overflowing_add($name(carry)), overflow); + (ret, overflow) + } else { + ($name(ret), overflow) + } + }) +} + +macro_rules! overflowing_add_u256_asm { + (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({ + let mut result: [u64; 4] = unsafe { mem::uninitialized() }; + let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) }; + let other_t: &[u64; 4] = unsafe { &mem::transmute($other) }; + + let overflow: u8; + unsafe { + asm!(" + xor %al, %al + adc $9, %r8 + adc $10, %r9 + adc $11, %r10 + adc $12, %r11 + adc $$0, %al" + : "={r8}"(result[0]), "={r9}"(result[1]), "={r10}"(result[2]), "={r11}"(result[3]), "={al}"(overflow) + : "{r8}"(self_t[0]), "{r9}"(self_t[1]), "{r10}"(self_t[2]), "{r11}"(self_t[3]), "m"(other_t[0]), "m"(other_t[1]), "m"(other_t[2]), "m"(other_t[3]) + : + : + ); + } + (U256(result), overflow != 0) + }); + ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ( + overflowing_add_regular!($name, $n_words, $self_expr, $other) + ) +} + macro_rules! overflowing { ($op: expr, $overflow: expr) => ( { @@ -297,32 +355,14 @@ macro_rules! construct_uint { (res, overflow) } + #[cfg(all(feature = "dev", target_arch = "x86_64"))] fn overflowing_add(self, other: $name) -> ($name, bool) { - let $name(ref me) = self; - let $name(ref you) = other; - let mut ret = [0u64; $n_words]; - let mut carry = [0u64; $n_words]; - let mut b_carry = false; - let mut overflow = false; + overflowing_add_u256_asm!($name, $n_words, self, other) + } - for i in 0..$n_words { - ret[i] = me[i].wrapping_add(you[i]); - - if ret[i] < me[i] { - if i < $n_words - 1 { - carry[i + 1] = 1; - b_carry = true; - } else { - overflow = true; - } - } - } - if b_carry { - let ret = overflowing!($name(ret).overflowing_add($name(carry)), overflow); - (ret, overflow) - } else { - ($name(ret), overflow) - } + #[cfg(not(all(feature = "dev", target_arch = "x86_64")))] + fn overflowing_add(self, other: $name) -> ($name, bool) { + overflowing_add_regular!($name, $n_words, self, other) } fn overflowing_sub(self, other: $name) -> ($name, bool) { @@ -1171,8 +1211,6 @@ mod tests { ); } - - #[test] #[should_panic] pub fn uint256_mul_overflow_panic() { From 476bb85d414ab5ae0ee1d67435757a517a3ff430 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Wed, 24 Feb 2016 21:36:31 +0300 Subject: [PATCH 02/23] r m/r + setc/xor --- util/src/uint.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/util/src/uint.rs b/util/src/uint.rs index 8266aff42..db8792592 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -91,13 +91,13 @@ macro_rules! overflowing_add_u256_asm { unsafe { asm!(" xor %al, %al - adc $9, %r8 - adc $10, %r9 - adc $11, %r10 - adc $12, %r11 - adc $$0, %al" - : "={r8}"(result[0]), "={r9}"(result[1]), "={r10}"(result[2]), "={r11}"(result[3]), "={al}"(overflow) - : "{r8}"(self_t[0]), "{r9}"(self_t[1]), "{r10}"(self_t[2]), "{r11}"(self_t[3]), "m"(other_t[0]), "m"(other_t[1]), "m"(other_t[2]), "m"(other_t[3]) + adc $9, $0 + adc $10, $1 + adc $11, $2 + adc $12, $3 + adc $$0, %al" + : "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow) + : "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3]) : : ); From 7821505139c32d8326dae60efe9b6b63e8e7530f Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Wed, 24 Feb 2016 23:08:21 +0300 Subject: [PATCH 03/23] sub x64 optimize --- util/benches/bigint.rs | 11 +++++++++- util/src/uint.rs | 49 ++++++++++++++++++++++++++++++++++++------ 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs index 6ba30d88c..524d31508 100644 --- a/util/benches/bigint.rs +++ b/util/benches/bigint.rs @@ -29,9 +29,18 @@ use test::{Bencher, black_box}; use ethcore_util::uint::*; #[bench] -fn u256_first_degree(b: &mut test::Bencher) { +fn u256_add(b: &mut Bencher) { b.iter(|| { let n = black_box(10000); (0..n).fold(U256::zero(), |old, new| { old.overflowing_add(U256::from(new)).0 }) }); } + +#[bench] +fn u256_sub(b: &mut Bencher) { + b.iter(|| { + let n = black_box(10000); + (0..n).fold(U256::zero(), |old, new| { old.overflowing_add(U256::from(new)).0 }) + }); +} + diff --git a/util/src/uint.rs b/util/src/uint.rs index db8792592..147b83e42 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -41,6 +41,8 @@ use from_json::*; use rustc_serialize::hex::ToHex; use serde; +#[cfg_attr(x64_asm_optimizations, all(feature = "dev", target_arch = "x86_64"))] + macro_rules! impl_map_from { ($thing:ident, $from:ty, $to:ty) => { impl From<$from> for $thing { @@ -81,7 +83,7 @@ macro_rules! overflowing_add_regular { }) } -macro_rules! overflowing_add_u256_asm { +macro_rules! add_64x_optimized { (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({ let mut result: [u64; 4] = unsafe { mem::uninitialized() }; let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) }; @@ -90,12 +92,38 @@ macro_rules! overflowing_add_u256_asm { let overflow: u8; unsafe { asm!(" - xor %al, %al adc $9, $0 adc $10, $1 adc $11, $2 adc $12, $3 - adc $$0, %al" + setc %al" + : "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow) + : "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3]) + : + : + ); + } + (U256(result), overflow != 0) + }); + ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ( + overflowing_add_regular!($name, $n_words, $self_expr, $other) + ) +} + +macro_rules! sub_64x_optimized { + (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({ + let mut result: [u64; 4] = unsafe { mem::uninitialized() }; + let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) }; + let other_t: &[u64; 4] = unsafe { &mem::transmute($other) }; + + let overflow: u8; + unsafe { + asm!(" + sbb $9, %r8 + sbb $10, %r9 + sbb $11, %r10 + sbb $12, %r11 + setb %al" : "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow) : "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3]) : @@ -355,16 +383,23 @@ macro_rules! construct_uint { (res, overflow) } - #[cfg(all(feature = "dev", target_arch = "x86_64"))] + /// Optimized instructions + #[cfg(x64_asm_optimizations)] + #[inline] fn overflowing_add(self, other: $name) -> ($name, bool) { - overflowing_add_u256_asm!($name, $n_words, self, other) + add_64x_optimized!($name, $n_words, self, other) } - - #[cfg(not(all(feature = "dev", target_arch = "x86_64")))] + #[cfg(not(x64_asm_optimizations))] fn overflowing_add(self, other: $name) -> ($name, bool) { overflowing_add_regular!($name, $n_words, self, other) } + #[cfg(x64_asm_optimizations)] + #[inline] + fn overflowing_sub(self, other: $name) -> ($name, bool) { + sub_64x_optimized!($name, $n_words, self, other) + } + #[cfg(not(x64_asm_optimizations))] fn overflowing_sub(self, other: $name) -> ($name, bool) { let res = overflowing!((!other).overflowing_add(From::from(1u64))); let res = overflowing!(self.overflowing_add(res)); From ccaa1946810e5566304f5e6e5cc2f33883f2a99b Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Thu, 25 Feb 2016 02:00:34 +0300 Subject: [PATCH 04/23] mul, bench showtime --- util/benches/bigint.rs | 20 ++++- util/src/uint.rs | 161 +++++++++++++++++++++++++++++++++-------- 2 files changed, 150 insertions(+), 31 deletions(-) diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs index 524d31508..38ce10a4a 100644 --- a/util/benches/bigint.rs +++ b/util/benches/bigint.rs @@ -21,6 +21,7 @@ //! ``` #![feature(test)] +#![feature(asm)] extern crate test; extern crate ethcore_util; @@ -40,7 +41,24 @@ fn u256_add(b: &mut Bencher) { fn u256_sub(b: &mut Bencher) { b.iter(|| { let n = black_box(10000); - (0..n).fold(U256::zero(), |old, new| { old.overflowing_add(U256::from(new)).0 }) + (0..n).fold(U256::zero(), |old, new| { old.overflowing_sub(U256::from(new)).0 }) + }); +} + +#[bench] +fn u256_mul(b: &mut Bencher) { + b.iter(|| { + let n = black_box(10000); + (0..n).fold(U256([12345u64, 0u64, 0u64, 0u64]), |old, new| { old.overflowing_mul(U256::from(new)).0 }) + }); +} + + +#[bench] +fn u128_mul(b: &mut Bencher) { + b.iter(|| { + let n = black_box(10000); + (0..n).fold(U128([12345u64, 0u64]), |old, new| { old.overflowing_mul(U128::from(new)).0 }) }); } diff --git a/util/src/uint.rs b/util/src/uint.rs index 147b83e42..38b4e4906 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -41,8 +41,6 @@ use from_json::*; use rustc_serialize::hex::ToHex; use serde; -#[cfg_attr(x64_asm_optimizations, all(feature = "dev", target_arch = "x86_64"))] - macro_rules! impl_map_from { ($thing:ident, $from:ty, $to:ty) => { impl From<$from> for $thing { @@ -53,7 +51,8 @@ macro_rules! impl_map_from { } } -macro_rules! overflowing_add_regular { +#[cfg(not(all(feature="dev", target_arch = "x86_64")))] +macro_rules! uint_overflowing_add { ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({ let $name(ref me) = $self_expr; let $name(ref you) = $other; @@ -83,7 +82,8 @@ macro_rules! overflowing_add_regular { }) } -macro_rules! add_64x_optimized { +#[cfg(all(feature="dev", target_arch = "x86_64"))] +macro_rules! uint_overflowing_add { (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({ let mut result: [u64; 4] = unsafe { mem::uninitialized() }; let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) }; @@ -110,7 +110,17 @@ macro_rules! add_64x_optimized { ) } -macro_rules! sub_64x_optimized { +#[cfg(not(all(feature="dev", target_arch = "x86_64")))] +macro_rules! uint_overflowing_sub { + ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ + let res = overflowing!((!$other).overflowing_add(From::from(1u64))); + let res = overflowing!($self_expr.overflowing_add(res)); + (res, $self_expr < $other) + }) +} + +#[cfg(all(feature="dev", target_arch = "x86_64"))] +macro_rules! uint_overflowing_sub { (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({ let mut result: [u64; 4] = unsafe { mem::uninitialized() }; let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) }; @@ -137,6 +147,119 @@ macro_rules! sub_64x_optimized { ) } +#[cfg(all(feature="dev", target_arch = "x86_64"))] +macro_rules! uint_overflowing_mul { + (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({ + let mut result: [u64; 4] = unsafe { mem::uninitialized() }; + let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) }; + let other_t: &[u64; 4] = unsafe { &mem::transmute($other) }; + + let overflow: u8; + unsafe { + asm!(" + mov $5, %rax + mulq $9 + mov %rax, %r8 + adc $6, %rdx + pushf + + mov %rdx, %rax + mulq $9 + popf + adc $$0, %rax + adc $7, %rdx + pushf + mov %rax, %r9 + + + mov %rdx, %rax + mulq $9 + popf + adc $$0, %rax + adc $8, %rdx + pushf + mov %rax, %r10 + + mov %rdx, %rax + mulq $9 + popf + adc $$0, %rax + mov %rax, %r11 + mov %rdx, %rcx + + mov $5, %rax + mulq $10 + adc %rax, %r9 + adc $6, %rdx + pushf + + mov %rdx, %rax + mulq $10 + popf + adc %rax, %r10 + adc $7, %rdx + pushf + + mov %rdx, %rax + mulq $10 + popf + adc %rax, %r11 + pushf + or %rax, %rcx + + mov $5, %rax + mulq $11 + popf + adc %rax, %r10 + adc $6, %rdx + pushf + + mov %rdx, %rax + mulq $11 + popf + adc %rax, %r11 + pushf + or %rdx, %rcx + + mov $5, %rax + mulq $12 + popf + adc %rax, %r11 + or %rdx, %rcx + " + : /* $0 */ "={r8}"(result[0]), /* $1 */ "={r9}"(result[1]), /* $2 */ "={r10}"(result[2]), + /* $3 */ "={r11}"(result[3]), /* $4 */ "={rcx}"(overflow) + + : /* $5 */ "m"(self_t[0]), /* $6 */ "m"(self_t[1]), /* $7 */ "m"(self_t[2]), + /* $8 */ "m"(self_t[3]), /* $9 */ "m"(other_t[0]), /* $10 */ "m"(other_t[1]), + /* $11 */ "m"(other_t[2]), /* $12 */ "m"(other_t[3]) + : "rax", "rdx" + : + + ); + } + (U256(result), overflow > 0) + }); + ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ( + overflowing_mul_regular!($name, $n_words, $self_expr, $other) + ) +} + +#[cfg(not(all(feature="dev", target_arch = "x86_64")))] +macro_rules! uint_overflowing_mul { + ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ + let mut res = $name::from(0u64); + let mut overflow = false; + // TODO: be more efficient about this + for i in 0..(2 * $n_words) { + let v = overflowing!($self_expr.overflowing_mul_u32(($other >> (32 * i)).low_u32()), overflow); + let res2 = overflowing!(v.overflowing_shl(32 * i as u32), overflow); + res = overflowing!(res.overflowing_add(res2), overflow); + } + (res, overflow) + }) +} + macro_rules! overflowing { ($op: expr, $overflow: expr) => ( { @@ -384,38 +507,16 @@ macro_rules! construct_uint { } /// Optimized instructions - #[cfg(x64_asm_optimizations)] - #[inline] fn overflowing_add(self, other: $name) -> ($name, bool) { - add_64x_optimized!($name, $n_words, self, other) - } - #[cfg(not(x64_asm_optimizations))] - fn overflowing_add(self, other: $name) -> ($name, bool) { - overflowing_add_regular!($name, $n_words, self, other) + uint_overflowing_add!($name, $n_words, self, other) } - #[cfg(x64_asm_optimizations)] - #[inline] fn overflowing_sub(self, other: $name) -> ($name, bool) { - sub_64x_optimized!($name, $n_words, self, other) - } - #[cfg(not(x64_asm_optimizations))] - fn overflowing_sub(self, other: $name) -> ($name, bool) { - let res = overflowing!((!other).overflowing_add(From::from(1u64))); - let res = overflowing!(self.overflowing_add(res)); - (res, self < other) + uint_overflowing_sub!($name, $n_words, self, other) } fn overflowing_mul(self, other: $name) -> ($name, bool) { - let mut res = $name::from(0u64); - let mut overflow = false; - // TODO: be more efficient about this - for i in 0..(2 * $n_words) { - let v = overflowing!(self.overflowing_mul_u32((other >> (32 * i)).low_u32()), overflow); - let res2 = overflowing!(v.overflowing_shl(32 * i as u32), overflow); - res = overflowing!(res.overflowing_add(res2), overflow); - } - (res, overflow) + uint_overflowing_mul!($name, $n_words, self, other) } fn overflowing_div(self, other: $name) -> ($name, bool) { From 0794049d18708a3d258df42cd43f51ce03a9ae33 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Thu, 25 Feb 2016 02:05:59 +0300 Subject: [PATCH 05/23] fix naughty macros --- util/src/uint.rs | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/util/src/uint.rs b/util/src/uint.rs index 38b4e4906..f27150199 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -53,6 +53,12 @@ macro_rules! impl_map_from { #[cfg(not(all(feature="dev", target_arch = "x86_64")))] macro_rules! uint_overflowing_add { + ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({ + uint_overflowing_add_reg!($name, $n_words, $self_expr, $other) + }) +} + +macro_rules! uint_overflowing_add_reg { ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({ let $name(ref me) = $self_expr; let $name(ref you) = $other; @@ -82,6 +88,7 @@ macro_rules! uint_overflowing_add { }) } + #[cfg(all(feature="dev", target_arch = "x86_64"))] macro_rules! uint_overflowing_add { (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({ @@ -106,7 +113,7 @@ macro_rules! uint_overflowing_add { (U256(result), overflow != 0) }); ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ( - overflowing_add_regular!($name, $n_words, $self_expr, $other) + uint_overflowing_add_reg!($name, $n_words, $self_expr, $other) ) } @@ -142,9 +149,11 @@ macro_rules! uint_overflowing_sub { } (U256(result), overflow != 0) }); - ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ( - overflowing_add_regular!($name, $n_words, $self_expr, $other) - ) + ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ + let res = overflowing!((!$other).overflowing_add(From::from(1u64))); + let res = overflowing!($self_expr.overflowing_add(res)); + (res, $self_expr < $other) + }) } #[cfg(all(feature="dev", target_arch = "x86_64"))] @@ -241,12 +250,18 @@ macro_rules! uint_overflowing_mul { (U256(result), overflow > 0) }); ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ( - overflowing_mul_regular!($name, $n_words, $self_expr, $other) + uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other) ) } #[cfg(not(all(feature="dev", target_arch = "x86_64")))] macro_rules! uint_overflowing_mul { + ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ + uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other) + }) +} + +macro_rules! uint_overflowing_mul_reg { ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ let mut res = $name::from(0u64); let mut overflow = false; From da69ea51fe2c5da3b540fb07066588a7a4e27d1e Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Thu, 25 Feb 2016 03:09:33 +0300 Subject: [PATCH 06/23] inline --- util/benches/bigint.rs | 12 ++++++++++-- util/src/uint.rs | 27 ++++++++++++++++----------- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs index 38ce10a4a..3a9c6d118 100644 --- a/util/benches/bigint.rs +++ b/util/benches/bigint.rs @@ -33,7 +33,15 @@ use ethcore_util::uint::*; fn u256_add(b: &mut Bencher) { b.iter(|| { let n = black_box(10000); - (0..n).fold(U256::zero(), |old, new| { old.overflowing_add(U256::from(new)).0 }) + (0..n).fold(U256::from(1234599u64), |old, new| { old.overflowing_add(U256::from(new)).0 }) + }); +} + +#[bench] +fn u256_uber_add(b: &mut Bencher) { + b.iter(|| { + let n = black_box(10000); + (0..n).fold(U256::from(1234599u64), |old, new| { old.uber_add(U256::from(new)).0 }) }); } @@ -41,7 +49,7 @@ fn u256_add(b: &mut Bencher) { fn u256_sub(b: &mut Bencher) { b.iter(|| { let n = black_box(10000); - (0..n).fold(U256::zero(), |old, new| { old.overflowing_sub(U256::from(new)).0 }) + (0..n).fold(U256::from(::std::u64::MAX), |old, new| { old.overflowing_sub(U256::from(new)).0 }) }); } diff --git a/util/src/uint.rs b/util/src/uint.rs index f27150199..8dd7d8638 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -97,17 +97,19 @@ macro_rules! uint_overflowing_add { let other_t: &[u64; 4] = unsafe { &mem::transmute($other) }; let overflow: u8; - unsafe { - asm!(" - adc $9, $0 - adc $10, $1 - adc $11, $2 - adc $12, $3 - setc %al" - : "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow) - : "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3]) - : - : + unsafe { + asm!(" + adc $9, %r8 + adc $10, %r9 + adc $11, %r10 + adc $12, %r11 + setc %al + " + : "={r8}"(result[0]), "={r9}"(result[1]), "={r10}"(result[2]), "={r11}"(result[3]), "={al}"(overflow) + : "{r8}"(self_t[0]), "{r9}"(self_t[1]), "{r10}"(self_t[2]), "{r11}"(self_t[3]), + "m"(other_t[0]), "m"(other_t[1]), "m"(other_t[2]), "m"(other_t[3]) + : + : ); } (U256(result), overflow != 0) @@ -522,14 +524,17 @@ macro_rules! construct_uint { } /// Optimized instructions + #[inline(always)] fn overflowing_add(self, other: $name) -> ($name, bool) { uint_overflowing_add!($name, $n_words, self, other) } + #[inline(always)] fn overflowing_sub(self, other: $name) -> ($name, bool) { uint_overflowing_sub!($name, $n_words, self, other) } + #[inline(always)] fn overflowing_mul(self, other: $name) -> ($name, bool) { uint_overflowing_mul!($name, $n_words, self, other) } From ae76a509dcc956ea329781bbc9cb6b6cc373580e Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Thu, 25 Feb 2016 03:10:02 +0300 Subject: [PATCH 07/23] inline test --- util/benches/bigint.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs index 3a9c6d118..826d5023e 100644 --- a/util/benches/bigint.rs +++ b/util/benches/bigint.rs @@ -37,13 +37,6 @@ fn u256_add(b: &mut Bencher) { }); } -#[bench] -fn u256_uber_add(b: &mut Bencher) { - b.iter(|| { - let n = black_box(10000); - (0..n).fold(U256::from(1234599u64), |old, new| { old.uber_add(U256::from(new)).0 }) - }); -} #[bench] fn u256_sub(b: &mut Bencher) { From f17d893f53f2551d51e590cb4ce2d296750f4093 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Thu, 25 Feb 2016 16:20:57 +0300 Subject: [PATCH 08/23] fixed mul, fixed register pref --- util/src/uint.rs | 159 +++++++++++++++++++++++++++-------------------- 1 file changed, 93 insertions(+), 66 deletions(-) diff --git a/util/src/uint.rs b/util/src/uint.rs index 8dd7d8638..6869c3ec1 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -99,15 +99,15 @@ macro_rules! uint_overflowing_add { let overflow: u8; unsafe { asm!(" - adc $9, %r8 - adc $10, %r9 - adc $11, %r10 - adc $12, %r11 + adc $9, $0 + adc $10, $1 + adc $11, $2 + adc $12, $3 setc %al " - : "={r8}"(result[0]), "={r9}"(result[1]), "={r10}"(result[2]), "={r11}"(result[3]), "={al}"(overflow) - : "{r8}"(self_t[0]), "{r9}"(self_t[1]), "{r10}"(self_t[2]), "{r11}"(self_t[3]), - "m"(other_t[0]), "m"(other_t[1]), "m"(other_t[2]), "m"(other_t[3]) + : "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow) + : "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), + "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3]) : : ); @@ -138,10 +138,10 @@ macro_rules! uint_overflowing_sub { let overflow: u8; unsafe { asm!(" - sbb $9, %r8 - sbb $10, %r9 - sbb $11, %r10 - sbb $12, %r11 + sbb $9, $0 + sbb $10, $1 + sbb $11, $2 + sbb $12, $3 setb %al" : "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow) : "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3]) @@ -168,76 +168,103 @@ macro_rules! uint_overflowing_mul { let overflow: u8; unsafe { asm!(" + clc mov $5, %rax mulq $9 - mov %rax, %r8 - adc $6, %rdx - pushf + mov %rax, $0 + mov %rdx, $1 - mov %rdx, %rax + mov $6, %rax + mulq $9 + clc + adc %rax, $1 + mov %rdx, $2 + + mov $5, %rax + pushf + mulq $10 + popf + adc %rax, $1 + adc %rdx, $2 + + mov $6, %rax + mulq $10 + clc + adc %rax, $2 + mov %rdx, $3 + + mov $7, %rax + mulq $9 + clc + adc %rax, $2 + adc %rdx, $3 + + mov $5, %rax + mulq $11 + clc + adc %rax, $2 + adc %rdx, $3 + + mov $8, %rax + pushf mulq $9 popf - adc $$0, %rax - adc $7, %rdx - pushf - mov %rax, %r9 - - - mov %rdx, %rax - mulq $9 - popf - adc $$0, %rax - adc $8, %rdx - pushf - mov %rax, %r10 - - mov %rdx, %rax - mulq $9 - popf - adc $$0, %rax - mov %rax, %r11 + adc %rax, $3 + adc $$0, %rdx mov %rdx, %rcx + clc - mov $5, %rax - mulq $10 - adc %rax, %r9 - adc $6, %rdx + mov $7, %rax pushf - - mov %rdx, %rax mulq $10 popf - adc %rax, %r10 - adc $7, %rdx - pushf - - mov %rdx, %rax - mulq $10 - popf - adc %rax, %r11 - pushf - or %rax, %rcx - - mov $5, %rax - mulq $11 - popf - adc %rax, %r10 - adc $6, %rdx - pushf - - mov %rdx, %rax - mulq $11 - popf - adc %rax, %r11 - pushf + adc %rax, $3 + adc $$0, %rdx or %rdx, %rcx + clc + + mov $6, %rax + pushf + mulq $11 + popf + adc %rax, $3 + adc $$0, %rdx + or %rdx, %rcx + clc mov $5, %rax + pushf mulq $12 popf - adc %rax, %r11 - or %rdx, %rcx - " + adc %rax, $3 + adc $$0, %rdx + or %rdx, %rcx + clc + + cmpq $$0, %rcx + jne 2f + + mov $8, %rax + cmpq $$0, %rax + setz %cl + + mov $7, %rax + cmpq $$0, %rax + sete %dl + or %dl, %cl + + mov $3, %rax + cmpq $$0, %rax + sete %dl + + mov $2, %rax + cmpq $$0, %rax + sete %bl + or %bl, %dl + + and %dl, %cl + + 2: " : /* $0 */ "={r8}"(result[0]), /* $1 */ "={r9}"(result[1]), /* $2 */ "={r10}"(result[2]), /* $3 */ "={r11}"(result[3]), /* $4 */ "={rcx}"(overflow) From 5467b06c4f845bd8fde8adf216dec75deb4dbea6 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Thu, 25 Feb 2016 16:40:36 +0300 Subject: [PATCH 09/23] fix bench iter --- util/benches/bigint.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs index 826d5023e..a22edcfbc 100644 --- a/util/benches/bigint.rs +++ b/util/benches/bigint.rs @@ -33,7 +33,7 @@ use ethcore_util::uint::*; fn u256_add(b: &mut Bencher) { b.iter(|| { let n = black_box(10000); - (0..n).fold(U256::from(1234599u64), |old, new| { old.overflowing_add(U256::from(new)).0 }) + (0..n).fold(U256([12345u64, 0u64, 0u64, 0u64]), |old, new| { old.overflowing_add(U256::from(new)).0 }) }); } @@ -42,7 +42,7 @@ fn u256_add(b: &mut Bencher) { fn u256_sub(b: &mut Bencher) { b.iter(|| { let n = black_box(10000); - (0..n).fold(U256::from(::std::u64::MAX), |old, new| { old.overflowing_sub(U256::from(new)).0 }) + (0..n).fold(U256([::std::u64::MAX, 0u64, 0u64, 0u64]), |old, new| { old.overflowing_sub(U256::from(new)).0 }) }); } From fb5779a00eeeef78228ec61e09331fbed503293a Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Thu, 25 Feb 2016 16:55:03 +0300 Subject: [PATCH 10/23] specific feature for asm opt --- util/Cargo.toml | 1 + util/src/lib.rs | 2 +- util/src/uint.rs | 12 ++++++------ 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/util/Cargo.toml b/util/Cargo.toml index 6d2ebcd9b..e2e91eb4b 100644 --- a/util/Cargo.toml +++ b/util/Cargo.toml @@ -39,6 +39,7 @@ target_info = "0.1" [features] default = [] dev = ["clippy"] +x64asm = [] [build-dependencies] vergen = "*" diff --git a/util/src/lib.rs b/util/src/lib.rs index 1f04240dc..d0c74af10 100644 --- a/util/src/lib.rs +++ b/util/src/lib.rs @@ -16,7 +16,7 @@ #![warn(missing_docs)] #![cfg_attr(feature="dev", feature(plugin))] -#![cfg_attr(feature="dev", feature(asm))] +#![cfg_attr(feature="x64asm", feature(asm))] #![cfg_attr(feature="dev", plugin(clippy))] // Clippy settings diff --git a/util/src/uint.rs b/util/src/uint.rs index 6869c3ec1..98c16ab90 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -51,7 +51,7 @@ macro_rules! impl_map_from { } } -#[cfg(not(all(feature="dev", target_arch = "x86_64")))] +#[cfg(not(all(feature="x64asm", target_arch = "x86_64")))] macro_rules! uint_overflowing_add { ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({ uint_overflowing_add_reg!($name, $n_words, $self_expr, $other) @@ -89,7 +89,7 @@ macro_rules! uint_overflowing_add_reg { } -#[cfg(all(feature="dev", target_arch = "x86_64"))] +#[cfg(all(feature="x64asm", target_arch = "x86_64"))] macro_rules! uint_overflowing_add { (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({ let mut result: [u64; 4] = unsafe { mem::uninitialized() }; @@ -119,7 +119,7 @@ macro_rules! uint_overflowing_add { ) } -#[cfg(not(all(feature="dev", target_arch = "x86_64")))] +#[cfg(not(all(feature="x64asm", target_arch = "x86_64")))] macro_rules! uint_overflowing_sub { ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ let res = overflowing!((!$other).overflowing_add(From::from(1u64))); @@ -128,7 +128,7 @@ macro_rules! uint_overflowing_sub { }) } -#[cfg(all(feature="dev", target_arch = "x86_64"))] +#[cfg(all(feature="x64asm", target_arch = "x86_64"))] macro_rules! uint_overflowing_sub { (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({ let mut result: [u64; 4] = unsafe { mem::uninitialized() }; @@ -158,7 +158,7 @@ macro_rules! uint_overflowing_sub { }) } -#[cfg(all(feature="dev", target_arch = "x86_64"))] +#[cfg(all(feature="x64asm", target_arch = "x86_64"))] macro_rules! uint_overflowing_mul { (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({ let mut result: [u64; 4] = unsafe { mem::uninitialized() }; @@ -283,7 +283,7 @@ macro_rules! uint_overflowing_mul { ) } -#[cfg(not(all(feature="dev", target_arch = "x86_64")))] +#[cfg(not(all(feature="x64asm", target_arch = "x86_64")))] macro_rules! uint_overflowing_mul { ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other) From 7525ff23cf1802cbeb6e4d51394eac3974de4c70 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Thu, 25 Feb 2016 17:59:08 +0300 Subject: [PATCH 11/23] removed artefact cls/pushf/popf --- util/src/uint.rs | 40 +++++++++++----------------------------- 1 file changed, 11 insertions(+), 29 deletions(-) diff --git a/util/src/uint.rs b/util/src/uint.rs index 98c16ab90..bebaade22 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -168,7 +168,6 @@ macro_rules! uint_overflowing_mul { let overflow: u8; unsafe { asm!(" - clc mov $5, %rax mulq $9 mov %rax, $0 @@ -176,77 +175,59 @@ macro_rules! uint_overflowing_mul { mov $6, %rax mulq $9 - clc - adc %rax, $1 + add %rax, $1 mov %rdx, $2 mov $5, %rax - pushf mulq $10 - popf - adc %rax, $1 + add %rax, $1 adc %rdx, $2 mov $6, %rax mulq $10 - clc - adc %rax, $2 + add %rax, $2 mov %rdx, $3 mov $7, %rax mulq $9 - clc - adc %rax, $2 + add %rax, $2 adc %rdx, $3 mov $5, %rax mulq $11 - clc - adc %rax, $2 + add %rax, $2 adc %rdx, $3 mov $8, %rax - pushf mulq $9 - popf adc %rax, $3 adc $$0, %rdx mov %rdx, %rcx - clc mov $7, %rax - pushf mulq $10 - popf - adc %rax, $3 + add %rax, $3 adc $$0, %rdx or %rdx, %rcx - clc mov $6, %rax - pushf mulq $11 - popf - adc %rax, $3 + add %rax, $3 adc $$0, %rdx or %rdx, %rcx - clc mov $5, %rax - pushf mulq $12 - popf - adc %rax, $3 + add %rax, $3 adc $$0, %rdx or %rdx, %rcx - clc cmpq $$0, %rcx jne 2f mov $8, %rax cmpq $$0, %rax - setz %cl + sete %cl mov $7, %rax cmpq $$0, %rax @@ -264,7 +245,8 @@ macro_rules! uint_overflowing_mul { and %dl, %cl - 2: " + 2: + " : /* $0 */ "={r8}"(result[0]), /* $1 */ "={r9}"(result[1]), /* $2 */ "={r10}"(result[2]), /* $3 */ "={r11}"(result[3]), /* $4 */ "={rcx}"(overflow) From 864e7540742ebc1c408e9e2de57f96eee28d7c5b Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Thu, 25 Feb 2016 18:02:08 +0300 Subject: [PATCH 12/23] overflowing_sub in sub --- util/src/uint.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/util/src/uint.rs b/util/src/uint.rs index bebaade22..6793376a0 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -765,9 +765,9 @@ macro_rules! construct_uint { #[inline] fn sub(self, other: $name) -> $name { - panic_on_overflow!(self < other); - let res = overflowing!((!other).overflowing_add(From::from(1u64))); - overflowing!(self.overflowing_add(res)) + let (result, overflow) = self.overflowing_sub(other); + panic_on_overflow!(overflow); + result } } From 5d22ad3fc8abe4617684213833550207ddca2c6b Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Thu, 25 Feb 2016 18:10:33 +0300 Subject: [PATCH 13/23] counter jump better --- util/src/uint.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/util/src/uint.rs b/util/src/uint.rs index 6793376a0..f4d5b5b76 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -222,8 +222,7 @@ macro_rules! uint_overflowing_mul { adc $$0, %rdx or %rdx, %rcx - cmpq $$0, %rcx - jne 2f + jrcxz 2f mov $8, %rax cmpq $$0, %rax @@ -234,6 +233,8 @@ macro_rules! uint_overflowing_mul { sete %dl or %dl, %cl + jrcxz 2f + mov $3, %rax cmpq $$0, %rax sete %dl From 2ee4a0c8c6ff29d6bbbc15752778590364d9bdb3 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Thu, 25 Feb 2016 18:16:08 +0300 Subject: [PATCH 14/23] mistake of ne/jcxz --- util/src/uint.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/util/src/uint.rs b/util/src/uint.rs index f4d5b5b76..6793376a0 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -222,7 +222,8 @@ macro_rules! uint_overflowing_mul { adc $$0, %rdx or %rdx, %rcx - jrcxz 2f + cmpq $$0, %rcx + jne 2f mov $8, %rax cmpq $$0, %rax @@ -233,8 +234,6 @@ macro_rules! uint_overflowing_mul { sete %dl or %dl, %cl - jrcxz 2f - mov $3, %rax cmpq $$0, %rax sete %dl From 600859ed04acd3650868f35bf4a1add4f983702d Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Thu, 25 Feb 2016 19:58:09 +0300 Subject: [PATCH 15/23] [ci skip] flush --- util/src/uint.rs | 48 +++++++++++++----------------------------------- 1 file changed, 13 insertions(+), 35 deletions(-) diff --git a/util/src/uint.rs b/util/src/uint.rs index 6793376a0..8e9172a04 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -165,7 +165,7 @@ macro_rules! uint_overflowing_mul { let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) }; let other_t: &[u64; 4] = unsafe { &mem::transmute($other) }; - let overflow: u8; + let overflow: u64; unsafe { asm!(" mov $5, %rax @@ -222,25 +222,25 @@ macro_rules! uint_overflowing_mul { adc $$0, %rdx or %rdx, %rcx - cmpq $$0, %rcx + cmpq $$0, %rcx jne 2f mov $8, %rax cmpq $$0, %rax - sete %cl + setne %cl mov $7, %rax cmpq $$0, %rax - sete %dl + setne %dl or %dl, %cl mov $3, %rax cmpq $$0, %rax - sete %dl + setne %dl mov $2, %rax cmpq $$0, %rax - sete %bl + setne %bl or %bl, %dl and %dl, %cl @@ -253,7 +253,7 @@ macro_rules! uint_overflowing_mul { : /* $5 */ "m"(self_t[0]), /* $6 */ "m"(self_t[1]), /* $7 */ "m"(self_t[2]), /* $8 */ "m"(self_t[3]), /* $9 */ "m"(other_t[0]), /* $10 */ "m"(other_t[1]), /* $11 */ "m"(other_t[2]), /* $12 */ "m"(other_t[3]) - : "rax", "rdx" + : "rax", "rdx", "rbx" : ); @@ -740,23 +740,8 @@ macro_rules! construct_uint { type Output = $name; fn add(self, other: $name) -> $name { - let $name(ref me) = self; - let $name(ref you) = other; - let mut ret = [0u64; $n_words]; - let mut carry = [0u64; $n_words]; - let mut b_carry = false; - for i in 0..$n_words { - if i < $n_words - 1 { - ret[i] = me[i].wrapping_add(you[i]); - if ret[i] < me[i] { - carry[i + 1] = 1; - b_carry = true; - } - } else { - ret[i] = me[i] + you[i]; - } - } - if b_carry { $name(ret) + $name(carry) } else { $name(ret) } + let (result, _) = self.overflowing_add(other); + result } } @@ -765,8 +750,7 @@ macro_rules! construct_uint { #[inline] fn sub(self, other: $name) -> $name { - let (result, overflow) = self.overflowing_sub(other); - panic_on_overflow!(overflow); + let (result, _) = self.overflowing_sub(other); result } } @@ -775,15 +759,9 @@ macro_rules! construct_uint { type Output = $name; fn mul(self, other: $name) -> $name { - let mut res = $name::from(0u64); - // TODO: be more efficient about this - for i in 0..(2 * $n_words) { - let v = self.mul_u32((other >> (32 * i)).low_u32()); - let (r, overflow) = v.overflowing_shl(32 * i as u32); - panic_on_overflow!(overflow); - res = res + r; - } - res + let (result, overflow) = self.overflowing_mul(other); + panic_on_overflow!(overflow); + result } } From e946e2ab183f8c1c60d88769602e654911244396 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Thu, 25 Feb 2016 22:27:22 +0300 Subject: [PATCH 16/23] epic mul overflow bug --- util/src/uint.rs | 32 +++++++++++++------------------- 1 file changed, 13 insertions(+), 19 deletions(-) diff --git a/util/src/uint.rs b/util/src/uint.rs index 8e9172a04..f9d9b4af8 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -225,25 +225,17 @@ macro_rules! uint_overflowing_mul { cmpq $$0, %rcx jne 2f - mov $8, %rax - cmpq $$0, %rax - setne %cl + popcnt $8, %rcx + popcnt $7, %rax + add %rax, %rcx + jrcxz 2f - mov $7, %rax - cmpq $$0, %rax - setne %dl - or %dl, %cl + popcnt $12, %rcx + popcnt $11, %rax + add %rax, %rcx + jrcxz 2f - mov $3, %rax - cmpq $$0, %rax - setne %dl - - mov $2, %rax - cmpq $$0, %rax - setne %bl - or %bl, %dl - - and %dl, %cl + mov $$1, %rcx 2: " @@ -740,7 +732,8 @@ macro_rules! construct_uint { type Output = $name; fn add(self, other: $name) -> $name { - let (result, _) = self.overflowing_add(other); + let (result, overflow) = self.overflowing_add(other); + panic_on_overflow!(overflow); result } } @@ -750,7 +743,8 @@ macro_rules! construct_uint { #[inline] fn sub(self, other: $name) -> $name { - let (result, _) = self.overflowing_sub(other); + let (result, overflow) = self.overflowing_sub(other); + panic_on_overflow!(overflow); result } } From 4b0ec642995206fcef7cca18b8a3cec733813be3 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Thu, 25 Feb 2016 22:48:34 +0300 Subject: [PATCH 17/23] random init for benches --- util/benches/bigint.rs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs index a22edcfbc..3b2012bc7 100644 --- a/util/benches/bigint.rs +++ b/util/benches/bigint.rs @@ -25,6 +25,7 @@ extern crate test; extern crate ethcore_util; +extern crate rand; use test::{Bencher, black_box}; use ethcore_util::uint::*; @@ -33,7 +34,7 @@ use ethcore_util::uint::*; fn u256_add(b: &mut Bencher) { b.iter(|| { let n = black_box(10000); - (0..n).fold(U256([12345u64, 0u64, 0u64, 0u64]), |old, new| { old.overflowing_add(U256::from(new)).0 }) + (0..n).fold(U256([rand::random::(), rand::random::(), rand::random::(), rand::random::()]), |old, new| { old.overflowing_add(U256::from(new)).0 }) }); } @@ -42,7 +43,7 @@ fn u256_add(b: &mut Bencher) { fn u256_sub(b: &mut Bencher) { b.iter(|| { let n = black_box(10000); - (0..n).fold(U256([::std::u64::MAX, 0u64, 0u64, 0u64]), |old, new| { old.overflowing_sub(U256::from(new)).0 }) + (0..n).fold(U256([rand::random::(), rand::random::(), rand::random::(), rand::random::()]), |old, new| { old.overflowing_sub(U256::from(new)).0 }) }); } @@ -50,7 +51,7 @@ fn u256_sub(b: &mut Bencher) { fn u256_mul(b: &mut Bencher) { b.iter(|| { let n = black_box(10000); - (0..n).fold(U256([12345u64, 0u64, 0u64, 0u64]), |old, new| { old.overflowing_mul(U256::from(new)).0 }) + (0..n).fold(U256([rand::random::(), rand::random::(), rand::random::(), rand::random::()]), |old, new| { old.overflowing_mul(U256::from(new)).0 }) }); } From f29417eea91f689fe57d3d8b06e1921b93235291 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Fri, 26 Feb 2016 14:50:55 +0300 Subject: [PATCH 18/23] allow dead code for macros expansion --- util/src/uint.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/util/src/uint.rs b/util/src/uint.rs index f9d9b4af8..ca727190b 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -588,6 +588,7 @@ macro_rules! construct_uint { } impl $name { + #[allow(dead_code)] // not used when multiplied with inline assembly /// Multiplication by u32 fn mul_u32(self, other: u32) -> Self { let $name(ref arr) = self; @@ -609,6 +610,7 @@ macro_rules! construct_uint { $name(ret) + $name(carry) } + #[allow(dead_code)] // not used when multiplied with inline assembly /// Overflowing multiplication by u32 fn overflowing_mul_u32(self, other: u32) -> (Self, bool) { let $name(ref arr) = self; From e95538f3ec716af0e051b6ed88761105b598defd Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Fri, 26 Feb 2016 15:56:55 +0300 Subject: [PATCH 19/23] [ci skip] style fixes, multipart add test --- util/src/uint.rs | 35 ++++++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/util/src/uint.rs b/util/src/uint.rs index ca727190b..82d3afe97 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -51,7 +51,7 @@ macro_rules! impl_map_from { } } -#[cfg(not(all(feature="x64asm", target_arch = "x86_64")))] +#[cfg(not(all(feature="x64asm", target_arch="x86_64")))] macro_rules! uint_overflowing_add { ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({ uint_overflowing_add_reg!($name, $n_words, $self_expr, $other) @@ -89,7 +89,7 @@ macro_rules! uint_overflowing_add_reg { } -#[cfg(all(feature="x64asm", target_arch = "x86_64"))] +#[cfg(all(feature="x64asm", target_arch="x86_64"))] macro_rules! uint_overflowing_add { (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({ let mut result: [u64; 4] = unsafe { mem::uninitialized() }; @@ -119,7 +119,7 @@ macro_rules! uint_overflowing_add { ) } -#[cfg(not(all(feature="x64asm", target_arch = "x86_64")))] +#[cfg(not(all(feature="x64asm", target_arch="x86_64")))] macro_rules! uint_overflowing_sub { ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ let res = overflowing!((!$other).overflowing_add(From::from(1u64))); @@ -128,7 +128,7 @@ macro_rules! uint_overflowing_sub { }) } -#[cfg(all(feature="x64asm", target_arch = "x86_64"))] +#[cfg(all(feature="x64asm", target_arch="x86_64"))] macro_rules! uint_overflowing_sub { (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({ let mut result: [u64; 4] = unsafe { mem::uninitialized() }; @@ -158,7 +158,7 @@ macro_rules! uint_overflowing_sub { }) } -#[cfg(all(feature="x64asm", target_arch = "x86_64"))] +#[cfg(all(feature="x64asm", target_arch="x86_64"))] macro_rules! uint_overflowing_mul { (U256, $n_words: expr, $self_expr: expr, $other: expr) => ({ let mut result: [u64; 4] = unsafe { mem::uninitialized() }; @@ -222,7 +222,7 @@ macro_rules! uint_overflowing_mul { adc $$0, %rdx or %rdx, %rcx - cmpq $$0, %rcx + cmpq $$0, %rcx jne 2f popcnt $8, %rcx @@ -257,7 +257,7 @@ macro_rules! uint_overflowing_mul { ) } -#[cfg(not(all(feature="x64asm", target_arch = "x86_64")))] +#[cfg(not(all(feature="x64asm", target_arch="x86_64")))] macro_rules! uint_overflowing_mul { ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ uint_overflowing_mul_reg!($name, $n_words, $self_expr, $other) @@ -1468,5 +1468,26 @@ mod tests { fn display_uint_zero() { assert_eq!(format!("{}", U256::from(0)), "0"); } + + + #[test] + fn u256_multi_adds() { + let (result, _) = U256([0, 0, 0, 0]).overflowing_add(U256([0, 0, 0, 0])); + assert_eq!(result, U256([0, 0, 0, 0])); + + let (result, _) = U256([0, 0, 0, 1]).overflowing_add(U256([0, 0, 0, 1])); + assert_eq!(result, U256([0, 0, 0, 2])); + + let (result, overflow) = U256([0, 0, 2, 1]).overflowing_add(U256([0, 0, 3, 1])); + assert_eq!(result, U256([0, 0, 5, 2])); + assert!(!overflow); + + let (_, overflow) = U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX]) + .overflowing_add(U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX])); + assert!(overflow); + + let (_, overflow) = U256([0, 0, 0, ::std::u64::MAX]).overflowing_add(U256([0, 0, 0, ::std::u64::MAX])); + assert!(overflow); + } } From 228e3fefe02445b39ce227d182331483beb90dd5 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Fri, 26 Feb 2016 16:03:04 +0300 Subject: [PATCH 20/23] [ci skip] multipart sub test --- util/src/uint.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/util/src/uint.rs b/util/src/uint.rs index 82d3afe97..245381b4b 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -1489,5 +1489,32 @@ mod tests { let (_, overflow) = U256([0, 0, 0, ::std::u64::MAX]).overflowing_add(U256([0, 0, 0, ::std::u64::MAX])); assert!(overflow); } + + + #[test] + fn u256_multi_subs() { + let (result, _) = U256([0, 0, 0, 0]).overflowing_sub(U256([0, 0, 0, 0])); + assert_eq!(result, U256([0, 0, 0, 0])); + + let (result, _) = U256([0, 0, 0, 1]).overflowing_sub(U256([0, 0, 0, 1])); + assert_eq!(result, U256([0, 0, 0, 0])); + + let (_, overflow) = U256([0, 0, 2, 1]).overflowing_sub(U256([0, 0, 3, 1])); + assert!(overflow); + + let (result, overflow) = U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX]) + .overflowing_sub(U256([::std::u64::MAX/2, ::std::u64::MAX/2, ::std::u64::MAX/2, ::std::u64::MAX/2])); + assert!(!overflow); + assert_eq!(U256([::std::u64::MAX/2+1, ::std::u64::MAX/2+1, ::std::u64::MAX/2+1, ::std::u64::MAX/2+1]), result); + + let (result, overflow) = U256([0, 0, 0, 1]).overflowing_sub(U256([0, 0, 1, 0])); + assert!(!overflow); + assert_eq!(U256([0, 0, ::std::u64::MAX, 0]), result); + + let (result, overflow) = U256([0, 0, 0, 1]).overflowing_sub(U256([1, 0, 0, 0])); + assert!(!overflow); + assert_eq!(U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, 0]), result); + } + } From 3858a2011fb765dbb785f15773f21bb4578548f6 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Fri, 26 Feb 2016 16:12:47 +0300 Subject: [PATCH 21/23] [ci skip] mul multipart tests --- util/src/uint.rs | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/util/src/uint.rs b/util/src/uint.rs index 245381b4b..a34742bd9 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -1516,5 +1516,51 @@ mod tests { assert_eq!(U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, 0]), result); } + + #[test] + fn u256_multi_muls() { + let (result, _) = U256([0, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, 0])); + assert_eq!(U256([0, 0, 0, 0]), result); + + let (result, _) = U256([1, 0, 0, 0]).overflowing_mul(U256([1, 0, 0, 0])); + assert_eq!(U256([1, 0, 0, 0]), result); + + let (result, _) = U256([5, 0, 0, 0]).overflowing_mul(U256([5, 0, 0, 0])); + assert_eq!(U256([25, 0, 0, 0]), result); + + let (result, _) = U256([0, 5, 0, 0]).overflowing_mul(U256([0, 5, 0, 0])); + assert_eq!(U256([0, 0, 25, 0]), result); + + let (result, _) = U256([0, 0, 0, 1]).overflowing_mul(U256([1, 0, 0, 0])); + assert_eq!(U256([0, 0, 0, 1]), result); + + let (result, _) = U256([0, 0, 0, 5]).overflowing_mul(U256([2, 0, 0, 0])); + assert_eq!(U256([0, 0, 0, 10]), result); + + let (result, _) = U256([0, 0, 1, 0]).overflowing_mul(U256([0, 5, 0, 0])); + assert_eq!(U256([0, 0, 0, 5]), result); + + let (result, _) = U256([0, 0, 8, 0]).overflowing_mul(U256([0, 0, 7, 0])); + assert_eq!(U256([0, 0, 0, 0]), result); + + let (result, _) = U256([2, 0, 0, 0]).overflowing_mul(U256([0, 5, 0, 0])); + assert_eq!(U256([0, 10, 0, 0]), result); + + let (result, _) = U256([::std::u64::MAX, 0, 0, 0]).overflowing_mul(U256([::std::u64::MAX, 0, 0, 0])); + assert_eq!(U256([1, ::std::u64::MAX-1, 0, 0]), result); + + let (result, _) = U256([0, 0, 0, ::std::u64::MAX]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX])); + assert_eq!(U256([0, 0, 0, 0]), result); + + let (result, _) = U256([1, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX])); + assert_eq!(U256([0, 0, 0, ::std::u64::MAX]), result); + + let (result, _) = U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX]) + .overflowing_mul(U256([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX])); + assert_eq!(U256([1, 0, 0, 0]), result); + } + + + } From 023c6236500b82f89110c0f482e15b23a559cbb5 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Fri, 26 Feb 2016 16:19:55 +0300 Subject: [PATCH 22/23] mul overflow multipart test --- util/src/uint.rs | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/util/src/uint.rs b/util/src/uint.rs index a34742bd9..98541fe33 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -1560,7 +1560,34 @@ mod tests { assert_eq!(U256([1, 0, 0, 0]), result); } + #[test] + fn u256_multi_muls_overflow() { + let (_, overflow) = U256([1, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, 0])); + assert!(!overflow); + let (_, overflow) = U256([1, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX])); + assert!(!overflow); + let (_, overflow) = U256([0, 1, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX])); + assert!(!overflow); + + let (_, overflow) = U256([0, 1, 0, 0]).overflowing_mul(U256([0, 1, 0, 0])); + assert!(!overflow); + + let (_, overflow) = U256([0, 1, 0, ::std::u64::MAX]).overflowing_mul(U256([0, 1, 0, ::std::u64::MAX])); + assert!(overflow); + + let (_, overflow) = U256([0, ::std::u64::MAX, 0, 0]).overflowing_mul(U256([0, ::std::u64::MAX, 0, 0])); + assert!(!overflow); + + let (_, overflow) = U256([1, 0, 0, 0]).overflowing_mul(U256([10, 0, 0, 0])); + assert!(!overflow); + + let (_, overflow) = U256([2, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX / 2])); + assert!(!overflow); + + let (_, overflow) = U256([0, 0, 8, 0]).overflowing_mul(U256([0, 0, 7, 0])); + assert!(overflow); + } } From 5013c4d1f1efc38e2237ed691e48f9e3f5e54aa0 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Fri, 26 Feb 2016 16:50:12 +0300 Subject: [PATCH 23/23] naughty overflow bug fixed --- util/src/uint.rs | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/util/src/uint.rs b/util/src/uint.rs index 98541fe33..b4940cfb8 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -226,16 +226,30 @@ macro_rules! uint_overflowing_mul { jne 2f popcnt $8, %rcx - popcnt $7, %rax - add %rax, %rcx - jrcxz 2f + jrcxz 12f popcnt $12, %rcx popcnt $11, %rax add %rax, %rcx - jrcxz 2f + popcnt $10, %rax + add %rax, %rcx + jmp 2f - mov $$1, %rcx + 12: + popcnt $12, %rcx + jrcxz 11f + + popcnt $7, %rcx + popcnt $6, %rax + add %rax, %rcx + + cmpq $$0, %rcx + jne 2f + + 11: + popcnt $11, %rcx + jrcxz 2f + popcnt $7, %rcx 2: " @@ -1569,7 +1583,7 @@ mod tests { assert!(!overflow); let (_, overflow) = U256([0, 1, 0, 0]).overflowing_mul(U256([0, 0, 0, ::std::u64::MAX])); - assert!(!overflow); + assert!(overflow); let (_, overflow) = U256([0, 1, 0, 0]).overflowing_mul(U256([0, 1, 0, 0])); assert!(!overflow);