mul, bench showtime
This commit is contained in:
parent
7821505139
commit
ccaa194681
@ -21,6 +21,7 @@
|
|||||||
//! ```
|
//! ```
|
||||||
|
|
||||||
#![feature(test)]
|
#![feature(test)]
|
||||||
|
#![feature(asm)]
|
||||||
|
|
||||||
extern crate test;
|
extern crate test;
|
||||||
extern crate ethcore_util;
|
extern crate ethcore_util;
|
||||||
@ -40,7 +41,24 @@ fn u256_add(b: &mut Bencher) {
|
|||||||
fn u256_sub(b: &mut Bencher) {
|
fn u256_sub(b: &mut Bencher) {
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
let n = black_box(10000);
|
let n = black_box(10000);
|
||||||
(0..n).fold(U256::zero(), |old, new| { old.overflowing_add(U256::from(new)).0 })
|
(0..n).fold(U256::zero(), |old, new| { old.overflowing_sub(U256::from(new)).0 })
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn u256_mul(b: &mut Bencher) {
|
||||||
|
b.iter(|| {
|
||||||
|
let n = black_box(10000);
|
||||||
|
(0..n).fold(U256([12345u64, 0u64, 0u64, 0u64]), |old, new| { old.overflowing_mul(U256::from(new)).0 })
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn u128_mul(b: &mut Bencher) {
|
||||||
|
b.iter(|| {
|
||||||
|
let n = black_box(10000);
|
||||||
|
(0..n).fold(U128([12345u64, 0u64]), |old, new| { old.overflowing_mul(U128::from(new)).0 })
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
161
util/src/uint.rs
161
util/src/uint.rs
@ -41,8 +41,6 @@ use from_json::*;
|
|||||||
use rustc_serialize::hex::ToHex;
|
use rustc_serialize::hex::ToHex;
|
||||||
use serde;
|
use serde;
|
||||||
|
|
||||||
#[cfg_attr(x64_asm_optimizations, all(feature = "dev", target_arch = "x86_64"))]
|
|
||||||
|
|
||||||
macro_rules! impl_map_from {
|
macro_rules! impl_map_from {
|
||||||
($thing:ident, $from:ty, $to:ty) => {
|
($thing:ident, $from:ty, $to:ty) => {
|
||||||
impl From<$from> for $thing {
|
impl From<$from> for $thing {
|
||||||
@ -53,7 +51,8 @@ macro_rules! impl_map_from {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! overflowing_add_regular {
|
#[cfg(not(all(feature="dev", target_arch = "x86_64")))]
|
||||||
|
macro_rules! uint_overflowing_add {
|
||||||
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
|
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ({
|
||||||
let $name(ref me) = $self_expr;
|
let $name(ref me) = $self_expr;
|
||||||
let $name(ref you) = $other;
|
let $name(ref you) = $other;
|
||||||
@ -83,7 +82,8 @@ macro_rules! overflowing_add_regular {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! add_64x_optimized {
|
#[cfg(all(feature="dev", target_arch = "x86_64"))]
|
||||||
|
macro_rules! uint_overflowing_add {
|
||||||
(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||||
let mut result: [u64; 4] = unsafe { mem::uninitialized() };
|
let mut result: [u64; 4] = unsafe { mem::uninitialized() };
|
||||||
let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
|
let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
|
||||||
@ -110,7 +110,17 @@ macro_rules! add_64x_optimized {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! sub_64x_optimized {
|
#[cfg(not(all(feature="dev", target_arch = "x86_64")))]
|
||||||
|
macro_rules! uint_overflowing_sub {
|
||||||
|
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||||
|
let res = overflowing!((!$other).overflowing_add(From::from(1u64)));
|
||||||
|
let res = overflowing!($self_expr.overflowing_add(res));
|
||||||
|
(res, $self_expr < $other)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(all(feature="dev", target_arch = "x86_64"))]
|
||||||
|
macro_rules! uint_overflowing_sub {
|
||||||
(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||||
let mut result: [u64; 4] = unsafe { mem::uninitialized() };
|
let mut result: [u64; 4] = unsafe { mem::uninitialized() };
|
||||||
let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
|
let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
|
||||||
@ -137,6 +147,119 @@ macro_rules! sub_64x_optimized {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(all(feature="dev", target_arch = "x86_64"))]
|
||||||
|
macro_rules! uint_overflowing_mul {
|
||||||
|
(U256, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||||
|
let mut result: [u64; 4] = unsafe { mem::uninitialized() };
|
||||||
|
let self_t: &[u64; 4] = unsafe { &mem::transmute($self_expr) };
|
||||||
|
let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };
|
||||||
|
|
||||||
|
let overflow: u8;
|
||||||
|
unsafe {
|
||||||
|
asm!("
|
||||||
|
mov $5, %rax
|
||||||
|
mulq $9
|
||||||
|
mov %rax, %r8
|
||||||
|
adc $6, %rdx
|
||||||
|
pushf
|
||||||
|
|
||||||
|
mov %rdx, %rax
|
||||||
|
mulq $9
|
||||||
|
popf
|
||||||
|
adc $$0, %rax
|
||||||
|
adc $7, %rdx
|
||||||
|
pushf
|
||||||
|
mov %rax, %r9
|
||||||
|
|
||||||
|
|
||||||
|
mov %rdx, %rax
|
||||||
|
mulq $9
|
||||||
|
popf
|
||||||
|
adc $$0, %rax
|
||||||
|
adc $8, %rdx
|
||||||
|
pushf
|
||||||
|
mov %rax, %r10
|
||||||
|
|
||||||
|
mov %rdx, %rax
|
||||||
|
mulq $9
|
||||||
|
popf
|
||||||
|
adc $$0, %rax
|
||||||
|
mov %rax, %r11
|
||||||
|
mov %rdx, %rcx
|
||||||
|
|
||||||
|
mov $5, %rax
|
||||||
|
mulq $10
|
||||||
|
adc %rax, %r9
|
||||||
|
adc $6, %rdx
|
||||||
|
pushf
|
||||||
|
|
||||||
|
mov %rdx, %rax
|
||||||
|
mulq $10
|
||||||
|
popf
|
||||||
|
adc %rax, %r10
|
||||||
|
adc $7, %rdx
|
||||||
|
pushf
|
||||||
|
|
||||||
|
mov %rdx, %rax
|
||||||
|
mulq $10
|
||||||
|
popf
|
||||||
|
adc %rax, %r11
|
||||||
|
pushf
|
||||||
|
or %rax, %rcx
|
||||||
|
|
||||||
|
mov $5, %rax
|
||||||
|
mulq $11
|
||||||
|
popf
|
||||||
|
adc %rax, %r10
|
||||||
|
adc $6, %rdx
|
||||||
|
pushf
|
||||||
|
|
||||||
|
mov %rdx, %rax
|
||||||
|
mulq $11
|
||||||
|
popf
|
||||||
|
adc %rax, %r11
|
||||||
|
pushf
|
||||||
|
or %rdx, %rcx
|
||||||
|
|
||||||
|
mov $5, %rax
|
||||||
|
mulq $12
|
||||||
|
popf
|
||||||
|
adc %rax, %r11
|
||||||
|
or %rdx, %rcx
|
||||||
|
"
|
||||||
|
: /* $0 */ "={r8}"(result[0]), /* $1 */ "={r9}"(result[1]), /* $2 */ "={r10}"(result[2]),
|
||||||
|
/* $3 */ "={r11}"(result[3]), /* $4 */ "={rcx}"(overflow)
|
||||||
|
|
||||||
|
: /* $5 */ "m"(self_t[0]), /* $6 */ "m"(self_t[1]), /* $7 */ "m"(self_t[2]),
|
||||||
|
/* $8 */ "m"(self_t[3]), /* $9 */ "m"(other_t[0]), /* $10 */ "m"(other_t[1]),
|
||||||
|
/* $11 */ "m"(other_t[2]), /* $12 */ "m"(other_t[3])
|
||||||
|
: "rax", "rdx"
|
||||||
|
:
|
||||||
|
|
||||||
|
);
|
||||||
|
}
|
||||||
|
(U256(result), overflow > 0)
|
||||||
|
});
|
||||||
|
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
|
||||||
|
overflowing_mul_regular!($name, $n_words, $self_expr, $other)
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(not(all(feature="dev", target_arch = "x86_64")))]
|
||||||
|
macro_rules! uint_overflowing_mul {
|
||||||
|
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||||
|
let mut res = $name::from(0u64);
|
||||||
|
let mut overflow = false;
|
||||||
|
// TODO: be more efficient about this
|
||||||
|
for i in 0..(2 * $n_words) {
|
||||||
|
let v = overflowing!($self_expr.overflowing_mul_u32(($other >> (32 * i)).low_u32()), overflow);
|
||||||
|
let res2 = overflowing!(v.overflowing_shl(32 * i as u32), overflow);
|
||||||
|
res = overflowing!(res.overflowing_add(res2), overflow);
|
||||||
|
}
|
||||||
|
(res, overflow)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
macro_rules! overflowing {
|
macro_rules! overflowing {
|
||||||
($op: expr, $overflow: expr) => (
|
($op: expr, $overflow: expr) => (
|
||||||
{
|
{
|
||||||
@ -384,38 +507,16 @@ macro_rules! construct_uint {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Optimized instructions
|
/// Optimized instructions
|
||||||
#[cfg(x64_asm_optimizations)]
|
|
||||||
#[inline]
|
|
||||||
fn overflowing_add(self, other: $name) -> ($name, bool) {
|
fn overflowing_add(self, other: $name) -> ($name, bool) {
|
||||||
add_64x_optimized!($name, $n_words, self, other)
|
uint_overflowing_add!($name, $n_words, self, other)
|
||||||
}
|
|
||||||
#[cfg(not(x64_asm_optimizations))]
|
|
||||||
fn overflowing_add(self, other: $name) -> ($name, bool) {
|
|
||||||
overflowing_add_regular!($name, $n_words, self, other)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(x64_asm_optimizations)]
|
|
||||||
#[inline]
|
|
||||||
fn overflowing_sub(self, other: $name) -> ($name, bool) {
|
fn overflowing_sub(self, other: $name) -> ($name, bool) {
|
||||||
sub_64x_optimized!($name, $n_words, self, other)
|
uint_overflowing_sub!($name, $n_words, self, other)
|
||||||
}
|
|
||||||
#[cfg(not(x64_asm_optimizations))]
|
|
||||||
fn overflowing_sub(self, other: $name) -> ($name, bool) {
|
|
||||||
let res = overflowing!((!other).overflowing_add(From::from(1u64)));
|
|
||||||
let res = overflowing!(self.overflowing_add(res));
|
|
||||||
(res, self < other)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn overflowing_mul(self, other: $name) -> ($name, bool) {
|
fn overflowing_mul(self, other: $name) -> ($name, bool) {
|
||||||
let mut res = $name::from(0u64);
|
uint_overflowing_mul!($name, $n_words, self, other)
|
||||||
let mut overflow = false;
|
|
||||||
// TODO: be more efficient about this
|
|
||||||
for i in 0..(2 * $n_words) {
|
|
||||||
let v = overflowing!(self.overflowing_mul_u32((other >> (32 * i)).low_u32()), overflow);
|
|
||||||
let res2 = overflowing!(v.overflowing_shl(32 * i as u32), overflow);
|
|
||||||
res = overflowing!(res.overflowing_add(res2), overflow);
|
|
||||||
}
|
|
||||||
(res, overflow)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn overflowing_div(self, other: $name) -> ($name, bool) {
|
fn overflowing_div(self, other: $name) -> ($name, bool) {
|
||||||
|
Loading…
Reference in New Issue
Block a user