Optimizing mul_u32

This commit is contained in:
Tomasz Drwięga 2016-03-07 16:17:14 +01:00
parent 0fd52176dc
commit 4717be07d6
2 changed files with 15 additions and 36 deletions

View File

@ -79,7 +79,7 @@ fn u256_full_mul(b: &mut Bencher) {
b.iter(|| {
let n = black_box(10000);
(0..n).fold(U256([rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>()]),
|old, new| {
|old, _new| {
let U512(ref u512words) = old.full_mul(U256([rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>()]));
U256([u512words[0], u512words[2], u512words[2], u512words[3]])
})

View File

@ -711,52 +711,31 @@ macro_rules! construct_uint {
#[allow(dead_code)] // not used when multiplied with inline assembly
/// Multiplication by u32
fn mul_u32(self, other: u32) -> Self {
let $name(ref arr) = self;
let mut carry = [0u64; $n_words];
let mut ret = [0u64; $n_words];
for i in 0..$n_words {
let upper = other as u64 * (arr[i] >> 32);
let lower = other as u64 * (arr[i] & 0xFFFFFFFF);
ret[i] = lower.wrapping_add(upper << 32);
if i < $n_words - 1 {
carry[i + 1] = upper >> 32;
if ret[i] < lower {
carry[i + 1] += 1;
}
}
}
$name(ret) + $name(carry)
let (ret, overflow) = self.overflowing_mul_u32(other);
panic_on_overflow!(overflow);
ret
}
#[allow(dead_code)] // not used when multiplied with inline assembly
/// Overflowing multiplication by u32
fn overflowing_mul_u32(self, other: u32) -> (Self, bool) {
let $name(ref arr) = self;
let mut carry = [0u64; $n_words];
let o = other as u64;
let mut carry = [0u64; $n_words + 1];
let mut ret = [0u64; $n_words];
let mut overflow = false;
for i in 0..$n_words {
let upper = other as u64 * (arr[i] >> 32);
let lower = other as u64 * (arr[i] & 0xFFFFFFFF);
let upper = o * (arr[i] >> 32);
let lower = o * (arr[i] & 0xFFFFFFFF);
ret[i] = lower.wrapping_add(upper << 32);
let (res1, overflow1) = lower.overflowing_add(upper << 32);
let (res2, overflow2) = res1.overflowing_add(carry[i]);
if i < $n_words - 1 {
carry[i + 1] = upper >> 32;
if ret[i] < lower {
carry[i + 1] += 1;
}
} else if (upper >> 32) > 0 || ret[i] < lower {
overflow = true
}
ret[i] = res2;
carry[i + 1] = (upper >> 32) + overflow1 as u64 + overflow2 as u64;
}
let result = overflowing!(
$name(ret).overflowing_add($name(carry)),
overflow
);
(result, overflow)
($name(ret), carry[$n_words] > 0)
}
}