Merge pull request #521 from ethcore/bigint-u512-opt
U512 add/sub optimize
This commit is contained in:
commit
07b9f9f6f3
@ -38,7 +38,6 @@ fn u256_add(b: &mut Bencher) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn u256_sub(b: &mut Bencher) {
|
fn u256_sub(b: &mut Bencher) {
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
@ -47,6 +46,25 @@ fn u256_sub(b: &mut Bencher) {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn u512_sub(b: &mut Bencher) {
|
||||||
|
b.iter(|| {
|
||||||
|
let n = black_box(10000);
|
||||||
|
(0..n).fold(U512([rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(),
|
||||||
|
rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>(), rand::random::<u64>()]),
|
||||||
|
|old, new| { old.overflowing_sub(U512([0, 0, 0, 0, 0, 0, 0, new])).0 })
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
#[bench]
|
||||||
|
fn u512_add(b: &mut Bencher) {
|
||||||
|
b.iter(|| {
|
||||||
|
let n = black_box(10000);
|
||||||
|
(0..n).fold(U512([0, 0, 0, 0, 0, 0, 0, 0]),
|
||||||
|
|old, new| { old.overflowing_add(U512([new, new, new, new, new, new, new, new])).0 })
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
#[bench]
|
#[bench]
|
||||||
fn u256_mul(b: &mut Bencher) {
|
fn u256_mul(b: &mut Bencher) {
|
||||||
b.iter(|| {
|
b.iter(|| {
|
||||||
|
179
util/src/uint.rs
179
util/src/uint.rs
@ -97,23 +97,69 @@ macro_rules! uint_overflowing_add {
|
|||||||
let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };
|
let other_t: &[u64; 4] = unsafe { &mem::transmute($other) };
|
||||||
|
|
||||||
let overflow: u8;
|
let overflow: u8;
|
||||||
unsafe {
|
unsafe {
|
||||||
asm!("
|
asm!("
|
||||||
adc $9, $0
|
add $9, $0
|
||||||
adc $10, $1
|
adc $10, $1
|
||||||
adc $11, $2
|
adc $11, $2
|
||||||
adc $12, $3
|
adc $12, $3
|
||||||
setc %al
|
setc %al
|
||||||
"
|
"
|
||||||
: "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow)
|
: "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow)
|
||||||
: "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]),
|
: "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]),
|
||||||
"mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3])
|
"mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3])
|
||||||
:
|
:
|
||||||
:
|
:
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
(U256(result), overflow != 0)
|
(U256(result), overflow != 0)
|
||||||
});
|
});
|
||||||
|
(U512, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||||
|
let mut result: [u64; 8] = unsafe { mem::uninitialized() };
|
||||||
|
let self_t: &[u64; 8] = unsafe { &mem::transmute($self_expr) };
|
||||||
|
let other_t: &[u64; 8] = unsafe { &mem::transmute($other) };
|
||||||
|
|
||||||
|
let overflow: u8;
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
asm!("
|
||||||
|
add $15, $0
|
||||||
|
adc $16, $1
|
||||||
|
adc $17, $2
|
||||||
|
adc $18, $3
|
||||||
|
lodsq
|
||||||
|
adc $11, %rax
|
||||||
|
stosq
|
||||||
|
lodsq
|
||||||
|
adc $12, %rax
|
||||||
|
stosq
|
||||||
|
lodsq
|
||||||
|
adc $13, %rax
|
||||||
|
stosq
|
||||||
|
lodsq
|
||||||
|
adc $14, %rax
|
||||||
|
stosq
|
||||||
|
setc %al
|
||||||
|
|
||||||
|
": "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]),
|
||||||
|
|
||||||
|
"={al}"(overflow) /* $0 - $4 */
|
||||||
|
|
||||||
|
: "{rdi}"(&result[4] as *const u64) /* $5 */
|
||||||
|
"{rsi}"(&other_t[4] as *const u64) /* $6 */
|
||||||
|
"0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]),
|
||||||
|
"m"(self_t[4]), "m"(self_t[5]), "m"(self_t[6]), "m"(self_t[7]),
|
||||||
|
/* $7 - $14 */
|
||||||
|
|
||||||
|
"mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3]),
|
||||||
|
"m"(other_t[4]), "m"(other_t[5]), "m"(other_t[6]), "m"(other_t[7]) /* $15 - $22 */
|
||||||
|
: "rdi", "rsi"
|
||||||
|
:
|
||||||
|
);
|
||||||
|
}
|
||||||
|
(U512(result), overflow != 0)
|
||||||
|
});
|
||||||
|
|
||||||
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
|
($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => (
|
||||||
uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
|
uint_overflowing_add_reg!($name, $n_words, $self_expr, $other)
|
||||||
)
|
)
|
||||||
@ -138,12 +184,13 @@ macro_rules! uint_overflowing_sub {
|
|||||||
let overflow: u8;
|
let overflow: u8;
|
||||||
unsafe {
|
unsafe {
|
||||||
asm!("
|
asm!("
|
||||||
sbb $9, $0
|
sub $9, $0
|
||||||
sbb $10, $1
|
sbb $10, $1
|
||||||
sbb $11, $2
|
sbb $11, $2
|
||||||
sbb $12, $3
|
sbb $12, $3
|
||||||
setb %al"
|
setb %al
|
||||||
: "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow)
|
"
|
||||||
|
: "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow)
|
||||||
: "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3])
|
: "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3])
|
||||||
:
|
:
|
||||||
:
|
:
|
||||||
@ -151,6 +198,51 @@ macro_rules! uint_overflowing_sub {
|
|||||||
}
|
}
|
||||||
(U256(result), overflow != 0)
|
(U256(result), overflow != 0)
|
||||||
});
|
});
|
||||||
|
(U512, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||||
|
let mut result: [u64; 8] = unsafe { mem::uninitialized() };
|
||||||
|
let self_t: &[u64; 8] = unsafe { &mem::transmute($self_expr) };
|
||||||
|
let other_t: &[u64; 8] = unsafe { &mem::transmute($other) };
|
||||||
|
|
||||||
|
let overflow: u8;
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
asm!("
|
||||||
|
sub $15, $0
|
||||||
|
sbb $16, $1
|
||||||
|
sbb $17, $2
|
||||||
|
sbb $18, $3
|
||||||
|
lodsq
|
||||||
|
sbb $19, %rax
|
||||||
|
stosq
|
||||||
|
lodsq
|
||||||
|
sbb $20, %rax
|
||||||
|
stosq
|
||||||
|
lodsq
|
||||||
|
sbb $21, %rax
|
||||||
|
stosq
|
||||||
|
lodsq
|
||||||
|
sbb $22, %rax
|
||||||
|
stosq
|
||||||
|
setb %al
|
||||||
|
"
|
||||||
|
: "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]),
|
||||||
|
|
||||||
|
"={al}"(overflow) /* $0 - $4 */
|
||||||
|
|
||||||
|
: "{rdi}"(&result[4] as *const u64) /* $5 */
|
||||||
|
"{rsi}"(&self_t[4] as *const u64) /* $6 */
|
||||||
|
"0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]),
|
||||||
|
"m"(self_t[4]), "m"(self_t[5]), "m"(self_t[6]), "m"(self_t[7]),
|
||||||
|
/* $7 - $14 */
|
||||||
|
|
||||||
|
"m"(other_t[0]), "m"(other_t[1]), "m"(other_t[2]), "m"(other_t[3]),
|
||||||
|
"m"(other_t[4]), "m"(other_t[5]), "m"(other_t[6]), "m"(other_t[7]) /* $15 - $22 */
|
||||||
|
: "rdi", "rsi"
|
||||||
|
:
|
||||||
|
);
|
||||||
|
}
|
||||||
|
(U512(result), overflow != 0)
|
||||||
|
});
|
||||||
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({
|
||||||
let res = overflowing!((!$other).overflowing_add(From::from(1u64)));
|
let res = overflowing!((!$other).overflowing_add(From::from(1u64)));
|
||||||
let res = overflowing!($self_expr.overflowing_add(res));
|
let res = overflowing!($self_expr.overflowing_add(res));
|
||||||
@ -251,8 +343,8 @@ macro_rules! uint_overflowing_mul {
|
|||||||
jrcxz 2f
|
jrcxz 2f
|
||||||
popcnt $7, %rcx
|
popcnt $7, %rcx
|
||||||
|
|
||||||
2:
|
2:
|
||||||
"
|
"
|
||||||
: /* $0 */ "={r8}"(result[0]), /* $1 */ "={r9}"(result[1]), /* $2 */ "={r10}"(result[2]),
|
: /* $0 */ "={r8}"(result[0]), /* $1 */ "={r9}"(result[1]), /* $2 */ "={r10}"(result[2]),
|
||||||
/* $3 */ "={r11}"(result[3]), /* $4 */ "={rcx}"(overflow)
|
/* $3 */ "={r11}"(result[3]), /* $4 */ "={rcx}"(overflow)
|
||||||
|
|
||||||
@ -1483,6 +1575,38 @@ mod tests {
|
|||||||
assert_eq!(format!("{}", U256::from(0)), "0");
|
assert_eq!(format!("{}", U256::from(0)), "0");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn u512_multi_adds() {
|
||||||
|
let (result, _) = U512([0, 0, 0, 0, 0, 0, 0, 0]).overflowing_add(U512([0, 0, 0, 0, 0, 0, 0, 0]));
|
||||||
|
assert_eq!(result, U512([0, 0, 0, 0, 0, 0, 0, 0]));
|
||||||
|
|
||||||
|
let (result, _) = U512([1, 0, 0, 0, 0, 0, 0, 1]).overflowing_add(U512([1, 0, 0, 0, 0, 0, 0, 1]));
|
||||||
|
assert_eq!(result, U512([2, 0, 0, 0, 0, 0, 0, 2]));
|
||||||
|
|
||||||
|
let (result, _) = U512([0, 0, 0, 0, 0, 0, 0, 1]).overflowing_add(U512([0, 0, 0, 0, 0, 0, 0, 1]));
|
||||||
|
assert_eq!(result, U512([0, 0, 0, 0, 0, 0, 0, 2]));
|
||||||
|
|
||||||
|
let (result, _) = U512([0, 0, 0, 0, 0, 0, 2, 1]).overflowing_add(U512([0, 0, 0, 0, 0, 0, 3, 1]));
|
||||||
|
assert_eq!(result, U512([0, 0, 0, 0, 0, 0, 5, 2]));
|
||||||
|
|
||||||
|
let (result, _) = U512([1, 2, 3, 4, 5, 6, 7, 8]).overflowing_add(U512([9, 10, 11, 12, 13, 14, 15, 16]));
|
||||||
|
assert_eq!(result, U512([10, 12, 14, 16, 18, 20, 22, 24]));
|
||||||
|
|
||||||
|
let (_, overflow) = U512([0, 0, 0, 0, 0, 0, 2, 1]).overflowing_add(U512([0, 0, 0, 0, 0, 0, 3, 1]));
|
||||||
|
assert!(!overflow);
|
||||||
|
|
||||||
|
let (_, overflow) = U512([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX])
|
||||||
|
.overflowing_add(U512([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX]));
|
||||||
|
assert!(overflow);
|
||||||
|
|
||||||
|
let (_, overflow) = U512([0, 0, 0, 0, 0, 0, 0, ::std::u64::MAX])
|
||||||
|
.overflowing_add(U512([0, 0, 0, 0, 0, 0, 0, ::std::u64::MAX]));
|
||||||
|
assert!(overflow);
|
||||||
|
|
||||||
|
let (_, overflow) = U512([0, 0, 0, 0, 0, 0, 0, ::std::u64::MAX])
|
||||||
|
.overflowing_add(U512([0, 0, 0, 0, 0, 0, 0, 0]));
|
||||||
|
assert!(!overflow);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn u256_multi_adds() {
|
fn u256_multi_adds() {
|
||||||
@ -1531,6 +1655,21 @@ mod tests {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn u512_multi_subs() {
|
||||||
|
let (result, _) = U512([0, 0, 0, 0, 0, 0, 0, 0]).overflowing_sub(U512([0, 0, 0, 0, 0, 0, 0, 0]));
|
||||||
|
assert_eq!(result, U512([0, 0, 0, 0, 0, 0, 0, 0]));
|
||||||
|
|
||||||
|
let (result, _) = U512([10, 9, 8, 7, 6, 5, 4, 3]).overflowing_sub(U512([9, 8, 7, 6, 5, 4, 3, 2]));
|
||||||
|
assert_eq!(result, U512([1, 1, 1, 1, 1, 1, 1, 1]));
|
||||||
|
|
||||||
|
let (_, overflow) = U512([10, 9, 8, 7, 6, 5, 4, 3]).overflowing_sub(U512([9, 8, 7, 6, 5, 4, 3, 2]));
|
||||||
|
assert!(!overflow);
|
||||||
|
|
||||||
|
let (_, overflow) = U512([9, 8, 7, 6, 5, 4, 3, 2]).overflowing_sub(U512([10, 9, 8, 7, 6, 5, 4, 3]));
|
||||||
|
assert!(overflow);
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn u256_multi_muls() {
|
fn u256_multi_muls() {
|
||||||
let (result, _) = U256([0, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, 0]));
|
let (result, _) = U256([0, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, 0]));
|
||||||
|
Loading…
Reference in New Issue
Block a user