From 9585138f20fdf74c327c484fa7e1a1a24f4393e2 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Fri, 26 Feb 2016 23:24:04 +0300 Subject: [PATCH 1/2] add/sub 512 --- util/src/uint.rs | 179 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 159 insertions(+), 20 deletions(-) diff --git a/util/src/uint.rs b/util/src/uint.rs index b4940cfb8..5a2730126 100644 --- a/util/src/uint.rs +++ b/util/src/uint.rs @@ -97,23 +97,69 @@ macro_rules! uint_overflowing_add { let other_t: &[u64; 4] = unsafe { &mem::transmute($other) }; let overflow: u8; - unsafe { - asm!(" - adc $9, $0 - adc $10, $1 - adc $11, $2 - adc $12, $3 - setc %al - " - : "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow) - : "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), + unsafe { + asm!(" + add $9, $0 + adc $10, $1 + adc $11, $2 + adc $12, $3 + setc %al + " + : "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow) + : "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3]) - : - : + : + : ); } (U256(result), overflow != 0) }); + (U512, $n_words: expr, $self_expr: expr, $other: expr) => ({ + let mut result: [u64; 8] = unsafe { mem::uninitialized() }; + let self_t: &[u64; 8] = unsafe { &mem::transmute($self_expr) }; + let other_t: &[u64; 8] = unsafe { &mem::transmute($other) }; + + let overflow: u8; + + unsafe { + asm!(" + add $15, $0 + adc $16, $1 + adc $17, $2 + adc $18, $3 + lodsq + adc $11, %rax + stosq + lodsq + adc $12, %rax + stosq + lodsq + adc $13, %rax + stosq + lodsq + adc $14, %rax + stosq + setc %al + + ": "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), + + "={al}"(overflow) /* $0 - $4 */ + + : "{rdi}"(&result[4] as *const u64) /* $5 */ + "{rsi}"(&other_t[4] as *const u64) /* $6 */ + "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), + "m"(self_t[4]), "m"(self_t[5]), "m"(self_t[6]), "m"(self_t[7]), + /* $7 - $14 */ + + "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3]), + "m"(other_t[4]), "m"(other_t[5]), "m"(other_t[6]), "m"(other_t[7]) /* $15 - $22 */ + : "rdi", "rsi" + : + ); + } + (U512(result), overflow != 0) + }); + ($name:ident, $n_words:expr, $self_expr: expr, $other: expr) => ( uint_overflowing_add_reg!($name, $n_words, $self_expr, $other) ) @@ -138,12 +184,13 @@ macro_rules! uint_overflowing_sub { let overflow: u8; unsafe { asm!(" - sbb $9, $0 - sbb $10, $1 - sbb $11, $2 - sbb $12, $3 - setb %al" - : "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow) + sub $9, $0 + sbb $10, $1 + sbb $11, $2 + sbb $12, $3 + setb %al + " + : "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), "={al}"(overflow) : "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), "mr"(other_t[0]), "mr"(other_t[1]), "mr"(other_t[2]), "mr"(other_t[3]) : : @@ -151,6 +198,51 @@ macro_rules! uint_overflowing_sub { } (U256(result), overflow != 0) }); + (U512, $n_words: expr, $self_expr: expr, $other: expr) => ({ + let mut result: [u64; 8] = unsafe { mem::uninitialized() }; + let self_t: &[u64; 8] = unsafe { &mem::transmute($self_expr) }; + let other_t: &[u64; 8] = unsafe { &mem::transmute($other) }; + + let overflow: u8; + + unsafe { + asm!(" + sub $15, $0 + sbb $16, $1 + sbb $17, $2 + sbb $18, $3 + lodsq + sbb $19, %rax + stosq + lodsq + sbb $20, %rax + stosq + lodsq + sbb $21, %rax + stosq + lodsq + sbb $22, %rax + stosq + setb %al + " + : "=r"(result[0]), "=r"(result[1]), "=r"(result[2]), "=r"(result[3]), + + "={al}"(overflow) /* $0 - $4 */ + + : "{rdi}"(&result[4] as *const u64) /* $5 */ + "{rsi}"(&self_t[4] as *const u64) /* $6 */ + "0"(self_t[0]), "1"(self_t[1]), "2"(self_t[2]), "3"(self_t[3]), + "m"(self_t[4]), "m"(self_t[5]), "m"(self_t[6]), "m"(self_t[7]), + /* $7 - $14 */ + + "m"(other_t[0]), "m"(other_t[1]), "m"(other_t[2]), "m"(other_t[3]), + "m"(other_t[4]), "m"(other_t[5]), "m"(other_t[6]), "m"(other_t[7]) /* $15 - $22 */ + : "rdi", "rsi" + : + ); + } + (U512(result), overflow != 0) + }); ($name:ident, $n_words: expr, $self_expr: expr, $other: expr) => ({ let res = overflowing!((!$other).overflowing_add(From::from(1u64))); let res = overflowing!($self_expr.overflowing_add(res)); @@ -251,8 +343,8 @@ macro_rules! uint_overflowing_mul { jrcxz 2f popcnt $7, %rcx - 2: - " + 2: + " : /* $0 */ "={r8}"(result[0]), /* $1 */ "={r9}"(result[1]), /* $2 */ "={r10}"(result[2]), /* $3 */ "={r11}"(result[3]), /* $4 */ "={rcx}"(overflow) @@ -1483,6 +1575,38 @@ mod tests { assert_eq!(format!("{}", U256::from(0)), "0"); } + #[test] + fn u512_multi_adds() { + let (result, _) = U512([0, 0, 0, 0, 0, 0, 0, 0]).overflowing_add(U512([0, 0, 0, 0, 0, 0, 0, 0])); + assert_eq!(result, U512([0, 0, 0, 0, 0, 0, 0, 0])); + + let (result, _) = U512([1, 0, 0, 0, 0, 0, 0, 1]).overflowing_add(U512([1, 0, 0, 0, 0, 0, 0, 1])); + assert_eq!(result, U512([2, 0, 0, 0, 0, 0, 0, 2])); + + let (result, _) = U512([0, 0, 0, 0, 0, 0, 0, 1]).overflowing_add(U512([0, 0, 0, 0, 0, 0, 0, 1])); + assert_eq!(result, U512([0, 0, 0, 0, 0, 0, 0, 2])); + + let (result, _) = U512([0, 0, 0, 0, 0, 0, 2, 1]).overflowing_add(U512([0, 0, 0, 0, 0, 0, 3, 1])); + assert_eq!(result, U512([0, 0, 0, 0, 0, 0, 5, 2])); + + let (result, _) = U512([1, 2, 3, 4, 5, 6, 7, 8]).overflowing_add(U512([9, 10, 11, 12, 13, 14, 15, 16])); + assert_eq!(result, U512([10, 12, 14, 16, 18, 20, 22, 24])); + + let (_, overflow) = U512([0, 0, 0, 0, 0, 0, 2, 1]).overflowing_add(U512([0, 0, 0, 0, 0, 0, 3, 1])); + assert!(!overflow); + + let (_, overflow) = U512([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX]) + .overflowing_add(U512([::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX, ::std::u64::MAX])); + assert!(overflow); + + let (_, overflow) = U512([0, 0, 0, 0, 0, 0, 0, ::std::u64::MAX]) + .overflowing_add(U512([0, 0, 0, 0, 0, 0, 0, ::std::u64::MAX])); + assert!(overflow); + + let (_, overflow) = U512([0, 0, 0, 0, 0, 0, 0, ::std::u64::MAX]) + .overflowing_add(U512([0, 0, 0, 0, 0, 0, 0, 0])); + assert!(!overflow); + } #[test] fn u256_multi_adds() { @@ -1531,6 +1655,21 @@ mod tests { } + #[test] + fn u512_multi_subs() { + let (result, _) = U512([0, 0, 0, 0, 0, 0, 0, 0]).overflowing_sub(U512([0, 0, 0, 0, 0, 0, 0, 0])); + assert_eq!(result, U512([0, 0, 0, 0, 0, 0, 0, 0])); + + let (result, _) = U512([10, 9, 8, 7, 6, 5, 4, 3]).overflowing_sub(U512([9, 8, 7, 6, 5, 4, 3, 2])); + assert_eq!(result, U512([1, 1, 1, 1, 1, 1, 1, 1])); + + let (_, overflow) = U512([10, 9, 8, 7, 6, 5, 4, 3]).overflowing_sub(U512([9, 8, 7, 6, 5, 4, 3, 2])); + assert!(!overflow); + + let (_, overflow) = U512([9, 8, 7, 6, 5, 4, 3, 2]).overflowing_sub(U512([10, 9, 8, 7, 6, 5, 4, 3])); + assert!(overflow); + } + #[test] fn u256_multi_muls() { let (result, _) = U256([0, 0, 0, 0]).overflowing_mul(U256([0, 0, 0, 0])); From 98a75d06d16c2e2924ce4d3bc6f950f87e4753b4 Mon Sep 17 00:00:00 2001 From: Nikolay Volf Date: Fri, 26 Feb 2016 23:37:13 +0300 Subject: [PATCH 2/2] benches --- util/benches/bigint.rs | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/util/benches/bigint.rs b/util/benches/bigint.rs index 3b2012bc7..da82084b8 100644 --- a/util/benches/bigint.rs +++ b/util/benches/bigint.rs @@ -38,7 +38,6 @@ fn u256_add(b: &mut Bencher) { }); } - #[bench] fn u256_sub(b: &mut Bencher) { b.iter(|| { @@ -47,6 +46,25 @@ fn u256_sub(b: &mut Bencher) { }); } +#[bench] +fn u512_sub(b: &mut Bencher) { + b.iter(|| { + let n = black_box(10000); + (0..n).fold(U512([rand::random::(), rand::random::(), rand::random::(), rand::random::(), + rand::random::(), rand::random::(), rand::random::(), rand::random::()]), + |old, new| { old.overflowing_sub(U512([0, 0, 0, 0, 0, 0, 0, new])).0 }) + }); +} + +#[bench] +fn u512_add(b: &mut Bencher) { + b.iter(|| { + let n = black_box(10000); + (0..n).fold(U512([0, 0, 0, 0, 0, 0, 0, 0]), + |old, new| { old.overflowing_add(U512([new, new, new, new, new, new, new, new])).0 }) + }); +} + #[bench] fn u256_mul(b: &mut Bencher) { b.iter(|| {