Skip to content

Commit

Permalink
Unroll biginteger loops and reduce copies (#205)
Browse files Browse the repository at this point in the history
* Unroll biginteger loops

* Reduce field arithmetic copies

* Reduce ec arithmetic copies

* Fix

* CHANGELOG and tweaks

* Use intrinsics in `add_nocarry`/`sub_noborrow`

Co-authored-by: Jon Chuang <jon-chuang@users.noreply.github.com>

* Update CHANGELOG

* fmt

* Remove assert

* minor changes for bigint

* minor changes for bigint

* Small clean up

Co-authored-by: Jon Chuang <jon-chuang@users.noreply.github.com>
Co-authored-by: jonch <9093549+jon-chuang@users.noreply.github.com>
  • Loading branch information
3 people authored Feb 6, 2021
1 parent 80ff5ea commit 87e25cb
Show file tree
Hide file tree
Showing 10 changed files with 160 additions and 114 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ The main features of this release are:
- #188 (ark-ec) Make Short Weierstrass random sampling result in an element with unknown discrete log
- #190 (ark-ec) Add curve cycle trait and extended pairing cycle trait for all types of ec cycles.
- #201 (ark-ec, ark-ff, ark-test-curves, ark-test-templates) Remove the dependency on `rand_xorshift`
- #205 (ark-ec, ark-ff) Unroll loops and conditionally use intrinsics in `biginteger` arithmetic, and reduce copies in `ff` and `ec` arithmetic.

### Bug fixes
- #36 (ark-ec) In Short-Weierstrass curves, include an infinity bit in `ToConstraintField`.
Expand Down
16 changes: 7 additions & 9 deletions ec/src/models/short_weierstrass_jacobian.rs
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,6 @@ impl<P: Parameters> Default for GroupAffine<P> {
#[derivative(
Copy(bound = "P: Parameters"),
Clone(bound = "P: Parameters"),
Eq(bound = "P: Parameters"),
Debug(bound = "P: Parameters"),
Hash(bound = "P: Parameters")
)]
Expand All @@ -279,6 +278,7 @@ impl<P: Parameters> Display for GroupProjective<P> {
}
}

impl<P: Parameters> Eq for GroupProjective<P> {}
impl<P: Parameters> PartialEq for GroupProjective<P> {
fn eq(&self, other: &Self) -> bool {
if self.is_zero() {
Expand Down Expand Up @@ -581,10 +581,9 @@ impl<'a, P: Parameters> Add<&'a Self> for GroupProjective<P> {
type Output = Self;

#[inline]
fn add(self, other: &'a Self) -> Self {
let mut copy = self;
copy += other;
copy
fn add(mut self, other: &'a Self) -> Self {
self += other;
self
}
}

Expand Down Expand Up @@ -657,10 +656,9 @@ impl<'a, P: Parameters> Sub<&'a Self> for GroupProjective<P> {
type Output = Self;

#[inline]
fn sub(self, other: &'a Self) -> Self {
let mut copy = self;
copy -= other;
copy
fn sub(mut self, other: &'a Self) -> Self {
self -= other;
self
}
}

Expand Down
14 changes: 6 additions & 8 deletions ec/src/models/twisted_edwards_extended.rs
Original file line number Diff line number Diff line change
Expand Up @@ -544,10 +544,9 @@ ark_ff::impl_additive_ops_from_ref!(GroupProjective, Parameters);

impl<'a, P: Parameters> Add<&'a Self> for GroupProjective<P> {
type Output = Self;
fn add(self, other: &'a Self) -> Self {
let mut copy = self;
copy += other;
copy
fn add(mut self, other: &'a Self) -> Self {
self += other;
self
}
}

Expand Down Expand Up @@ -597,10 +596,9 @@ impl<'a, P: Parameters> AddAssign<&'a Self> for GroupProjective<P> {

impl<'a, P: Parameters> Sub<&'a Self> for GroupProjective<P> {
type Output = Self;
fn sub(self, other: &'a Self) -> Self {
let mut copy = self;
copy -= other;
copy
fn sub(mut self, other: &'a Self) -> Self {
self -= other;
self
}
}

Expand Down
4 changes: 2 additions & 2 deletions ff-asm/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ pub fn x86_64_asm_mul(input: TokenStream) -> TokenStream {
let inner_ts: Expr = syn::parse_str(&impl_block).unwrap();
let ts = quote::quote! {
let a = &mut #a;
let b = #b;
let b = &#b;
#inner_ts
};
ts.into()
Expand Down Expand Up @@ -290,7 +290,7 @@ fn generate_impl(num_limbs: usize, is_mul: bool) -> String {
let mut ctx = Context::new();
ctx.add_declaration("a", "r", "a");
if is_mul {
ctx.add_declaration("b", "r", "&b");
ctx.add_declaration("b", "r", "b");
}
ctx.add_declaration("modulus", "r", "&P::MODULUS.0");
ctx.add_declaration("0", "i", "0u64");
Expand Down
122 changes: 90 additions & 32 deletions ff/src/biginteger/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,39 +13,81 @@ macro_rules! bigint_impl {
const NUM_LIMBS: usize = $num_limbs;

#[inline]
#[ark_ff_asm::unroll_for_loops]
fn add_nocarry(&mut self, other: &Self) -> bool {
let mut carry = 0;

for (a, b) in self.0.iter_mut().zip(other.0.iter()) {
*a = adc!(*a, *b, &mut carry);
for i in 0..$num_limbs {
#[cfg(all(target_arch = "x86_64", feature = "asm"))]
#[allow(unsafe_code)]
unsafe {
use core::arch::x86_64::_addcarry_u64;
carry = _addcarry_u64(carry, self.0[i], other.0[i], &mut self.0[i])
};

#[cfg(not(all(target_arch = "x86_64", feature = "asm")))]
{
self.0[i] = adc!(self.0[i], other.0[i], &mut carry);
}
}

carry != 0
}

#[inline]
#[ark_ff_asm::unroll_for_loops]
fn sub_noborrow(&mut self, other: &Self) -> bool {
let mut borrow = 0;

for (a, b) in self.0.iter_mut().zip(other.0.iter()) {
*a = sbb!(*a, *b, &mut borrow);
for i in 0..$num_limbs {
#[cfg(all(target_arch = "x86_64", feature = "asm"))]
#[allow(unsafe_code)]
unsafe {
use core::arch::x86_64::_subborrow_u64;
borrow = _subborrow_u64(borrow, self.0[i], other.0[i], &mut self.0[i])
};

#[cfg(not(all(target_arch = "x86_64", feature = "asm")))]
{
self.0[i] = sbb!(self.0[i], other.0[i], &mut borrow);
}
}

borrow != 0
}

#[inline]
#[ark_ff_asm::unroll_for_loops]
#[allow(unused)]
fn mul2(&mut self) {
let mut last = 0;
for i in &mut self.0 {
let tmp = *i >> 63;
*i <<= 1;
*i |= last;
last = tmp;
#[cfg(all(target_arch = "x86_64", feature = "asm"))]
#[allow(unsafe_code)]
{
let mut carry = 0;

for i in 0..$num_limbs {
unsafe {
use core::arch::x86_64::_addcarry_u64;
carry = _addcarry_u64(carry, self.0[i], self.0[i], &mut self.0[i])
};
}
}

#[cfg(not(all(target_arch = "x86_64", feature = "asm")))]
{
let mut last = 0;
for i in 0..$num_limbs {
let a = &mut self.0[i];
let tmp = *a >> 63;
*a <<= 1;
*a |= last;
last = tmp;
}
}
}

#[inline]
#[ark_ff_asm::unroll_for_loops]
fn muln(&mut self, mut n: u32) {
if n >= 64 * $num_limbs {
*self = Self::from(0);
Expand All @@ -54,35 +96,41 @@ macro_rules! bigint_impl {

while n >= 64 {
let mut t = 0;
for i in &mut self.0 {
core::mem::swap(&mut t, i);
for i in 0..$num_limbs {
core::mem::swap(&mut t, &mut self.0[i]);
}
n -= 64;
}

if n > 0 {
let mut t = 0;
for i in &mut self.0 {
let t2 = *i >> (64 - n);
*i <<= n;
*i |= t;
#[allow(unused)]
for i in 0..$num_limbs {
let a = &mut self.0[i];
let t2 = *a >> (64 - n);
*a <<= n;
*a |= t;
t = t2;
}
}
}

#[inline]
#[ark_ff_asm::unroll_for_loops]
#[allow(unused)]
fn div2(&mut self) {
let mut t = 0;
for i in self.0.iter_mut().rev() {
let t2 = *i << 63;
*i >>= 1;
*i |= t;
for i in 0..$num_limbs {
let a = &mut self.0[$num_limbs - i - 1];
let t2 = *a << 63;
*a >>= 1;
*a |= t;
t = t2;
}
}

#[inline]
#[ark_ff_asm::unroll_for_loops]
fn divn(&mut self, mut n: u32) {
if n >= 64 * $num_limbs {
*self = Self::from(0);
Expand All @@ -91,18 +139,20 @@ macro_rules! bigint_impl {

while n >= 64 {
let mut t = 0;
for i in self.0.iter_mut().rev() {
core::mem::swap(&mut t, i);
for i in 0..$num_limbs {
core::mem::swap(&mut t, &mut self.0[$num_limbs - i - 1]);
}
n -= 64;
}

if n > 0 {
let mut t = 0;
for i in self.0.iter_mut().rev() {
let t2 = *i << (64 - n);
*i >>= n;
*i |= t;
#[allow(unused)]
for i in 0..$num_limbs {
let a = &mut self.0[$num_limbs - i - 1];
let t2 = *a << (64 - n);
*a >>= n;
*a |= t;
t = t2;
}
}
Expand All @@ -120,7 +170,12 @@ macro_rules! bigint_impl {

#[inline]
fn is_zero(&self) -> bool {
self.0.iter().all(|&e| e == 0)
for i in 0..$num_limbs {
if self.0[i] != 0 {
return false;
}
}
true
}

#[inline]
Expand Down Expand Up @@ -270,16 +325,19 @@ macro_rules! bigint_impl {

impl Ord for $name {
#[inline]
#[ark_ff_asm::unroll_for_loops]
fn cmp(&self, other: &Self) -> ::core::cmp::Ordering {
for (a, b) in self.0.iter().rev().zip(other.0.iter().rev()) {
use core::cmp::Ordering;
for i in 0..$num_limbs {
let a = &self.0[$num_limbs - i - 1];
let b = &other.0[$num_limbs - i - 1];
if a < b {
return core::cmp::Ordering::Less;
return Ordering::Less;
} else if a > b {
return core::cmp::Ordering::Greater;
return Ordering::Greater;
}
}

core::cmp::Ordering::Equal
Ordering::Equal
}
}

Expand Down
2 changes: 0 additions & 2 deletions ff/src/fields/arithmetic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ macro_rules! impl_field_mul_assign {
{
// Tentatively avoid using assembly for `$limbs == 1`.
if $limbs <= 6 && $limbs > 1 {
assert!($limbs <= 6);
ark_ff_asm::x86_64_asm_mul!($limbs, (self.0).0, (other.0).0);
self.reduce();
return;
Expand Down Expand Up @@ -104,7 +103,6 @@ macro_rules! impl_field_square_in_place {
let _no_carry: bool = !(first_bit_set || all_bits_set);

if $limbs <= 6 && _no_carry {
assert!($limbs <= 6);
ark_ff_asm::x86_64_asm_square!($limbs, (self.0).0);
self.reduce();
return self;
Expand Down
32 changes: 14 additions & 18 deletions ff/src/fields/macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ macro_rules! impl_Fp {
}

impl<P: $FpParameters> $Fp<P> {
#[inline]
#[inline(always)]
pub(crate) fn is_valid(&self) -> bool {
self.0 < P::MODULUS
}
Expand Down Expand Up @@ -605,7 +605,7 @@ macro_rules! impl_Fp {
#[must_use]
fn neg(self) -> Self {
if !self.is_zero() {
let mut tmp = P::MODULUS.clone();
let mut tmp = P::MODULUS;
tmp.sub_noborrow(&self.0);
$Fp::<P>(tmp, PhantomData)
} else {
Expand All @@ -618,43 +618,39 @@ macro_rules! impl_Fp {
type Output = Self;

#[inline]
fn add(self, other: &Self) -> Self {
let mut result = self.clone();
result.add_assign(other);
result
fn add(mut self, other: &Self) -> Self {
self.add_assign(other);
self
}
}

impl<'a, P: $FpParameters> Sub<&'a $Fp<P>> for $Fp<P> {
type Output = Self;

#[inline]
fn sub(self, other: &Self) -> Self {
let mut result = self.clone();
result.sub_assign(other);
result
fn sub(mut self, other: &Self) -> Self {
self.sub_assign(other);
self
}
}

impl<'a, P: $FpParameters> Mul<&'a $Fp<P>> for $Fp<P> {
type Output = Self;

#[inline]
fn mul(self, other: &Self) -> Self {
let mut result = self.clone();
result.mul_assign(other);
result
fn mul(mut self, other: &Self) -> Self {
self.mul_assign(other);
self
}
}

impl<'a, P: $FpParameters> Div<&'a $Fp<P>> for $Fp<P> {
type Output = Self;

#[inline]
fn div(self, other: &Self) -> Self {
let mut result = self.clone();
result.mul_assign(&other.inverse().unwrap());
result
fn div(mut self, other: &Self) -> Self {
self.mul_assign(&other.inverse().unwrap());
self
}
}

Expand Down
Loading

0 comments on commit 87e25cb

Please sign in to comment.