From 6499e9766fdd5816007f86c93c4c5cc32037bc1d Mon Sep 17 00:00:00 2001
From: Pratyush Mishra <pratyushmishra@berkeley.edu>
Date: Thu, 4 Feb 2021 18:59:27 -0800
Subject: [PATCH 1/8] Unroll biginteger loops

---
 ff/src/biginteger/macros.rs | 47 ++++++++++++++++++++++---------------
 1 file changed, 28 insertions(+), 19 deletions(-)

diff --git a/ff/src/biginteger/macros.rs b/ff/src/biginteger/macros.rs
index b3f7fa4c3..0573a3ae8 100644
--- a/ff/src/biginteger/macros.rs
+++ b/ff/src/biginteger/macros.rs
@@ -12,35 +12,38 @@ macro_rules! bigint_impl {
         impl BigInteger for $name {
             const NUM_LIMBS: usize = $num_limbs;
 
-            #[inline]
+            #[ark_ff_asm::unroll_for_loops]
             fn add_nocarry(&mut self, other: &Self) -> bool {
                 let mut carry = 0;
 
-                for (a, b) in self.0.iter_mut().zip(other.0.iter()) {
-                    *a = adc!(*a, *b, &mut carry);
+                for i in 0..$num_limbs {
+                    self.0[i] = adc!(self.0[i], other.0[i], &mut carry);
                 }
 
                 carry != 0
             }
 
-            #[inline]
+            #[ark_ff_asm::unroll_for_loops]
             fn sub_noborrow(&mut self, other: &Self) -> bool {
                 let mut borrow = 0;
 
-                for (a, b) in self.0.iter_mut().zip(other.0.iter()) {
-                    *a = sbb!(*a, *b, &mut borrow);
+                for i in 0..$num_limbs {
+                    self.0[i] = sbb!(self.0[i], other.0[i], &mut borrow);
                 }
 
                 borrow != 0
             }
 
             #[inline]
+            #[ark_ff_asm::unroll_for_loops]
+            #[allow(unused)]
             fn mul2(&mut self) {
                 let mut last = 0;
-                for i in &mut self.0 {
-                    let tmp = *i >> 63;
-                    *i <<= 1;
-                    *i |= last;
+                for i in 0..$num_limbs {
+                    let a = &mut self.0[i];
+                    let tmp = *a >> 63;
+                    *a <<= 1;
+                    *a |= last;
                     last = tmp;
                 }
             }
@@ -72,12 +75,15 @@ macro_rules! bigint_impl {
             }
 
             #[inline]
+            #[ark_ff_asm::unroll_for_loops]
+            #[allow(unused)]
             fn div2(&mut self) {
                 let mut t = 0;
-                for i in self.0.iter_mut().rev() {
-                    let t2 = *i << 63;
-                    *i >>= 1;
-                    *i |= t;
+                for i in 0..$num_limbs {
+                    let a = &mut self.0[$num_limbs - i - 1];
+                    let t2 = *a << 63;
+                    *a >>= 1;
+                    *a |= t;
                     t = t2;
                 }
             }
@@ -270,16 +276,19 @@ macro_rules! bigint_impl {
 
         impl Ord for $name {
             #[inline]
+            #[ark_ff_asm::unroll_for_loops]
             fn cmp(&self, other: &Self) -> ::core::cmp::Ordering {
-                for (a, b) in self.0.iter().rev().zip(other.0.iter().rev()) {
+                use core::cmp::Ordering;
+                for i in 0..$num_limbs {
+                    let a = &self.0[$num_limbs - i - 1];
+                    let b = &other.0[$num_limbs - i - 1];
                     if a < b {
-                        return core::cmp::Ordering::Less;
+                        return Ordering::Less;
                     } else if a > b {
-                        return core::cmp::Ordering::Greater;
+                        return Ordering::Greater;
                     }
                 }
-
-                core::cmp::Ordering::Equal
+                Ordering::Equal
             }
         }
 

From e62bb644298b6c3ef6ef6572b85ed99dfe69add0 Mon Sep 17 00:00:00 2001
From: Pratyush Mishra <pratyushmishra@berkeley.edu>
Date: Thu, 4 Feb 2021 19:03:06 -0800
Subject: [PATCH 2/8] Reduce field arithmetic copies

---
 ff/src/fields/arithmetic.rs                 |  1 -
 ff/src/fields/macros.rs                     | 32 ++++++--------
 ff/src/fields/models/cubic_extension.rs     | 35 ++++++++-------
 ff/src/fields/models/quadratic_extension.rs | 48 ++++++++++-----------
 4 files changed, 55 insertions(+), 61 deletions(-)

diff --git a/ff/src/fields/arithmetic.rs b/ff/src/fields/arithmetic.rs
index f14732a19..cbbc5d0ee 100644
--- a/ff/src/fields/arithmetic.rs
+++ b/ff/src/fields/arithmetic.rs
@@ -104,7 +104,6 @@ macro_rules! impl_field_square_in_place {
                 let _no_carry: bool = !(first_bit_set || all_bits_set);
 
                 if $limbs <= 6 && _no_carry {
-                    assert!($limbs <= 6);
                     ark_ff_asm::x86_64_asm_square!($limbs, (self.0).0);
                     self.reduce();
                     return self;
diff --git a/ff/src/fields/macros.rs b/ff/src/fields/macros.rs
index dd85e3085..8a33dc11f 100644
--- a/ff/src/fields/macros.rs
+++ b/ff/src/fields/macros.rs
@@ -233,7 +233,7 @@ macro_rules! impl_Fp {
         }
 
         impl<P: $FpParameters> $Fp<P> {
-            #[inline]
+            #[inline(always)]
             pub(crate) fn is_valid(&self) -> bool {
                 self.0 < P::MODULUS
             }
@@ -605,7 +605,7 @@ macro_rules! impl_Fp {
             #[must_use]
             fn neg(self) -> Self {
                 if !self.is_zero() {
-                    let mut tmp = P::MODULUS.clone();
+                    let mut tmp = P::MODULUS;
                     tmp.sub_noborrow(&self.0);
                     $Fp::<P>(tmp, PhantomData)
                 } else {
@@ -618,10 +618,9 @@ macro_rules! impl_Fp {
             type Output = Self;
 
             #[inline]
-            fn add(self, other: &Self) -> Self {
-                let mut result = self.clone();
-                result.add_assign(other);
-                result
+            fn add(mut self, other: &Self) -> Self {
+                self.add_assign(other);
+                self
             }
         }
 
@@ -629,10 +628,9 @@ macro_rules! impl_Fp {
             type Output = Self;
 
             #[inline]
-            fn sub(self, other: &Self) -> Self {
-                let mut result = self.clone();
-                result.sub_assign(other);
-                result
+            fn sub(mut self, other: &Self) -> Self {
+                self.sub_assign(other);
+                self
             }
         }
 
@@ -640,10 +638,9 @@ macro_rules! impl_Fp {
             type Output = Self;
 
             #[inline]
-            fn mul(self, other: &Self) -> Self {
-                let mut result = self.clone();
-                result.mul_assign(other);
-                result
+            fn mul(mut self, other: &Self) -> Self {
+                self.mul_assign(other);
+                self
             }
         }
 
@@ -651,10 +648,9 @@ macro_rules! impl_Fp {
             type Output = Self;
 
             #[inline]
-            fn div(self, other: &Self) -> Self {
-                let mut result = self.clone();
-                result.mul_assign(&other.inverse().unwrap());
-                result
+            fn div(mut self, other: &Self) -> Self {
+                self.mul_assign(&other.inverse().unwrap());
+                self
             }
         }
 
diff --git a/ff/src/fields/models/cubic_extension.rs b/ff/src/fields/models/cubic_extension.rs
index 7edeb4568..48d33ca4a 100644
--- a/ff/src/fields/models/cubic_extension.rs
+++ b/ff/src/fields/models/cubic_extension.rs
@@ -372,8 +372,11 @@ impl<P: CubicExtParameters> FromBytes for CubicExtField<P> {
 impl<P: CubicExtParameters> Neg for CubicExtField<P> {
     type Output = Self;
     #[inline]
-    fn neg(self) -> Self {
-        Self::new(self.c0.neg(), self.c1.neg(), self.c2.neg())
+    fn neg(mut self) -> Self {
+        self.c0 = -self.c0;
+        self.c1 = -self.c1;
+        self.c2 = -self.c2;
+        self
     }
 }
 
@@ -392,10 +395,9 @@ impl<'a, P: CubicExtParameters> Add<&'a CubicExtField<P>> for CubicExtField<P> {
     type Output = Self;
 
     #[inline]
-    fn add(self, other: &Self) -> Self {
-        let mut result = self;
-        result.add_assign(other);
-        result
+    fn add(mut self, other: &Self) -> Self {
+        self.add_assign(other);
+        self
     }
 }
 
@@ -403,10 +405,9 @@ impl<'a, P: CubicExtParameters> Sub<&'a CubicExtField<P>> for CubicExtField<P> {
     type Output = Self;
 
     #[inline]
-    fn sub(self, other: &Self) -> Self {
-        let mut result = self;
-        result.sub_assign(other);
-        result
+    fn sub(mut self, other: &Self) -> Self {
+        self.sub_assign(other);
+        self
     }
 }
 
@@ -414,10 +415,9 @@ impl<'a, P: CubicExtParameters> Mul<&'a CubicExtField<P>> for CubicExtField<P> {
     type Output = Self;
 
     #[inline]
-    fn mul(self, other: &Self) -> Self {
-        let mut result = self;
-        result.mul_assign(other);
-        result
+    fn mul(mut self, other: &Self) -> Self {
+        self.mul_assign(other);
+        self
     }
 }
 
@@ -425,10 +425,9 @@ impl<'a, P: CubicExtParameters> Div<&'a CubicExtField<P>> for CubicExtField<P> {
     type Output = Self;
 
     #[inline]
-    fn div(self, other: &Self) -> Self {
-        let mut result = self;
-        result.mul_assign(&other.inverse().unwrap());
-        result
+    fn div(mut self, other: &Self) -> Self {
+        self.mul_assign(&other.inverse().unwrap());
+        self
     }
 }
 
diff --git a/ff/src/fields/models/quadratic_extension.rs b/ff/src/fields/models/quadratic_extension.rs
index 0a6c2fa47..1da379777 100644
--- a/ff/src/fields/models/quadratic_extension.rs
+++ b/ff/src/fields/models/quadratic_extension.rs
@@ -49,7 +49,9 @@ pub trait QuadExtParameters: 'static + Send + Sync + Sized {
     /// and in complex squaring.
     #[inline(always)]
     fn mul_base_field_by_nonresidue(fe: &Self::BaseField) -> Self::BaseField {
-        Self::NONRESIDUE * fe
+        let mut result = Self::NONRESIDUE;
+        result *= fe;
+        result
     }
 
     /// A specializable method for multiplying an element of the base field by
@@ -68,7 +70,7 @@ pub trait QuadExtParameters: 'static + Send + Sync + Sized {
 
         for &value in naf.iter().rev() {
             if found_nonzero {
-                res = res.square();
+                res.square_in_place();
             }
 
             if value != 0 {
@@ -420,8 +422,10 @@ impl<P: QuadExtParameters> Neg for QuadExtField<P> {
     type Output = Self;
     #[inline]
     #[must_use]
-    fn neg(self) -> Self {
-        Self::new(-self.c0, -self.c1)
+    fn neg(mut self) -> Self {
+        self.c0 = -self.c0;
+        self.c1 = -self.c1;
+        self
     }
 }
 
@@ -436,10 +440,9 @@ impl<'a, P: QuadExtParameters> Add<&'a QuadExtField<P>> for QuadExtField<P> {
     type Output = Self;
 
     #[inline]
-    fn add(self, other: &Self) -> Self {
-        let mut result = self;
-        result.add_assign(other);
-        result
+    fn add(mut self, other: &Self) -> Self {
+        self.add_assign(other);
+        self
     }
 }
 
@@ -447,10 +450,9 @@ impl<'a, P: QuadExtParameters> Sub<&'a QuadExtField<P>> for QuadExtField<P> {
     type Output = Self;
 
     #[inline]
-    fn sub(self, other: &Self) -> Self {
-        let mut result = self;
-        result.sub_assign(other);
-        result
+    fn sub(mut self, other: &Self) -> Self {
+        self.sub_assign(other);
+        self
     }
 }
 
@@ -458,10 +460,9 @@ impl<'a, P: QuadExtParameters> Mul<&'a QuadExtField<P>> for QuadExtField<P> {
     type Output = Self;
 
     #[inline]
-    fn mul(self, other: &Self) -> Self {
-        let mut result = self;
-        result.mul_assign(other);
-        result
+    fn mul(mut self, other: &Self) -> Self {
+        self.mul_assign(other);
+        self
     }
 }
 
@@ -469,26 +470,25 @@ impl<'a, P: QuadExtParameters> Div<&'a QuadExtField<P>> for QuadExtField<P> {
     type Output = Self;
 
     #[inline]
-    fn div(self, other: &Self) -> Self {
-        let mut result = self;
-        result.mul_assign(&other.inverse().unwrap());
-        result
+    fn div(mut self, other: &Self) -> Self {
+        self.mul_assign(&other.inverse().unwrap());
+        self
     }
 }
 
 impl<'a, P: QuadExtParameters> AddAssign<&'a Self> for QuadExtField<P> {
     #[inline]
     fn add_assign(&mut self, other: &Self) {
-        self.c0.add_assign(&other.c0);
-        self.c1.add_assign(&other.c1);
+        self.c0 += &other.c0;
+        self.c1 += &other.c1;
     }
 }
 
 impl<'a, P: QuadExtParameters> SubAssign<&'a Self> for QuadExtField<P> {
     #[inline]
     fn sub_assign(&mut self, other: &Self) {
-        self.c0.sub_assign(&other.c0);
-        self.c1.sub_assign(&other.c1);
+        self.c0 -= &other.c0;
+        self.c1 -= &other.c1;
     }
 }
 

From 41eb72f88a8f5f7d869e70747ce328af0ca1da40 Mon Sep 17 00:00:00 2001
From: Pratyush Mishra <pratyushmishra@berkeley.edu>
Date: Thu, 4 Feb 2021 19:04:43 -0800
Subject: [PATCH 3/8] Reduce ec arithmetic copies

---
 ec/src/models/short_weierstrass_jacobian.rs | 16 ++--
 ec/src/models/twisted_edwards_extended.rs   | 83 +++++++++++----------
 2 files changed, 49 insertions(+), 50 deletions(-)

diff --git a/ec/src/models/short_weierstrass_jacobian.rs b/ec/src/models/short_weierstrass_jacobian.rs
index 882f90ff8..1bffeb39e 100644
--- a/ec/src/models/short_weierstrass_jacobian.rs
+++ b/ec/src/models/short_weierstrass_jacobian.rs
@@ -260,7 +260,6 @@ impl<P: Parameters> Default for GroupAffine<P> {
 #[derivative(
     Copy(bound = "P: Parameters"),
     Clone(bound = "P: Parameters"),
-    Eq(bound = "P: Parameters"),
     Debug(bound = "P: Parameters"),
     Hash(bound = "P: Parameters")
 )]
@@ -279,6 +278,7 @@ impl<P: Parameters> Display for GroupProjective<P> {
     }
 }
 
+impl<P: Parameters> Eq for GroupProjective<P> {}
 impl<P: Parameters> PartialEq for GroupProjective<P> {
     fn eq(&self, other: &Self) -> bool {
         if self.is_zero() {
@@ -581,10 +581,9 @@ impl<'a, P: Parameters> Add<&'a Self> for GroupProjective<P> {
     type Output = Self;
 
     #[inline]
-    fn add(self, other: &'a Self) -> Self {
-        let mut copy = self;
-        copy += other;
-        copy
+    fn add(mut self, other: &'a Self) -> Self {
+        self += other;
+        self
     }
 }
 
@@ -657,10 +656,9 @@ impl<'a, P: Parameters> Sub<&'a Self> for GroupProjective<P> {
     type Output = Self;
 
     #[inline]
-    fn sub(self, other: &'a Self) -> Self {
-        let mut copy = self;
-        copy -= other;
-        copy
+    fn sub(mut self, other: &'a Self) -> Self {
+        self -= other;
+        self
     }
 }
 
diff --git a/ec/src/models/twisted_edwards_extended.rs b/ec/src/models/twisted_edwards_extended.rs
index 4fcd04a26..629137bc9 100644
--- a/ec/src/models/twisted_edwards_extended.rs
+++ b/ec/src/models/twisted_edwards_extended.rs
@@ -544,63 +544,64 @@ ark_ff::impl_additive_ops_from_ref!(GroupProjective, Parameters);
 
 impl<'a, P: Parameters> Add<&'a Self> for GroupProjective<P> {
     type Output = Self;
-    fn add(self, other: &'a Self) -> Self {
-        let mut copy = self;
-        copy += other;
-        copy
+    fn add(mut self, other: &'a Self) -> Self {
+        self += other;
+        self
     }
 }
 
 impl<'a, P: Parameters> AddAssign<&'a Self> for GroupProjective<P> {
     fn add_assign(&mut self, other: &'a Self) {
-        // See "Twisted Edwards Curves Revisited" (https://eprint.iacr.org/2008/522.pdf)
-        // by Huseyin Hisil, Kenneth Koon-Ho Wong, Gary Carter, and Ed Dawson
+        // See "Twisted Edwards Curves Revisited"
+        // Huseyin Hisil, Kenneth Koon-Ho Wong, Gary Carter, and Ed Dawson
         // 3.1 Unified Addition in E^e
+        // Source: https://www.hyperelliptic.org/EFD/g1p/data/twisted/extended/addition/madd-2008-hwcd
 
-        // A = x1 * x2
-        let a = self.x * &other.x;
-
-        // B = y1 * y2
-        let b = self.y * &other.y;
-
-        // C = d * t1 * t2
-        let c = P::COEFF_D * &self.t * &other.t;
-
-        // D = z1 * z2
-        let d = self.z * &other.z;
-
-        // H = B - aA
-        let h = b - &P::mul_by_a(&a);
+        // A = X1*X2
+        let mut a = self.x;
+        a *= &other.x;
+        // B = Y1*Y2
+        let mut b = self.y;
+        b *= &other.y;
+        // C = T1*d*T2
+        let mut c = P::COEFF_D;
+        c *= &self.t;
+        c *= &other.t;
 
-        // E = (x1 + y1) * (x2 + y2) - A - B
-        let e = (self.x + &self.y) * &(other.x + &other.y) - &a - &b;
+        // D = Z1 * Z2
+        let mut d = self.z;
+        d *= other.z;
 
-        // F = D - C
+        // E = (X1+Y1)*(X2+Y2)-A-B
+        let mut e = (self.x + &self.y) * &(other.x + &other.y);
+        e -= &a;
+        e -= &b;
+        // F = D-C
         let f = d - &c;
-
-        // G = D + C
+        // G = D+C
         let g = d + &c;
-
-        // x3 = E * F
-        self.x = e * &f;
-
-        // y3 = G * H
-        self.y = g * &h;
-
-        // t3 = E * H
-        self.t = e * &h;
-
-        // z3 = F * G
-        self.z = f * &g;
+        // H = B-a*A
+        let h = b - &P::mul_by_a(&a);
+        // X3 = E*F
+        self.x = e;
+        self.x *= &f;
+        // Y3 = G*H
+        self.y = g;
+        self.y *= &h;
+        // T3 = E*H
+        self.t = e;
+        self.t *= &h;
+        // Z3 = F*G
+        self.z = f;
+        self.z *= &g;
     }
 }
 
 impl<'a, P: Parameters> Sub<&'a Self> for GroupProjective<P> {
     type Output = Self;
-    fn sub(self, other: &'a Self) -> Self {
-        let mut copy = self;
-        copy -= other;
-        copy
+    fn sub(mut self, other: &'a Self) -> Self {
+        self -= other;
+        self
     }
 }
 

From b7c8a5c1c495105a108dc778a4f4bd510963d48e Mon Sep 17 00:00:00 2001
From: Pratyush Mishra <pratyushmishra@berkeley.edu>
Date: Thu, 4 Feb 2021 19:05:58 -0800
Subject: [PATCH 4/8] in-place ops in quadratic extension

---
 ff/src/fields/models/quadratic_extension.rs | 47 ++++++++++++++-------
 1 file changed, 31 insertions(+), 16 deletions(-)

diff --git a/ff/src/fields/models/quadratic_extension.rs b/ff/src/fields/models/quadratic_extension.rs
index 1da379777..148bd9fcc 100644
--- a/ff/src/fields/models/quadratic_extension.rs
+++ b/ff/src/fields/models/quadratic_extension.rs
@@ -130,16 +130,19 @@ impl<P: QuadExtParameters> QuadExtField<P> {
     /// This simplifies to: `Norm(a) = a.x^2 - P::NON_RESIDUE * a.y^2`.
     /// This is alternatively expressed as `Norm(a) = a^(1 + p)`.
     pub fn norm(&self) -> P::BaseField {
-        let t0 = self.c0.square();
-        let mut t1 = self.c1.square();
+        let mut t0 = self.c0;
+        t0.square_in_place();
+
+        let mut t1 = self.c1;
+        t1.square_in_place();
         t1 = -P::mul_base_field_by_nonresidue(&t1);
-        t1.add_assign(&t0);
+        t1 += &t0;
         t1
     }
 
     pub fn mul_assign_by_basefield(&mut self, element: &P::BaseField) {
-        self.c0.mul_assign(element);
-        self.c1.mul_assign(element);
+        self.c0 *= element;
+        self.c1 *= element;
     }
 }
 
@@ -219,18 +222,25 @@ impl<P: QuadExtParameters> Field for QuadExtField<P> {
 
     fn square_in_place(&mut self) -> &mut Self {
         // v0 = c0 - c1
-        let mut v0 = self.c0 - &self.c1;
+        let mut v0 = self.c0;
+        v0 -= &self.c1;
+
         // v3 = c0 - beta * c1
-        let v3 = self.c0 - &P::mul_base_field_by_nonresidue(&self.c1);
+        let mut v3 = self.c0;
+        v3 -= &P::mul_base_field_by_nonresidue(&self.c1);
         // v2 = c0 * c1
-        let v2 = self.c0 * &self.c1;
+        let mut v2 = self.c0;
+        v2 *= &self.c1;
 
         // v0 = (v0 * v3) + v2
         v0 *= &v3;
         v0 += &v2;
 
-        self.c1 = v2.double();
-        self.c0 = v0 + &P::mul_base_field_by_nonresidue(&v2);
+        self.c1 = v2;
+        self.c1.double_in_place();
+
+        self.c0 = v0;
+        self.c0 += &P::mul_base_field_by_nonresidue(&v2);
 
         self
     }
@@ -241,9 +251,11 @@ impl<P: QuadExtParameters> Field for QuadExtField<P> {
         } else {
             // Guide to Pairing-based Cryptography, Algorithm 5.19.
             // v0 = c0.square()
-            let mut v0 = self.c0.square();
+            let mut v0 = self.c0;
+            v0.square_in_place();
             // v1 = c1.square()
-            let v1 = self.c1.square();
+            let mut v1 = self.c1;
+            v1.square_in_place();
             // v0 = v0 - beta * v1
             v0 -= &P::mul_base_field_by_nonresidue(&v1);
             v0.inverse().map(|v1| {
@@ -500,21 +512,24 @@ impl<'a, P: QuadExtParameters> MulAssign<&'a Self> for QuadExtField<P> {
     fn mul_assign(&mut self, other: &Self) {
         // Karatsuba multiplication;
         // Guide to Pairing-based cryprography, Algorithm 5.16.
-        let v0 = self.c0 * &other.c0;
-        let v1 = self.c1 * &other.c1;
+        let mut v0 = self.c0;
+        v0 *= &other.c0;
+        let mut v1 = self.c1;
+        v1 *= &other.c1;
 
         self.c1 += &self.c0;
         self.c1 *= &(other.c0 + &other.c1);
         self.c1 -= &v0;
         self.c1 -= &v1;
-        self.c0 = v0 + &P::mul_base_field_by_nonresidue(&v1);
+        self.c0 = v0;
+        self.c0 += &P::mul_base_field_by_nonresidue(&v1);
     }
 }
 
 impl<'a, P: QuadExtParameters> DivAssign<&'a Self> for QuadExtField<P> {
     #[inline]
     fn div_assign(&mut self, other: &Self) {
-        self.mul_assign(&other.inverse().unwrap());
+        *self *= &other.inverse().unwrap();
     }
 }
 

From cd1ff8bf115bd8047f739d93264401cd7260d09b Mon Sep 17 00:00:00 2001
From: Pratyush Mishra <pratyushmishra@berkeley.edu>
Date: Thu, 4 Feb 2021 19:06:11 -0800
Subject: [PATCH 5/8] in-place ops in cubic extension

---
 ff/src/fields/models/cubic_extension.rs | 96 +++++++++++++++++--------
 1 file changed, 68 insertions(+), 28 deletions(-)

diff --git a/ff/src/fields/models/cubic_extension.rs b/ff/src/fields/models/cubic_extension.rs
index 48d33ca4a..5c79d6a63 100644
--- a/ff/src/fields/models/cubic_extension.rs
+++ b/ff/src/fields/models/cubic_extension.rs
@@ -204,13 +204,18 @@ impl<P: CubicExtParameters> Field for CubicExtField<P> {
         let b = self.c1;
         let c = self.c2;
 
-        let s0 = a.square();
+        let mut s0 = a;
+        s0.square_in_place();
         let ab = a * &b;
-        let s1 = ab.double();
-        let s2 = (a - &b + &c).square();
+        let mut s1 = ab;
+        s1.double_in_place();
+        let mut s2 = a - &b + &c;
+        s2.square_in_place();
         let bc = b * &c;
-        let s3 = bc.double();
-        let s4 = c.square();
+        let mut s3 = bc;
+        s3.double_in_place();
+        let mut s4 = c;
+        s4.square_in_place();
 
         self.c0 = s0 + &P::mul_base_field_by_nonresidue(&s3);
         self.c1 = s1 + &P::mul_base_field_by_nonresidue(&s4);
@@ -225,21 +230,33 @@ impl<P: CubicExtParameters> Field for CubicExtField<P> {
             // From "High-Speed Software Implementation of the Optimal Ate AbstractPairing
             // over
             // Barreto-Naehrig Curves"; Algorithm 17
-            let t0 = self.c0.square();
-            let t1 = self.c1.square();
-            let t2 = self.c2.square();
-            let t3 = self.c0 * &self.c1;
-            let t4 = self.c0 * &self.c2;
-            let t5 = self.c1 * &self.c2;
+            let mut t0 = self.c0;
+            t0.square_in_place();
+            let mut t1 = self.c1;
+            t1.square_in_place();
+            let mut t2 = self.c2;
+            t2.square_in_place();
+            let mut t3 = self.c0;
+            t3 *= &self.c1;
+            let mut t4 = self.c0;
+            t4 *= &self.c2;
+            let mut t5 = self.c1;
+            t5 *= &self.c2;
             let n5 = P::mul_base_field_by_nonresidue(&t5);
 
-            let s0 = t0 - &n5;
-            let s1 = P::mul_base_field_by_nonresidue(&t2) - &t3;
-            let s2 = t1 - &t4; // typo in paper referenced above. should be "-" as per Scott, but is "*"
-
-            let a1 = self.c2 * &s1;
-            let a2 = self.c1 * &s2;
-            let mut a3 = a1 + &a2;
+            let mut s0 = t0;
+            s0 -= &n5;
+            let mut s1 = P::mul_base_field_by_nonresidue(&t2);
+            s1 -= &t3;
+            let mut s2 = t1;
+            s2 -= &t4; // typo in paper referenced above. should be "-" as per Scott, but is "*"
+
+            let mut a1 = self.c2;
+            a1 *= &s1;
+            let mut a2 = self.c1;
+            a2 *= &s2;
+            let mut a3 = a1;
+            a3 += &a2;
             a3 = P::mul_base_field_by_nonresidue(&a3);
             let t6 = (self.c0 * &s0 + &a3).inverse().unwrap();
 
@@ -467,16 +484,39 @@ impl<'a, P: CubicExtParameters> MulAssign<&'a Self> for CubicExtField<P> {
         let e = self.c1;
         let f = self.c2;
 
-        let ad = d * &a;
-        let be = e * &b;
-        let cf = f * &c;
-
-        let x = (e + &f) * &(b + &c) - &be - &cf;
-        let y = (d + &e) * &(a + &b) - &ad - &be;
-        let z = (d + &f) * &(a + &c) - &ad + &be - &cf;
-
-        self.c0 = ad + &P::mul_base_field_by_nonresidue(&x);
-        self.c1 = y + &P::mul_base_field_by_nonresidue(&cf);
+        let mut ad = d;
+        ad *= &a;
+        let mut be = e;
+        be *= &b;
+        let mut cf = f;
+        cf *= &c;
+
+        // x = (e + f) * (b + c) - be - cf;
+        let mut x = e;
+        x += &f;
+        x *= &(b + &c);
+        x -= &be;
+        x -= &cf;
+
+        // y = (d + e) * (a + b) - ad - be;
+        let mut y = d;
+        y += &e;
+        y *= &(a + &b);
+        y -= &ad;
+        y -= &be;
+
+        // z = (d + f) * (a + c) - ad + be - cf;
+        let mut z = d;
+        z += &f;
+        z *= &(a + &c);
+        z -= &ad;
+        z += &be;
+        z -= &cf;
+
+        self.c0 = ad;
+        self.c0 += &P::mul_base_field_by_nonresidue(&x);
+        self.c1 = y;
+        self.c1 += &P::mul_base_field_by_nonresidue(&cf);
         self.c2 = z;
     }
 }

From 66937163b74919652eb4becbeab4403db341f0d8 Mon Sep 17 00:00:00 2001
From: Pratyush Mishra <pratyushmishra@berkeley.edu>
Date: Thu, 4 Feb 2021 19:06:33 -0800
Subject: [PATCH 6/8] in-place ops in sw

---
 ec/src/models/short_weierstrass_jacobian.rs | 194 +++++++++++++++-----
 1 file changed, 143 insertions(+), 51 deletions(-)

diff --git a/ec/src/models/short_weierstrass_jacobian.rs b/ec/src/models/short_weierstrass_jacobian.rs
index 1bffeb39e..154aa2f58 100644
--- a/ec/src/models/short_weierstrass_jacobian.rs
+++ b/ec/src/models/short_weierstrass_jacobian.rs
@@ -121,8 +121,15 @@ impl<P: Parameters> GroupAffine<P> {
             true
         } else {
             // Check that the point is on the curve
-            let y2 = self.y.square();
-            let x3b = P::add_b(&((self.x.square() * &self.x) + &P::mul_by_a(&self.x)));
+            // y2 = y^2
+            let mut y2 = self.y;
+            y2.square_in_place();
+            // x3b = x^3 + Ax + b
+            let mut x3b = self.x;
+            x3b.square_in_place();
+            x3b *= &self.x;
+            x3b += P::mul_by_a(&self.x);
+            x3b = P::add_b(&x3b);
             y2 == x3b
         }
     }
@@ -292,13 +299,22 @@ impl<P: Parameters> PartialEq for GroupProjective<P> {
         // The points (X, Y, Z) and (X', Y', Z')
         // are equal when (X * Z^2) = (X' * Z'^2)
         // and (Y * Z^3) = (Y' * Z'^3).
-        let z1z1 = self.z.square();
-        let z2z2 = other.z.square();
+        let mut z1z1 = self.z;
+        z1z1.square_in_place();
+        let mut z2z2 = other.z;
+        z2z2.square_in_place();
 
         if self.x * &z2z2 != other.x * &z1z1 {
             false
         } else {
-            self.y * &(z2z2 * &other.z) == other.y * &(z1z1 * &self.z)
+            let mut lhs = self.y;
+            lhs *= z2z2;
+            lhs *= &other.z;
+            let mut rhs = other.y;
+            rhs *= z1z1;
+            rhs *= &self.z;
+
+            lhs == rhs
         }
     }
 }
@@ -430,63 +446,98 @@ impl<P: Parameters> ProjectiveCurve for GroupProjective<P> {
 
         if P::COEFF_A.is_zero() {
             // A = X1^2
-            let mut a = self.x.square();
+            let mut a = self.x;
+            a.square_in_place();
 
             // B = Y1^2
-            let b = self.y.square();
+            let mut b = self.y;
+            b.square_in_place();
 
             // C = B^2
-            let mut c = b.square();
+            let mut c = b;
+            c.square_in_place();
 
             // D = 2*((X1+B)2-A-C)
-            let d = ((self.x + &b).square() - &a - &c).double();
+            let mut d = self.x;
+            d += &b;
+            d.square_in_place();
+            d -= &a;
+            d -= &c;
+            d.double_in_place();
 
             // E = 3*A
-            let e = a + &*a.double_in_place();
+            let mut e = a;
+            e += &*a.double_in_place();
 
             // F = E^2
-            let f = e.square();
+            let mut f = e;
+            f.square_in_place();
 
             // Z3 = 2*Y1*Z1
             self.z *= &self.y;
             self.z.double_in_place();
 
             // X3 = F-2*D
-            self.x = f - &d - &d;
+            self.x = -d;
+            self.x.double_in_place();
+            self.x += f;
 
             // Y3 = E*(D-X3)-8*C
-            self.y = (d - &self.x) * &e - &*c.double_in_place().double_in_place().double_in_place();
+            self.y = d;
+            self.y -= &self.x;
+            self.y *= e;
+            self.y -= &*c.double_in_place().double_in_place().double_in_place();
             self
         } else {
             // http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian-0.html#doubling-dbl-2009-l
             // XX = X1^2
-            let xx = self.x.square();
+            let mut xx = self.x;
+            xx.square_in_place();
 
             // YY = Y1^2
-            let yy = self.y.square();
+            let mut yy = self.y;
+            yy.square_in_place();
 
             // YYYY = YY^2
-            let mut yyyy = yy.square();
+            let mut yyyy = yy;
+            yyyy.square_in_place();
 
             // ZZ = Z1^2
-            let zz = self.z.square();
+            let mut zz = self.z;
+            zz.square_in_place();
 
             // S = 2*((X1+YY)^2-XX-YYYY)
-            let s = ((self.x + &yy).square() - &xx - &yyyy).double();
+            let mut s = self.x;
+            s += &yy;
+            s.square_in_place();
+            s -= &xx;
+            s -= &yyyy;
+            s.double_in_place();
 
             // M = 3*XX+a*ZZ^2
-            let m = xx + &xx + &xx + &P::mul_by_a(&zz.square());
+            let mut m = xx;
+            m.double_in_place();
+            m += xx;
+            m += &P::mul_by_a(&zz.square());
 
             // T = M^2-2*S
-            let t = m.square() - &s.double();
+            let mut t = m;
+            t.square_in_place();
+            t -= &s.double();
 
             // X3 = T
             self.x = t;
-            // Y3 = M*(S-T)-8*YYYY
-            let old_y = self.y;
-            self.y = m * &(s - &t) - &*yyyy.double_in_place().double_in_place().double_in_place();
             // Z3 = (Y1+Z1)^2-YY-ZZ
-            self.z = (old_y + &self.z).square() - &yy - &zz;
+            self.z += &self.y;
+            self.z.square_in_place();
+            self.z -= &yy;
+            self.z -= &zz;
+            // Y3 = M*(S-T)-8*YYYY
+            self.y = s;
+            self.y -= &t;
+            self.y *= m;
+            self.y -= &*yyyy.double_in_place().double_in_place().double_in_place();
+
             self
         }
     }
@@ -507,13 +558,17 @@ impl<P: Parameters> ProjectiveCurve for GroupProjective<P> {
         // Works for all curves.
 
         // Z1Z1 = Z1^2
-        let z1z1 = self.z.square();
+        let mut z1z1 = self.z;
+        z1z1.square_in_place();
 
         // U2 = X2*Z1Z1
-        let u2 = other.x * &z1z1;
+        let mut u2 = other.x;
+        u2 *= &z1z1;
 
         // S2 = Y2*Z1*Z1Z1
-        let s2 = (other.y * &self.z) * &z1z1;
+        let mut s2 = other.y;
+        s2 *= &self.z;
+        s2 *= &z1z1;
 
         if self.x == u2 && self.y == s2 {
             // The two points are equal, so we double.
@@ -522,26 +577,33 @@ impl<P: Parameters> ProjectiveCurve for GroupProjective<P> {
             // If we're adding -a and a together, self.z becomes zero as H becomes zero.
 
             // H = U2-X1
-            let h = u2 - &self.x;
+            let mut h = u2;
+            h -= &self.x;
 
             // HH = H^2
-            let hh = h.square();
+            let mut hh = h;
+            hh.square_in_place();
 
             // I = 4*HH
             let mut i = hh;
             i.double_in_place().double_in_place();
 
             // J = H*I
-            let mut j = h * &i;
+            let mut j = h;
+            j *= &i;
 
             // r = 2*(S2-Y1)
-            let r = (s2 - &self.y).double();
+            let mut r = s2;
+            r -= &self.y;
+            r.double_in_place();
 
             // V = X1*I
-            let v = self.x * &i;
+            let mut v = self.x;
+            v *= &i;
 
             // X3 = r^2 - J - 2*V
-            self.x = r.square();
+            self.x = r;
+            self.x.square_in_place();
             self.x -= &j;
             self.x -= &v;
             self.x -= &v;
@@ -549,7 +611,8 @@ impl<P: Parameters> ProjectiveCurve for GroupProjective<P> {
             // Y3 = r*(V-X3)-2*Y1*J
             j *= &self.y; // J = 2*Y1*J
             j.double_in_place();
-            self.y = v - &self.x;
+            self.y = v;
+            self.y -= &self.x;
             self.y *= &r;
             self.y -= &j;
 
@@ -602,22 +665,30 @@ impl<'a, P: Parameters> AddAssign<&'a Self> for GroupProjective<P> {
         // Works for all curves.
 
         // Z1Z1 = Z1^2
-        let z1z1 = self.z.square();
+        let mut z1z1 = self.z;
+        z1z1.square_in_place();
 
         // Z2Z2 = Z2^2
-        let z2z2 = other.z.square();
+        let mut z2z2 = other.z;
+        z2z2.square_in_place();
 
         // U1 = X1*Z2Z2
-        let u1 = self.x * &z2z2;
+        let mut u1 = self.x;
+        u1 *= &z2z2;
 
         // U2 = X2*Z1Z1
-        let u2 = other.x * &z1z1;
+        let mut u2 = other.x;
+        u2 *= &z1z1;
 
         // S1 = Y1*Z2*Z2Z2
-        let s1 = self.y * &other.z * &z2z2;
+        let mut s1 = self.y;
+        s1 *= &other.z;
+        s1 *= &z2z2;
 
         // S2 = Y2*Z1*Z1Z1
-        let s2 = other.y * &self.z * &z1z1;
+        let mut s2 = other.y;
+        s2 *= &self.z;
+        s2 *= &z1z1;
 
         if u1 == u2 && s1 == s2 {
             // The two points are equal, so we double.
@@ -626,28 +697,45 @@ impl<'a, P: Parameters> AddAssign<&'a Self> for GroupProjective<P> {
             // If we're adding -a and a together, self.z becomes zero as H becomes zero.
 
             // H = U2-U1
-            let h = u2 - &u1;
+            let mut h = u2;
+            h -= &u1;
 
             // I = (2*H)^2
-            let i = (h.double()).square();
+            let mut i = h;
+            i.double_in_place();
+            i.square_in_place();
 
             // J = H*I
-            let j = h * &i;
+            let mut j = h;
+            j *= &i;
 
             // r = 2*(S2-S1)
-            let r = (s2 - &s1).double();
+            let mut r = s2;
+            r -= &s1;
+            r.double_in_place();
 
             // V = U1*I
-            let v = u1 * &i;
+            let mut v = u1;
+            v *= &i;
 
             // X3 = r^2 - J - 2*V
-            self.x = r.square() - &j - &(v.double());
+            self.x = r;
+            self.x.square_in_place();
+            self.x -= &j;
+            self.x -= &v.double();
 
             // Y3 = r*(V - X3) - 2*S1*J
-            self.y = r * &(v - &self.x) - &*(s1 * &j).double_in_place();
+            self.y = v;
+            self.y -= &self.x;
+            self.y *= r;
+            self.y -= &*(s1 * &j).double_in_place();
 
             // Z3 = ((Z1+Z2)^2 - Z1Z1 - Z2Z2)*H
-            self.z = ((self.z + &other.z).square() - &z1z1 - &z2z2) * &h;
+            self.z += &other.z;
+            self.z.square_in_place();
+            self.z -= z1z1;
+            self.z -= z2z2;
+            self.z *= &h;
         }
     }
 }
@@ -700,13 +788,17 @@ impl<P: Parameters> From<GroupProjective<P>> for GroupAffine<P> {
         } else {
             // Z is nonzero, so it must have an inverse in a field.
             let zinv = p.z.inverse().unwrap();
-            let zinv_squared = zinv.square();
+            let mut zinv_squared = zinv;
+            zinv_squared.square_in_place();
 
             // X/Z^2
-            let x = p.x * &zinv_squared;
+            let mut x = p.x;
+            x *= &zinv_squared;
 
             // Y/Z^3
-            let y = p.y * &(zinv_squared * &zinv);
+            let mut y = p.y;
+            y *= zinv_squared;
+            y *= zinv;
 
             GroupAffine::new(x, y, false)
         }

From e40ea6080639de9c1f47adc527f34ef85776a38f Mon Sep 17 00:00:00 2001
From: Pratyush Mishra <pratyushmishra@berkeley.edu>
Date: Thu, 4 Feb 2021 19:06:39 -0800
Subject: [PATCH 7/8] in-place ops in te

---
 ec/src/models/twisted_edwards_extended.rs | 48 +++++++++++++++--------
 1 file changed, 32 insertions(+), 16 deletions(-)

diff --git a/ec/src/models/twisted_edwards_extended.rs b/ec/src/models/twisted_edwards_extended.rs
index 629137bc9..1c3fbf061 100644
--- a/ec/src/models/twisted_edwards_extended.rs
+++ b/ec/src/models/twisted_edwards_extended.rs
@@ -470,15 +470,22 @@ impl<P: Parameters> ProjectiveCurve for GroupProjective<P> {
         // Source: https://www.hyperelliptic.org/EFD/g1p/data/twisted/extended/doubling/dbl-2008-hwcd
 
         // A = X1^2
-        let a = self.x.square();
+        let mut a = self.x;
+        a.square_in_place();
         // B = Y1^2
-        let b = self.y.square();
+        let mut b = self.y;
+        b.square_in_place();
         // C = 2 * Z1^2
-        let c = self.z.square().double();
+        let mut c = self.z;
+        c.square_in_place();
+        c.double_in_place();
         // D = a * A
         let d = P::mul_by_a(&a);
         // E = (X1 + Y1)^2 - A - B
-        let e = (self.x + &self.y).square() - &a - &b;
+        let mut e = self.x + &self.y;
+        e.square_in_place();
+        e -= &a;
+        e -= &b;
         // G = D + B
         let g = d + &b;
         // F = G - C
@@ -504,30 +511,39 @@ impl<P: Parameters> ProjectiveCurve for GroupProjective<P> {
         // Source: https://www.hyperelliptic.org/EFD/g1p/data/twisted/extended/addition/madd-2008-hwcd
 
         // A = X1*X2
-        let a = self.x * &other.x;
+        let mut a = self.x;
+        a *= &other.x;
         // B = Y1*Y2
-        let b = self.y * &other.y;
+        let mut b = self.y;
+        b *= &other.y;
         // C = T1*d*T2
-        let c = P::COEFF_D * &self.t * &other.x * &other.y;
+        let mut c = P::COEFF_D;
+        c *= &self.t;
+        c *= &other.x;
+        c *= &other.y;
 
-        // D = Z1
-        let d = self.z;
         // E = (X1+Y1)*(X2+Y2)-A-B
-        let e = (self.x + &self.y) * &(other.x + &other.y) - &a - &b;
+        let mut e = (self.x + &self.y) * &(other.x + &other.y);
+        e -= &a;
+        e -= &b;
         // F = D-C
-        let f = d - &c;
+        let f = self.z - &c;
         // G = D+C
-        let g = d + &c;
+        let g = self.z + &c;
         // H = B-a*A
         let h = b - &P::mul_by_a(&a);
         // X3 = E*F
-        self.x = e * &f;
+        self.x = e;
+        self.x *= &f;
         // Y3 = G*H
-        self.y = g * &h;
+        self.y = g;
+        self.y *= &h;
         // T3 = E*H
-        self.t = e * &h;
+        self.t = e;
+        self.t *= &h;
         // Z3 = F*G
-        self.z = f * &g;
+        self.z = f;
+        self.z *= &g;
     }
 }
 

From 63a56ef9548020996da10c543bf2c0c55e667de3 Mon Sep 17 00:00:00 2001
From: Pratyush Mishra <pratyushmishra@berkeley.edu>
Date: Thu, 4 Feb 2021 19:06:53 -0800
Subject: [PATCH 8/8] Update CHANGELOG.md

---
 CHANGELOG.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 633cfaa6a..aa703a7f0 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -80,8 +80,9 @@ The main features of this release are:
 - #166 (ark-ff) Add a `to_bytes_be()` and `to_bytes_le` methods to `BigInt`.
 - #169 (ark-poly) Improve radix-2 FFTs by moving to a faster algorithm by Riad S. Wahby.
 - #171, #173, #176 (ark-poly) Apply significant further speedups to the new radix-2 FFT.
-- #188 (ark-ec) Make Short Weierstrass random sampling result in an element with unknown discrete log
+- #188 (ark-ec) Make Short Weierstrass random sampling result in an element with unknown discrete log.
 - #190 (ark-ec) Add curve cycle trait and extended pairing cycle trait for all types of ec cycles.
+- #199 (ark-ff, ark-ec) Unroll some loops in biginteger arithmetic, and prefer in-place operations in field and curve arithmetic.
 - #201 (ark-ec, ark-ff, ark-test-curves, ark-test-templates) Remove the dependency on `rand_xorshift`
 
 ### Bug fixes