From baa75da59d7cae906ab31bf55e7c27e9a08366f2 Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Wed, 13 May 2015 21:16:13 -0500
Subject: [PATCH 1/7] tests: add a couple tests

  - Add zero/one sanity check tests for ecmult

  - Add unit test for secp256k1_scalar_split_lambda_var

  - Typo fix in `ge_equals_ge`; was comparing b->y to itself, should
    have been comparing a->y to b->y

  - Normalize y-coordinate in `random_group_element_test`; this is
    needed to pass random group elements as the first argument to
    `ge_equals_ge`, which I will do in a future commit.
---
 src/tests.c | 47 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/tests.c b/src/tests.c
index db99704967..b4ca476c08 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -56,6 +56,7 @@ void random_group_element_test(secp256k1_ge_t *ge) {
     do {
         random_field_element_test(&fe);
         if (secp256k1_ge_set_xo_var(ge, &fe, secp256k1_rand32() & 1)) {
+            secp256k1_fe_normalize(&ge->y);
             break;
         }
     } while(1);
@@ -932,7 +933,7 @@ void ge_equals_ge(const secp256k1_ge_t *a, const secp256k1_ge_t *b) {
         return;
     }
     CHECK(secp256k1_fe_equal_var(&a->x, &b->x));
-    CHECK(secp256k1_fe_equal_var(&b->y, &b->y));
+    CHECK(secp256k1_fe_equal_var(&a->y, &b->y));
 }
 
 /* This compares jacobian points including their Z, not just their geometric meaning. */
@@ -1323,6 +1324,8 @@ void test_point_times_order(const secp256k1_gej_t *point) {
     /* X * (point + G) + (order-X) * (pointer + G) = 0 */
     secp256k1_scalar_t x;
     secp256k1_scalar_t nx;
+    secp256k1_scalar_t zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0);
+    secp256k1_scalar_t one = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1);
     secp256k1_gej_t res1, res2;
     secp256k1_ge_t res3;
     unsigned char pub[65];
@@ -1340,6 +1343,16 @@ void test_point_times_order(const secp256k1_gej_t *point) {
     CHECK(secp256k1_eckey_pubkey_serialize(&res3, pub, &psize, 0) == 0);
     psize = 65;
     CHECK(secp256k1_eckey_pubkey_serialize(&res3, pub, &psize, 1) == 0);
+    /* check zero/one edge cases */
+    secp256k1_ecmult(&ctx->ecmult_ctx, &res1, point, &zero, &zero);
+    secp256k1_ge_set_gej(&res3, &res1);
+    CHECK(secp256k1_ge_is_infinity(&res3));
+    secp256k1_ecmult(&ctx->ecmult_ctx, &res1, point, &one, &zero);
+    secp256k1_ge_set_gej(&res3, &res1);
+    ge_equals_gej(&res3, point);
+    secp256k1_ecmult(&ctx->ecmult_ctx, &res1, point, &zero, &one);
+    secp256k1_ge_set_gej(&res3, &res1);
+    ge_equals_ge(&res3, &secp256k1_ge_const_g);
 }
 
 void run_point_times_order(void) {
@@ -1487,6 +1500,33 @@ void run_ecmult_gen_blind(void) {
     }
 }
 
+#ifdef USE_ENDOMORPHISM
+/***** ENDOMORPHISH TESTS *****/
+void test_scalar_split(void) {
+    secp256k1_scalar_t full;
+    secp256k1_scalar_t s1, slam;
+    const unsigned char zero[32] = {0};
+    unsigned char tmp[32];
+
+    random_scalar_order_test(&full);
+    secp256k1_scalar_split_lambda_var(&s1, &slam, &full);
+
+    /* check that both are <= 128 bits in size */
+    if (secp256k1_scalar_is_high(&s1))
+        secp256k1_scalar_negate(&s1, &s1);
+    if (secp256k1_scalar_is_high(&slam))
+        secp256k1_scalar_negate(&slam, &slam);
+
+    secp256k1_scalar_get_b32(tmp, &s1);
+    CHECK(memcmp(zero, tmp, 16) == 0);
+    secp256k1_scalar_get_b32(tmp, &slam);
+    CHECK(memcmp(zero, tmp, 16) == 0);
+}
+
+void run_endomorphism_tests(void) {
+    test_scalar_split();
+}
+#endif
 
 void random_sign(secp256k1_scalar_t *sigr, secp256k1_scalar_t *sigs, const secp256k1_scalar_t *key, const secp256k1_scalar_t *msg, int *recid) {
     secp256k1_scalar_t nonce;
@@ -2227,6 +2267,11 @@ int main(int argc, char **argv) {
     run_ecmult_constants();
     run_ecmult_gen_blind();
 
+    /* endomorphism tests */
+#ifdef USE_ENDOMORPHISM
+    run_endomorphism_tests();
+#endif
+
     /* ecdsa tests */
     run_random_pubkeys();
     run_ecdsa_sign_verify();

From 440150006064295038d4d7fcb7fe2796bf1672df Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Wed, 13 May 2015 17:31:47 -0500
Subject: [PATCH 2/7] Add constant-time multiply `secp256k1_ecmult_const` for
 ECDH

Designed with clear separation of the wNAF conversion, precomputation
and exponentiation (since the precomp at least we will probably want
to separate in the API for users who reuse points a lot.

Future work:
  - actually separate precomp in the API
  - do multiexp rather than single exponentiation
---
 Makefile.am             |   2 +
 src/ecmult_const.h      |  15 ++++
 src/ecmult_const_impl.h | 139 ++++++++++++++++++++++++++++++++
 src/group.h             |   4 +
 src/group_impl.h        |   5 ++
 src/scalar.h            |  11 +++
 src/scalar_4x64_impl.h  |  28 +++++++
 src/scalar_8x32_impl.h  |  41 ++++++++++
 src/scalar_impl.h       |   5 ++
 src/secp256k1.c         |   1 +
 src/tests.c             | 174 +++++++++++++++++++++++++++++++++++++++-
 11 files changed, 424 insertions(+), 1 deletion(-)
 create mode 100644 src/ecmult_const.h
 create mode 100644 src/ecmult_const_impl.h

diff --git a/Makefile.am b/Makefile.am
index 2609c7feed..8c8bd77367 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -19,6 +19,8 @@ noinst_HEADERS += src/eckey.h
 noinst_HEADERS += src/eckey_impl.h
 noinst_HEADERS += src/ecmult.h
 noinst_HEADERS += src/ecmult_impl.h
+noinst_HEADERS += src/ecmult_const.h
+noinst_HEADERS += src/ecmult_const_impl.h
 noinst_HEADERS += src/ecmult_gen.h
 noinst_HEADERS += src/ecmult_gen_impl.h
 noinst_HEADERS += src/num.h
diff --git a/src/ecmult_const.h b/src/ecmult_const.h
new file mode 100644
index 0000000000..e8ff1feb8c
--- /dev/null
+++ b/src/ecmult_const.h
@@ -0,0 +1,15 @@
+/**********************************************************************
+ * Copyright (c) 2015 Andrew Poelstra                                 *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECMULT_CONST_
+#define _SECP256K1_ECMULT_CONST_
+
+#include "scalar.h"
+#include "group.h"
+
+static void secp256k1_ecmult_const(secp256k1_gej_t *r, const secp256k1_ge_t *a, const secp256k1_scalar_t *q);
+
+#endif
diff --git a/src/ecmult_const_impl.h b/src/ecmult_const_impl.h
new file mode 100644
index 0000000000..956d5e4c0f
--- /dev/null
+++ b/src/ecmult_const_impl.h
@@ -0,0 +1,139 @@
+/**********************************************************************
+ * Copyright (c) 2015 Pieter Wuille, Andrew Poelstra                  *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_ECMULT_CONST_IMPL_
+#define _SECP256K1_ECMULT_CONST_IMPL_
+
+#include "scalar.h"
+#include "group.h"
+#include "ecmult_const.h"
+#include "ecmult_impl.h"
+
+#define WNAF_BITS 256
+#define WNAF_SIZE(w) ((WNAF_BITS + (w) - 1) / (w))
+
+/* This is like `ECMULT_TABLE_GET_GE` but is constant time */
+#define ECMULT_CONST_TABLE_GET_GE(r,pre,n,w) do { \
+    int m; \
+    int abs_n = (n) * (((n) > 0) * 2 - 1); \
+    secp256k1_fe_t neg_y; \
+    VERIFY_CHECK(((n) & 1) == 1); \
+    VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
+    VERIFY_CHECK((n) <=  ((1 << ((w)-1)) - 1)); \
+    for (m = 1; m < (1 << ((w) - 1)); m += 2) { \
+        /* This loop is used to avoid secret data in array indices. See
+         * the comment in ecmult_gen_impl.h for rationale. */ \
+        secp256k1_fe_cmov(&(r)->x, &(pre)[(m - 1) / 2].x, m == abs_n); \
+        secp256k1_fe_cmov(&(r)->y, &(pre)[(m - 1) / 2].y, m == abs_n); \
+    } \
+    (r)->infinity = 0; \
+    secp256k1_fe_normalize_weak(&(r)->x); \
+    secp256k1_fe_normalize_weak(&(r)->y); \
+    secp256k1_fe_negate(&neg_y, &(r)->y, 1); \
+    secp256k1_fe_cmov(&(r)->y, &neg_y, (n) != abs_n); \
+} while(0)
+
+
+/** Convert a number to WNAF notation. The number becomes represented by sum(2^{wi} * wnaf[i], i=0..return_val)
+ *  with the following guarantees:
+ *  - each wnaf[i] an odd integer between -(1 << w) and (1 << w)
+ *  - each wnaf[i] is nonzero
+ *  - the number of words set is returned; this is always (WNAF_BITS + w - 1) / w
+ *
+ *  Adapted from `The Width-w NAF Method Provides Small Memory and Fast Elliptic Scalar
+ *  Multiplications Secure against Side Channel Attacks`, Okeya and Tagaki. M. Joye (Ed.)
+ *  CT-RSA 2003, LNCS 2612, pp. 328-443, 2003. Springer-Verlagy Berlin Heidelberg 2003
+ *
+ *  Numbers reference steps of `Algorithm SPA-resistant Width-w NAF with Odd Scalar` on pp. 335
+ */
+static void secp256k1_wnaf_const(int *wnaf, const secp256k1_scalar_t *a, int w) {
+    secp256k1_scalar_t s = *a;
+    /* Negate to force oddness */
+    int is_even = secp256k1_scalar_is_even(&s);
+    int global_sign = secp256k1_scalar_cond_negate(&s, is_even);
+
+    int word = 0;
+    /* 1 2 3 */
+    int u_last = secp256k1_scalar_shr_int(&s, w);
+    int u;
+    /* 4 */
+    while (word * w < WNAF_BITS) {
+        int sign;
+        int even;
+
+        /* 4.1 4.4 */
+        u = secp256k1_scalar_shr_int(&s, w);
+        /* 4.2 */
+        even = ((u & 1) == 0);
+        sign = 2 * (u_last > 0) - 1;
+        u += sign * even;
+        u_last -= sign * even * (1 << w);
+
+        /* 4.3, adapted for global sign change */
+        wnaf[word++] = u_last * global_sign;
+
+        u_last = u;
+    }
+    wnaf[word] = u * global_sign;
+
+    VERIFY_CHECK(secp256k1_scalar_is_zero(&s));
+    VERIFY_CHECK(word == WNAF_SIZE(w));
+}
+
+
+static void secp256k1_ecmult_const(secp256k1_gej_t *r, const secp256k1_ge_t *a, const secp256k1_scalar_t *scalar) {
+    secp256k1_ge_t pre_a[ECMULT_TABLE_SIZE(WINDOW_A)];
+    secp256k1_ge_t tmpa;
+    secp256k1_fe_t Z;
+
+    int wnaf[1 + WNAF_SIZE(WINDOW_A - 1)];
+
+    int i;
+    int is_zero = secp256k1_scalar_is_zero(scalar);
+    secp256k1_scalar_t sc = *scalar;
+    /* the wNAF ladder cannot handle zero, so bump this to one .. we will
+     * correct the result after the fact */
+    sc.d[0] += is_zero;
+
+    /* build wnaf representation for q. */
+    secp256k1_wnaf_const(wnaf, &sc, WINDOW_A - 1);
+
+    /* Calculate odd multiples of a.
+     * All multiples are brought to the same Z 'denominator', which is stored
+     * in Z. Due to secp256k1' isomorphism we can do all operations pretending
+     * that the Z coordinate was 1, use affine addition formulae, and correct
+     * the Z coordinate of the result once at the end.
+     */
+    secp256k1_gej_set_ge(r, a);
+    secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, r);
+
+    /* first loop iteration (separated out so we can directly set r, rather
+     * than having it start at infinity, get doubled several times, then have
+     * its new value added to it) */
+    i = wnaf[WNAF_SIZE(WINDOW_A - 1)];
+    VERIFY_CHECK(i != 0);
+    ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, i, WINDOW_A);
+    secp256k1_gej_set_ge(r, &tmpa);
+    /* remaining loop iterations */
+    for (i = WNAF_SIZE(WINDOW_A - 1) - 1; i >= 0; i--) {
+        int n;
+        int j;
+        for (j = 0; j < WINDOW_A - 1; ++j) {
+            secp256k1_gej_double_nonzero(r, r, NULL);
+        }
+        n = wnaf[i];
+        VERIFY_CHECK(n != 0);
+        ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
+        secp256k1_gej_add_ge(r, r, &tmpa);
+    }
+
+    secp256k1_fe_mul(&r->z, &r->z, &Z);
+
+    /* correct for zero */
+    r->infinity |= is_zero;
+}
+
+#endif
diff --git a/src/group.h b/src/group.h
index 9203bdf7eb..48398918e2 100644
--- a/src/group.h
+++ b/src/group.h
@@ -94,6 +94,10 @@ static void secp256k1_gej_neg(secp256k1_gej_t *r, const secp256k1_gej_t *a);
 /** Check whether a group element is the point at infinity. */
 static int secp256k1_gej_is_infinity(const secp256k1_gej_t *a);
 
+/** Set r equal to the double of a. If rzr is not-NULL, r->z = a->z * *rzr (where infinity means an implicit z = 0).
+ * a may not be zero. Constant time. */
+static void secp256k1_gej_double_nonzero(secp256k1_gej_t *r, const secp256k1_gej_t *a, secp256k1_fe_t *rzr);
+
 /** Set r equal to the double of a. If rzr is not-NULL, r->z = a->z * *rzr (where infinity means an implicit z = 0). */
 static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, secp256k1_fe_t *rzr);
 
diff --git a/src/group_impl.h b/src/group_impl.h
index 008bbac315..165eba2a52 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -301,6 +301,11 @@ static void secp256k1_gej_double_var(secp256k1_gej_t *r, const secp256k1_gej_t *
     secp256k1_fe_add(&r->y, &t2);         /* Y' = 36*X^3*Y^2 - 27*X^6 - 8*Y^4 (4) */
 }
 
+static SECP256K1_INLINE void secp256k1_gej_double_nonzero(secp256k1_gej_t *r, const secp256k1_gej_t *a, secp256k1_fe_t *rzr) {
+    VERIFY_CHECK(!secp256k1_gej_is_infinity(a));
+    secp256k1_gej_double_var(r, a, rzr);
+}
+
 static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_gej_t *b, secp256k1_fe_t *rzr) {
     /* Operations: 12 mul, 4 sqr, 2 normalize, 12 mul_int/add/negate */
     secp256k1_fe_t z22, z12, u1, u2, s1, s2, h, i, i2, h2, h3, t;
diff --git a/src/scalar.h b/src/scalar.h
index f5d09f8d47..f33e72a7c9 100644
--- a/src/scalar.h
+++ b/src/scalar.h
@@ -48,6 +48,10 @@ static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit);
 /** Multiply two scalars (modulo the group order). */
 static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b);
 
+/** Shift a scalar right by some amount strictly between 0 and 16, returning
+ *  the low bits that were shifted off */
+static int secp256k1_scalar_shr_int(secp256k1_scalar_t *r, int n);
+
 /** Compute the square of a scalar (modulo the group order). */
 static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a);
 
@@ -66,9 +70,16 @@ static int secp256k1_scalar_is_zero(const secp256k1_scalar_t *a);
 /** Check whether a scalar equals one. */
 static int secp256k1_scalar_is_one(const secp256k1_scalar_t *a);
 
+/** Check whether a scalar, considered as an nonnegative integer, is even. */
+static int secp256k1_scalar_is_even(const secp256k1_scalar_t *a);
+
 /** Check whether a scalar is higher than the group order divided by 2. */
 static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a);
 
+/** Conditionally negate a number, in constant time.
+ * Returns -1 if the number was negated, 1 otherwise */
+static int secp256k1_scalar_cond_negate(secp256k1_scalar_t *a, int flag);
+
 #ifndef USE_NUM_NONE
 /** Convert a scalar to a number. */
 static void secp256k1_scalar_get_num(secp256k1_num_t *r, const secp256k1_scalar_t *a);
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index ff365292f8..147229dab9 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -164,6 +164,22 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
     return yes;
 }
 
+static int secp256k1_scalar_cond_negate(secp256k1_scalar_t *r, int flag) {
+    /* If we are flag = 0, mask = 00...00 and this is a no-op;
+     * if we are flag = 1, mask = 11...11 and this is identical to secp256k1_scalar_negate */
+    uint64_t mask = !flag - 1;
+    uint64_t nonzero = (secp256k1_scalar_is_zero(r) != 0) - 1;
+    uint128_t t = (uint128_t)(r->d[0] ^ mask) + ((SECP256K1_N_0 + 1) & mask);
+    r->d[0] = t & nonzero; t >>= 64;
+    t += (uint128_t)(r->d[1] ^ mask) + (SECP256K1_N_1 & mask);
+    r->d[1] = t & nonzero; t >>= 64;
+    t += (uint128_t)(r->d[2] ^ mask) + (SECP256K1_N_2 & mask);
+    r->d[2] = t & nonzero; t >>= 64;
+    t += (uint128_t)(r->d[3] ^ mask) + (SECP256K1_N_3 & mask);
+    r->d[3] = t & nonzero;
+    return 2 * (mask == 0) - 1;
+}
+
 /* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
 
 /** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
@@ -877,6 +893,18 @@ static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t
     secp256k1_scalar_reduce_512(r, l);
 }
 
+static int secp256k1_scalar_shr_int(secp256k1_scalar_t *r, int n) {
+    int ret;
+    VERIFY_CHECK(n > 0);
+    VERIFY_CHECK(n < 16);
+    ret = r->d[0] & ((1 << n) - 1);
+    r->d[0] = (r->d[0] >> n) + (r->d[1] << (64 - n));
+    r->d[1] = (r->d[1] >> n) + (r->d[2] << (64 - n));
+    r->d[2] = (r->d[2] >> n) + (r->d[3] << (64 - n));
+    r->d[3] = (r->d[3] >> n);
+    return ret;
+}
+
 static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
     uint64_t l[8];
     secp256k1_scalar_sqr_512(l, a);
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index 22b31d4112..0ad2423db0 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -234,6 +234,31 @@ static int secp256k1_scalar_is_high(const secp256k1_scalar_t *a) {
     return yes;
 }
 
+static int secp256k1_scalar_cond_negate(secp256k1_scalar_t *r, int flag) {
+    /* If we are flag = 0, mask = 00...00 and this is a no-op;
+     * if we are flag = 1, mask = 11...11 and this is identical to secp256k1_scalar_negate */
+    uint32_t mask = !flag - 1;
+    uint32_t nonzero = 0xFFFFFFFFUL * (secp256k1_scalar_is_zero(r) == 0);
+    uint64_t t = (uint64_t)(r->d[0] ^ mask) + ((SECP256K1_N_0 + 1) & mask);
+    r->d[0] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[1] ^ mask) + (SECP256K1_N_1 & mask);
+    r->d[1] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[2] ^ mask) + (SECP256K1_N_2 & mask);
+    r->d[2] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[3] ^ mask) + (SECP256K1_N_3 & mask);
+    r->d[3] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[4] ^ mask) + (SECP256K1_N_4 & mask);
+    r->d[4] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[5] ^ mask) + (SECP256K1_N_5 & mask);
+    r->d[5] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[6] ^ mask) + (SECP256K1_N_6 & mask);
+    r->d[6] = t & nonzero; t >>= 32;
+    t += (uint64_t)(r->d[7] ^ mask) + (SECP256K1_N_7 & mask);
+    r->d[7] = t & nonzero;
+    return 2 * (mask == 0) - 1;
+}
+
+
 /* Inspired by the macros in OpenSSL's crypto/bn/asm/x86_64-gcc.c. */
 
 /** Add a*b to the number defined by (c0,c1,c2). c2 must never overflow. */
@@ -624,6 +649,22 @@ static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t
     secp256k1_scalar_reduce_512(r, l);
 }
 
+static int secp256k1_scalar_shr_int(secp256k1_scalar_t *r, int n) {
+    int ret;
+    VERIFY_CHECK(n > 0);
+    VERIFY_CHECK(n < 16);
+    ret = r->d[0] & ((1 << n) - 1);
+    r->d[0] = (r->d[0] >> n) + (r->d[1] << (32 - n));
+    r->d[1] = (r->d[1] >> n) + (r->d[2] << (32 - n));
+    r->d[2] = (r->d[2] >> n) + (r->d[3] << (32 - n));
+    r->d[3] = (r->d[3] >> n) + (r->d[4] << (32 - n));
+    r->d[4] = (r->d[4] >> n) + (r->d[5] << (32 - n));
+    r->d[5] = (r->d[5] >> n) + (r->d[6] << (32 - n));
+    r->d[6] = (r->d[6] >> n) + (r->d[7] << (32 - n));
+    r->d[7] = (r->d[7] >> n);
+    return ret;
+}
+
 static void secp256k1_scalar_sqr(secp256k1_scalar_t *r, const secp256k1_scalar_t *a) {
     uint32_t l[16];
     secp256k1_scalar_sqr_512(l, a);
diff --git a/src/scalar_impl.h b/src/scalar_impl.h
index abd777bd2c..d5426a8c37 100644
--- a/src/scalar_impl.h
+++ b/src/scalar_impl.h
@@ -234,6 +234,11 @@ static void secp256k1_scalar_inverse(secp256k1_scalar_t *r, const secp256k1_scal
     secp256k1_scalar_mul(r, t, &x6); /* 111111 */
 }
 
+SECP256K1_INLINE static int secp256k1_scalar_is_even(const secp256k1_scalar_t *a) {
+    /* d[0] is present and is the lowest word for all representations */
+    return !(a->d[0] & 1);
+}
+
 static void secp256k1_scalar_inverse_var(secp256k1_scalar_t *r, const secp256k1_scalar_t *x) {
 #if defined(USE_SCALAR_INV_BUILTIN)
     secp256k1_scalar_inverse(r, x);
diff --git a/src/secp256k1.c b/src/secp256k1.c
index 8f5b652019..6904108e2a 100644
--- a/src/secp256k1.c
+++ b/src/secp256k1.c
@@ -14,6 +14,7 @@
 #include "scalar_impl.h"
 #include "group_impl.h"
 #include "ecmult_impl.h"
+#include "ecmult_const_impl.h"
 #include "ecmult_gen_impl.h"
 #include "ecdsa_impl.h"
 #include "eckey_impl.h"
diff --git a/src/tests.c b/src/tests.c
index b4ca476c08..7dc9805cb2 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -496,6 +496,20 @@ void scalar_test(void) {
         secp256k1_scalar_get_num(&rnum2, &r);
         CHECK(secp256k1_num_eq(&rnum, &rnum2));
     }
+
+    {
+        /* test secp256k1_scalar_shr_int */
+        secp256k1_scalar_t r;
+        int i;
+        int low;
+        random_scalar_order_test(&r);
+        for (i = 0; i < 100; ++i) {
+            int shift = 1 + (secp256k1_rand32() % 15);
+            int expected = r.d[0] % (1 << shift);
+            low = secp256k1_scalar_shr_int(&r, shift);
+            CHECK(expected == low);
+        }
+    }
 #endif
 
     {
@@ -1377,6 +1391,109 @@ void run_point_times_order(void) {
     CHECK(secp256k1_fe_equal_var(&x, &xr));
 }
 
+void ecmult_const_random_mult(void) {
+    /* random starting point A (on the curve) */
+    secp256k1_ge_t a = SECP256K1_GE_CONST(
+        0x6d986544, 0x57ff52b8, 0xcf1b8126, 0x5b802a5b,
+        0xa97f9263, 0xb1e88044, 0x93351325, 0x91bc450a,
+        0x535c59f7, 0x325e5d2b, 0xc391fbe8, 0x3c12787c,
+        0x337e4a98, 0xe82a9011, 0x0123ba37, 0xdd769c7d
+    );
+    /* random initial factor xn */
+    secp256k1_scalar_t xn = SECP256K1_SCALAR_CONST(
+        0x649d4f77, 0xc4242df7, 0x7f2079c9, 0x14530327,
+        0xa31b876a, 0xd2d8ce2a, 0x2236d5c6, 0xd7b2029b
+    );
+    /* expected xn * A (from sage) */
+    secp256k1_ge_t expected_b = SECP256K1_GE_CONST(
+        0x23773684, 0x4d209dc7, 0x098a786f, 0x20d06fcd,
+        0x070a38bf, 0xc11ac651, 0x03004319, 0x1e2a8786,
+        0xed8c3b8e, 0xc06dd57b, 0xd06ea66e, 0x45492b0f,
+        0xb84e4e1b, 0xfb77e21f, 0x96baae2a, 0x63dec956
+    );
+    secp256k1_gej_t b;
+    secp256k1_ecmult_const(&b, &a, &xn);
+
+    CHECK(secp256k1_ge_is_valid_var(&a));
+    ge_equals_gej(&expected_b, &b);
+}
+
+void ecmult_const_commutativity(void) {
+    secp256k1_scalar_t a;
+    secp256k1_scalar_t b;
+    secp256k1_gej_t res1;
+    secp256k1_gej_t res2;
+    secp256k1_ge_t mid1;
+    secp256k1_ge_t mid2;
+    random_scalar_order_test(&a);
+    random_scalar_order_test(&b);
+
+    secp256k1_ecmult_const(&res1, &secp256k1_ge_const_g, &a);
+    secp256k1_ecmult_const(&res2, &secp256k1_ge_const_g, &b);
+    secp256k1_ge_set_gej(&mid1, &res1);
+    secp256k1_ge_set_gej(&mid2, &res2);
+    secp256k1_ecmult_const(&res1, &mid1, &b);
+    secp256k1_ecmult_const(&res2, &mid2, &a);
+    secp256k1_ge_set_gej(&mid1, &res1);
+    secp256k1_ge_set_gej(&mid2, &res2);
+    ge_equals_ge(&mid1, &mid2);
+}
+
+void ecmult_const_mult_zero_one(void) {
+    secp256k1_scalar_t zero = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 0);
+    secp256k1_scalar_t one = SECP256K1_SCALAR_CONST(0, 0, 0, 0, 0, 0, 0, 1);
+    secp256k1_scalar_t negone;
+    secp256k1_gej_t res1;
+    secp256k1_ge_t res2;
+    secp256k1_ge_t point;
+    secp256k1_scalar_negate(&negone, &one);
+
+    random_group_element_test(&point);
+    secp256k1_ecmult_const(&res1, &point, &zero);
+    secp256k1_ge_set_gej(&res2, &res1);
+    CHECK(secp256k1_ge_is_infinity(&res2));
+    secp256k1_ecmult_const(&res1, &point, &one);
+    secp256k1_ge_set_gej(&res2, &res1);
+    ge_equals_ge(&res2, &point);
+    secp256k1_ecmult_const(&res1, &point, &negone);
+    secp256k1_gej_neg(&res1, &res1);
+    secp256k1_ge_set_gej(&res2, &res1);
+    ge_equals_ge(&res2, &point);
+}
+
+void ecmult_const_chain_multiply(void) {
+    /* Check known result (randomly generated test problem from sage) */
+    const secp256k1_scalar_t scalar = SECP256K1_SCALAR_CONST(
+        0x4968d524, 0x2abf9b7a, 0x466abbcf, 0x34b11b6d,
+        0xcd83d307, 0x827bed62, 0x05fad0ce, 0x18fae63b
+    );
+    const secp256k1_gej_t expected_point = SECP256K1_GEJ_CONST(
+        0x5494c15d, 0x32099706, 0xc2395f94, 0x348745fd,
+        0x757ce30e, 0x4e8c90fb, 0xa2bad184, 0xf883c69f,
+        0x5d195d20, 0xe191bf7f, 0x1be3e55f, 0x56a80196,
+        0x6071ad01, 0xf1462f66, 0xc997fa94, 0xdb858435
+    );
+    secp256k1_gej_t point;
+    secp256k1_ge_t res;
+    int i;
+
+    secp256k1_gej_set_ge(&point, &secp256k1_ge_const_g);
+    for (i = 0; i < 100; ++i) {
+        secp256k1_ge_t tmp;
+        secp256k1_ge_set_gej(&tmp, &point);
+        secp256k1_ecmult_const(&point, &tmp, &scalar);
+    }
+    secp256k1_ge_set_gej(&res, &point);
+    ge_equals_gej(&res, &expected_point);
+}
+
+void run_ecmult_const_tests(void) {
+    ecmult_const_mult_zero_one();
+    ecmult_const_random_mult();
+    ecmult_const_commutativity();
+    ecmult_const_chain_multiply();
+}
+
 void test_wnaf(const secp256k1_scalar_t *number, int w) {
     secp256k1_scalar_t x, two, t;
     int wnaf[256];
@@ -1411,12 +1528,66 @@ void test_wnaf(const secp256k1_scalar_t *number, int w) {
     CHECK(secp256k1_scalar_eq(&x, number)); /* check that wnaf represents number */
 }
 
+void test_constant_wnaf_negate(const secp256k1_scalar_t *number) {
+    secp256k1_scalar_t neg1 = *number;
+    secp256k1_scalar_t neg2 = *number;
+    int sign1 = 1;
+    int sign2 = 1;
+
+    if (!secp256k1_scalar_get_bits(&neg1, 0, 1)) {
+        secp256k1_scalar_negate(&neg1, &neg1);
+        sign1 = -1;
+    }
+    sign2 = secp256k1_scalar_cond_negate(&neg2, secp256k1_scalar_is_even(&neg2));
+    CHECK(sign1 == sign2);
+    CHECK(secp256k1_scalar_eq(&neg1, &neg2));
+}
+
+void test_constant_wnaf(const secp256k1_scalar_t *number, int w) {
+    secp256k1_scalar_t x, shift;
+    int wnaf[256] = {0};
+    int i;
+
+    secp256k1_scalar_set_int(&x, 0);
+    secp256k1_scalar_set_int(&shift, 1 << w);
+    secp256k1_wnaf_const(wnaf, number, w);
+
+    for (i = WNAF_SIZE(w); i >= 0; --i) {
+        secp256k1_scalar_t t;
+        int v = wnaf[i];
+        CHECK(v != 0); /* check nonzero */
+        CHECK(v & 1);  /* check parity */
+        CHECK(v > -(1 << w)); /* check range above */
+        CHECK(v < (1 << w));  /* check range below */
+
+        secp256k1_scalar_mul(&x, &x, &shift);
+        if (v >= 0) {
+            secp256k1_scalar_set_int(&t, v);
+        } else {
+            secp256k1_scalar_set_int(&t, -v);
+            secp256k1_scalar_negate(&t, &t);
+        }
+        secp256k1_scalar_add(&x, &x, &t);
+    }
+    CHECK(secp256k1_scalar_eq(&x, number));
+}
+
 void run_wnaf(void) {
     int i;
-    secp256k1_scalar_t n;
+    secp256k1_scalar_t n = {{0}};
+
+    /* Sanity check: 1 and 2 are the smallest odd and even numbers and should
+     *               have easier-to-diagnose failure modes  */
+    n.d[0] = 1;
+    test_constant_wnaf(&n, 4);
+    n.d[0] = 2;
+    test_constant_wnaf(&n, 4);
+    /* Random tests */
     for (i = 0; i < count; i++) {
         random_scalar_order(&n);
         test_wnaf(&n, 4+(i%10));
+        test_constant_wnaf_negate(&n);
+        test_constant_wnaf(&n, 4 + (i % 10));
     }
 }
 
@@ -2266,6 +2437,7 @@ int main(int argc, char **argv) {
     run_ecmult_chain();
     run_ecmult_constants();
     run_ecmult_gen_blind();
+    run_ecmult_const_tests();
 
     /* endomorphism tests */
 #ifdef USE_ENDOMORPHISM

From 0739bbb6f056b26bb84006012033333a73170d42 Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Mon, 29 Jun 2015 15:06:28 -0500
Subject: [PATCH 3/7] Add ECDH module which works by hashing the output of
 ecmult_const

---
 .travis.yml                          |  6 ++-
 Makefile.am                          |  4 ++
 configure.ac                         | 11 ++++
 include/secp256k1_ecdh.h             | 30 +++++++++++
 src/modules/ecdh/Makefile.am.include |  3 ++
 src/modules/ecdh/main_impl.h         | 53 ++++++++++++++++++++
 src/modules/ecdh/tests_impl.h        | 75 ++++++++++++++++++++++++++++
 src/secp256k1.c                      |  4 ++
 src/tests.c                          |  9 ++++
 9 files changed, 193 insertions(+), 2 deletions(-)
 create mode 100644 include/secp256k1_ecdh.h
 create mode 100644 src/modules/ecdh/Makefile.am.include
 create mode 100644 src/modules/ecdh/main_impl.h
 create mode 100644 src/modules/ecdh/tests_impl.h

diff --git a/.travis.yml b/.travis.yml
index 8d6c1f0b75..282c8b8d5e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -8,12 +8,14 @@ compiler:
   - gcc
 env:
   global:
-    - FIELD=auto  BIGNUM=auto  SCALAR=auto  ENDOMORPHISM=no  STATICPRECOMPUTATION=no ASM=no  BUILD=check  EXTRAFLAGS= HOST=
+    - FIELD=auto  BIGNUM=auto  SCALAR=auto  ENDOMORPHISM=no  STATICPRECOMPUTATION=no ASM=no  BUILD=check  EXTRAFLAGS= HOST= ECDH=no
   matrix:
     - SCALAR=32bit
+    - SCALAR=32bit    FIELD=32bit       ECDH=yes
     - SCALAR=64bit
     - FIELD=64bit
     - FIELD=64bit     ENDOMORPHISM=yes
+    - FIELD=64bit     ENDOMORPHISM=yes  ECDH=yes
     - FIELD=64bit                       ASM=x86_64
     - FIELD=64bit     ENDOMORPHISM=yes  ASM=x86_64
     - FIELD=32bit
@@ -56,5 +58,5 @@ before_script: ./autogen.sh
 script:
  - if [ -n "$HOST" ]; then export USE_HOST="--host=$HOST"; fi
  - if [ "x$HOST" = "xi686-linux-gnu" ]; then export CC="$CC -m32"; fi
- - ./configure --enable-endomorphism=$ENDOMORPHISM --with-field=$FIELD --with-bignum=$BIGNUM --with-scalar=$SCALAR --enable-ecmult-static-precomputation=$STATICPRECOMPUTATION $EXTRAFLAGS $USE_HOST && make -j2 $BUILD
+ - ./configure --enable-endomorphism=$ENDOMORPHISM --with-field=$FIELD --with-bignum=$BIGNUM --with-scalar=$SCALAR --enable-ecmult-static-precomputation=$STATICPRECOMPUTATION --enable-module-ecdh=$ECDH $EXTRAFLAGS $USE_HOST && make -j2 $BUILD
 os: linux
diff --git a/Makefile.am b/Makefile.am
index 8c8bd77367..8f0ffdb456 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -97,3 +97,7 @@ CLEANFILES = gen_context src/ecmult_static_context.h
 endif
 
 EXTRA_DIST = autogen.sh src/gen_context.c src/basic-config.h
+
+if ENABLE_MODULE_ECDH
+include src/modules/ecdh/Makefile.am.include
+endif
diff --git a/configure.ac b/configure.ac
index 70c9e593f6..e3126dbe26 100644
--- a/configure.ac
+++ b/configure.ac
@@ -102,6 +102,11 @@ AC_ARG_ENABLE(ecmult_static_precomputation,
     [use_ecmult_static_precomputation=$enableval],
     [use_ecmult_static_precomputation=yes])
 
+AC_ARG_ENABLE(module_ecdh,
+    AS_HELP_STRING([--enable-module-ecdh],[enable ECDH shared secret computation (default is no)]),
+    [enable_module_ecdh=$enableval],
+    [enable_module_ecdh=no])
+
 AC_ARG_WITH([field], [AS_HELP_STRING([--with-field=64bit|32bit|auto],
 [Specify Field Implementation. Default is auto])],[req_field=$withval], [req_field=auto])
 
@@ -315,6 +320,10 @@ if test x"$use_ecmult_static_precomputation" = x"yes"; then
   AC_DEFINE(USE_ECMULT_STATIC_PRECOMPUTATION, 1, [Define this symbol to use a statically generated ecmult table])
 fi
 
+if test x"$enable_module_ecdh" = x"yes"; then
+  AC_DEFINE(ENABLE_MODULE_ECDH, 1, [Define this symbol to enable the ECDH module])
+fi
+
 AC_C_BIGENDIAN()
 
 AC_MSG_NOTICE([Using assembly optimizations: $set_asm])
@@ -322,6 +331,7 @@ AC_MSG_NOTICE([Using field implementation: $set_field])
 AC_MSG_NOTICE([Using bignum implementation: $set_bignum])
 AC_MSG_NOTICE([Using scalar implementation: $set_scalar])
 AC_MSG_NOTICE([Using endomorphism optimizations: $use_endomorphism])
+AC_MSG_NOTICE([Building ECDH module: $enable_module_ecdh])
 
 AC_CONFIG_HEADERS([src/libsecp256k1-config.h])
 AC_CONFIG_FILES([Makefile libsecp256k1.pc])
@@ -332,6 +342,7 @@ AC_SUBST(SECP_TEST_INCLUDES)
 AM_CONDITIONAL([USE_TESTS], [test x"$use_tests" != x"no"])
 AM_CONDITIONAL([USE_BENCHMARK], [test x"$use_benchmark" = x"yes"])
 AM_CONDITIONAL([USE_ECMULT_STATIC_PRECOMPUTATION], [test x"$use_ecmult_static_precomputation" = x"yes"])
+AM_CONDITIONAL([ENABLE_MODULE_ECDH], [test x"$enable_module_ecdh" = x"yes"])
 
 dnl make sure nothing new is exported so that we don't break the cache
 PKGCONFIG_PATH_TEMP="$PKG_CONFIG_PATH"
diff --git a/include/secp256k1_ecdh.h b/include/secp256k1_ecdh.h
new file mode 100644
index 0000000000..671c393fae
--- /dev/null
+++ b/include/secp256k1_ecdh.h
@@ -0,0 +1,30 @@
+#ifndef _SECP256K1_ECDH_
+# define _SECP256K1_ECDH_
+
+# include "secp256k1.h"
+
+# ifdef __cplusplus
+extern "C" {
+# endif
+
+/** Compute an EC Diffie-Hellman secret in constant time
+ *  Returns: 1: exponentiation was successful
+ *           0: scalar was invalid (zero or overflow)
+ *  In:      ctx:      pointer to a context object (cannot be NULL)
+ *           point:    pointer to a public point
+ *           scalar:   a 32-byte scalar with which to multiply the point
+ *  Out:     result:   a 32-byte array which will be populated by an ECDH
+ *                     secret computed from the point and scalar
+ */
+SECP256K1_WARN_UNUSED_RESULT int secp256k1_ecdh(
+  const secp256k1_context_t* ctx,
+  unsigned char *result,
+  const secp256k1_pubkey_t *point,
+  const unsigned char *scalar
+) SECP256K1_ARG_NONNULL(1) SECP256K1_ARG_NONNULL(2) SECP256K1_ARG_NONNULL(3) SECP256K1_ARG_NONNULL(4);
+
+# ifdef __cplusplus
+}
+# endif
+
+#endif
diff --git a/src/modules/ecdh/Makefile.am.include b/src/modules/ecdh/Makefile.am.include
new file mode 100644
index 0000000000..0367e6a110
--- /dev/null
+++ b/src/modules/ecdh/Makefile.am.include
@@ -0,0 +1,3 @@
+include_HEADERS += include/secp256k1_ecdh.h
+noinst_HEADERS += src/modules/ecdh/main_impl.h
+noinst_HEADERS += src/modules/ecdh/tests_impl.h
diff --git a/src/modules/ecdh/main_impl.h b/src/modules/ecdh/main_impl.h
new file mode 100644
index 0000000000..064cf6e577
--- /dev/null
+++ b/src/modules/ecdh/main_impl.h
@@ -0,0 +1,53 @@
+/**********************************************************************
+ * Copyright (c) 2015 Andrew Poelstra                                 *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_MODULE_ECDH_MAIN_
+#define _SECP256K1_MODULE_ECDH_MAIN_
+
+#include "ecmult_const_impl.h"
+
+int secp256k1_ecdh(const secp256k1_context_t* ctx, unsigned char *result, const secp256k1_pubkey_t *point, const unsigned char *scalar) {
+    int ret = 0;
+    int overflow = 0;
+    secp256k1_gej_t res;
+    secp256k1_ge_t pt;
+    secp256k1_scalar_t s;
+    ARG_CHECK(result != NULL);
+    ARG_CHECK(point != NULL);
+    ARG_CHECK(scalar != NULL);
+    (void)ctx;
+
+    secp256k1_pubkey_load(ctx, &pt, point);
+    secp256k1_scalar_set_b32(&s, scalar, &overflow);
+    if (overflow || secp256k1_scalar_is_zero(&s)) {
+        ret = 0;
+    } else {
+        unsigned char x[32];
+        unsigned char y[1];
+        secp256k1_sha256_t sha;
+
+        secp256k1_ecmult_const(&res, &pt, &s);
+        secp256k1_ge_set_gej(&pt, &res);
+        /* Compute a hash of the point in compressed form
+         * Note we cannot use secp256k1_eckey_pubkey_serialize here since it does not
+         * expect its output to be secret and has a timing sidechannel. */
+        secp256k1_fe_normalize(&pt.x);
+        secp256k1_fe_normalize(&pt.y);
+        secp256k1_fe_get_b32(x, &pt.x);
+        y[0] = 0x02 | secp256k1_fe_is_odd(&pt.y);
+
+        secp256k1_sha256_initialize(&sha);
+        secp256k1_sha256_write(&sha, y, sizeof(y));
+        secp256k1_sha256_write(&sha, x, sizeof(x));
+        secp256k1_sha256_finalize(&sha, result);
+        ret = 1;
+    }
+
+    secp256k1_scalar_clear(&s);
+    return ret;
+}
+
+#endif
diff --git a/src/modules/ecdh/tests_impl.h b/src/modules/ecdh/tests_impl.h
new file mode 100644
index 0000000000..271eb28aaa
--- /dev/null
+++ b/src/modules/ecdh/tests_impl.h
@@ -0,0 +1,75 @@
+/**********************************************************************
+ * Copyright (c) 2015 Andrew Poelstra                                 *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#ifndef _SECP256K1_MODULE_ECDH_TESTS_
+#define _SECP256K1_MODULE_ECDH_TESTS_
+
+void test_ecdh_generator_basepoint(void) {
+    unsigned char s_one[32] = { 0 };
+    secp256k1_pubkey_t point[2];
+    int i;
+
+    s_one[31] = 1;
+    /* Check against pubkey creation when the basepoint is the generator */
+    for (i = 0; i < 100; ++i) {
+        secp256k1_sha256_t sha;
+        unsigned char s_b32[32];
+        unsigned char output_ecdh[32];
+        unsigned char output_ser[32];
+        unsigned char point_ser[33];
+        int point_ser_len = sizeof(point_ser);
+        secp256k1_scalar_t s;
+
+        random_scalar_order(&s);
+        secp256k1_scalar_get_b32(s_b32, &s);
+
+        /* compute using ECDH function */
+        CHECK(secp256k1_ec_pubkey_create(ctx, &point[0], s_one) == 1);
+        CHECK(secp256k1_ecdh(ctx, output_ecdh, &point[0], s_b32) == 1);
+        /* compute "explicitly" */
+        CHECK(secp256k1_ec_pubkey_create(ctx, &point[1], s_b32) == 1);
+        CHECK(secp256k1_ec_pubkey_serialize(ctx, point_ser, &point_ser_len, &point[1], 1) == 1);
+        CHECK(point_ser_len == sizeof(point_ser));
+        secp256k1_sha256_initialize(&sha);
+        secp256k1_sha256_write(&sha, point_ser, point_ser_len);
+        secp256k1_sha256_finalize(&sha, output_ser);
+        /* compare */
+        CHECK(memcmp(output_ecdh, output_ser, sizeof(output_ser)) == 0);
+    }
+}
+
+void test_bad_scalar(void) {
+    unsigned char s_zero[32] = { 0 };
+    unsigned char s_overflow[32] = {
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+        0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
+        0xba, 0xae, 0xdc, 0xe6, 0xaf, 0x48, 0xa0, 0x3b,
+        0xbf, 0xd2, 0x5e, 0x8c, 0xd0, 0x36, 0x41, 0x41
+    };
+    unsigned char s_rand[32] = { 0 };
+    unsigned char output[32];
+    secp256k1_scalar_t rand;
+    secp256k1_pubkey_t point;
+
+    /* Create random point */
+    random_scalar_order(&rand);
+    secp256k1_scalar_get_b32(s_rand, &rand);
+    CHECK(secp256k1_ec_pubkey_create(ctx, &point, s_rand) == 1);
+
+    /* Try to multiply it by bad values */
+    CHECK(secp256k1_ecdh(ctx, output, &point, s_zero) == 0);
+    CHECK(secp256k1_ecdh(ctx, output, &point, s_overflow) == 0);
+    /* ...and a good one */
+    s_overflow[31] -= 1;
+    CHECK(secp256k1_ecdh(ctx, output, &point, s_overflow) == 1);
+}
+
+void run_ecdh_tests(void) {
+    test_ecdh_generator_basepoint();
+    test_bad_scalar();
+}
+
+#endif
diff --git a/src/secp256k1.c b/src/secp256k1.c
index 6904108e2a..9d6dee671d 100644
--- a/src/secp256k1.c
+++ b/src/secp256k1.c
@@ -526,3 +526,7 @@ int secp256k1_context_randomize(secp256k1_context_t* ctx, const unsigned char *s
     secp256k1_ecmult_gen_blind(&ctx->ecmult_gen_ctx, seed32);
     return 1;
 }
+
+#ifdef ENABLE_MODULE_ECDH
+# include "modules/ecdh/main_impl.h"
+#endif
diff --git a/src/tests.c b/src/tests.c
index 7dc9805cb2..c074c5fecb 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -2356,6 +2356,10 @@ void run_ecdsa_openssl(void) {
 }
 #endif
 
+#ifdef ENABLE_MODULE_ECDH
+# include "modules/ecdh/tests_impl.h"
+#endif
+
 int main(int argc, char **argv) {
     unsigned char seed16[16] = {0};
     unsigned char run32[32] = {0};
@@ -2444,6 +2448,11 @@ int main(int argc, char **argv) {
     run_endomorphism_tests();
 #endif
 
+#ifdef ENABLE_MODULE_ECDH
+    /* ecdh tests */
+    run_ecdh_tests();
+#endif
+
     /* ecdsa tests */
     run_random_pubkeys();
     run_ecdsa_sign_verify();

From 91c0ce95cabda52d03edfe52453a1004fbb01d1e Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Fri, 15 May 2015 14:46:08 -0500
Subject: [PATCH 4/7] Add benchmarks for ECDH and const-time multiplication

---
 .gitignore                           |  1 +
 src/bench_ecdh.c                     | 51 ++++++++++++++++++++++++++++
 src/bench_internal.c                 | 12 +++++++
 src/modules/ecdh/Makefile.am.include |  6 ++++
 4 files changed, 70 insertions(+)
 create mode 100644 src/bench_ecdh.c

diff --git a/.gitignore b/.gitignore
index 6cbc0b2713..46114e4be6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 bench_inv
+bench_ecdh
 bench_sign
 bench_verify
 bench_recover
diff --git a/src/bench_ecdh.c b/src/bench_ecdh.c
new file mode 100644
index 0000000000..e28035a26a
--- /dev/null
+++ b/src/bench_ecdh.c
@@ -0,0 +1,51 @@
+/**********************************************************************
+ * Copyright (c) 2015 Pieter Wuille, Andrew Poelstra                  *
+ * Distributed under the MIT software license, see the accompanying   *
+ * file COPYING or http://www.opensource.org/licenses/mit-license.php.*
+ **********************************************************************/
+
+#include <string.h>
+
+#include "include/secp256k1.h"
+#include "include/secp256k1_ecdh.h"
+#include "util.h"
+#include "bench.h"
+
+typedef struct {
+    secp256k1_context_t *ctx;
+    secp256k1_pubkey_t point;
+    unsigned char scalar[32];
+} bench_ecdh_t;
+
+static void bench_ecdh_setup(void* arg) {
+    int i;
+    bench_ecdh_t *data = (bench_ecdh_t*)arg;
+    const unsigned char point[] = {
+        0x03,
+        0x54, 0x94, 0xc1, 0x5d, 0x32, 0x09, 0x97, 0x06,
+        0xc2, 0x39, 0x5f, 0x94, 0x34, 0x87, 0x45, 0xfd,
+        0x75, 0x7c, 0xe3, 0x0e, 0x4e, 0x8c, 0x90, 0xfb,
+        0xa2, 0xba, 0xd1, 0x84, 0xf8, 0x83, 0xc6, 0x9f
+    };
+
+    data->ctx = secp256k1_context_create(0);
+    for (i = 0; i < 32; i++) data->scalar[i] = i + 1;
+    CHECK(secp256k1_ec_pubkey_parse(data->ctx, &data->point, point, sizeof(point)) == 1);
+}
+
+static void bench_ecdh(void* arg) {
+    int i;
+    unsigned char res[32];
+    bench_ecdh_t *data = (bench_ecdh_t*)arg;
+
+    for (i = 0; i < 20000; i++) {
+        CHECK(secp256k1_ecdh(data->ctx, res, &data->point, data->scalar) == 1);
+    }
+}
+
+int main(void) {
+    bench_ecdh_t data;
+
+    run_benchmark("ecdh", bench_ecdh, bench_ecdh_setup, NULL, &data, 10, 20000);
+    return 0;
+}
diff --git a/src/bench_internal.c b/src/bench_internal.c
index 196224257b..00fa0559fb 100644
--- a/src/bench_internal.c
+++ b/src/bench_internal.c
@@ -13,6 +13,7 @@
 #include "field_impl.h"
 #include "group_impl.h"
 #include "scalar_impl.h"
+#include "ecmult_const_impl.h"
 #include "ecmult_impl.h"
 #include "bench.h"
 
@@ -235,6 +236,16 @@ void bench_ecmult_wnaf(void* arg) {
     }
 }
 
+void bench_wnaf_const(void* arg) {
+    int i;
+    bench_inv_t *data = (bench_inv_t*)arg;
+
+    for (i = 0; i < 20000; i++) {
+        secp256k1_wnaf_const(data->wnaf, &data->scalar_x, WINDOW_A);
+        secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
+    }
+}
+
 
 void bench_sha256(void* arg) {
     int i;
@@ -310,6 +321,7 @@ int main(int argc, char **argv) {
     if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine", bench_group_add_affine, bench_setup, NULL, &data, 10, 200000);
     if (have_flag(argc, argv, "group") || have_flag(argc, argv, "add")) run_benchmark("group_add_affine_var", bench_group_add_affine_var, bench_setup, NULL, &data, 10, 200000);
 
+    if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("wnaf_const", bench_wnaf_const, bench_setup, NULL, &data, 10, 20000);
     if (have_flag(argc, argv, "ecmult") || have_flag(argc, argv, "wnaf")) run_benchmark("ecmult_wnaf", bench_ecmult_wnaf, bench_setup, NULL, &data, 10, 20000);
 
     if (have_flag(argc, argv, "hash") || have_flag(argc, argv, "sha256")) run_benchmark("hash_sha256", bench_sha256, bench_setup, NULL, &data, 10, 20000);
diff --git a/src/modules/ecdh/Makefile.am.include b/src/modules/ecdh/Makefile.am.include
index 0367e6a110..8ef3aff92f 100644
--- a/src/modules/ecdh/Makefile.am.include
+++ b/src/modules/ecdh/Makefile.am.include
@@ -1,3 +1,9 @@
 include_HEADERS += include/secp256k1_ecdh.h
 noinst_HEADERS += src/modules/ecdh/main_impl.h
 noinst_HEADERS += src/modules/ecdh/tests_impl.h
+if USE_BENCHMARK
+noinst_PROGRAMS += bench_ecdh
+bench_ecdh_SOURCES = src/bench_ecdh.c
+bench_ecdh_LDADD = libsecp256k1.la $(SECP_LIBS)
+bench_ecdh_LDFLAGS = -static
+endif

From ed35d43a0cd2994b91650065c083c4e937c78600 Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Fri, 22 May 2015 11:51:51 -0500
Subject: [PATCH 5/7] Make `secp256k1_scalar_add_bit` conditional; make
 `secp256k1_scalar_split_lambda_var` constant time

This has the effect of making `secp256k1_scalar_mul_shift_var` constant
time in both input scalars. Keep the _var name because it is NOT constant
time in the shift amount.

As used in `secp256k1_scalar_split_lambda_var`, the shift is always
the constant 272, so this function becomes constant time, and it
loses the `_var` suffix.
---
 src/bench_internal.c   | 2 +-
 src/ecmult_impl.h      | 2 +-
 src/scalar.h           | 6 +++---
 src/scalar_4x64_impl.h | 7 +++----
 src/scalar_8x32_impl.h | 7 +++----
 src/scalar_impl.h      | 3 ++-
 src/tests.c            | 7 +++++--
 7 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/src/bench_internal.c b/src/bench_internal.c
index 00fa0559fb..827a389938 100644
--- a/src/bench_internal.c
+++ b/src/bench_internal.c
@@ -98,7 +98,7 @@ void bench_scalar_split(void* arg) {
 
     for (i = 0; i < 20000; i++) {
         secp256k1_scalar_t l, r;
-        secp256k1_scalar_split_lambda_var(&l, &r, &data->scalar_x);
+        secp256k1_scalar_split_lambda(&l, &r, &data->scalar_x);
         secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
     }
 }
diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index 43b3262539..d6da22073c 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -294,7 +294,7 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_ge
 
 #ifdef USE_ENDOMORPHISM
     /* split na into na_1 and na_lam (where na = na_1 + na_lam*lambda, and na_1 and na_lam are ~128 bit) */
-    secp256k1_scalar_split_lambda_var(&na_1, &na_lam, na);
+    secp256k1_scalar_split_lambda(&na_1, &na_lam, na);
 
     /* build wnaf representation for na_1 and na_lam. */
     bits_na_1   = secp256k1_ecmult_wnaf(wnaf_na_1,   130, &na_1,   WINDOW_A);
diff --git a/src/scalar.h b/src/scalar.h
index f33e72a7c9..72e12b86fc 100644
--- a/src/scalar.h
+++ b/src/scalar.h
@@ -42,8 +42,8 @@ static void secp256k1_scalar_get_b32(unsigned char *bin, const secp256k1_scalar_
 /** Add two scalars together (modulo the group order). Returns whether it overflowed. */
 static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b);
 
-/** Add a power of two to a scalar. The result is not allowed to overflow. */
-static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit);
+/** Conditionally add a power of two to a scalar. The result is not allowed to overflow. */
+static void secp256k1_scalar_cadd_bit(secp256k1_scalar_t *r, unsigned int bit, int flag);
 
 /** Multiply two scalars (modulo the group order). */
 static void secp256k1_scalar_mul(secp256k1_scalar_t *r, const secp256k1_scalar_t *a, const secp256k1_scalar_t *b);
@@ -95,7 +95,7 @@ static int secp256k1_scalar_eq(const secp256k1_scalar_t *a, const secp256k1_scal
 /** Find r1 and r2 such that r1+r2*2^128 = a. */
 static void secp256k1_scalar_split_128(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a);
 /** Find r1 and r2 such that r1+r2*lambda = a, and r1 and r2 are maximum 128 bits long (see secp256k1_gej_mul_lambda). */
-static void secp256k1_scalar_split_lambda_var(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a);
+static void secp256k1_scalar_split_lambda(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a);
 #endif
 
 /** Multiply a and b (without taking the modulus!), divide by 2**shift, and round to the nearest integer. Shift must be at least 256. */
diff --git a/src/scalar_4x64_impl.h b/src/scalar_4x64_impl.h
index 147229dab9..09431e0df7 100644
--- a/src/scalar_4x64_impl.h
+++ b/src/scalar_4x64_impl.h
@@ -96,9 +96,10 @@ static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t
     return overflow;
 }
 
-static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
+static void secp256k1_scalar_cadd_bit(secp256k1_scalar_t *r, unsigned int bit, int flag) {
     uint128_t t;
     VERIFY_CHECK(bit < 256);
+    bit += ((uint32_t) flag - 1) & 0x100;  /* forcing (bit >> 6) > 3 makes this a noop */
     t = (uint128_t)r->d[0] + (((uint64_t)((bit >> 6) == 0)) << (bit & 0x3F));
     r->d[0] = t & 0xFFFFFFFFFFFFFFFFULL; t >>= 64;
     t += (uint128_t)r->d[1] + (((uint64_t)((bit >> 6) == 1)) << (bit & 0x3F));
@@ -940,9 +941,7 @@ SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *
     r->d[1] = shift < 448 ? (l[1 + shiftlimbs] >> shiftlow | (shift < 384 && shiftlow ? (l[2 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[2] = shift < 384 ? (l[2 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[3 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[3] = shift < 320 ? (l[3 + shiftlimbs] >> shiftlow) : 0;
-    if ((l[(shift - 1) >> 6] >> ((shift - 1) & 0x3f)) & 1) {
-        secp256k1_scalar_add_bit(r, 0);
-    }
+    secp256k1_scalar_cadd_bit(r, 0, (l[(shift - 1) >> 6] >> ((shift - 1) & 0x3f)) & 1);
 }
 
 #endif
diff --git a/src/scalar_8x32_impl.h b/src/scalar_8x32_impl.h
index 0ad2423db0..54923eb45f 100644
--- a/src/scalar_8x32_impl.h
+++ b/src/scalar_8x32_impl.h
@@ -136,9 +136,10 @@ static int secp256k1_scalar_add(secp256k1_scalar_t *r, const secp256k1_scalar_t
     return overflow;
 }
 
-static void secp256k1_scalar_add_bit(secp256k1_scalar_t *r, unsigned int bit) {
+static void secp256k1_scalar_cadd_bit(secp256k1_scalar_t *r, unsigned int bit, int flag) {
     uint64_t t;
     VERIFY_CHECK(bit < 256);
+    bit += ((uint32_t) flag - 1) & 0x100;  /* forcing (bit >> 5) > 7 makes this a noop */
     t = (uint64_t)r->d[0] + (((uint32_t)((bit >> 5) == 0)) << (bit & 0x1F));
     r->d[0] = t & 0xFFFFFFFFULL; t >>= 32;
     t += (uint64_t)r->d[1] + (((uint32_t)((bit >> 5) == 1)) << (bit & 0x1F));
@@ -714,9 +715,7 @@ SECP256K1_INLINE static void secp256k1_scalar_mul_shift_var(secp256k1_scalar_t *
     r->d[5] = shift < 352 ? (l[5 + shiftlimbs] >> shiftlow | (shift < 320 && shiftlow ? (l[6 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[6] = shift < 320 ? (l[6 + shiftlimbs] >> shiftlow | (shift < 288 && shiftlow ? (l[7 + shiftlimbs] << shifthigh) : 0)) : 0;
     r->d[7] = shift < 288 ? (l[7 + shiftlimbs] >> shiftlow)  : 0;
-    if ((l[(shift - 1) >> 5] >> ((shift - 1) & 0x1f)) & 1) {
-        secp256k1_scalar_add_bit(r, 0);
-    }
+    secp256k1_scalar_cadd_bit(r, 0, (l[(shift - 1) >> 5] >> ((shift - 1) & 0x1f)) & 1);
 }
 
 #endif
diff --git a/src/scalar_impl.h b/src/scalar_impl.h
index d5426a8c37..425c0bfbda 100644
--- a/src/scalar_impl.h
+++ b/src/scalar_impl.h
@@ -299,7 +299,7 @@ static void secp256k1_scalar_inverse_var(secp256k1_scalar_t *r, const secp256k1_
  * The function below splits a in r1 and r2, such that r1 + lambda * r2 == a (mod order).
  */
 
-static void secp256k1_scalar_split_lambda_var(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) {
+static void secp256k1_scalar_split_lambda(secp256k1_scalar_t *r1, secp256k1_scalar_t *r2, const secp256k1_scalar_t *a) {
     secp256k1_scalar_t c1, c2;
     static const secp256k1_scalar_t minus_lambda = SECP256K1_SCALAR_CONST(
         0xAC9C52B3UL, 0x3FA3CF1FUL, 0x5AD9E3FDUL, 0x77ED9BA4UL,
@@ -323,6 +323,7 @@ static void secp256k1_scalar_split_lambda_var(secp256k1_scalar_t *r1, secp256k1_
     );
     VERIFY_CHECK(r1 != a);
     VERIFY_CHECK(r2 != a);
+    /* these _var calls are constant time since the shift amount is constant */
     secp256k1_scalar_mul_shift_var(&c1, a, &g1, 272);
     secp256k1_scalar_mul_shift_var(&c2, a, &g2, 272);
     secp256k1_scalar_mul(&c1, &c1, &minus_b1);
diff --git a/src/tests.c b/src/tests.c
index c074c5fecb..bfcc8bce11 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -558,7 +558,10 @@ void scalar_test(void) {
         r2 = s1;
         if (!secp256k1_scalar_add(&r1, &r1, &b)) {
             /* No overflow happened. */
-            secp256k1_scalar_add_bit(&r2, bit);
+            secp256k1_scalar_cadd_bit(&r2, bit, 1);
+            CHECK(secp256k1_scalar_eq(&r1, &r2));
+            /* cadd is a noop when flag is zero */
+            secp256k1_scalar_cadd_bit(&r2, bit, 0);
             CHECK(secp256k1_scalar_eq(&r1, &r2));
         }
     }
@@ -1680,7 +1683,7 @@ void test_scalar_split(void) {
     unsigned char tmp[32];
 
     random_scalar_order_test(&full);
-    secp256k1_scalar_split_lambda_var(&s1, &slam, &full);
+    secp256k1_scalar_split_lambda(&s1, &slam, &full);
 
     /* check that both are <= 128 bits in size */
     if (secp256k1_scalar_is_high(&s1))

From 92e53fc4c8080546b4a85aef8315d1a545fbb3ab Mon Sep 17 00:00:00 2001
From: Andrew Poelstra <apoelstra@wpsoftware.net>
Date: Fri, 22 May 2015 12:09:36 -0500
Subject: [PATCH 6/7] Implement endomorphism optimization for
 secp256k1_ecmult_const

---
 src/bench_internal.c    |   2 +-
 src/ecmult_const_impl.h | 137 ++++++++++++++++++++++++++++++++++++----
 src/tests.c             |  19 +++++-
 3 files changed, 144 insertions(+), 14 deletions(-)

diff --git a/src/bench_internal.c b/src/bench_internal.c
index 827a389938..1a06e9441a 100644
--- a/src/bench_internal.c
+++ b/src/bench_internal.c
@@ -241,7 +241,7 @@ void bench_wnaf_const(void* arg) {
     bench_inv_t *data = (bench_inv_t*)arg;
 
     for (i = 0; i < 20000; i++) {
-        secp256k1_wnaf_const(data->wnaf, &data->scalar_x, WINDOW_A);
+        secp256k1_wnaf_const(data->wnaf, data->scalar_x, WINDOW_A);
         secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y);
     }
 }
diff --git a/src/ecmult_const_impl.h b/src/ecmult_const_impl.h
index 956d5e4c0f..3632d2ecb5 100644
--- a/src/ecmult_const_impl.h
+++ b/src/ecmult_const_impl.h
@@ -12,7 +12,11 @@
 #include "ecmult_const.h"
 #include "ecmult_impl.h"
 
-#define WNAF_BITS 256
+#ifdef USE_ENDOMORPHISM
+    #define WNAF_BITS 128
+#else
+    #define WNAF_BITS 256
+#endif
 #define WNAF_SIZE(w) ((WNAF_BITS + (w) - 1) / (w))
 
 /* This is like `ECMULT_TABLE_GET_GE` but is constant time */
@@ -49,17 +53,47 @@
  *
  *  Numbers reference steps of `Algorithm SPA-resistant Width-w NAF with Odd Scalar` on pp. 335
  */
-static void secp256k1_wnaf_const(int *wnaf, const secp256k1_scalar_t *a, int w) {
-    secp256k1_scalar_t s = *a;
-    /* Negate to force oddness */
-    int is_even = secp256k1_scalar_is_even(&s);
-    int global_sign = secp256k1_scalar_cond_negate(&s, is_even);
-
+static int secp256k1_wnaf_const(int *wnaf, secp256k1_scalar_t s, int w) {
+    int global_sign = 1;
+    int skew = 0;
     int word = 0;
     /* 1 2 3 */
-    int u_last = secp256k1_scalar_shr_int(&s, w);
+    int u_last;
     int u;
+
+#ifdef USE_ENDOMORPHISM
+    /* If we are using the endomorphism, we cannot handle even numbers by negating
+     * them, since we are working with 128-bit numbers whose negations would be 256
+     * bits, eliminating the performance advantage. Instead we use a technique from
+     * Section 4.2 of the Okeya/Tagaki paper, which is to add either 1 (for even)
+     * or 2 (for odd) to the number we are encoding, then compensating after the
+     * multiplication. */
+    /* Negative 128-bit numbers will be negated, since otherwise they are 256-bit */
+    int flip = secp256k1_scalar_is_high(&s);
+    /* We add 1 to even numbers, 2 to odd ones, noting that negation flips parity */
+    int bit = flip ^ (s.d[0] & 1);
+    /* We check for negative one, since adding 2 to it will cause an overflow */
+    secp256k1_scalar_t neg_s;
+    int not_neg_one;
+    secp256k1_scalar_negate(&neg_s, &s);
+    not_neg_one = !secp256k1_scalar_is_one(&neg_s);
+    secp256k1_scalar_cadd_bit(&s, bit, not_neg_one);
+    /* If we had negative one, flip == 1, s.d[0] == 0, bit == 1, so caller expects
+     * that we added two to it and flipped it. In fact for -1 these operations are
+     * identical. We only flipped, but since skewing is required (in the sense that
+     * the skew must be 1 or 2, never zero) and flipping is not, we need to change
+     * our flags to claim that we only skewed. */
+    global_sign = secp256k1_scalar_cond_negate(&s, flip);
+    global_sign *= not_neg_one * 2 - 1;
+    skew = 1 << bit;
+#else
+    /* Otherwise, we just negate to force oddness */
+    int is_even = secp256k1_scalar_is_even(&s);
+    global_sign = secp256k1_scalar_cond_negate(&s, is_even);
+#endif
+
     /* 4 */
+    u_last = secp256k1_scalar_shr_int(&s, w);
     while (word * w < WNAF_BITS) {
         int sign;
         int even;
@@ -81,6 +115,7 @@ static void secp256k1_wnaf_const(int *wnaf, const secp256k1_scalar_t *a, int w)
 
     VERIFY_CHECK(secp256k1_scalar_is_zero(&s));
     VERIFY_CHECK(word == WNAF_SIZE(w));
+    return skew;
 }
 
 
@@ -89,17 +124,37 @@ static void secp256k1_ecmult_const(secp256k1_gej_t *r, const secp256k1_ge_t *a,
     secp256k1_ge_t tmpa;
     secp256k1_fe_t Z;
 
+#ifdef USE_ENDOMORPHISM
+    secp256k1_ge_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)];
+    int wnaf_1[1 + WNAF_SIZE(WINDOW_A - 1)];
+    int wnaf_lam[1 + WNAF_SIZE(WINDOW_A - 1)];
+    int skew_1;
+    int skew_lam;
+    secp256k1_scalar_t q_1, q_lam;
+#else
     int wnaf[1 + WNAF_SIZE(WINDOW_A - 1)];
+#endif
 
     int i;
-    int is_zero = secp256k1_scalar_is_zero(scalar);
     secp256k1_scalar_t sc = *scalar;
+
+    /* build wnaf representation for q. */
+#ifdef USE_ENDOMORPHISM
+    /* split q into q_1 and q_lam (where q = q_1 + q_lam*lambda, and q_1 and q_lam are ~128 bit) */
+    secp256k1_scalar_split_lambda(&q_1, &q_lam, &sc);
+    /* no need for zero correction when using endomorphism since even
+     * numbers have one added to them anyway */
+    skew_1   = secp256k1_wnaf_const(wnaf_1,   q_1,   WINDOW_A - 1);
+    skew_lam = secp256k1_wnaf_const(wnaf_lam, q_lam, WINDOW_A - 1);
+#else
+    int is_zero = secp256k1_scalar_is_zero(scalar);
     /* the wNAF ladder cannot handle zero, so bump this to one .. we will
      * correct the result after the fact */
     sc.d[0] += is_zero;
+    VERIFY_CHECK(!secp256k1_scalar_is_zero(&sc));
 
-    /* build wnaf representation for q. */
-    secp256k1_wnaf_const(wnaf, &sc, WINDOW_A - 1);
+    secp256k1_wnaf_const(wnaf, sc, WINDOW_A - 1);
+#endif
 
     /* Calculate odd multiples of a.
      * All multiples are brought to the same Z 'denominator', which is stored
@@ -109,14 +164,31 @@ static void secp256k1_ecmult_const(secp256k1_gej_t *r, const secp256k1_ge_t *a,
      */
     secp256k1_gej_set_ge(r, a);
     secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, r);
+#ifdef USE_ENDOMORPHISM
+    for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
+        secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]);
+    }
+#endif
 
     /* first loop iteration (separated out so we can directly set r, rather
      * than having it start at infinity, get doubled several times, then have
      * its new value added to it) */
+#ifdef USE_ENDOMORPHISM
+    i = wnaf_1[WNAF_SIZE(WINDOW_A - 1)];
+    VERIFY_CHECK(i != 0);
+    ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, i, WINDOW_A);
+    secp256k1_gej_set_ge(r, &tmpa);
+
+    i = wnaf_lam[WNAF_SIZE(WINDOW_A - 1)];
+    VERIFY_CHECK(i != 0);
+    ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, i, WINDOW_A);
+    secp256k1_gej_add_ge(r, r, &tmpa);
+#else
     i = wnaf[WNAF_SIZE(WINDOW_A - 1)];
     VERIFY_CHECK(i != 0);
     ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, i, WINDOW_A);
     secp256k1_gej_set_ge(r, &tmpa);
+#endif
     /* remaining loop iterations */
     for (i = WNAF_SIZE(WINDOW_A - 1) - 1; i >= 0; i--) {
         int n;
@@ -124,16 +196,59 @@ static void secp256k1_ecmult_const(secp256k1_gej_t *r, const secp256k1_ge_t *a,
         for (j = 0; j < WINDOW_A - 1; ++j) {
             secp256k1_gej_double_nonzero(r, r, NULL);
         }
+#ifdef USE_ENDOMORPHISM
+        n = wnaf_1[i];
+        ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
+        VERIFY_CHECK(n != 0);
+        secp256k1_gej_add_ge(r, r, &tmpa);
+
+        n = wnaf_lam[i];
+        ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A);
+        VERIFY_CHECK(n != 0);
+        secp256k1_gej_add_ge(r, r, &tmpa);
+#else
         n = wnaf[i];
         VERIFY_CHECK(n != 0);
         ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
         secp256k1_gej_add_ge(r, r, &tmpa);
+#endif
     }
 
     secp256k1_fe_mul(&r->z, &r->z, &Z);
 
+#ifdef USE_ENDOMORPHISM
+    {
+        /* Correct for wNAF skew */
+        secp256k1_ge_t correction = *a;
+        secp256k1_ge_storage_t correction_1_stor;
+        secp256k1_ge_storage_t correction_lam_stor;
+        secp256k1_ge_storage_t a2_stor;
+        secp256k1_gej_t tmpj;
+        secp256k1_gej_set_ge(&tmpj, &correction);
+        secp256k1_gej_double_var(&tmpj, &tmpj, NULL);
+        secp256k1_ge_set_gej(&correction, &tmpj);
+        secp256k1_ge_to_storage(&correction_1_stor, a);
+        secp256k1_ge_to_storage(&correction_lam_stor, a);
+        secp256k1_ge_to_storage(&a2_stor, &correction);
+
+        /* For odd numbers this is 2a (so replace it), for even ones a (so no-op) */
+        secp256k1_ge_storage_cmov(&correction_1_stor, &a2_stor, skew_1 == 2);
+        secp256k1_ge_storage_cmov(&correction_lam_stor, &a2_stor, skew_lam == 2);
+
+        /* Apply the correction */
+        secp256k1_ge_from_storage(&correction, &correction_1_stor);
+        secp256k1_ge_neg(&correction, &correction);
+        secp256k1_gej_add_ge(r, r, &correction);
+
+        secp256k1_ge_from_storage(&correction, &correction_lam_stor);
+        secp256k1_ge_neg(&correction, &correction);
+        secp256k1_ge_mul_lambda(&correction, &correction);
+        secp256k1_gej_add_ge(r, r, &correction);
+    }
+#else
     /* correct for zero */
     r->infinity |= is_zero;
+#endif
 }
 
 #endif
diff --git a/src/tests.c b/src/tests.c
index bfcc8bce11..feb9576330 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -1550,10 +1550,21 @@ void test_constant_wnaf(const secp256k1_scalar_t *number, int w) {
     secp256k1_scalar_t x, shift;
     int wnaf[256] = {0};
     int i;
+#ifdef USE_ENDOMORPHISM
+    int skew;
+#endif
+    secp256k1_scalar_t num = *number;
 
     secp256k1_scalar_set_int(&x, 0);
     secp256k1_scalar_set_int(&shift, 1 << w);
-    secp256k1_wnaf_const(wnaf, number, w);
+    /* With USE_ENDOMORPHISM on we only consider 128-bit numbers */
+#ifdef USE_ENDOMORPHISM
+    for (i = 0; i < 16; ++i)
+        secp256k1_scalar_shr_int(&num, 8);
+    skew = secp256k1_wnaf_const(wnaf, num, w);
+#else
+    secp256k1_wnaf_const(wnaf, num, w);
+#endif
 
     for (i = WNAF_SIZE(w); i >= 0; --i) {
         secp256k1_scalar_t t;
@@ -1572,7 +1583,11 @@ void test_constant_wnaf(const secp256k1_scalar_t *number, int w) {
         }
         secp256k1_scalar_add(&x, &x, &t);
     }
-    CHECK(secp256k1_scalar_eq(&x, number));
+#ifdef USE_ENDOMORPHISM
+    /* Skew num because when encoding 128-bit numbers as odd we use an offset */
+    secp256k1_scalar_cadd_bit(&num, skew == 2, 1);
+#endif
+    CHECK(secp256k1_scalar_eq(&x, &num));
 }
 
 void run_wnaf(void) {

From 72ae443afb536221a8523646ea4a88162341c3df Mon Sep 17 00:00:00 2001
From: Peter Dettman <peter.dettman@gmail.com>
Date: Wed, 15 Jul 2015 20:33:35 +0700
Subject: [PATCH 7/7] Improve perf. of cmov-based table lookup

---
 src/ecmult_const_impl.h | 14 +++++++++-----
 src/util.h              |  2 ++
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/ecmult_const_impl.h b/src/ecmult_const_impl.h
index 3632d2ecb5..62406a242b 100644
--- a/src/ecmult_const_impl.h
+++ b/src/ecmult_const_impl.h
@@ -23,19 +23,20 @@
 #define ECMULT_CONST_TABLE_GET_GE(r,pre,n,w) do { \
     int m; \
     int abs_n = (n) * (((n) > 0) * 2 - 1); \
+    int idx_n = abs_n / 2; \
     secp256k1_fe_t neg_y; \
     VERIFY_CHECK(((n) & 1) == 1); \
     VERIFY_CHECK((n) >= -((1 << ((w)-1)) - 1)); \
     VERIFY_CHECK((n) <=  ((1 << ((w)-1)) - 1)); \
-    for (m = 1; m < (1 << ((w) - 1)); m += 2) { \
+    VERIFY_SETUP(secp256k1_fe_clear(&(r)->x)); \
+    VERIFY_SETUP(secp256k1_fe_clear(&(r)->y)); \
+    for (m = 0; m < ECMULT_TABLE_SIZE(w); m++) { \
         /* This loop is used to avoid secret data in array indices. See
          * the comment in ecmult_gen_impl.h for rationale. */ \
-        secp256k1_fe_cmov(&(r)->x, &(pre)[(m - 1) / 2].x, m == abs_n); \
-        secp256k1_fe_cmov(&(r)->y, &(pre)[(m - 1) / 2].y, m == abs_n); \
+        secp256k1_fe_cmov(&(r)->x, &(pre)[m].x, m == idx_n); \
+        secp256k1_fe_cmov(&(r)->y, &(pre)[m].y, m == idx_n); \
     } \
     (r)->infinity = 0; \
-    secp256k1_fe_normalize_weak(&(r)->x); \
-    secp256k1_fe_normalize_weak(&(r)->y); \
     secp256k1_fe_negate(&neg_y, &(r)->y, 1); \
     secp256k1_fe_cmov(&(r)->y, &neg_y, (n) != abs_n); \
 } while(0)
@@ -164,6 +165,9 @@ static void secp256k1_ecmult_const(secp256k1_gej_t *r, const secp256k1_ge_t *a,
      */
     secp256k1_gej_set_ge(r, a);
     secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, r);
+    for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
+        secp256k1_fe_normalize_weak(&pre_a[i].y);
+    }
 #ifdef USE_ENDOMORPHISM
     for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) {
         secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]);
diff --git a/src/util.h b/src/util.h
index 00a309c979..ce21c42b34 100644
--- a/src/util.h
+++ b/src/util.h
@@ -55,8 +55,10 @@ typedef struct {
 /* Like assert(), but when VERIFY is defined, and side-effect safe. */
 #ifdef VERIFY
 #define VERIFY_CHECK CHECK
+#define VERIFY_SETUP(stmt) do { stmt; } while(0)
 #else
 #define VERIFY_CHECK(cond) do { (void)(cond); } while(0)
+#define VERIFY_SETUP(stmt)
 #endif
 
 static SECP256K1_INLINE void *checked_malloc(const callback_t* cb, size_t size) {