diff --git a/include/decaf.h b/include/decaf.h index 7609f34..9ed3a53 100644 --- a/include/decaf.h +++ b/include/decaf.h @@ -396,9 +396,6 @@ void decaf_448_precomputed_scalarmul ( * @param [in] scalar1 A first scalar to multiply by. * @param [in] base2 A second point to be scaled. * @param [in] scalar2 A second scalar to multiply by. - * - * @TODO: test - * @TODO: define vartime/precomp version of this for performance?? */ void decaf_448_point_double_scalarmul ( decaf_448_point_t combo, @@ -408,6 +405,30 @@ void decaf_448_point_double_scalarmul ( const decaf_448_scalar_t scalar2 ) API_VIS NONNULL5 NOINLINE; +/** + * @brief Multiply two base points by two scalars: + * scaled = scalar1*base1 + scalar2*base2. + * + * Otherwise quivalent to two calls to decaf_448_point_scalarmul, but may be + * faster. + * + * @param [out] scaled The scaled point base*scalar + * @param [in] base1 A precomputed first point to be scaled. + * @param [in] scalar1 A first scalar to multiply by. + * @param [in] base2 A second point to be scaled. + * @param [in] scalar2 A second scalar to multiply by. + * + * @warning: This function takes variable time, and may leak the scalars + * used. It is designed for signature verification. + */ +void decaf_448_precomputed_double_scalarmul_non_secret ( + decaf_448_point_t combo, + const decaf_448_precomputed_s *base1, + const decaf_448_scalar_t scalar1, + const decaf_448_point_t base2, + const decaf_448_scalar_t scalar2 +) API_VIS NONNULL5 NOINLINE; + /** * @brief Test that a point is valid, for debugging purposes. * diff --git a/include/decaf_crypto.h b/include/decaf_crypto.h index ae370b8..a86bfc7 100644 --- a/include/decaf_crypto.h +++ b/include/decaf_crypto.h @@ -85,6 +85,9 @@ void decaf_448_private_to_public ( * * @retval DECAF_SUCCESS Key exchange was successful. * @retval DECAF_FAILURE Key exchange failed. + * + * @warning This is a pretty silly shared secret computation + * and will almost definitely change in the future. */ decaf_bool_t decaf_448_shared_secret ( diff --git a/src/decaf.c b/src/decaf.c index d134447..106fa00 100644 --- a/src/decaf.c +++ b/src/decaf.c @@ -804,3 +804,13 @@ void decaf_448_precomputed_scalarmul ( ) { decaf_448_point_scalarmul(a,b->p[0],scalar); } + +void decaf_448_precomputed_double_scalarmul_non_secret ( + decaf_448_point_t combo, + const decaf_448_precomputed_s *base1, + const decaf_448_scalar_t scalar1, + const decaf_448_point_t base2, + const decaf_448_scalar_t scalar2 +) { + decaf_448_point_double_scalarmul(combo, base1->p[0], scalar1, base2, scalar2); +} diff --git a/src/decaf_crypto.c b/src/decaf_crypto.c index 886f287..7b16f0c 100644 --- a/src/decaf_crypto.c +++ b/src/decaf_crypto.c @@ -186,9 +186,9 @@ decaf_448_verify_shake ( ret &= decaf_448_point_decode(pubpoint, pub, DECAF_FALSE); ret &= decaf_448_scalar_decode(response, &sig[DECAF_448_SER_BYTES]); - decaf_448_point_double_scalarmul ( + decaf_448_precomputed_double_scalarmul_non_secret ( pubpoint, - decaf_448_point_base, response, + decaf_448_precomputed_base, response, pubpoint, challenge ); diff --git a/src/decaf_fast.c b/src/decaf_fast.c index 3809e8a..b5e7d35 100644 --- a/src/decaf_fast.c +++ b/src/decaf_fast.c @@ -13,10 +13,6 @@ #include #include "field.h" - /* TODO REMOVE */ -#include "constant_time.h" -#include - #define WBITS DECAF_WORD_BITS #if WBITS == 64 @@ -184,9 +180,17 @@ siv gf_add_nr ( gf c, const gf a, const gf b ) { } /** Constant time, x = is_z ? z : y */ -sv cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) { - //FOR_LIMB(i, x[i] = (y[i] & ~is_z) | (z[i] & is_z) ); +siv cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) { + big_register_t br_mask = br_set_to_mask(is_z); + big_register_t *out = (big_register_t *)x; + const big_register_t *y_ = (const big_register_t *)y, *z_ = (const big_register_t *)z; + word_t k; + for (k=0; klimb[i] ^= s; y->limb[i] ^= s; } - /* - constant_time_cond_swap(x,y,sizeof(gf),swap); - */ } /** @@ -791,6 +792,27 @@ snv add_niels_to_pt ( if (!before_double) gf_mul ( d->t, b, c ); } +snv sub_niels_from_pt ( + decaf_448_point_t d, + const niels_t e, + decaf_bool_t before_double +) { + gf a, b, c; + gf_sub_nr ( b, d->y, d->x ); + gf_mul ( a, e->b, b ); + gf_add_nr ( b, d->x, d->y ); + gf_mul ( d->y, e->a, b ); + gf_mul ( d->x, e->c, d->t ); + gf_add_nr ( c, a, d->y ); + gf_sub_nr ( b, d->y, a ); + gf_add_nr ( d->y, d->z, d->x ); + gf_sub_nr ( a, d->z, d->x ); + gf_mul ( d->z, a, d->y ); + gf_mul ( d->x, d->y, b ); + gf_mul ( d->y, a, c ); + if (!before_double) gf_mul ( d->t, b, c ); +} + sv add_pniels_to_pt ( decaf_448_point_t p, const pniels_t pn, @@ -802,8 +824,42 @@ sv add_pniels_to_pt ( add_niels_to_pt( p, pn->n, before_double ); } +sv sub_pniels_from_pt ( + decaf_448_point_t p, + const pniels_t pn, + decaf_bool_t before_double +) { + gf L0; + gf_mul ( L0, p->z, pn->z ); + gf_cpy ( p->z, L0 ); + sub_niels_from_pt( p, pn->n, before_double ); +} + extern const decaf_448_scalar_t decaf_448_point_scalarmul_adjustment; +/* TODO: get rid of big_register_t dependencies? */ +siv constant_time_lookup_xx ( + void *__restrict__ out_, + const void *table_, + word_t elem_bytes, + word_t n_table, + word_t idx +) { + big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx); + big_register_t *out = (big_register_t *)out_; + const unsigned char *table = (const unsigned char *)table_; + word_t j,k; + + for (k=0; k>(WINDOW-1))-1; bits ^= inv; - constant_time_lookup(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK); + constant_time_lookup_xx(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK); cond_neg_niels(pn->n, inv); pniels_to_pt(tmp, pn); @@ -861,7 +917,7 @@ void decaf_448_point_scalarmul ( bits ^= inv; /* Add in from table. Compute t only on last iteration. */ - constant_time_lookup(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK); + constant_time_lookup_xx(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK); cond_neg_niels(pn->n, inv); add_pniels_to_pt(tmp, pn, i ? -1 : 0); } @@ -1104,7 +1160,7 @@ siv constant_time_lookup_niels ( int nelts, int idx ) { - constant_time_lookup(ni, table, sizeof(niels_s), nelts, idx); + constant_time_lookup_xx(ni, table, sizeof(niels_s), nelts, idx); } void decaf_448_precomputed_scalarmul ( @@ -1112,25 +1168,26 @@ void decaf_448_precomputed_scalarmul ( const decaf_448_precomputed_s *table, const decaf_448_scalar_t scalar ) { - unsigned int i,j,k; + int i; + unsigned j,k; const unsigned int n = 5, t = 5, s = 18; // TODO MAGIC - decaf_448_scalar_t scalar2; - decaf_448_scalar_add(scalar2, scalar, decaf_448_precomputed_scalarmul_adjustment); - decaf_448_halve(scalar2,scalar2,decaf_448_scalar_p); + decaf_448_scalar_t scalar1x; + decaf_448_scalar_add(scalar1x, scalar, decaf_448_precomputed_scalarmul_adjustment); + decaf_448_halve(scalar1x,scalar1x,decaf_448_scalar_p); niels_t ni; - for (i=0; i=0; i--) { + if (i != (int)s-1) decaf_448_point_double(out,out); for (j=0; jlimb[bit/WBITS] >> (bit%WBITS) & 1) << k; + tab |= (scalar1x->limb[bit/WBITS] >> (bit%WBITS) & 1) << k; } } @@ -1141,8 +1198,8 @@ void decaf_448_precomputed_scalarmul ( constant_time_lookup_niels(ni, &table->table[j<<(t-1)], 1<<(t-1), tab); cond_neg_niels(ni, invert); - if (i||j) { - add_niels_to_pt(out, ni, j==n-1 && i= 0; i--) { + int bit = (scalar->limb[i/WORD_BITS] >> (i%WORD_BITS)) & 1; + current = 2*current + bit; + + /* + * Sizing: |current| >= 2^(tableBits+1) -> |current| = 2^0 + * So current loses (tableBits+1) bits every time. It otherwise gains + * 1 bit per iteration. The number of iterations is + * (nbits + 2 + tableBits), and an additional control word is added at + * the end. So the total number of control words is at most + * ceil((nbits+1) / (tableBits+1)) + 2 = floor((nbits)/(tableBits+1)) + 2. + * There's also the stopper with power -1, for a total of +3. + */ + if (current >= (2<> 1; /* |delta| < 2^tablebits */ + current = -(current & 1); + + for (j=i; (delta & 1) == 0; j++) { + delta >>= 1; + } + control[position].power = j+1; + control[position].addend = delta; + position++; + assert(position <= DECAF_448_SCALAR_BITS/(tableBits+1) + 2); + } + } + + if (current) { + for (j=0; (current & 1) == 0; j++) { + current >>= 1; + } + control[position].power = j; + control[position].addend = current; + position++; + assert(position <= DECAF_448_SCALAR_BITS/(tableBits+1) + 2); + } + + + control[position].power = -1; + control[position].addend = 0; + return position; +} + +sv prepare_wnaf_table( + pniels_t *output, + decaf_448_point_t working, + unsigned int tbits +) { + int i; + pt_to_pniels(output[0], working); + + if (tbits == 0) return; + + decaf_448_point_double(working,working); + pniels_t twop; + pt_to_pniels(twop, working); + + add_pniels_to_pt(working, output[0],0); + pt_to_pniels(output[1], working); + + for (i=2; i < 1<= 0; i--) { + + if (i == control[conti].power) { + decaf_448_point_double_internal(combo,combo,0); + assert(control[conti].addend); + + if (control[conti].addend > 0) { + add_pniels_to_pt(combo, precmp[control[conti].addend >> 1], i>=s); // TODO PERF: internal + } else { + sub_pniels_from_pt(combo, precmp[(-control[conti].addend) >> 1], i>=s); // TODO PERF: internal + } + conti++; + assert(conti <= control_bits); + } else { + decaf_448_point_double_internal(combo,combo,i>=s); + } + + if (i < s) { + /* comb component */ + for (j=0; jlimb[bit/WBITS] >> (bit%WBITS) & 1) << k; + } + } + + decaf_bool_t invert = (tab>>(t-1))-1; + tab ^= invert; + tab &= (1<<(t-1)) - 1; + + if (invert) { + sub_niels_from_pt(combo, base1->table[(j<<(t-1)) + tab], j==n-1 && i); + } else { + add_niels_to_pt(combo, base1->table[(j<<(t-1)) + tab], j==n-1 && i); + } + } + } + } +}