diff --git a/src/decaf_fast.c b/src/decaf_fast.c index 1874c87..45a1e61 100644 --- a/src/decaf_fast.c +++ b/src/decaf_fast.c @@ -548,7 +548,11 @@ void decaf_448_point_add ( gf_mul ( p->t, b, c ); } -void decaf_448_point_double(decaf_448_point_t p, const decaf_448_point_t q) { +static void decaf_448_point_double_internal ( + decaf_448_point_t p, + const decaf_448_point_t q, + decaf_bool_t before_double +) { gf a, b, c, d; gf_sqr ( c, q->x ); gf_sqr ( a, q->y ); @@ -563,8 +567,11 @@ void decaf_448_point_double(decaf_448_point_t p, const decaf_448_point_t q) { gf_mul ( p->x, a, b ); gf_mul ( p->z, p->t, a ); gf_mul ( p->y, p->t, d ); - /* TODO: conditional? */ - gf_mul ( p->t, b, d ); + if (!before_double) gf_mul ( p->t, b, d ); +} + +void decaf_448_point_double(decaf_448_point_t p, const decaf_448_point_t q) { + decaf_448_point_double_internal(p,q,0); } void decaf_448_point_copy ( @@ -734,7 +741,8 @@ static void pniels_to_pt ( static void add_niels_to_pt ( decaf_448_point_t d, - const niels_t e + const niels_t e, + decaf_bool_t before_double ) { gf a, b, c; gf_sub_nr ( b, d->y, d->x ); @@ -749,18 +757,18 @@ static void add_niels_to_pt ( gf_mul ( d->z, a, d->y ); gf_mul ( d->x, d->y, b ); gf_mul ( d->y, a, c ); - /* TODO: if... */ - gf_mul ( d->t, b, c ); + if (!before_double) gf_mul ( d->t, b, c ); } static void add_pniels_to_pt ( decaf_448_point_t p, - const pniels_t pn + const pniels_t pn, + decaf_bool_t before_double ) { gf L0; gf_mul ( L0, p->z, pn->z ); gf_cpy ( p->z, L0 ); - add_niels_to_pt( p, pn->n ); + add_niels_to_pt( p, pn->n, before_double ); } void decaf_448_point_scalarmul ( @@ -768,64 +776,73 @@ void decaf_448_point_scalarmul ( const decaf_448_point_t b, const decaf_448_scalar_t scalar ) { - const int WINDOW = 5, + const int WINDOW = 5, /* PERF: Make 4 on non hugevector platforms? */ WINDOW_MASK = (1<> 1, NTABLE = 1<<(WINDOW-1); - decaf_448_scalar_t scalar2, onehalf = {{{0}}}, arrr; + /* Adjust the scalar to SABS window. TODO: optimize, subroutinize */ + decaf_448_scalar_t scalar2, onehalf = {{{0}}}, two = {{{2}}}, arrr; onehalf->limb[SCALAR_WORDS-1] = 1ull<<(WBITS-1); + + /* FIXME PERF MAGIC precompute 2^449-1/2 mod q. Could instead use 2^446-1/2 mod q though. */ + decaf_448_montmul(arrr,two,decaf_448_scalar_r2,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR); /* PERF dedicated halve */ decaf_448_scalar_sub(scalar2, scalar, decaf_448_scalar_one); decaf_448_montmul(scalar2,scalar2,onehalf,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR); - - /* FIXME PERF precompute 2^447-1/2 mod q. Could instead use 2^446-1/2 mod q though. */ - decaf_448_montmul(arrr,decaf_448_scalar_one,decaf_448_scalar_r2,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR); - decaf_448_montmul(arrr,arrr,onehalf,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR); - decaf_448_scalar_add(scalar2, scalar2, arrr); - + + /* Set up a precomputed table with odd multiples of b. */ pniels_t pn, multiples[NTABLE]; + decaf_448_point_t tmp; + decaf_448_point_double(tmp, b); + pt_to_pniels(pn, tmp); pt_to_pniels(multiples[0], b); - decaf_448_point_double(a, b); - pt_to_pniels(pn, a); + decaf_448_point_copy(tmp, b); int i,j; for (i=1; ilimb[i/WBITS] >> (i%WBITS), - inv = (bits>>((448 % WINDOW)-1))-1; + /* Initialize. */ + i = DECAF_448_SCALAR_BITS - ((DECAF_448_SCALAR_BITS-1) % WINDOW) - 1; + int bits = scalar2->limb[i/WBITS] >> (i%WBITS) & WINDOW_MASK, + inv = (bits>>(WINDOW-1))-1; bits ^= inv; - constant_time_lookup(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_U_MASK); + constant_time_lookup(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK); cond_neg_pniels(pn, inv); - pniels_to_pt(a, pn); + pniels_to_pt(tmp, pn); for (i-=WINDOW; i>=0; i-=WINDOW) { - for (j=0; jlimb[i/WBITS] >> (i%WBITS); - if (i%WBITS >= WBITS-WINDOW) { bits ^= scalar2->limb[i/WBITS+1] << (WBITS - (i%WBITS)); } - bits &= WINDOW_MASK; inv = (bits>>(WINDOW-1))-1; bits ^= inv; + /* Add in from table. Compute t only on last iteration. */ constant_time_lookup(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK); cond_neg_pniels(pn, inv); - add_pniels_to_pt(a, pn); + add_pniels_to_pt(tmp, pn, i ? -1 : 0); } + + /* Write out the answer */ + decaf_448_point_copy(a,tmp); } void decaf_448_point_double_scalarmul (