diff --git a/include/decaf.h b/include/decaf.h index 3be5183..214cc24 100644 --- a/include/decaf.h +++ b/include/decaf.h @@ -410,6 +410,8 @@ void decaf_448_precompute ( * @param [out] scaled The scaled point base*scalar * @param [in] base The point to be scaled. * @param [in] scalar The scalar to multiply by. + * + * @TODO: precomputed dsmul? const or variable time? */ void decaf_448_precomputed_scalarmul ( decaf_448_point_t scaled, @@ -429,6 +431,7 @@ void decaf_448_precomputed_scalarmul ( * @param [in] scalar1 A first scalar to multiply by. * @param [in] base2 A second point to be scaled. * @param [in] scalar2 A second scalar to multiply by. + * @fixme This function isn't tested! */ void decaf_448_point_double_scalarmul ( decaf_448_point_t combo, @@ -485,7 +488,6 @@ decaf_bool_t decaf_448_point_valid ( * Unlike Elligator, this function may be up to 4:1 on [0,(p-1)/2]: * A factor of 2 due to the isogeny. * A factor of 2 because we quotient out the 2-torsion. - * // TODO: check that it isn't more, especially for the identity point. * * Negating the input (mod q) results in the same point. Inverting the input * (mod q) results in the negative point. This is the same as Elligator. diff --git a/src/decaf_fast.c b/src/decaf_fast.c index 2cce6db..67812fa 100644 --- a/src/decaf_fast.c +++ b/src/decaf_fast.c @@ -12,6 +12,7 @@ #include "decaf.h" #include #include "field.h" +#include "decaf_448_config.h" #define WBITS DECAF_WORD_BITS @@ -100,7 +101,9 @@ const decaf_448_point_t decaf_448_point_base = {{ /* Projective Niels coordinates */ typedef struct { gf a, b, c; } niels_s, niels_t[1]; typedef struct { niels_t n; gf z; } pniels_s, pniels_t[1]; -struct decaf_448_precomputed_s { niels_t table [5<<4]; /* MAGIC */ }; + +/* Precomputed base */ +struct decaf_448_precomputed_s { niels_t table [DECAF_COMBS_N<<(DECAF_COMBS_T-1)]; }; extern const decaf_word_t decaf_448_precomputed_base_as_words[]; const decaf_448_precomputed_s *decaf_448_precomputed_base = @@ -109,7 +112,6 @@ const decaf_448_precomputed_s *decaf_448_precomputed_base = const size_t sizeof_decaf_448_precomputed_s = sizeof(decaf_448_precomputed_s); const size_t alignof_decaf_448_precomputed_s = 32; - #ifdef __clang__ #if 100*__clang_major__ + __clang_minor__ > 305 #define VECTORIZE _Pragma("clang loop unroll(disable) vectorize(enable) vectorize_width(8)") @@ -245,25 +247,6 @@ static decaf_word_t hibit(const gf x) { return -(y->limb[0]&1); } -/** Return high bit of x/2 = low bit of x mod p */ -static inline decaf_word_t lobit(gf x) { - gf_canon(x); - return -(x->limb[0]&1); -} - -/* a = use_c ? c : b */ -sv decaf_448_cond_sel ( - decaf_448_point_t a, - const decaf_448_point_t b, - const decaf_448_point_t c, - decaf_bool_t use_c -) { - cond_sel(a->x, b->x, c->x, use_c); - cond_sel(a->y, b->y, c->y, use_c); - cond_sel(a->z, b->z, c->z, use_c); - cond_sel(a->t, b->t, c->t, use_c); -} - /** {extra,accum} - sub +? p * Must have extra <= 1 */ @@ -529,37 +512,6 @@ static decaf_bool_t gf_deser(gf s, const unsigned char ser[DECAF_448_SER_BYTES]) return accum; } -/* Constant-time add or subtract */ -sv decaf_448_point_add_sub ( - decaf_448_point_t p, - const decaf_448_point_t q, - const decaf_448_point_t r, - decaf_bool_t do_sub -) { - /* Twisted Edward formulas, complete when 4-torsion isn't involved */ - gf a, b, c, d; - gf_sub_nr ( b, q->y, q->x ); - gf_sub_nr ( c, r->y, r->x ); - gf_add_nr ( d, r->y, r->x ); - cond_swap(c,d,do_sub); - gf_mul ( a, c, b ); - gf_add_nr ( b, q->y, q->x ); - gf_mul ( p->y, d, b ); - gf_mul ( b, r->t, q->t ); - gf_mlw ( p->x, b, 2-2*EDWARDS_D ); - gf_add_nr ( b, a, p->y ); - gf_sub_nr ( c, p->y, a ); - gf_mul ( a, q->z, r->z ); - gf_add_nr ( a, a, a ); - gf_add_nr ( p->y, a, p->x ); - gf_sub_nr ( a, a, p->x ); - cond_swap(a,p->y,do_sub); - gf_mul ( p->z, a, p->y ); - gf_mul ( p->x, p->y, c ); - gf_mul ( p->y, a, b ); - gf_mul ( p->t, b, c ); -} - decaf_bool_t decaf_448_point_decode ( decaf_448_point_t p, const unsigned char ser[DECAF_448_SER_BYTES], @@ -931,66 +883,74 @@ siv constant_time_lookup_xx ( } } +snv prepare_fixed_window( + pniels_t *multiples, + const decaf_448_point_t b, + int ntable +) { + decaf_448_point_t tmp; + pniels_t pn; + int i; + + decaf_448_point_double(tmp, b); + pt_to_pniels(pn, tmp); + pt_to_pniels(multiples[0], b); + decaf_448_point_copy(tmp, b); + for (i=1; i> 1, NTABLE = 1<<(WINDOW-1); - decaf_448_scalar_t scalar2; - decaf_448_scalar_add(scalar2, scalar, decaf_448_point_scalarmul_adjustment); - decaf_448_halve(scalar2,scalar2,decaf_448_scalar_p); + decaf_448_scalar_t scalar1x; + decaf_448_scalar_add(scalar1x, scalar, decaf_448_point_scalarmul_adjustment); + decaf_448_halve(scalar1x,scalar1x,decaf_448_scalar_p); /* Set up a precomputed table with odd multiples of b. */ pniels_t pn, multiples[NTABLE]; decaf_448_point_t tmp; - decaf_448_point_double(tmp, b); - pt_to_pniels(pn, tmp); - pt_to_pniels(multiples[0], b); - decaf_448_point_copy(tmp, b); - - int i,j; - for (i=1; ilimb[i/WBITS] >> (i%WBITS) & WINDOW_MASK, - inv = (bits>>(WINDOW-1))-1; - bits ^= inv; - - constant_time_lookup_xx(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK); - cond_neg_niels(pn->n, inv); - pniels_to_pt(tmp, pn); - - for (i-=WINDOW; i>=0; i-=WINDOW) { - /* Using Hisil et al's lookahead method instead of extensible here - * for no particular reason. Double WINDOW times, but only compute t on - * the last one. - */ - for (j=0; j=0; i-=WINDOW) { /* Fetch another block of bits */ - bits = scalar2->limb[i/WBITS] >> (i%WBITS); - if (i%WBITS >= WBITS-WINDOW) { - bits ^= scalar2->limb[i/WBITS+1] << (WBITS - (i%WBITS)); + decaf_word_t bits = scalar1x->limb[i/WBITS] >> (i%WBITS); + if (i%WBITS >= WBITS-WINDOW && i/WBITSlimb[i/WBITS+1] << (WBITS - (i%WBITS)); } bits &= WINDOW_MASK; - inv = (bits>>(WINDOW-1))-1; + decaf_word_t inv = (bits>>(WINDOW-1))-1; bits ^= inv; /* Add in from table. Compute t only on last iteration. */ constant_time_lookup_xx(pn, multiples, sizeof(pn), NTABLE, bits & WINDOW_T_MASK); cond_neg_niels(pn->n, inv); - add_pniels_to_pt(tmp, pn, i ? -1 : 0); + if (first) { + pniels_to_pt(tmp, pn); + first = 0; + } else { + /* Using Hisil et al's lookahead method instead of extensible here + * for no particular reason. Double WINDOW times, but only compute t on + * the last one. + */ + for (j=0; j0; i-=2) { - decaf_448_point_double(w,w); - decaf_word_t bits = scalarb->limb[i/WBITS]>>(i%WBITS); - decaf_448_cond_sel(tmp,b,b3,((bits^(bits>>1))&1)-1); - decaf_448_point_add_sub(w,w,tmp,((bits>>1)&1)-1); - bits = scalarc->limb[i/WBITS]>>(i%WBITS); - decaf_448_cond_sel(tmp,c,c3,((bits^(bits>>1))&1)-1); - decaf_448_point_add_sub(w,w,tmp,((bits>>1)&1)-1); - decaf_448_point_double(w,w); + const int WINDOW = DECAF_WINDOW_BITS, + WINDOW_MASK = (1<> 1, + NTABLE = 1<<(WINDOW-1); + + decaf_448_scalar_t scalar1x, scalar2x; + decaf_448_scalar_add(scalar1x, scalarb, decaf_448_point_scalarmul_adjustment); + decaf_448_halve(scalar1x,scalar1x,decaf_448_scalar_p); + decaf_448_scalar_add(scalar2x, scalarc, decaf_448_point_scalarmul_adjustment); + decaf_448_halve(scalar2x,scalar2x,decaf_448_scalar_p); + + /* Set up a precomputed table with odd multiples of b. */ + pniels_t pn, multiples1[NTABLE], multiples2[NTABLE]; + decaf_448_point_t tmp; + prepare_fixed_window(multiples1, b, NTABLE); + prepare_fixed_window(multiples2, c, NTABLE); + + /* Initialize. */ + int i,j,first=1; + i = DECAF_448_SCALAR_BITS - ((DECAF_448_SCALAR_BITS-1) % WINDOW) - 1; + + for (; i>=0; i-=WINDOW) { + /* Fetch another block of bits */ + decaf_word_t bits1 = scalar1x->limb[i/WBITS] >> (i%WBITS), + bits2 = scalar2x->limb[i/WBITS] >> (i%WBITS); + if (i%WBITS >= WBITS-WINDOW && i/WBITSlimb[i/WBITS+1] << (WBITS - (i%WBITS)); + bits2 ^= scalar2x->limb[i/WBITS+1] << (WBITS - (i%WBITS)); + } + bits1 &= WINDOW_MASK; + bits2 &= WINDOW_MASK; + decaf_word_t inv1 = (bits1>>(WINDOW-1))-1; + decaf_word_t inv2 = (bits1>>(WINDOW-1))-1; + bits1 ^= inv1; + bits2 ^= inv2; + + /* Add in from table. Compute t only on last iteration. */ + constant_time_lookup_xx(pn, multiples1, sizeof(pn), NTABLE, bits1 & WINDOW_T_MASK); + cond_neg_niels(pn->n, inv1); + if (first) { + pniels_to_pt(tmp, pn); + first = 0; + } else { + /* Using Hisil et al's lookahead method instead of extensible here + * for no particular reason. Double WINDOW times, but only compute t on + * the last one. + */ + for (j=0; jn, inv2); + add_pniels_to_pt(tmp, pn, i?-1:0); } - decaf_448_point_add_sub(w,w,b,((scalarb->limb[0]>>1)&1)-1); - decaf_448_point_add_sub(w,w,c,((scalarc->limb[0]>>1)&1)-1); - /* low bit is special because of signed window */ - decaf_448_cond_sel(tmp,b,decaf_448_point_identity,-(scalarb->limb[0]&1)); - decaf_448_point_sub(w,w,tmp); - decaf_448_cond_sel(tmp,c,decaf_448_point_identity,-(scalarc->limb[0]&1)); - decaf_448_point_sub(a,w,tmp); + + /* Write out the answer */ + decaf_448_point_copy(a,tmp); } decaf_bool_t decaf_448_point_eq ( const decaf_448_point_t p, const decaf_448_point_t q ) { @@ -1179,7 +1168,7 @@ decaf_448_precompute ( decaf_448_precomputed_s *table, const decaf_448_point_t base ) { - const unsigned int n = 5, t = 5, s = 18; // TODO MAGIC + const unsigned int n = DECAF_COMBS_N, t = DECAF_COMBS_T, s = DECAF_COMBS_S; assert(n*t*s >= DECAF_448_SCALAR_BITS); decaf_448_point_t working, start, doubles[t-1]; @@ -1251,7 +1240,7 @@ void decaf_448_precomputed_scalarmul ( ) { int i; unsigned j,k; - const unsigned int n = 5, t = 5, s = 18; // TODO MAGIC + const unsigned int n = DECAF_COMBS_N, t = DECAF_COMBS_T, s = DECAF_COMBS_S; decaf_448_scalar_t scalar1x; decaf_448_scalar_add(scalar1x, scalar, decaf_448_precomputed_scalarmul_adjustment); @@ -1288,6 +1277,13 @@ void decaf_448_precomputed_scalarmul ( } } +#if DECAF_USE_MONTGOMERY_LADDER +/** Return high bit of x/2 = low bit of x mod p */ +static inline decaf_word_t lobit(gf x) { + gf_canon(x); + return -(x->limb[0]&1); +} + decaf_bool_t decaf_448_direct_scalarmul ( uint8_t scaled[DECAF_448_SER_BYTES], const uint8_t base[DECAF_448_SER_BYTES], @@ -1417,6 +1413,22 @@ decaf_bool_t decaf_448_direct_scalarmul ( return succ; } +#else /* DECAF_USE_MONTGOMERY_LADDER */ +decaf_bool_t decaf_448_direct_scalarmul ( + uint8_t scaled[DECAF_448_SER_BYTES], + const uint8_t base[DECAF_448_SER_BYTES], + const decaf_448_scalar_t scalar, + decaf_bool_t allow_identity, + decaf_bool_t short_circuit +) { + decaf_448_point_t basep; + decaf_bool_t succ = decaf_448_point_decode(basep, base, allow_identity); + if (short_circuit & ~succ) return succ; + decaf_448_point_scalarmul(basep, basep, scalar); + decaf_448_point_encode(scaled, basep); + return succ; +} +#endif /* DECAF_USE_MONTGOMERY_LADDER */ /** * @cond internal @@ -1506,28 +1518,27 @@ sv prepare_wnaf_table( extern const decaf_word_t decaf_448_precomputed_wnaf_as_words[]; static const niels_t *decaf_448_wnaf_base = (const niels_t *)decaf_448_precomputed_wnaf_as_words; - -const size_t sizeof_decaf_448_precomputed_wnafs __attribute((visibility("hidden"))) = sizeof(niels_t)<<5; +const size_t sizeof_decaf_448_precomputed_wnafs __attribute((visibility("hidden"))) + = sizeof(niels_t)<n, sizeof(niels_t)); gf_cpy(zs[i], tmp[i]->z); } - batch_normalize_niels(out, zs, zis, 1<<5); + batch_normalize_niels(out, zs, zis, 1< #include #include "decaf.h" +#include "decaf_448_config.h" /* To satisfy linker. */ const decaf_word_t decaf_448_precomputed_base_as_words[1]; @@ -82,16 +83,15 @@ int main(int argc, char **argv) { decaf_448_scalar_t smadj; decaf_448_scalar_copy(smadj,decaf_448_scalar_one); - const unsigned int n = 5, t = 5, s = 18; // TODO MAGIC - for (i=0; i