diff --git a/Makefile b/Makefile index d7edb0e..dbb8568 100644 --- a/Makefile +++ b/Makefile @@ -16,7 +16,7 @@ LD = $(CC) LDXX = $(CXX) ASM ?= $(CC) -DECAF ?= decaf +DECAF ?= decaf_fast ifneq (,$(findstring x86_64,$(MACHINE))) ARCH ?= arch_x86_64 diff --git a/include/decaf.h b/include/decaf.h index 7465832..789041e 100644 --- a/include/decaf.h +++ b/include/decaf.h @@ -96,11 +96,6 @@ static const decaf_bool_t DECAF_TRUE = -(decaf_bool_t)1, DECAF_FALSE = 0; static const decaf_bool_t DECAF_SUCCESS = -(decaf_bool_t)1 /*DECAF_TRUE*/, DECAF_FAILURE = 0 /*DECAF_FALSE*/; -/** The prime p, for debugging purposes. - * TODO: prevent this scalar from actually being used for non-debugging purposes? - */ -extern const decaf_448_scalar_t decaf_448_scalar_p API_VIS; - /** A scalar equal to 1. */ extern const decaf_448_scalar_t decaf_448_scalar_one API_VIS; diff --git a/src/decaf_fast.c b/src/decaf_fast.c index 6e3c012..d4d816f 100644 --- a/src/decaf_fast.c +++ b/src/decaf_fast.c @@ -17,15 +17,25 @@ #define WBITS DECAF_WORD_BITS +/* Rename table for eventual factoring into .c.inc, MSR ECC style */ +#define SCALAR_LIMBS DECAF_448_SCALAR_LIMBS +#define SCALAR_BITS DECAF_448_SCALAR_BITS +#define NLIMBS DECAF_448_LIMBS +#define API_NS(_id) decaf_448_##_id +#define API_NS2(_pref,_id) _pref##_decaf_448_##_id +#define scalar_t decaf_448_scalar_t +#define point_t decaf_448_point_t +#define precomputed_s decaf_448_precomputed_s +#define SER_BYTES DECAF_448_SER_BYTES + #if WBITS == 64 -#define LBITS 56 typedef __int128_t decaf_sdword_t; #define LIMB(x) (x##ull) #define SC_LIMB(x) (x##ull) #elif WBITS == 32 typedef int64_t decaf_sdword_t; -#define LBITS 28 -#define LIMB(x) (x##ull)&((1ull<>LBITS +/* 28 is MAGIC */ +#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28 #define SC_LIMB(x) (x##ull)&((1ull<<32)-1), (x##ull)>>32 #else #error "Only supporting 32- and 64-bit platforms right now" @@ -38,16 +48,9 @@ typedef int64_t decaf_sdword_t; #define siv static inline void __attribute__((always_inline)) static const gf ZERO = {{{0}}}, ONE = {{{1}}}, TWO = {{{2}}}; -#define LMASK ((((decaf_word_t)1)< 305 @@ -129,11 +133,11 @@ const size_t alignof_decaf_448_precomputed_s = 32; #define VECTORIZE #endif -#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; ilimb[i] = y->limb[i]); } +siv gf_cpy(gf x, const gf y) { x[0] = y[0]; } /** Mostly-unoptimized multiply, but at least it's unrolled. */ siv gf_mul (gf c, const gf a, const gf b) { @@ -150,6 +154,16 @@ siv gf_isqrt(gf y, const gf x) { field_isr((field_t *)y, (const field_t *)x); } +/** Inverse. TODO: adapt to 5-mod-8 fields? */ +sv gf_invert(gf y, const gf x) { + gf t1, t2; + gf_sqr(t1, x); // o^2 + gf_isqrt(t2, t1); // +-1/sqrt(o^2) = +-1/o + gf_sqr(t1, t2); + gf_mul(t2, t1, x); // not direct to y in case of alias. + gf_cpy(y, t2); +} + /** Add mod p. Conservatively always weak-reduce. */ snv gf_add ( gf_s *__restrict__ c, const gf a, const gf b ) { field_add((field_t *)c, (const field_t *)a, (const field_t *)b); @@ -248,6 +262,15 @@ static decaf_word_t __attribute__((noinline)) gf_eq(const gf a, const gf b) { return ((decaf_dword_t)ret - 1) >> WBITS; } +/** Inverse square root using addition chain. */ +static decaf_bool_t gf_isqrt_chk(gf y, const gf x, decaf_bool_t allow_zero) { + gf tmp0, tmp1; + field_isr((field_t *)y, (const field_t *)x); + gf_sqr(tmp0,y); + gf_mul(tmp1,tmp0,x); + return gf_eq(tmp1,ONE) | (allow_zero & gf_eq(tmp1,ZERO)); +} + /** Return high bit of x = low bit of 2x mod p */ static decaf_word_t hibit(const gf x) { gf y; @@ -259,16 +282,16 @@ static decaf_word_t hibit(const gf x) { /** {extra,accum} - sub +? p * Must have extra <= 1 */ -snv decaf_448_subx( - decaf_448_scalar_t out, - const decaf_word_t accum[DECAF_448_SCALAR_LIMBS], - const decaf_448_scalar_t sub, - const decaf_448_scalar_t p, +snv sc_subx( + scalar_t out, + const decaf_word_t accum[SCALAR_LIMBS], + const scalar_t sub, + const scalar_t p, decaf_word_t extra ) { decaf_sdword_t chain = 0; unsigned int i; - for (i=0; ilimb[i]; out->limb[i] = chain; chain >>= WBITS; @@ -276,38 +299,38 @@ snv decaf_448_subx( decaf_bool_t borrow = chain+extra; /* = 0 or -1 */ chain = 0; - for (i=0; ilimb[i]) + (p->limb[i] & borrow); out->limb[i] = chain; chain >>= WBITS; } } -snv decaf_448_montmul ( - decaf_448_scalar_t out, - const decaf_448_scalar_t a, - const decaf_448_scalar_t b +snv sc_montmul ( + scalar_t out, + const scalar_t a, + const scalar_t b ) { unsigned int i,j; - decaf_word_t accum[DECAF_448_SCALAR_LIMBS+1] = {0}; + decaf_word_t accum[SCALAR_LIMBS+1] = {0}; decaf_word_t hi_carry = 0; - for (i=0; ilimb[i]; const decaf_word_t *mier = b->limb; decaf_dword_t chain = 0; - for (j=0; j>= WBITS; } accum[j] = chain; - mand = accum[0] * DECAF_MONTGOMERY_FACTOR; + mand = accum[0] * MONTGOMERY_FACTOR; chain = 0; - mier = decaf_448_scalar_p->limb; - for (j=0; jlimb; + for (j=0; j>= WBITS; @@ -318,32 +341,33 @@ snv decaf_448_montmul ( hi_carry = chain >> WBITS; } - decaf_448_subx(out, accum, decaf_448_scalar_p, decaf_448_scalar_p, hi_carry); + sc_subx(out, accum, sc_p, sc_p, hi_carry); } -void decaf_448_scalar_mul ( - decaf_448_scalar_t out, - const decaf_448_scalar_t a, - const decaf_448_scalar_t b +void API_NS(scalar_mul) ( + scalar_t out, + const scalar_t a, + const scalar_t b ) { - decaf_448_montmul(out,a,b); - decaf_448_montmul(out,out,decaf_448_scalar_r2); + sc_montmul(out,a,b); + sc_montmul(out,out,sc_r2); } /* PERF: could implement this */ -siv decaf_448_montsqr ( - decaf_448_scalar_t out, - const decaf_448_scalar_t a +siv sc_montsqr ( + scalar_t out, + const scalar_t a ) { - decaf_448_montmul(out,a,a); + sc_montmul(out,a,a); } -decaf_bool_t decaf_448_scalar_invert ( - decaf_448_scalar_t out, - const decaf_448_scalar_t a +decaf_bool_t API_NS(scalar_invert) ( + scalar_t out, + const scalar_t a ) { - decaf_448_scalar_t chain[7], tmp; - decaf_448_montmul(chain[0],a,decaf_448_scalar_r2); + /* FIELD MAGIC */ + scalar_t chain[7], tmp; + sc_montmul(chain[0],a,sc_r2); unsigned int i,j; /* Addition chain generated by a not-too-clever SAGE script. First part: compute a^(2^222-1) */ @@ -368,85 +392,85 @@ decaf_bool_t decaf_448_scalar_invert ( }; for (i=0; ilimb, b, decaf_448_scalar_p, 0); + sc_subx(out, a->limb, b, sc_p, 0); } -void decaf_448_scalar_add ( - decaf_448_scalar_t out, - const decaf_448_scalar_t a, - const decaf_448_scalar_t b +void API_NS(scalar_add) ( + scalar_t out, + const scalar_t a, + const scalar_t b ) { decaf_dword_t chain = 0; unsigned int i; - for (i=0; ilimb[i]) + b->limb[i]; out->limb[i] = chain; chain >>= WBITS; } - decaf_448_subx(out, out->limb, decaf_448_scalar_p, decaf_448_scalar_p, chain); + sc_subx(out, out->limb, sc_p, sc_p, chain); } -snv decaf_448_scalar_halve ( - decaf_448_scalar_t out, - const decaf_448_scalar_t a, - const decaf_448_scalar_t p +snv sc_halve ( + scalar_t out, + const scalar_t a, + const scalar_t p ) { decaf_word_t mask = -(a->limb[0] & 1); decaf_dword_t chain = 0; unsigned int i; - for (i=0; ilimb[i]) + (p->limb[i] & mask); out->limb[i] = chain; chain >>= WBITS; } - for (i=0; ilimb[i] = out->limb[i]>>1 | out->limb[i+1]<<(WBITS-1); } out->limb[i] = out->limb[i]>>1 | chain<<(WBITS-1); } -void decaf_448_scalar_set ( - decaf_448_scalar_t out, +void API_NS(scalar_set) ( + scalar_t out, decaf_word_t w ) { - memset(out,0,sizeof(decaf_448_scalar_t)); + memset(out,0,sizeof(scalar_t)); out->limb[0] = w; } -decaf_bool_t decaf_448_scalar_eq ( - const decaf_448_scalar_t a, - const decaf_448_scalar_t b +decaf_bool_t API_NS(scalar_eq) ( + const scalar_t a, + const scalar_t b ) { decaf_word_t diff = 0; unsigned int i; - for (i=0; ilimb[i] ^ b->limb[i]; } return (((decaf_dword_t)diff)-1)>>WBITS; @@ -455,24 +479,13 @@ decaf_bool_t decaf_448_scalar_eq ( /* *** API begins here *** */ /** identity = (0,1) */ -const decaf_448_point_t decaf_448_point_identity = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}}; +const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}}; -static void gf_encode ( unsigned char ser[DECAF_448_SER_BYTES], gf a ) { - /* - gf_canon(a); - int i, k=0, bits=0; - decaf_dword_t buf=0; - for (i=0; ilimb[i]<=8 || i==DECAF_448_LIMBS-1) && k>=8) { - ser[k++]=buf; - } - } - */ +static void gf_encode ( unsigned char ser[SER_BYTES], gf a ) { field_serialize(ser, (field_t *)a); } -void decaf_448_point_encode( unsigned char ser[DECAF_448_SER_BYTES], const decaf_448_point_t p ) { +void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) { /* Can shave off one mul here; not important but makes consistent with paper */ gf a, b, c, d; gf_mlw ( a, p->y, 1-EDWARDS_D ); @@ -499,30 +512,16 @@ void decaf_448_point_encode( unsigned char ser[DECAF_448_SER_BYTES], const decaf /** * Deserialize a bool, return TRUE if < p. */ -static decaf_bool_t gf_deser(gf s, const unsigned char ser[DECAF_448_SER_BYTES]) { - /* - unsigned int i, k=0, bits=0; - decaf_dword_t buf=0; - for (i=0; i=LBITS || i==DECAF_448_SER_BYTES-1) && k>=LBITS) { - s->limb[k++] = buf & LMASK; - } - } - - decaf_sdword_t accum = 0; - FOR_LIMB(i, accum = (accum + s->limb[i] - P->limb[i]) >> WBITS ); - return accum; - */ +static decaf_bool_t gf_deser(gf s, const unsigned char ser[SER_BYTES]) { return field_deserialize((field_t *)s, ser); } -decaf_bool_t decaf_448_point_decode ( - decaf_448_point_t p, - const unsigned char ser[DECAF_448_SER_BYTES], +decaf_bool_t API_NS(point_decode) ( + point_t p, + const unsigned char ser[SER_BYTES], decaf_bool_t allow_identity ) { - gf s, a, b, c, d, e; + gf s, a, b, c, d; decaf_bool_t succ = gf_deser(s, ser), zero = gf_eq(s, ZERO); succ &= allow_identity | ~zero; succ &= ~hibit(s); @@ -532,11 +531,7 @@ decaf_bool_t decaf_448_point_decode ( gf_mlw ( c, a, 4-4*EDWARDS_D ); gf_add ( c, c, b ); gf_mul ( b, c, a ); - gf_isqrt ( d, b ); - gf_sqr ( e, d ); - gf_mul ( a, e, b ); - gf_add ( a, a, ONE ); - succ &= ~gf_eq ( a, ZERO ); + succ &= gf_isqrt_chk ( d, b, DECAF_TRUE ); gf_mul ( b, c, d ); cond_neg ( d, hibit(b) ); gf_add ( p->x, s, s ); @@ -550,10 +545,10 @@ decaf_bool_t decaf_448_point_decode ( return succ; } -void decaf_448_point_sub ( - decaf_448_point_t p, - const decaf_448_point_t q, - const decaf_448_point_t r +void API_NS(point_sub) ( + point_t p, + const point_t q, + const point_t r ) { gf a, b, c, d; gf_sub_nr ( b, q->y, q->x ); @@ -576,10 +571,10 @@ void decaf_448_point_sub ( gf_mul ( p->t, b, c ); } -void decaf_448_point_add ( - decaf_448_point_t p, - const decaf_448_point_t q, - const decaf_448_point_t r +void API_NS(point_add) ( + point_t p, + const point_t q, + const point_t r ) { gf a, b, c, d; gf_sub_nr ( b, q->y, q->x ); @@ -602,9 +597,9 @@ void decaf_448_point_add ( gf_mul ( p->t, b, c ); } -snv decaf_448_point_double_internal ( - decaf_448_point_t p, - const decaf_448_point_t q, +snv point_double_internal ( + point_t p, + const point_t q, decaf_bool_t before_double ) { gf a, b, c, d; @@ -624,13 +619,13 @@ snv decaf_448_point_double_internal ( if (!before_double) gf_mul ( p->t, b, d ); } -void decaf_448_point_double(decaf_448_point_t p, const decaf_448_point_t q) { - decaf_448_point_double_internal(p,q,0); +void API_NS(point_double)(point_t p, const point_t q) { + point_double_internal(p,q,0); } -void decaf_448_point_negate ( - decaf_448_point_t nega, - const decaf_448_point_t a +void API_NS(point_negate) ( + point_t nega, + const point_t a ) { gf_sub(nega->x, ZERO, a->x); gf_cpy(nega->y, a->y); @@ -638,13 +633,13 @@ void decaf_448_point_negate ( gf_sub(nega->t, ZERO, a->t); } -siv decaf_448_scalar_decode_short ( - decaf_448_scalar_t s, - const unsigned char ser[DECAF_448_SER_BYTES], +siv scalar_decode_short ( + scalar_t s, + const unsigned char ser[SER_BYTES], unsigned int nbytes ) { unsigned int i,j,k=0; - for (i=0; ilimb[i] - decaf_448_scalar_p->limb[i]) >> WBITS; + for (i=0; ilimb[i] - sc_p->limb[i]) >> WBITS; } - decaf_448_montmul(s,s,decaf_448_scalar_r1); /* ham-handed reduce */ + sc_montmul(s,s,sc_r1); /* ham-handed reduce */ return accum; } @@ -688,60 +683,60 @@ void decaf_bzero ( } -void decaf_448_scalar_destroy ( - decaf_448_scalar_t scalar +void API_NS(scalar_destroy) ( + scalar_t scalar ) { - decaf_bzero(scalar, sizeof(decaf_448_scalar_t)); + decaf_bzero(scalar, sizeof(scalar_t)); } static inline void ignore_result ( decaf_bool_t boo ) { (void)boo; } -void decaf_448_scalar_decode_long( - decaf_448_scalar_t s, +void API_NS(scalar_decode_long)( + scalar_t s, const unsigned char *ser, size_t ser_len ) { if (ser_len == 0) { - decaf_448_scalar_copy(s, decaf_448_scalar_zero); + API_NS(scalar_copy)(s, API_NS(scalar_zero)); return; } size_t i; - decaf_448_scalar_t t1, t2; + scalar_t t1, t2; - i = ser_len - (ser_len%DECAF_448_SER_BYTES); - if (i==ser_len) i -= DECAF_448_SER_BYTES; + i = ser_len - (ser_len%SER_BYTES); + if (i==ser_len) i -= SER_BYTES; - decaf_448_scalar_decode_short(t1, &ser[i], ser_len-i); + scalar_decode_short(t1, &ser[i], ser_len-i); - if (ser_len == sizeof(decaf_448_scalar_t)) { + if (ser_len == sizeof(scalar_t)) { assert(i==0); /* ham-handed reduce */ - decaf_448_montmul(s,t1,decaf_448_scalar_r1); - decaf_448_scalar_destroy(t1); + sc_montmul(s,t1,sc_r1); + API_NS(scalar_destroy)(t1); return; } while (i) { - i -= DECAF_448_SER_BYTES; - decaf_448_montmul(t1,t1,decaf_448_scalar_r2); - ignore_result( decaf_448_scalar_decode(t2, ser+i) ); - decaf_448_scalar_add(t1, t1, t2); + i -= SER_BYTES; + sc_montmul(t1,t1,sc_r2); + ignore_result( API_NS(scalar_decode)(t2, ser+i) ); + API_NS(scalar_add)(t1, t1, t2); } - decaf_448_scalar_copy(s, t1); - decaf_448_scalar_destroy(t1); - decaf_448_scalar_destroy(t2); + API_NS(scalar_copy)(s, t1); + API_NS(scalar_destroy)(t1); + API_NS(scalar_destroy)(t2); } -void decaf_448_scalar_encode( - unsigned char ser[DECAF_448_SER_BYTES], - const decaf_448_scalar_t s +void API_NS(scalar_encode)( + unsigned char ser[SER_BYTES], + const scalar_t s ) { unsigned int i,j,k=0; - for (i=0; ilimb[i] >> (8*j); } @@ -759,7 +754,7 @@ siv cond_neg_niels ( static void pt_to_pniels ( pniels_t b, - const decaf_448_point_t a + const point_t a ) { gf_sub ( b->n->a, a->y, a->x ); gf_add ( b->n->b, a->x, a->y ); @@ -768,7 +763,7 @@ static void pt_to_pniels ( } static void pniels_to_pt ( - decaf_448_point_t e, + point_t e, const pniels_t d ) { gf eu; @@ -781,7 +776,7 @@ static void pniels_to_pt ( } snv niels_to_pt ( - decaf_448_point_t e, + point_t e, const niels_t n ) { gf_add ( e->y, n->b, n->a ); @@ -791,7 +786,7 @@ snv niels_to_pt ( } snv add_niels_to_pt ( - decaf_448_point_t d, + point_t d, const niels_t e, decaf_bool_t before_double ) { @@ -812,7 +807,7 @@ snv add_niels_to_pt ( } snv sub_niels_from_pt ( - decaf_448_point_t d, + point_t d, const niels_t e, decaf_bool_t before_double ) { @@ -833,7 +828,7 @@ snv sub_niels_from_pt ( } sv add_pniels_to_pt ( - decaf_448_point_t p, + point_t p, const pniels_t pn, decaf_bool_t before_double ) { @@ -844,7 +839,7 @@ sv add_pniels_to_pt ( } sv sub_pniels_from_pt ( - decaf_448_point_t p, + point_t p, const pniels_t pn, decaf_bool_t before_double ) { @@ -854,7 +849,7 @@ sv sub_pniels_from_pt ( sub_niels_from_pt( p, pn->n, before_double ); } -extern const decaf_448_scalar_t decaf_448_point_scalarmul_adjustment; +extern const scalar_t API_NS(point_scalarmul_adjustment); /* TODO: get rid of big_register_t dependencies? */ siv constant_time_lookup_xx ( @@ -883,50 +878,50 @@ siv constant_time_lookup_xx ( snv prepare_fixed_window( pniels_t *multiples, - const decaf_448_point_t b, + const point_t b, int ntable ) { - decaf_448_point_t tmp; + point_t tmp; pniels_t pn; int i; - decaf_448_point_double(tmp, b); + point_double_internal(tmp, b, 0); pt_to_pniels(pn, tmp); pt_to_pniels(multiples[0], b); - decaf_448_point_copy(tmp, b); + API_NS(point_copy)(tmp, b); for (i=1; i> 1, NTABLE = 1<<(WINDOW-1); - decaf_448_scalar_t scalar1x; - decaf_448_scalar_add(scalar1x, scalar, decaf_448_point_scalarmul_adjustment); - decaf_448_scalar_halve(scalar1x,scalar1x,decaf_448_scalar_p); + scalar_t scalar1x; + API_NS(scalar_add)(scalar1x, scalar, API_NS(point_scalarmul_adjustment)); + sc_halve(scalar1x,scalar1x,sc_p); /* Set up a precomputed table with odd multiples of b. */ pniels_t pn, multiples[NTABLE]; - decaf_448_point_t tmp; + point_t tmp; prepare_fixed_window(multiples, b, NTABLE); /* Initialize. */ int i,j,first=1; - i = DECAF_448_SCALAR_BITS - ((DECAF_448_SCALAR_BITS-1) % WINDOW) - 1; + i = SCALAR_BITS - ((SCALAR_BITS-1) % WINDOW) - 1; for (; i>=0; i-=WINDOW) { /* Fetch another block of bits */ decaf_word_t bits = scalar1x->limb[i/WBITS] >> (i%WBITS); - if (i%WBITS >= WBITS-WINDOW && i/WBITS= WBITS-WINDOW && i/WBITSlimb[i/WBITS+1] << (WBITS - (i%WBITS)); } bits &= WINDOW_MASK; @@ -945,49 +940,49 @@ void decaf_448_point_scalarmul ( * the last one. */ for (j=0; j> 1, NTABLE = 1<<(WINDOW-1); - decaf_448_scalar_t scalar1x, scalar2x; - decaf_448_scalar_add(scalar1x, scalarb, decaf_448_point_scalarmul_adjustment); - decaf_448_scalar_halve(scalar1x,scalar1x,decaf_448_scalar_p); - decaf_448_scalar_add(scalar2x, scalarc, decaf_448_point_scalarmul_adjustment); - decaf_448_scalar_halve(scalar2x,scalar2x,decaf_448_scalar_p); + scalar_t scalar1x, scalar2x; + API_NS(scalar_add)(scalar1x, scalarb, API_NS(point_scalarmul_adjustment)); + sc_halve(scalar1x,scalar1x,sc_p); + API_NS(scalar_add)(scalar2x, scalarc, API_NS(point_scalarmul_adjustment)); + sc_halve(scalar2x,scalar2x,sc_p); /* Set up a precomputed table with odd multiples of b. */ pniels_t pn, multiples1[NTABLE], multiples2[NTABLE]; - decaf_448_point_t tmp; + point_t tmp; prepare_fixed_window(multiples1, b, NTABLE); prepare_fixed_window(multiples2, c, NTABLE); /* Initialize. */ int i,j,first=1; - i = DECAF_448_SCALAR_BITS - ((DECAF_448_SCALAR_BITS-1) % WINDOW) - 1; + i = SCALAR_BITS - ((SCALAR_BITS-1) % WINDOW) - 1; for (; i>=0; i-=WINDOW) { /* Fetch another block of bits */ decaf_word_t bits1 = scalar1x->limb[i/WBITS] >> (i%WBITS), bits2 = scalar2x->limb[i/WBITS] >> (i%WBITS); - if (i%WBITS >= WBITS-WINDOW && i/WBITS= WBITS-WINDOW && i/WBITSlimb[i/WBITS+1] << (WBITS - (i%WBITS)); bits2 ^= scalar2x->limb[i/WBITS+1] << (WBITS - (i%WBITS)); } @@ -1010,8 +1005,8 @@ void decaf_448_point_double_scalarmul ( * the last one. */ for (j=0; jy, q->x ); @@ -1031,9 +1026,9 @@ decaf_bool_t decaf_448_point_eq ( const decaf_448_point_t p, const decaf_448_poi return gf_eq(a,b); } -void decaf_448_point_from_hash_nonuniform ( - decaf_448_point_t p, - const unsigned char ser[DECAF_448_SER_BYTES] +void API_NS(point_from_hash_nonuniform) ( + point_t p, + const unsigned char ser[SER_BYTES] ) { gf r,urr,a,b,c,dee,e,ur2_d,udr2_1; (void)gf_deser(r,ser); @@ -1049,10 +1044,8 @@ void decaf_448_point_from_hash_nonuniform ( gf_add(udr2_1,b,ONE); gf_mul(a,c,udr2_1); gf_mlw(c,a,EDWARDS_D+1); - gf_isqrt(b,c); /* FIELD: if 5 mod 8, multiply result by u. */ - gf_sqr(a,b); - gf_mul(e,a,c); - decaf_bool_t square = gf_eq(e,ONE); + decaf_bool_t square = gf_isqrt_chk(b,c,DECAF_FALSE); + /* FIELD: if 5 mod 8, multiply result by u. */ gf_mul(a,b,r); cond_sel(b,a,b,square); gf_mlw(a,b,EDWARDS_D+1); @@ -1072,18 +1065,18 @@ void decaf_448_point_from_hash_nonuniform ( gf_mul(p->t,b,e); } -void decaf_448_point_from_hash_uniform ( - decaf_448_point_t pt, - const unsigned char hashed_data[2*DECAF_448_SER_BYTES] +void API_NS(point_from_hash_uniform) ( + point_t pt, + const unsigned char hashed_data[2*SER_BYTES] ) { - decaf_448_point_t pt2; - decaf_448_point_from_hash_nonuniform(pt,hashed_data); - decaf_448_point_from_hash_nonuniform(pt2,&hashed_data[DECAF_448_SER_BYTES]); - decaf_448_point_add(pt,pt,pt2); + point_t pt2; + API_NS(point_from_hash_nonuniform)(pt,hashed_data); + API_NS(point_from_hash_nonuniform)(pt2,&hashed_data[SER_BYTES]); + API_NS(point_add)(pt,pt,pt2); } -decaf_bool_t decaf_448_point_valid ( - const decaf_448_point_t p +decaf_bool_t API_NS(point_valid) ( + const point_t p ) { gf a,b,c; gf_mul(a,p->x,p->y); @@ -1106,6 +1099,7 @@ static void gf_batch_invert ( /* const */ gf *in, unsigned int n ) { + gf t1; assert(n>1); gf_cpy(out[1], in[0]); @@ -1115,12 +1109,7 @@ static void gf_batch_invert ( } gf_mul(out[0], out[n-1], in[n-1]); - gf t1, t2; - gf_isqrt(t1, out[0]); - gf_sqr(t2, t1); - gf_sqr(t1, t2); - gf_mul(t2, t1, out[0]); - gf_cpy(out[0], t2); + gf_invert(out[0], out[0]); for (i=n-1; i>0; i--) { gf_mul(t1, out[i], out[0]); @@ -1155,16 +1144,15 @@ static void batch_normalize_niels ( } } -void -decaf_448_precompute ( - decaf_448_precomputed_s *table, - const decaf_448_point_t base +void API_NS(precompute) ( + precomputed_s *table, + const point_t base ) { const unsigned int n = DECAF_COMBS_N, t = DECAF_COMBS_T, s = DECAF_COMBS_S; - assert(n*t*s >= DECAF_448_SCALAR_BITS); + assert(n*t*s >= SCALAR_BITS); - decaf_448_point_t working, start, doubles[t-1]; - decaf_448_point_copy(working, base); + point_t working, start, doubles[t-1]; + API_NS(point_copy)(working, base); pniels_t pn_tmp; gf zs[n<<(t-1)], zis[n<<(t-1)]; @@ -1176,16 +1164,16 @@ decaf_448_precompute ( /* Doubling phase */ for (j=0; j>=1; if (gray & (1<table,zs,zis,n<<(t-1)); } -extern const decaf_448_scalar_t decaf_448_precomputed_scalarmul_adjustment; +extern const scalar_t API_NS(precomputed_scalarmul_adjustment); -siv constant_time_lookup_niels ( +siv constant_time_lookup_xx_niels ( niels_s *__restrict__ ni, const niels_t *table, int nelts, @@ -1225,30 +1213,30 @@ siv constant_time_lookup_niels ( constant_time_lookup_xx(ni, table, sizeof(niels_s), nelts, idx); } -void decaf_448_precomputed_scalarmul ( - decaf_448_point_t out, - const decaf_448_precomputed_s *table, - const decaf_448_scalar_t scalar +void API_NS(precomputed_scalarmul) ( + point_t out, + const precomputed_s *table, + const scalar_t scalar ) { int i; unsigned j,k; const unsigned int n = DECAF_COMBS_N, t = DECAF_COMBS_T, s = DECAF_COMBS_S; - decaf_448_scalar_t scalar1x; - decaf_448_scalar_add(scalar1x, scalar, decaf_448_precomputed_scalarmul_adjustment); - decaf_448_scalar_halve(scalar1x,scalar1x,decaf_448_scalar_p); + scalar_t scalar1x; + API_NS(scalar_add)(scalar1x, scalar, API_NS(precomputed_scalarmul_adjustment)); + sc_halve(scalar1x,scalar1x,sc_p); niels_t ni; for (i=s-1; i>=0; i--) { - if (i != (int)s-1) decaf_448_point_double(out,out); + if (i != (int)s-1) point_double_internal(out,out,0); for (j=0; jlimb[bit/WBITS] >> (bit%WBITS) & 1) << k; } } @@ -1257,7 +1245,7 @@ void decaf_448_precomputed_scalarmul ( tab ^= invert; tab &= (1<<(t-1)) - 1; - constant_time_lookup_niels(ni, &table->table[j<<(t-1)], 1<<(t-1), tab); + constant_time_lookup_xx_niels(ni, &table->table[j<<(t-1)], 1<<(t-1), tab); cond_neg_niels(ni, invert); if ((i!=s-1)||j) { @@ -1276,10 +1264,10 @@ static inline decaf_word_t lobit(gf x) { return -(x->limb[0]&1); } -decaf_bool_t decaf_448_direct_scalarmul ( - uint8_t scaled[DECAF_448_SER_BYTES], - const uint8_t base[DECAF_448_SER_BYTES], - const decaf_448_scalar_t scalar, +decaf_bool_t API_NS(direct_scalarmul) ( + uint8_t scaled[SER_BYTES], + const uint8_t base[SER_BYTES], + const scalar_t scalar, decaf_bool_t allow_identity, decaf_bool_t short_circuit ) { @@ -1301,7 +1289,7 @@ decaf_bool_t decaf_448_direct_scalarmul ( int j; decaf_bool_t pflip = 0; - for (j=DECAF_448_SCALAR_BITS+1; j>=0; j--) { + for (j=SCALAR_BITS+1; j>=0; j--) { /* FIXME: -1, but the test cases use too many bits */ /* Augmented Montgomery ladder */ @@ -1406,18 +1394,18 @@ decaf_bool_t decaf_448_direct_scalarmul ( return succ; } #else /* DECAF_USE_MONTGOMERY_LADDER */ -decaf_bool_t decaf_448_direct_scalarmul ( - uint8_t scaled[DECAF_448_SER_BYTES], - const uint8_t base[DECAF_448_SER_BYTES], - const decaf_448_scalar_t scalar, +decaf_bool_t API_NS(direct_scalarmul) ( + uint8_t scaled[SER_BYTES], + const uint8_t base[SER_BYTES], + const scalar_t scalar, decaf_bool_t allow_identity, decaf_bool_t short_circuit ) { - decaf_448_point_t basep; - decaf_bool_t succ = decaf_448_point_decode(basep, base, allow_identity); + point_t basep; + decaf_bool_t succ = API_NS(point_decode)(basep, base, allow_identity); if (short_circuit & ~succ) return succ; - decaf_448_point_scalarmul(basep, basep, scalar); - decaf_448_point_encode(scaled, basep); + API_NS(point_scalarmul)(basep, basep, scalar); + API_NS(point_encode)(scaled, basep); return succ; } #endif /* DECAF_USE_MONTGOMERY_LADDER */ @@ -1432,7 +1420,7 @@ struct smvt_control { static int recode_wnaf ( struct smvt_control *control, /* [nbits/(tableBits+1) + 3] */ - const decaf_448_scalar_t scalar, + const scalar_t scalar, unsigned int tableBits ) { int current = 0, i, j; @@ -1440,8 +1428,9 @@ static int recode_wnaf ( /* PERF: negate scalar if it's large * PERF: this is a pretty simplistic algorithm. I'm sure there's a faster one... + * PERF MINOR: not technically WNAF, since last digits can be adjacent. Could be rtl. */ - for (i=DECAF_448_SCALAR_BITS-1; i >= 0; i--) { + for (i=SCALAR_BITS-1; i >= 0; i--) { int bit = (scalar->limb[i/WORD_BITS] >> (i%WORD_BITS)) & 1; current = 2*current + bit; @@ -1464,7 +1453,7 @@ static int recode_wnaf ( control[position].power = j+1; control[position].addend = delta; position++; - assert(position <= DECAF_448_SCALAR_BITS/(tableBits+1) + 2); + assert(position <= SCALAR_BITS/(tableBits+1) + 2); } } @@ -1475,7 +1464,7 @@ static int recode_wnaf ( control[position].power = j; control[position].addend = current; position++; - assert(position <= DECAF_448_SCALAR_BITS/(tableBits+1) + 2); + assert(position <= SCALAR_BITS/(tableBits+1) + 2); } @@ -1486,16 +1475,16 @@ static int recode_wnaf ( sv prepare_wnaf_table( pniels_t *output, - const decaf_448_point_t working, + const point_t working, unsigned int tbits ) { - decaf_448_point_t tmp; + point_t tmp; int i; pt_to_pniels(output[0], working); if (tbits == 0) return; - decaf_448_point_double(tmp,working); + API_NS(point_double)(tmp,working); pniels_t twop; pt_to_pniels(twop, tmp); @@ -1508,19 +1497,19 @@ sv prepare_wnaf_table( } } -extern const decaf_word_t decaf_448_precomputed_wnaf_as_words[]; -static const niels_t *decaf_448_wnaf_base = (const niels_t *)decaf_448_precomputed_wnaf_as_words; -const size_t sizeof_decaf_448_precomputed_wnafs __attribute((visibility("hidden"))) +extern const decaf_word_t API_NS(precomputed_wnaf_as_words)[]; +static const niels_t *API_NS(wnaf_base) = (const niels_t *)API_NS(precomputed_wnaf_as_words); +const size_t API_NS2(sizeof,precomputed_wnafs) __attribute((visibility("hidden"))) = sizeof(niels_t)< control_pre[0].power) { pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]); contv++; } else if (i == control_pre[0].power && i >=0 ) { pniels_to_pt(combo, precmp_var[control_var[0].addend >> 1]); - add_niels_to_pt(combo, decaf_448_wnaf_base[control_pre[0].addend >> 1], i); + add_niels_to_pt(combo, API_NS(wnaf_base)[control_pre[0].addend >> 1], i); contv++; contp++; } else { i = control_pre[0].power; - niels_to_pt(combo, decaf_448_wnaf_base[control_pre[0].addend >> 1]); + niels_to_pt(combo, API_NS(wnaf_base)[control_pre[0].addend >> 1]); contp++; } for (i--; i >= 0; i--) { int cv = (i==control_var[contv].power), cp = (i==control_pre[contp].power); - decaf_448_point_double_internal(combo,combo,i && !(cv||cp)); + point_double_internal(combo,combo,i && !(cv||cp)); if (cv) { assert(control_var[contv].addend); @@ -1587,9 +1576,9 @@ void decaf_448_base_double_scalarmul_non_secret ( assert(control_pre[contp].addend); if (control_pre[contp].addend > 0) { - add_niels_to_pt(combo, decaf_448_wnaf_base[control_pre[contp].addend >> 1], i); + add_niels_to_pt(combo, API_NS(wnaf_base)[control_pre[contp].addend >> 1], i); } else { - sub_niels_from_pt(combo, decaf_448_wnaf_base[(-control_pre[contp].addend) >> 1], i); + sub_niels_from_pt(combo, API_NS(wnaf_base)[(-control_pre[contp].addend) >> 1], i); } contp++; } @@ -1599,14 +1588,14 @@ void decaf_448_base_double_scalarmul_non_secret ( assert(contp == ncb_pre); (void)ncb_pre; } -void decaf_448_point_destroy ( - decaf_448_point_t point +void API_NS(point_destroy) ( + point_t point ) { - decaf_bzero(point, sizeof(decaf_448_point_t)); + decaf_bzero(point, sizeof(point_t)); } -void decaf_448_precomputed_destroy ( - decaf_448_precomputed_s *pre +void API_NS(precomputed_destroy) ( + precomputed_s *pre ) { - decaf_bzero(pre, sizeof_decaf_448_precomputed_s); + decaf_bzero(pre, API_NS2(sizeof,precomputed_s)); } diff --git a/src/decaf_gen_tables.c b/src/decaf_gen_tables.c index b4af912..07ce73d 100644 --- a/src/decaf_gen_tables.c +++ b/src/decaf_gen_tables.c @@ -12,26 +12,29 @@ #include #include #include "decaf.h" -#include "decaf_448_config.h" +#include "decaf_448_config.h" /* MAGIC */ + +#define API_NS(_id) decaf_448_##_id +#define API_NS2(_pref,_id) _pref##_decaf_448_##_id /* To satisfy linker. */ -const decaf_word_t decaf_448_precomputed_base_as_words[1]; -const decaf_448_scalar_t decaf_448_precomputed_scalarmul_adjustment; -const decaf_448_scalar_t decaf_448_point_scalarmul_adjustment; +const decaf_word_t API_NS(precomputed_base_as_words)[1]; +const API_NS(scalar_t) API_NS(precomputed_scalarmul_adjustment); +const API_NS(scalar_t) API_NS(point_scalarmul_adjustment); struct niels_s; -const decaf_word_t *decaf_448_precomputed_wnaf_as_words; -extern const size_t sizeof_decaf_448_precomputed_wnafs; +const decaf_word_t *API_NS(precomputed_wnaf_as_words); +extern const size_t API_NS2(sizeof,precomputed_wnafs); -void decaf_448_precompute_wnafs ( +void API_NS(precompute_wnafs) ( struct niels_s *out, - const decaf_448_point_t base + const API_NS(point_t) base ); -static void scalar_print(const char *name, const decaf_448_scalar_t sc) { - printf("const decaf_448_scalar_t %s = {{{\n", name); +static void scalar_print(const char *name, const API_NS(scalar_t) sc) { + printf("const API_NS(scalar_t) %s = {{{\n", name); unsigned i; - for (i=0; ilimb[i] ); } @@ -41,26 +44,28 @@ static void scalar_print(const char *name, const decaf_448_scalar_t sc) { int main(int argc, char **argv) { (void)argc; (void)argv; - decaf_448_precomputed_s *pre; - int ret = posix_memalign((void**)&pre, alignof_decaf_448_precomputed_s, sizeof_decaf_448_precomputed_s); + API_NS(precomputed_s) *pre; + int ret = posix_memalign((void**)&pre, API_NS2(alignof,precomputed_s), API_NS2(sizeof,precomputed_s)); if (ret || !pre) return 1; - decaf_448_precompute(pre, decaf_448_point_base); + API_NS(precompute)(pre, API_NS(point_base)); struct niels_s *preWnaf; - ret = posix_memalign((void**)&preWnaf, alignof_decaf_448_precomputed_s, sizeof_decaf_448_precomputed_wnafs); + ret = posix_memalign((void**)&preWnaf, API_NS2(alignof,precomputed_s), API_NS2(sizeof,precomputed_wnafs)); if (ret || !preWnaf) return 1; - decaf_448_precompute_wnafs(preWnaf, decaf_448_point_base); + API_NS(precompute_wnafs)(preWnaf, API_NS(point_base)); const decaf_word_t *output = (const decaf_word_t *)pre; unsigned i; printf("/** @warning: this file was automatically generated. */\n"); printf("#include \"decaf.h\"\n\n"); - printf("const decaf_word_t decaf_448_precomputed_base_as_words[%d]\n", - (int)(sizeof_decaf_448_precomputed_s / sizeof(decaf_word_t))); - printf("__attribute__((aligned(%d),visibility(\"hidden\"))) = {\n ", (int)alignof_decaf_448_precomputed_s); + printf("#define API_NS(_id) decaf_448_##_id\n"); + printf("#define API_NS2(_pref,_id) _pref##_decaf_448_##_id\n"); + printf("const decaf_word_t API_NS(precomputed_base_as_words)[%d]\n", + (int)(API_NS2(sizeof,precomputed_s) / sizeof(decaf_word_t))); + printf("__attribute__((aligned(%d),visibility(\"hidden\"))) = {\n ", (int)API_NS2(alignof,precomputed_s)); - for (i=0; i < sizeof_decaf_448_precomputed_s; i+=sizeof(decaf_word_t)) { + for (i=0; i < API_NS2(sizeof,precomputed_s); i+=sizeof(decaf_word_t)) { if (i && (i%8==0)) printf(",\n "); else if (i) printf(", "); printf("0x%0*llxull", (int)sizeof(decaf_word_t)*2, (unsigned long long)*output ); @@ -69,10 +74,10 @@ int main(int argc, char **argv) { printf("\n};\n"); output = (const decaf_word_t *)preWnaf; - printf("const decaf_word_t decaf_448_precomputed_wnaf_as_words[%d]\n", - (int)(sizeof_decaf_448_precomputed_wnafs / sizeof(decaf_word_t))); - printf("__attribute__((aligned(%d),visibility(\"hidden\"))) = {\n ", (int)alignof_decaf_448_precomputed_s); - for (i=0; i < sizeof_decaf_448_precomputed_wnafs; i+=sizeof(decaf_word_t)) { + printf("const decaf_word_t API_NS(precomputed_wnaf_as_words)[%d]\n", + (int)(API_NS2(sizeof,precomputed_wnafs) / sizeof(decaf_word_t))); + printf("__attribute__((aligned(%d),visibility(\"hidden\"))) = {\n ", (int)API_NS2(alignof,precomputed_s)); + for (i=0; i < API_NS2(sizeof,precomputed_wnafs); i+=sizeof(decaf_word_t)) { if (i && (i%8==0)) printf(",\n "); else if (i) printf(", "); printf("0x%0*llxull", (int)sizeof(decaf_word_t)*2, (unsigned long long)*output ); @@ -80,22 +85,22 @@ int main(int argc, char **argv) { } printf("\n};\n"); - decaf_448_scalar_t smadj; - decaf_448_scalar_copy(smadj,decaf_448_scalar_one); + API_NS(scalar_t) smadj; + API_NS(scalar_copy)(smadj,API_NS(scalar_one)); for (i=0; i shake; shake.update(hashed_password); @@ -212,7 +219,7 @@ static void spake2ee(const Block &hashed_password, SpongeRng &rng, bool aug) { client.send_plaintext(gx); server.recv_plaintext(gx); - Scalar y(rng); + Scalar y(serverRng); SecureBuffer gy(Precomputed::base() * y + hs); server.send_plaintext(gy); client.recv_plaintext(gy); @@ -332,27 +339,28 @@ int main(int argc, char **argv) { } printf("\nProtocol benchmarks:\n"); - SpongeRng rng(Block("my rng seed")); + SpongeRng clientRng(Block("client rng seed")); + SpongeRng serverRng(Block("server rng seed")); SecureBuffer hashedPassword("hello world"); for (Benchmark b("Spake2ee c+s",0.1); b.iter(); ) { - spake2ee(hashedPassword,rng,false); + spake2ee(clientRng, serverRng, hashedPassword,false); } for (Benchmark b("Spake2ee c+s aug",0.1); b.iter(); ) { - spake2ee(hashedPassword,rng,true); + spake2ee(clientRng, serverRng, hashedPassword,true); } - Scalar x(rng); + Scalar x(clientRng); SecureBuffer gx(Precomputed::base() * x); - Scalar y(rng); + Scalar y(serverRng); SecureBuffer gy(Precomputed::base() * y); for (Benchmark b("FHMQV c+s",0.1); b.iter(); ) { - fhmqv(rng,x,gx,y,gy); + fhmqv(clientRng, serverRng,x,gx,y,gy); } for (Benchmark b("TripleDH anon c+s",0.1); b.iter(); ) { - tdh(rng,x,gx,y,gy); + tdh(clientRng, serverRng, x,gx,y,gy); } printf("\n"); diff --git a/test/test_decaf.cxx b/test/test_decaf.cxx index 0e32a56..9cae71c 100644 --- a/test/test_decaf.cxx +++ b/test/test_decaf.cxx @@ -13,33 +13,9 @@ #include "shake.hxx" #include -typedef decaf::decaf<448>::Scalar Scalar; -typedef decaf::decaf<448>::Point Point; -typedef decaf::decaf<448>::Precomputed Precomputed; - -static const long NTESTS = 10000; - -static void print(const char *name, const Scalar &x) { - unsigned char buffer[DECAF_448_SCALAR_BYTES]; - x.encode(buffer); - printf(" %s = 0x", name); - for (int i=sizeof(buffer)-1; i>=0; i--) { - printf("%02x", buffer[i]); - } - printf("\n"); -} - -static void print(const char *name, const Point &x) { - unsigned char buffer[DECAF_448_SER_BYTES]; - x.encode(buffer); - printf(" %s = 0x", name); - for (int i=sizeof(buffer)-1; i>=0; i--) { - printf("%02x", buffer[i]); - } - printf("\n"); -} static bool passing = true; +static const long NTESTS = 10000; class Test { public: @@ -64,6 +40,32 @@ public: } }; +template struct Tests { + +typedef typename decaf::decaf::Scalar Scalar; +typedef typename decaf::decaf::Point Point; +typedef typename decaf::decaf::Precomputed Precomputed; + +static void print(const char *name, const Scalar &x) { + unsigned char buffer[DECAF_448_SCALAR_BYTES]; + x.encode(buffer); + printf(" %s = 0x", name); + for (int i=sizeof(buffer)-1; i>=0; i--) { + printf("%02x", buffer[i]); + } + printf("\n"); +} + +static void print(const char *name, const Point &x) { + unsigned char buffer[DECAF_448_SER_BYTES]; + x.encode(buffer); + printf(" %s = 0x", name); + for (int i=sizeof(buffer)-1; i>=0; i--) { + printf("%02x", buffer[i]); + } + printf("\n"); +} + static bool arith_check( Test &test, const Scalar &x, @@ -191,11 +193,13 @@ static void test_ec() { } } +}; // template + int main(int argc, char **argv) { (void) argc; (void) argv; - test_arithmetic(); - test_ec(); + Tests<448>::test_arithmetic(); + Tests<448>::test_ec(); if (passing) printf("Passed all tests.\n");