diff --git a/include/decaf.h b/include/decaf.h index 3e73787..1e5ab4c 100644 --- a/include/decaf.h +++ b/include/decaf.h @@ -23,12 +23,17 @@ #include /* Goldilocks' build flags default to hidden and stripping executables. */ +/** @cond internal */ +#if defined(DOXYGEN) && !defined(__attribute__) +#define __attribute__((x)) +#endif #define API_VIS __attribute__((visibility("default"))) #define WARN_UNUSED __attribute__((warn_unused_result)) #define NONNULL1 __attribute__((nonnull(1))) #define NONNULL2 __attribute__((nonnull(1,2))) #define NONNULL3 __attribute__((nonnull(1,2,3))) #define NONNULL5 __attribute__((nonnull(1,2,3,4,5))) +/** @endcond */ /** Types of internal words. TODO: ARCH: make 32-bit clean */ typedef uint64_t decaf_word_t, decaf_bool_t; @@ -37,8 +42,9 @@ typedef uint64_t decaf_word_t, decaf_bool_t; /* TODO: perfield, so when 25519 hits this will change */ #define DECAF_FIELD_BITS 448 -#define DECAF_LIMBS (1 + (512-1)/8/sizeof(decaf_word_t)) -#define DECAF_SCALAR_LIMBS (1 + (448-3)/8/sizeof(decaf_word_t)) +#define DECAF_LIMBS 8 +#define DECAF_SCALAR_BITS 446 +#define DECAF_SCALAR_LIMBS (1 + (DECAF_SCALAR_BITS-1)/8/sizeof(decaf_word_t)) /** Number of bytes in a serialized point. One less bit than you'd think. */ #define DECAF_SER_BYTES ((DECAF_FIELD_BITS+6)/8) diff --git a/src/decaf.c b/src/decaf.c index 8dd7d87..5083d89 100644 --- a/src/decaf.c +++ b/src/decaf.c @@ -6,34 +6,95 @@ * @file decaf.c * @author Mike Hamburg * @brief Decaf high-level functions. - */ + */ #include "decaf.h" -typedef __uint128_t decaf_dword_t; -typedef __int128_t decaf_sdword_t; +/* TODO arch */ #define WBITS 64 + +#if WBITS == 64 #define LBITS 56 +typedef __uint128_t decaf_dword_t; +typedef __int128_t decaf_sdword_t; +#define LIMB(x) (x##ull) +#define SC_LIMB(x) (x##ull) +#elif WBITS == 32 +typedef uint64_t decaf_dword_t; +typedef int64_t decaf_sdword_t; +#define LBITS 28 +#define LIMB(x) (x##ull)&((1ull<>LBITS +#define SC_LIMB(x) (x##ull)&((1ull<<32)-1), (x##ull)>>32 +#else +#error "Only supporting 32- and 64-bit platforms right now" +#endif -#define sv static void -#define NLIMBS 8 +static const int QUADRATIC_NONRESIDUE = -1; -typedef decaf_word_t gf[NLIMBS]; +#define sv static void +typedef decaf_word_t gf[DECAF_LIMBS]; static const gf ZERO = {0}, ONE = {1}, TWO = {2}; -#define LMASK ((1ull<> LBITS; - accum[NLIMBS-2] &= LMASK; - accum[NLIMBS/2] += accum[NLIMBS-1] >> LBITS; + accum[DECAF_LIMBS-1] += accum[DECAF_LIMBS-2] >> LBITS; + accum[DECAF_LIMBS-2] &= LMASK; + accum[DECAF_LIMBS/2] += accum[DECAF_LIMBS-1] >> LBITS; FOR_LIMB(j,{ - accum[j] += accum[(j-1)%NLIMBS] >> LBITS; - accum[(j-1)%NLIMBS] &= LMASK; + accum[j] += accum[(j-1)%DECAF_LIMBS] >> LBITS; + accum[(j-1)%DECAF_LIMBS] &= LMASK; }); FOR_LIMB(j, c[j] = accum[j] ); } @@ -83,10 +144,10 @@ sv gf_isqrt(gf y, const gf x) { /** Weak reduce mod p. */ sv gf_reduce(gf x) { - x[NLIMBS/2] += x[NLIMBS-1] >> LBITS; + x[DECAF_LIMBS/2] += x[DECAF_LIMBS-1] >> LBITS; FOR_LIMB(j,{ - x[j] += x[(j-1)%NLIMBS] >> LBITS; - x[(j-1)%NLIMBS] &= LMASK; + x[j] += x[(j-1)%DECAF_LIMBS] >> LBITS; + x[(j-1)%DECAF_LIMBS] &= LMASK; }); } @@ -220,30 +281,6 @@ sv decaf_subx( } } -const decaf_scalar_t decaf_scalar_p = {{{ - 0x2378c292ab5844f3ull, - 0x216cc2728dc58f55ull, - 0xc44edb49aed63690ull, - 0xffffffff7cca23e9ull, - 0xffffffffffffffffull, - 0xffffffffffffffffull, - 0x3fffffffffffffffull - // TODO 32-bit clean -}}}, decaf_scalar_one = {{{1}}}, decaf_scalar_zero = {{{0}}}; - -static const decaf_scalar_t decaf_scalar_r2 = {{{ - 0xe3539257049b9b60ull, - 0x7af32c4bc1b195d9ull, - 0x0d66de2388ea1859ull, - 0xae17cf725ee4d838ull, - 0x1a9cc14ba3c47c44ull, - 0x2052bcb7e4d070afull, - 0x3402a939f823b729ull - // TODO 32-bit clean -}}}; - -static const decaf_word_t DECAF_MONTGOMERY_FACTOR = 0x3bd440fae918bc5ull; - sv decaf_montmul ( decaf_scalar_t out, const decaf_scalar_t a, @@ -343,17 +380,6 @@ decaf_bool_t decaf_scalar_eq ( /** identity = (0,1) */ const decaf_point_t decaf_point_identity = {{{0},{1},{1},{0}}}; -/** base = twist of Goldilocks base point (~,19). FIXME: ARCH */ -const decaf_point_t decaf_point_base = {{ - { 0xb39a2d57e08c7bull,0xb38639c75ff281ull,0x2ec981082b3288ull,0x99fe8607e5237cull, - 0x0e33fbb1fadd1full,0xe714f67055eb4aull,0xc9ae06d64067ddull,0xf7be45054760faull }, - { 0xbd8715f551617full,0x8c17fbeca8f5fcull,0xaae0eec209c06full,0xce41ad80cbe6b8ull, - 0xdf360b5c828c00ull,0xaf25b6bbb40e3bull,0x8ed37f0ce4ed31ull,0x72a1c3214557b9ull }, - { 1 }, - { 0x97ca9c8ed8bde9ull,0xf0b780da83304cull,0x0d79c0a7729a69ull,0xc18d3f24aebc1cull, - 0x1fbb5389b3fda5ull,0xbb24f674635948ull,0x723a55709a3983ull,0xe1c0107a823dd4ull } -}}; - void decaf_point_encode( unsigned char ser[DECAF_SER_BYTES], const decaf_point_t p ) { gf a, b, c, d; gf_mlw ( a, p->y, 1-EDWARDS_D ); @@ -375,30 +401,29 @@ void decaf_point_encode( unsigned char ser[DECAF_SER_BYTES], const decaf_point_t gf_add ( a, a, c ); cond_neg ( a, hibit(a) ); - // FIXME arch gf_canon(a); - int j; - FOR_LIMB(i,{ - for (j=0; j<7; j++) { - ser[7*i+j] = a[i]; - a[i] >>= 8; + int i, k=0, bits=0; + decaf_dword_t buf=0; + for (i=0; i=8 || i==DECAF_LIMBS-1) && k>=8) { + ser[k++]=buf; } - }); + } } /** * Deserialize a bool, return TRUE if < p. */ static decaf_bool_t gf_deser(gf s, const unsigned char ser[DECAF_SER_BYTES]) { - // FIXME arch - int j; - FOR_LIMB(i, { - decaf_word_t out = 0; - for (j=0; j<7; j++) { - out |= ((decaf_word_t)ser[7*i+j])<<(8*j); + unsigned int i, k=0, bits=0; + decaf_dword_t buf=0; + for (i=0; i=LBITS || i==DECAF_SER_BYTES-1) && k>=LBITS) { + s[k++] = buf & LMASK; } - s[i] = out; - }); + } decaf_sdword_t accum = 0; FOR_LIMB(i, accum = (accum + s[i] - P[i]) >> WBITS ); @@ -535,15 +560,13 @@ void decaf_point_scalarmul ( ) { /* w=2 signed window uses about 1.5 adds per bit. * I figured a few extra lines was worth the 25% speedup. - * NB: if adapting this function to scalarmul by a - * possibly-odd number of unmasked bits, may need to mask. */ decaf_point_t w,b3,tmp; decaf_point_double(w,b); /* b3 = b*3 */ decaf_point_add(b3,w,b); int i; - for (i=DECAF_SCALAR_LIMBS*WBITS-2; i>0; i-=2) { + for (i=DECAF_SCALAR_BITS &~ 1; i>0; i-=2) { decaf_word_t bits = scalar->limb[i/WBITS]>>(i%WBITS); decaf_cond_sel(tmp,b,b3,((bits^(bits>>1))&1)-1); decaf_point_double(w,w); @@ -576,7 +599,7 @@ void decaf_point_double_scalarmul ( decaf_point_add(c3,tmp,c); decaf_point_add(w,w,tmp); int i; - for (i=DECAF_SCALAR_LIMBS*WBITS-2; i>0; i-=2) { + for (i=DECAF_SCALAR_BITS &~ 1; i>0; i-=2) { decaf_point_double(w,w); decaf_word_t bits = scalarb->limb[i/WBITS]>>(i%WBITS); decaf_cond_sel(tmp,b,b3,((bits^(bits>>1))&1)-1); @@ -603,8 +626,6 @@ decaf_bool_t decaf_point_eq ( const decaf_point_t p, const decaf_point_t q ) { return gf_eq(a,b); } -static const int QUADRATIC_NONRESIDUE = -1; - void decaf_point_from_hash_nonuniform ( decaf_point_t p, const unsigned char ser[DECAF_SER_BYTES]