@@ -70,7 +70,8 @@ LIBCOMPONENTS= build/goldilocks.o build/barrett_field.o build/crandom.o \ | |||||
build/$(FIELD).o build/ec_point.o build/scalarmul.o build/sha512.o build/magic.o \ | build/$(FIELD).o build/ec_point.o build/scalarmul.o build/sha512.o build/magic.o \ | ||||
build/f_arithmetic.o build/arithmetic.o | build/f_arithmetic.o build/arithmetic.o | ||||
DECAFCOMPONENTS= build/$(DECAF).o build/shake.o build/decaf_crypto.o build/decaf_tables.o | |||||
DECAFCOMPONENTS= build/$(DECAF).o build/shake.o build/decaf_crypto.o build/decaf_tables.o \ | |||||
build/$(FIELD).o build/f_arithmetic.o # TODO | |||||
TESTCOMPONENTS=build/test.o build/test_scalarmul.o build/test_sha512.o \ | TESTCOMPONENTS=build/test.o build/test_scalarmul.o build/test_sha512.o \ | ||||
build/test_pointops.o build/test_arithmetic.o build/test_goldilocks.o build/magic.o \ | build/test_pointops.o build/test_arithmetic.o build/test_goldilocks.o build/magic.o \ | ||||
@@ -60,10 +60,13 @@ typedef uint32_t decaf_word_t, decaf_bool_t; | |||||
/** Number of bytes in a serialized scalar. */ | /** Number of bytes in a serialized scalar. */ | ||||
#define DECAF_448_SCALAR_BYTES 56 | #define DECAF_448_SCALAR_BYTES 56 | ||||
/** Galois field element internal structure */ | |||||
typedef struct gf_s { | |||||
decaf_word_t limb[DECAF_448_LIMBS]; | |||||
} __attribute__((aligned(32))) gf_s, gf[1]; | |||||
/** Twisted Edwards (-1,d-1) extended homogeneous coordinates */ | /** Twisted Edwards (-1,d-1) extended homogeneous coordinates */ | ||||
typedef struct decaf_448_point_s { | |||||
decaf_word_t x[DECAF_448_LIMBS],y[DECAF_448_LIMBS],z[DECAF_448_LIMBS],t[DECAF_448_LIMBS]; | |||||
} __attribute__((aligned(32))) decaf_448_point_t[1]; | |||||
typedef struct decaf_448_point_s { gf x,y,z,t; } decaf_448_point_t[1]; | |||||
/** Precomputed table based on a point. Can be trivial implementation. */ | /** Precomputed table based on a point. Can be trivial implementation. */ | ||||
struct decaf_448_precomputed_s; | struct decaf_448_precomputed_s; | ||||
@@ -33,15 +33,16 @@ typedef int64_t decaf_sdword_t; | |||||
static const int QUADRATIC_NONRESIDUE = -1; | static const int QUADRATIC_NONRESIDUE = -1; | ||||
#define sv static void | #define sv static void | ||||
typedef decaf_word_t gf[DECAF_448_LIMBS]; | |||||
static const gf ZERO = {0}, ONE = {1}, TWO = {2}; | |||||
#define snv static void __attribute__((noinline)) | |||||
#define siv static inline void __attribute__((always_inline)) | |||||
static const gf ZERO = {{{0}}}, ONE = {{{1}}}, TWO = {{{2}}}; | |||||
#define LMASK ((((decaf_word_t)1)<<LBITS)-1) | #define LMASK ((((decaf_word_t)1)<<LBITS)-1) | ||||
#if WBITS == 64 | #if WBITS == 64 | ||||
static const gf P = { LMASK, LMASK, LMASK, LMASK, LMASK-1, LMASK, LMASK, LMASK }; | |||||
static const gf P = {{{ LMASK, LMASK, LMASK, LMASK, LMASK-1, LMASK, LMASK, LMASK }}}; | |||||
#else | #else | ||||
static const gf P = { LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, | |||||
LMASK-1, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK }; | |||||
static const gf P = {{{ LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, | |||||
LMASK-1, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK }}}; | |||||
#endif | #endif | ||||
static const int EDWARDS_D = -39081; | static const int EDWARDS_D = -39081; | ||||
@@ -70,24 +71,22 @@ static const decaf_word_t DECAF_MONTGOMERY_FACTOR = (decaf_word_t)(0x3bd440fae91 | |||||
/** base = twist of Goldilocks base point (~,19). */ | /** base = twist of Goldilocks base point (~,19). */ | ||||
const decaf_448_point_t decaf_448_point_base = {{ | const decaf_448_point_t decaf_448_point_base = {{ | ||||
{ LIMB(0xb39a2d57e08c7b),LIMB(0xb38639c75ff281), | |||||
LIMB(0x2ec981082b3288),LIMB(0x99fe8607e5237c), | |||||
LIMB(0x0e33fbb1fadd1f),LIMB(0xe714f67055eb4a), | |||||
LIMB(0xc9ae06d64067dd),LIMB(0xf7be45054760fa) }, | |||||
{ LIMB(0xbd8715f551617f),LIMB(0x8c17fbeca8f5fc), | |||||
LIMB(0xaae0eec209c06f),LIMB(0xce41ad80cbe6b8), | |||||
LIMB(0xdf360b5c828c00),LIMB(0xaf25b6bbb40e3b), | |||||
LIMB(0x8ed37f0ce4ed31),LIMB(0x72a1c3214557b9) }, | |||||
{ 1 }, | |||||
{ LIMB(0x97ca9c8ed8bde9),LIMB(0xf0b780da83304c), | |||||
LIMB(0x0d79c0a7729a69),LIMB(0xc18d3f24aebc1c), | |||||
LIMB(0x1fbb5389b3fda5),LIMB(0xbb24f674635948), | |||||
LIMB(0x723a55709a3983),LIMB(0xe1c0107a823dd4) } | |||||
{{{ LIMB(0xb39a2d57e08c7b),LIMB(0xb38639c75ff281), | |||||
LIMB(0x2ec981082b3288),LIMB(0x99fe8607e5237c), | |||||
LIMB(0x0e33fbb1fadd1f),LIMB(0xe714f67055eb4a), | |||||
LIMB(0xc9ae06d64067dd),LIMB(0xf7be45054760fa) }}}, | |||||
{{{ LIMB(0xbd8715f551617f),LIMB(0x8c17fbeca8f5fc), | |||||
LIMB(0xaae0eec209c06f),LIMB(0xce41ad80cbe6b8), | |||||
LIMB(0xdf360b5c828c00),LIMB(0xaf25b6bbb40e3b), | |||||
LIMB(0x8ed37f0ce4ed31),LIMB(0x72a1c3214557b9) }}}, | |||||
{{{ 1 }}}, | |||||
{{{ LIMB(0x97ca9c8ed8bde9),LIMB(0xf0b780da83304c), | |||||
LIMB(0x0d79c0a7729a69),LIMB(0xc18d3f24aebc1c), | |||||
LIMB(0x1fbb5389b3fda5),LIMB(0xbb24f674635948), | |||||
LIMB(0x723a55709a3983),LIMB(0xe1c0107a823dd4) }}} | |||||
}}; | }}; | ||||
struct decaf_448_precomputed_s { | |||||
decaf_448_point_t p[1]; | |||||
}; | |||||
struct decaf_448_precomputed_s { decaf_448_point_t p[1]; }; | |||||
/* FIXME: restore */ | /* FIXME: restore */ | ||||
// const struct decaf_448_precomputed_s *decaf_448_precomputed_base = | // const struct decaf_448_precomputed_s *decaf_448_precomputed_base = | ||||
@@ -118,17 +117,17 @@ const size_t alignof_decaf_448_precomputed_s = 32; | |||||
#endif | #endif | ||||
/** Copy x = y */ | /** Copy x = y */ | ||||
sv gf_cpy(gf x, const gf y) { FOR_LIMB(i, x[i] = y[i]); } | |||||
siv gf_cpy(gf x, const gf y) { FOR_LIMB(i, x->limb[i] = y->limb[i]); } | |||||
/** Mostly-unoptimized multiply (PERF), but at least it's unrolled. */ | /** Mostly-unoptimized multiply (PERF), but at least it's unrolled. */ | ||||
sv gf_mul (gf c, const gf a, const gf b) { | |||||
snv gf_mul (gf c, const gf a, const gf b) { | |||||
gf aa; | gf aa; | ||||
gf_cpy(aa,a); | gf_cpy(aa,a); | ||||
decaf_dword_t accum[DECAF_448_LIMBS] = {0}; | decaf_dword_t accum[DECAF_448_LIMBS] = {0}; | ||||
FOR_LIMB(i, { | FOR_LIMB(i, { | ||||
FOR_LIMB(j,{ accum[(i+j)%DECAF_448_LIMBS] += (decaf_dword_t)b[i] * aa[j]; }); | |||||
aa[(DECAF_448_LIMBS-1-i)^(DECAF_448_LIMBS/2)] += aa[DECAF_448_LIMBS-1-i]; | |||||
FOR_LIMB(j,{ accum[(i+j)%DECAF_448_LIMBS] += (decaf_dword_t)b->limb[i] * aa->limb[j]; }); | |||||
aa->limb[(DECAF_448_LIMBS-1-i)^(DECAF_448_LIMBS/2)] += aa->limb[DECAF_448_LIMBS-1-i]; | |||||
}); | }); | ||||
accum[DECAF_448_LIMBS-1] += accum[DECAF_448_LIMBS-2] >> LBITS; | accum[DECAF_448_LIMBS-1] += accum[DECAF_448_LIMBS-2] >> LBITS; | ||||
@@ -138,14 +137,14 @@ sv gf_mul (gf c, const gf a, const gf b) { | |||||
accum[j] += accum[(j-1)%DECAF_448_LIMBS] >> LBITS; | accum[j] += accum[(j-1)%DECAF_448_LIMBS] >> LBITS; | ||||
accum[(j-1)%DECAF_448_LIMBS] &= LMASK; | accum[(j-1)%DECAF_448_LIMBS] &= LMASK; | ||||
}); | }); | ||||
FOR_LIMB(j, c[j] = accum[j] ); | |||||
FOR_LIMB(j, c->limb[j] = accum[j] ); | |||||
} | } | ||||
/** No dedicated square (PERF) */ | /** No dedicated square (PERF) */ | ||||
#define gf_sqr(c,a) gf_mul(c,a,a) | #define gf_sqr(c,a) gf_mul(c,a,a) | ||||
/** Inverse square root using addition chain. */ | /** Inverse square root using addition chain. */ | ||||
sv gf_isqrt(gf y, const gf x) { | |||||
snv gf_isqrt(gf y, const gf x) { | |||||
int i; | int i; | ||||
#define STEP(s,m,n) gf_mul(s,m,c); gf_cpy(c,s); for (i=0;i<n;i++) gf_sqr(c,c); | #define STEP(s,m,n) gf_mul(s,m,c); gf_cpy(c,s); for (i=0;i<n;i++) gf_sqr(c,c); | ||||
gf a, b, c; | gf a, b, c; | ||||
@@ -165,44 +164,44 @@ sv gf_isqrt(gf y, const gf x) { | |||||
} | } | ||||
/** Weak reduce mod p. */ | /** Weak reduce mod p. */ | ||||
sv gf_reduce(gf x) { | |||||
x[DECAF_448_LIMBS/2] += x[DECAF_448_LIMBS-1] >> LBITS; | |||||
siv gf_reduce(gf x) { | |||||
x->limb[DECAF_448_LIMBS/2] += x->limb[DECAF_448_LIMBS-1] >> LBITS; | |||||
FOR_LIMB(j,{ | FOR_LIMB(j,{ | ||||
x[j] += x[(j-1)%DECAF_448_LIMBS] >> LBITS; | |||||
x[(j-1)%DECAF_448_LIMBS] &= LMASK; | |||||
x->limb[j] += x->limb[(j-1)%DECAF_448_LIMBS] >> LBITS; | |||||
x->limb[(j-1)%DECAF_448_LIMBS] &= LMASK; | |||||
}); | }); | ||||
} | } | ||||
/** Add mod p. Conservatively always weak-reduce. (PERF) */ | /** Add mod p. Conservatively always weak-reduce. (PERF) */ | ||||
sv gf_add ( gf x, const gf y, const gf z ) { | sv gf_add ( gf x, const gf y, const gf z ) { | ||||
FOR_LIMB(i, x[i] = y[i] + z[i] ); | |||||
FOR_LIMB(i, x->limb[i] = y->limb[i] + z->limb[i] ); | |||||
gf_reduce(x); | gf_reduce(x); | ||||
} | } | ||||
/** Subtract mod p. Conservatively always weak-reduce. (PERF) */ | /** Subtract mod p. Conservatively always weak-reduce. (PERF) */ | ||||
sv gf_sub ( gf x, const gf y, const gf z ) { | sv gf_sub ( gf x, const gf y, const gf z ) { | ||||
FOR_LIMB(i, x[i] = y[i] - z[i] + 2*P[i] ); | |||||
FOR_LIMB(i, x->limb[i] = y->limb[i] - z->limb[i] + 2*P->limb[i] ); | |||||
gf_reduce(x); | gf_reduce(x); | ||||
} | } | ||||
/** Constant time, x = is_z ? z : y */ | /** Constant time, x = is_z ? z : y */ | ||||
sv cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) { | sv cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) { | ||||
FOR_LIMB(i, x[i] = (y[i] & ~is_z) | (z[i] & is_z) ); | |||||
FOR_LIMB(i, x->limb[i] = (y->limb[i] & ~is_z) | (z->limb[i] & is_z) ); | |||||
} | } | ||||
/** Constant time, if (neg) x=-x; */ | /** Constant time, if (neg) x=-x; */ | ||||
sv cond_neg(gf x, decaf_bool_t neg) { | |||||
siv cond_neg(gf x, decaf_bool_t neg) { | |||||
gf y; | gf y; | ||||
gf_sub(y,ZERO,x); | gf_sub(y,ZERO,x); | ||||
cond_sel(x,x,y,neg); | cond_sel(x,x,y,neg); | ||||
} | } | ||||
/** Constant time, if (swap) (x,y) = (y,x); */ | /** Constant time, if (swap) (x,y) = (y,x); */ | ||||
sv cond_swap(gf x, gf y, decaf_bool_t swap) { | |||||
sv cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) { | |||||
FOR_LIMB(i, { | FOR_LIMB(i, { | ||||
decaf_word_t s = (x[i] ^ y[i]) & swap; | |||||
x[i] ^= s; | |||||
y[i] ^= s; | |||||
decaf_word_t s = (x->limb[i] ^ y->limb[i]) & swap; | |||||
x->limb[i] ^= s; | |||||
y->limb[i] ^= s; | |||||
}); | }); | ||||
} | } | ||||
@@ -210,26 +209,26 @@ sv cond_swap(gf x, gf y, decaf_bool_t swap) { | |||||
* Mul by signed int. Not constant-time WRT the sign of that int. | * Mul by signed int. Not constant-time WRT the sign of that int. | ||||
* Just uses a full mul (PERF) | * Just uses a full mul (PERF) | ||||
*/ | */ | ||||
sv gf_mlw(gf a, const gf b, int w) { | |||||
siv gf_mlw(gf a, const gf b, int w) { | |||||
if (w>0) { | if (w>0) { | ||||
gf ww = {w}; | |||||
gf ww = {{{w}}}; | |||||
gf_mul(a,b,ww); | gf_mul(a,b,ww); | ||||
} else { | } else { | ||||
gf ww = {-w}; | |||||
gf ww = {{{-w}}}; | |||||
gf_mul(a,b,ww); | gf_mul(a,b,ww); | ||||
gf_sub(a,ZERO,a); | gf_sub(a,ZERO,a); | ||||
} | } | ||||
} | } | ||||
/** Canonicalize */ | /** Canonicalize */ | ||||
sv gf_canon ( gf a ) { | |||||
snv gf_canon ( gf a ) { | |||||
gf_reduce(a); | gf_reduce(a); | ||||
/* subtract p with borrow */ | /* subtract p with borrow */ | ||||
decaf_sdword_t carry = 0; | decaf_sdword_t carry = 0; | ||||
FOR_LIMB(i, { | FOR_LIMB(i, { | ||||
carry = carry + a[i] - P[i]; | |||||
a[i] = carry & LMASK; | |||||
carry = carry + a->limb[i] - P->limb[i]; | |||||
a->limb[i] = carry & LMASK; | |||||
carry >>= LBITS; | carry >>= LBITS; | ||||
}); | }); | ||||
@@ -238,8 +237,8 @@ sv gf_canon ( gf a ) { | |||||
/* add it back */ | /* add it back */ | ||||
FOR_LIMB(i, { | FOR_LIMB(i, { | ||||
carry = carry + a[i] + (P[i] & addback); | |||||
a[i] = carry & LMASK; | |||||
carry = carry + a->limb[i] + (P->limb[i] & addback); | |||||
a->limb[i] = carry & LMASK; | |||||
carry >>= LBITS; | carry >>= LBITS; | ||||
}); | }); | ||||
} | } | ||||
@@ -250,7 +249,7 @@ static decaf_word_t __attribute__((noinline)) gf_eq(const gf a, const gf b) { | |||||
gf_sub(c,a,b); | gf_sub(c,a,b); | ||||
gf_canon(c); | gf_canon(c); | ||||
decaf_word_t ret=0; | decaf_word_t ret=0; | ||||
FOR_LIMB(i, ret |= c[i] ); | |||||
FOR_LIMB(i, ret |= c->limb[i] ); | |||||
/* Hope the compiler is too dumb to optimize this, thus noinline */ | /* Hope the compiler is too dumb to optimize this, thus noinline */ | ||||
return ((decaf_dword_t)ret - 1) >> WBITS; | return ((decaf_dword_t)ret - 1) >> WBITS; | ||||
} | } | ||||
@@ -260,7 +259,7 @@ static decaf_word_t hibit(const gf x) { | |||||
gf y; | gf y; | ||||
gf_add(y,x,x); | gf_add(y,x,x); | ||||
gf_canon(y); | gf_canon(y); | ||||
return -(y[0]&1); | |||||
return -(y->limb[0]&1); | |||||
} | } | ||||
/* a = use_c ? c : b */ | /* a = use_c ? c : b */ | ||||
@@ -279,7 +278,7 @@ sv decaf_448_cond_sel ( | |||||
/** {extra,accum} - sub +? p | /** {extra,accum} - sub +? p | ||||
* Must have extra <= 1 | * Must have extra <= 1 | ||||
*/ | */ | ||||
sv decaf_448_subx( | |||||
snv decaf_448_subx( | |||||
decaf_448_scalar_t out, | decaf_448_scalar_t out, | ||||
const decaf_word_t accum[DECAF_448_SCALAR_LIMBS], | const decaf_word_t accum[DECAF_448_SCALAR_LIMBS], | ||||
const decaf_448_scalar_t sub, | const decaf_448_scalar_t sub, | ||||
@@ -303,7 +302,7 @@ sv decaf_448_subx( | |||||
} | } | ||||
} | } | ||||
sv decaf_448_montmul ( | |||||
snv decaf_448_montmul ( | |||||
decaf_448_scalar_t out, | decaf_448_scalar_t out, | ||||
const decaf_448_scalar_t a, | const decaf_448_scalar_t a, | ||||
const decaf_448_scalar_t b, | const decaf_448_scalar_t b, | ||||
@@ -400,7 +399,7 @@ decaf_bool_t decaf_448_scalar_eq ( | |||||
/* *** API begins here *** */ | /* *** API begins here *** */ | ||||
/** identity = (0,1) */ | /** identity = (0,1) */ | ||||
const decaf_448_point_t decaf_448_point_identity = {{{0},{1},{1},{0}}}; | |||||
const decaf_448_point_t decaf_448_point_identity = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}}; | |||||
void decaf_448_point_encode( unsigned char ser[DECAF_448_SER_BYTES], const decaf_448_point_t p ) { | void decaf_448_point_encode( unsigned char ser[DECAF_448_SER_BYTES], const decaf_448_point_t p ) { | ||||
/* Can shave off one mul here; not important but makes consistent with paper */ | /* Can shave off one mul here; not important but makes consistent with paper */ | ||||
@@ -428,7 +427,7 @@ void decaf_448_point_encode( unsigned char ser[DECAF_448_SER_BYTES], const decaf | |||||
int i, k=0, bits=0; | int i, k=0, bits=0; | ||||
decaf_dword_t buf=0; | decaf_dword_t buf=0; | ||||
for (i=0; i<DECAF_448_LIMBS; i++) { | for (i=0; i<DECAF_448_LIMBS; i++) { | ||||
buf |= (decaf_dword_t)a[i]<<bits; | |||||
buf |= (decaf_dword_t)a->limb[i]<<bits; | |||||
for (bits += LBITS; (bits>=8 || i==DECAF_448_LIMBS-1) && k<DECAF_448_SER_BYTES; bits-=8, buf>>=8) { | for (bits += LBITS; (bits>=8 || i==DECAF_448_LIMBS-1) && k<DECAF_448_SER_BYTES; bits-=8, buf>>=8) { | ||||
ser[k++]=buf; | ser[k++]=buf; | ||||
} | } | ||||
@@ -444,17 +443,17 @@ static decaf_bool_t gf_deser(gf s, const unsigned char ser[DECAF_448_SER_BYTES]) | |||||
for (i=0; i<DECAF_448_SER_BYTES; i++) { | for (i=0; i<DECAF_448_SER_BYTES; i++) { | ||||
buf |= (decaf_dword_t)ser[i]<<bits; | buf |= (decaf_dword_t)ser[i]<<bits; | ||||
for (bits += 8; (bits>=LBITS || i==DECAF_448_SER_BYTES-1) && k<DECAF_448_LIMBS; bits-=LBITS, buf>>=LBITS) { | for (bits += 8; (bits>=LBITS || i==DECAF_448_SER_BYTES-1) && k<DECAF_448_LIMBS; bits-=LBITS, buf>>=LBITS) { | ||||
s[k++] = buf & LMASK; | |||||
s->limb[k++] = buf & LMASK; | |||||
} | } | ||||
} | } | ||||
decaf_sdword_t accum = 0; | decaf_sdword_t accum = 0; | ||||
FOR_LIMB(i, accum = (accum + s[i] - P[i]) >> WBITS ); | |||||
FOR_LIMB(i, accum = (accum + s->limb[i] - P->limb[i]) >> WBITS ); | |||||
return accum; | return accum; | ||||
} | } | ||||
/* Constant-time add or subtract */ | /* Constant-time add or subtract */ | ||||
sv decaf_448_point_add_sub ( | |||||
snv decaf_448_point_add_sub ( | |||||
decaf_448_point_t p, | decaf_448_point_t p, | ||||
const decaf_448_point_t q, | const decaf_448_point_t q, | ||||
const decaf_448_point_t r, | const decaf_448_point_t r, | ||||
@@ -512,7 +511,7 @@ decaf_bool_t decaf_448_point_decode ( | |||||
gf_mul ( a, b, c ); | gf_mul ( a, b, c ); | ||||
gf_mul ( p->y,a,p->z ); | gf_mul ( p->y,a,p->z ); | ||||
gf_mul ( p->t,p->x,a ); | gf_mul ( p->t,p->x,a ); | ||||
p->y[0] -= zero; | |||||
p->y->limb[0] -= zero; | |||||
/* TODO: do something safe if ~succ? */ | /* TODO: do something safe if ~succ? */ | ||||
return succ; | return succ; | ||||
} | } | ||||
@@ -40,15 +40,14 @@ static const int QUADRATIC_NONRESIDUE = -1; | |||||
#define sv static void | #define sv static void | ||||
#define snv static void __attribute__((noinline)) | #define snv static void __attribute__((noinline)) | ||||
#define siv static inline void __attribute__((always_inline)) | #define siv static inline void __attribute__((always_inline)) | ||||
typedef decaf_word_t gf[DECAF_448_LIMBS] __attribute__((aligned(32))); | |||||
static const gf ZERO = {0}, ONE = {1}, TWO = {2}; | |||||
static const gf ZERO = {{{0}}}, ONE = {{{1}}}, TWO = {{{2}}}; | |||||
#define LMASK ((((decaf_word_t)1)<<LBITS)-1) | #define LMASK ((((decaf_word_t)1)<<LBITS)-1) | ||||
#if WBITS == 64 | #if WBITS == 64 | ||||
static const gf P = { LMASK, LMASK, LMASK, LMASK, LMASK-1, LMASK, LMASK, LMASK }; | |||||
static const gf P = {{{ LMASK, LMASK, LMASK, LMASK, LMASK-1, LMASK, LMASK, LMASK }}}; | |||||
#else | #else | ||||
static const gf P = { LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, | |||||
LMASK-1, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK }; | |||||
static const gf P = {{{ LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, | |||||
LMASK-1, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK }}}; | |||||
#endif | #endif | ||||
static const int EDWARDS_D = -39081; | static const int EDWARDS_D = -39081; | ||||
@@ -77,19 +76,19 @@ static const decaf_word_t DECAF_MONTGOMERY_FACTOR = (decaf_word_t)(0x3bd440fae91 | |||||
/** base = twist of Goldilocks base point (~,19). */ | /** base = twist of Goldilocks base point (~,19). */ | ||||
const decaf_448_point_t decaf_448_point_base = {{ | const decaf_448_point_t decaf_448_point_base = {{ | ||||
{ LIMB(0xb39a2d57e08c7b),LIMB(0xb38639c75ff281), | |||||
LIMB(0x2ec981082b3288),LIMB(0x99fe8607e5237c), | |||||
LIMB(0x0e33fbb1fadd1f),LIMB(0xe714f67055eb4a), | |||||
LIMB(0xc9ae06d64067dd),LIMB(0xf7be45054760fa) }, | |||||
{ LIMB(0xbd8715f551617f),LIMB(0x8c17fbeca8f5fc), | |||||
LIMB(0xaae0eec209c06f),LIMB(0xce41ad80cbe6b8), | |||||
LIMB(0xdf360b5c828c00),LIMB(0xaf25b6bbb40e3b), | |||||
LIMB(0x8ed37f0ce4ed31),LIMB(0x72a1c3214557b9) }, | |||||
{ 1 }, | |||||
{ LIMB(0x97ca9c8ed8bde9),LIMB(0xf0b780da83304c), | |||||
LIMB(0x0d79c0a7729a69),LIMB(0xc18d3f24aebc1c), | |||||
LIMB(0x1fbb5389b3fda5),LIMB(0xbb24f674635948), | |||||
LIMB(0x723a55709a3983),LIMB(0xe1c0107a823dd4) } | |||||
{{{ LIMB(0xb39a2d57e08c7b),LIMB(0xb38639c75ff281), | |||||
LIMB(0x2ec981082b3288),LIMB(0x99fe8607e5237c), | |||||
LIMB(0x0e33fbb1fadd1f),LIMB(0xe714f67055eb4a), | |||||
LIMB(0xc9ae06d64067dd),LIMB(0xf7be45054760fa) }}}, | |||||
{{{ LIMB(0xbd8715f551617f),LIMB(0x8c17fbeca8f5fc), | |||||
LIMB(0xaae0eec209c06f),LIMB(0xce41ad80cbe6b8), | |||||
LIMB(0xdf360b5c828c00),LIMB(0xaf25b6bbb40e3b), | |||||
LIMB(0x8ed37f0ce4ed31),LIMB(0x72a1c3214557b9) }}}, | |||||
{{{ 1 }}}, | |||||
{{{ LIMB(0x97ca9c8ed8bde9),LIMB(0xf0b780da83304c), | |||||
LIMB(0x0d79c0a7729a69),LIMB(0xc18d3f24aebc1c), | |||||
LIMB(0x1fbb5389b3fda5),LIMB(0xbb24f674635948), | |||||
LIMB(0x723a55709a3983),LIMB(0xe1c0107a823dd4) }}} | |||||
}}; | }}; | ||||
/* Projective Niels coordinates */ | /* Projective Niels coordinates */ | ||||
@@ -122,7 +121,7 @@ const size_t alignof_decaf_448_precomputed_s = 32; | |||||
#endif | #endif | ||||
/** Copy x = y */ | /** Copy x = y */ | ||||
siv gf_cpy(gf x, const gf y) { FOR_LIMB(i, x[i] = y[i]); } | |||||
siv gf_cpy(gf x, const gf y) { FOR_LIMB(i, x->limb[i] = y->limb[i]); } | |||||
/** Mostly-unoptimized multiply, but at least it's unrolled. */ | /** Mostly-unoptimized multiply, but at least it's unrolled. */ | ||||
siv gf_mul (gf c, const gf a, const gf b) { | siv gf_mul (gf c, const gf a, const gf b) { | ||||
@@ -188,18 +187,21 @@ sv cond_neg(gf x, decaf_bool_t neg) { | |||||
} | } | ||||
/** Constant time, if (swap) (x,y) = (y,x); */ | /** Constant time, if (swap) (x,y) = (y,x); */ | ||||
siv cond_swap(gf x, decaf_word_t *__restrict__ y, decaf_bool_t swap) { | |||||
siv cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) { | |||||
int i; | int i; | ||||
#ifdef __clang__ | #ifdef __clang__ | ||||
#if 10*__clang_major__ + __clang_minor__ > 35 | #if 10*__clang_major__ + __clang_minor__ > 35 | ||||
_Pragma("clang loop unroll(disable) vectorize(enable) vectorize_width(4) interleave_count(2)") | |||||
_Pragma("clang loop unroll(disable) vectorize(enable) vectorize_width(8)") | |||||
#endif | #endif | ||||
#endif | #endif | ||||
for (i=0; i<DECAF_448_LIMBS; i++) { | for (i=0; i<DECAF_448_LIMBS; i++) { | ||||
decaf_word_t s = (x[i] ^ y[i]) & swap; | |||||
x[i] ^= s; | |||||
y[i] ^= s; | |||||
decaf_word_t s = (x->limb[i] ^ y->limb[i]) & swap; | |||||
x->limb[i] ^= s; | |||||
y->limb[i] ^= s; | |||||
} | } | ||||
/* | |||||
constant_time_cond_swap(x,y,sizeof(gf),swap); | |||||
*/ | |||||
} | } | ||||
/** | /** | ||||
@@ -226,7 +228,7 @@ static decaf_word_t __attribute__((noinline)) gf_eq(const gf a, const gf b) { | |||||
gf_sub(c,a,b); | gf_sub(c,a,b); | ||||
gf_canon(c); | gf_canon(c); | ||||
decaf_word_t ret=0; | decaf_word_t ret=0; | ||||
FOR_LIMB(i, ret |= c[i] ); | |||||
FOR_LIMB(i, ret |= c->limb[i] ); | |||||
/* Hope the compiler is too dumb to optimize this, thus noinline */ | /* Hope the compiler is too dumb to optimize this, thus noinline */ | ||||
return ((decaf_dword_t)ret - 1) >> WBITS; | return ((decaf_dword_t)ret - 1) >> WBITS; | ||||
} | } | ||||
@@ -236,15 +238,13 @@ static decaf_word_t hibit(const gf x) { | |||||
gf y; | gf y; | ||||
gf_add(y,x,x); | gf_add(y,x,x); | ||||
gf_canon(y); | gf_canon(y); | ||||
return -(y[0]&1); | |||||
return -(y->limb[0]&1); | |||||
} | } | ||||
/** Return high bit of x/2 = low bit of x mod p */ | /** Return high bit of x/2 = low bit of x mod p */ | ||||
static decaf_word_t lobit(const gf x) { | |||||
gf y; | |||||
gf_cpy(y,x); | |||||
gf_canon(y); | |||||
return -(y[0]&1); | |||||
static inline decaf_word_t lobit(gf x) { | |||||
gf_canon(x); | |||||
return -(x->limb[0]&1); | |||||
} | } | ||||
/* a = use_c ? c : b */ | /* a = use_c ? c : b */ | ||||
@@ -403,14 +403,14 @@ decaf_bool_t decaf_448_scalar_eq ( | |||||
/* *** API begins here *** */ | /* *** API begins here *** */ | ||||
/** identity = (0,1) */ | /** identity = (0,1) */ | ||||
const decaf_448_point_t decaf_448_point_identity = {{{0},{1},{1},{0}}}; | |||||
const decaf_448_point_t decaf_448_point_identity = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}}; | |||||
static void gf_encode ( unsigned char ser[DECAF_448_SER_BYTES], gf a ) { | static void gf_encode ( unsigned char ser[DECAF_448_SER_BYTES], gf a ) { | ||||
gf_canon(a); | gf_canon(a); | ||||
int i, k=0, bits=0; | int i, k=0, bits=0; | ||||
decaf_dword_t buf=0; | decaf_dword_t buf=0; | ||||
for (i=0; i<DECAF_448_LIMBS; i++) { | for (i=0; i<DECAF_448_LIMBS; i++) { | ||||
buf |= (decaf_dword_t)a[i]<<bits; | |||||
buf |= (decaf_dword_t)a->limb[i]<<bits; | |||||
for (bits += LBITS; (bits>=8 || i==DECAF_448_LIMBS-1) && k<DECAF_448_SER_BYTES; bits-=8, buf>>=8) { | for (bits += LBITS; (bits>=8 || i==DECAF_448_LIMBS-1) && k<DECAF_448_SER_BYTES; bits-=8, buf>>=8) { | ||||
ser[k++]=buf; | ser[k++]=buf; | ||||
} | } | ||||
@@ -450,12 +450,12 @@ static decaf_bool_t gf_deser(gf s, const unsigned char ser[DECAF_448_SER_BYTES]) | |||||
for (i=0; i<DECAF_448_SER_BYTES; i++) { | for (i=0; i<DECAF_448_SER_BYTES; i++) { | ||||
buf |= (decaf_dword_t)ser[i]<<bits; | buf |= (decaf_dword_t)ser[i]<<bits; | ||||
for (bits += 8; (bits>=LBITS || i==DECAF_448_SER_BYTES-1) && k<DECAF_448_LIMBS; bits-=LBITS, buf>>=LBITS) { | for (bits += 8; (bits>=LBITS || i==DECAF_448_SER_BYTES-1) && k<DECAF_448_LIMBS; bits-=LBITS, buf>>=LBITS) { | ||||
s[k++] = buf & LMASK; | |||||
s->limb[k++] = buf & LMASK; | |||||
} | } | ||||
} | } | ||||
decaf_sdword_t accum = 0; | decaf_sdword_t accum = 0; | ||||
FOR_LIMB(i, accum = (accum + s[i] - P[i]) >> WBITS ); | |||||
FOR_LIMB(i, accum = (accum + s->limb[i] - P->limb[i]) >> WBITS ); | |||||
return accum; | return accum; | ||||
} | } | ||||
@@ -518,7 +518,7 @@ decaf_bool_t decaf_448_point_decode ( | |||||
gf_mul ( a, b, c ); | gf_mul ( a, b, c ); | ||||
gf_mul ( p->y,a,p->z ); | gf_mul ( p->y,a,p->z ); | ||||
gf_mul ( p->t,p->x,a ); | gf_mul ( p->t,p->x,a ); | ||||
p->y[0] -= zero; | |||||
p->y->limb[0] -= zero; | |||||
/* TODO: do something safe if ~succ? */ | /* TODO: do something safe if ~succ? */ | ||||
return succ; | return succ; | ||||
} | } | ||||
@@ -902,7 +902,8 @@ void decaf_448_point_from_hash_nonuniform ( | |||||
(void)gf_deser(r,ser); | (void)gf_deser(r,ser); | ||||
gf_canon(r); | gf_canon(r); | ||||
gf_sqr(a,r); | gf_sqr(a,r); | ||||
gf_mlw(urr,a,QUADRATIC_NONRESIDUE); | |||||
/* gf_mlw(urr,a,QUADRATIC_NONRESIDUE); */ | |||||
gf_sub(urr,ZERO,a); | |||||
gf_mlw(dee,ONE,EDWARDS_D); | gf_mlw(dee,ONE,EDWARDS_D); | ||||
gf_add(a,urr,ONE); | gf_add(a,urr,ONE); | ||||
gf_sub(ur2_d,dee,urr); | gf_sub(ur2_d,dee,urr); | ||||
@@ -1185,7 +1186,7 @@ decaf_bool_t decaf_448_direct_scalarmul ( | |||||
gf_mul(xz_d, xd, zd); | gf_mul(xz_d, xd, zd); | ||||
gf_mul(xz_a, xa, za); | gf_mul(xz_a, xa, za); | ||||
output_zero = gf_eq(xz_d, ZERO); | output_zero = gf_eq(xz_d, ZERO); | ||||
xz_d[0] -= output_zero; /* make xz_d always nonzero */ | |||||
xz_d->limb[0] -= output_zero; /* make xz_d always nonzero */ | |||||
zcase = output_zero | gf_eq(xz_a, ZERO); | zcase = output_zero | gf_eq(xz_a, ZERO); | ||||
za_zero = gf_eq(za, ZERO); | za_zero = gf_eq(za, ZERO); | ||||
@@ -704,7 +704,7 @@ int main(int argc, char **argv) { | |||||
unsigned char dshared[2][32]; | unsigned char dshared[2][32]; | ||||
when = now(); | when = now(); | ||||
for (i=0; i<nbase/10; i++) { | |||||
for (i=0; i<nbase; i++) { | |||||
decaf_448_derive_private_key(dpriv[i&1], sym[i&1]); | decaf_448_derive_private_key(dpriv[i&1], sym[i&1]); | ||||
} | } | ||||
when = now() - when; | when = now() - when; | ||||
@@ -714,7 +714,7 @@ int main(int argc, char **argv) { | |||||
decaf_448_private_to_public(dpub[1], dpriv[1]); | decaf_448_private_to_public(dpub[1], dpriv[1]); | ||||
when = now(); | when = now(); | ||||
for (i=0; i<nbase/10; i++) { | |||||
for (i=0; i<nbase; i++) { | |||||
decaf_bool_t ret = decaf_448_shared_secret(dshared[i&1], 32, dpriv[i&1], dpub[(i+1)&1]); | decaf_bool_t ret = decaf_448_shared_secret(dshared[i&1], 32, dpriv[i&1], dpub[(i+1)&1]); | ||||
if (ret != DECAF_SUCCESS) { | if (ret != DECAF_SUCCESS) { | ||||
printf("BUG: shared secret returns failure on %d.\n", i); | printf("BUG: shared secret returns failure on %d.\n", i); | ||||
@@ -732,7 +732,7 @@ int main(int argc, char **argv) { | |||||
const char *dmessage = "hello world"; | const char *dmessage = "hello world"; | ||||
const char *dnessage = "Jello world"; | const char *dnessage = "Jello world"; | ||||
when = now(); | when = now(); | ||||
for (i=0; i<nbase/10; i++) { | |||||
for (i=0; i<nbase; i++) { | |||||
decaf_448_sign(dsig, dpriv[0], (const unsigned char *)dmessage, 11); | decaf_448_sign(dsig, dpriv[0], (const unsigned char *)dmessage, 11); | ||||
} | } | ||||
when = now() - when; | when = now() - when; | ||||
@@ -743,7 +743,7 @@ int main(int argc, char **argv) { | |||||
} | } | ||||
when = now(); | when = now(); | ||||
for (i=0; i<nbase/10; i++) { | |||||
for (i=0; i<nbase; i++) { | |||||
decaf_bool_t ret = decaf_448_verify(dsig, dpub[0], | decaf_bool_t ret = decaf_448_verify(dsig, dpub[0], | ||||
(const unsigned char *)((i&1) ? dmessage : dnessage), 11); | (const unsigned char *)((i&1) ? dmessage : dnessage), 11); | ||||
if ((i&1) && ~ret) { | if ((i&1) && ~ret) { | ||||
@@ -762,7 +762,7 @@ int main(int argc, char **argv) { | |||||
alignof_decaf_448_precomputed_s, sizeof_decaf_448_precomputed_s)); | alignof_decaf_448_precomputed_s, sizeof_decaf_448_precomputed_s)); | ||||
assert(dpre); | assert(dpre); | ||||
when = now(); | when = now(); | ||||
for (i=0; i<nbase/10; i++) { | |||||
for (i=0; i<nbase; i++) { | |||||
decaf_448_precompute(dpre, Da); | decaf_448_precompute(dpre, Da); | ||||
} | } | ||||
when = now() - when; | when = now() - when; | ||||