| @@ -86,12 +86,45 @@ const size_t API_NS2(alignof,precomputed_s) = 32; | |||||
| #define UNROLL | #define UNROLL | ||||
| #endif | #endif | ||||
| #define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<sizeof(gf)/sizeof(word_t); i++) { op; }} | |||||
| #define FOR_LIMB_U(i,op) { unsigned int i=0; UNROLL for (i=0; i<sizeof(gf)/sizeof(word_t); i++) { op; }} | |||||
| #define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<NLIMBS; i++) { op; }} | |||||
| #define FOR_LIMB_U(i,op) { unsigned int i=0; UNROLL for (i=0; i<NLIMBS; i++) { op; }} | |||||
| void gf_serialize (uint8_t serial[SER_BYTES], const gf x) { | |||||
| gf red; | |||||
| gf_copy(red, x); | |||||
| gf_strong_reduce(red); | |||||
| unsigned int j=0, fill=0; | |||||
| dword_t buffer = 0; | |||||
| UNROLL for (unsigned int i=0; i<SER_BYTES; i++) { | |||||
| if (fill < 8 && j < NLIMBS) { | |||||
| buffer |= ((dword_t)red->limb[LIMBPERM(j)]) << fill; | |||||
| fill += LIMB_PLACE_VALUE(LIMBPERM(j)); | |||||
| j++; | |||||
| } | |||||
| serial[i] = buffer; | |||||
| fill -= 8; | |||||
| buffer >>= 8; | |||||
| } | |||||
| } | |||||
| /** Copy x = y */ | |||||
| static INLINE void | |||||
| gf_cpy(gf x, const gf y) { x[0] = y[0]; } | |||||
| mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES]) { | |||||
| unsigned int j=0, fill=0; | |||||
| dword_t buffer = 0; | |||||
| dsword_t scarry = 0; | |||||
| UNROLL for (unsigned int i=0; i<NLIMBS; i++) { | |||||
| UNROLL while (fill < LIMB_PLACE_VALUE(LIMBPERM(i)) && j < SER_BYTES) { | |||||
| buffer |= ((dword_t)serial[j]) << fill; | |||||
| fill += 8; | |||||
| j++; | |||||
| } | |||||
| x->limb[LIMBPERM(i)] = (i<NLIMBS-1) ? buffer & LIMB_MASK(LIMBPERM(i)) : buffer; | |||||
| fill -= LIMB_PLACE_VALUE(LIMBPERM(i)); | |||||
| buffer >>= LIMB_PLACE_VALUE(LIMBPERM(i)); | |||||
| scarry = (scarry + x->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]) >> (8*sizeof(word_t)); | |||||
| } | |||||
| return word_is_zero(buffer) & ~word_is_zero(scarry); | |||||
| } | |||||
| /** Constant time, x = is_z ? z : y */ | /** Constant time, x = is_z ? z : y */ | ||||
| static INLINE void | static INLINE void | ||||
| @@ -120,9 +153,7 @@ cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) { | |||||
| /** Compare a==b */ | /** Compare a==b */ | ||||
| /* Not static because it's used in inverse square root. */ | /* Not static because it's used in inverse square root. */ | ||||
| decaf_word_t gf_eq(const gf a, const gf b); | decaf_word_t gf_eq(const gf a, const gf b); | ||||
| decaf_word_t | |||||
| gf_eq(const gf a, const gf b) { | |||||
| decaf_word_t gf_eq(const gf a, const gf b) { | |||||
| gf c; | gf c; | ||||
| gf_sub(c,a,b); | gf_sub(c,a,b); | ||||
| gf_strong_reduce(c); | gf_strong_reduce(c); | ||||
| @@ -153,13 +184,10 @@ gf_invert(gf y, const gf x) { | |||||
| (void)ret; assert(ret); | (void)ret; assert(ret); | ||||
| gf_sqr(t1, t2); | gf_sqr(t1, t2); | ||||
| gf_mul(t2, t1, x); // not direct to y in case of alias. | gf_mul(t2, t1, x); // not direct to y in case of alias. | ||||
| gf_cpy(y, t2); | |||||
| gf_copy(y, t2); | |||||
| } | } | ||||
| /** | |||||
| * Mul by signed int. Not constant-time WRT the sign of that int. | |||||
| * Just uses a full mul (PERF) | |||||
| */ | |||||
| /** Mul by signed int. Not constant-time WRT the sign of that int. */ | |||||
| static INLINE void | static INLINE void | ||||
| gf_mulw_sgn(gf c, const gf a, int w) { | gf_mulw_sgn(gf c, const gf a, int w) { | ||||
| if (w>0) { | if (w>0) { | ||||
| @@ -182,7 +210,7 @@ static decaf_word_t hibit(const gf x) { | |||||
| /** Return high bit of x = low bit of 2x mod p */ | /** Return high bit of x = low bit of 2x mod p */ | ||||
| static decaf_word_t lobit(const gf x) { | static decaf_word_t lobit(const gf x) { | ||||
| gf y; | gf y; | ||||
| gf_cpy(y,x); | |||||
| gf_copy(y,x); | |||||
| gf_strong_reduce(y); | gf_strong_reduce(y); | ||||
| return -(y->limb[0]&1); | return -(y->limb[0]&1); | ||||
| } | } | ||||
| @@ -394,16 +422,9 @@ API_NS(scalar_eq) ( | |||||
| return word_is_zero(diff); | return word_is_zero(diff); | ||||
| } | } | ||||
| /* *** API begins here *** */ | |||||
| /** identity = (0,1) */ | /** identity = (0,1) */ | ||||
| const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}}; | const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}}; | ||||
| static void | |||||
| gf_encode ( unsigned char ser[SER_BYTES], gf a ) { | |||||
| gf_serialize(ser, (gf_s *)a); | |||||
| } | |||||
| static void | static void | ||||
| deisogenize ( | deisogenize ( | ||||
| gf_s *__restrict__ s, | gf_s *__restrict__ s, | ||||
| @@ -508,14 +529,7 @@ deisogenize ( | |||||
| void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) { | void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) { | ||||
| gf s, mtos; | gf s, mtos; | ||||
| deisogenize(s,mtos,p,0,0,0); | deisogenize(s,mtos,p,0,0,0); | ||||
| gf_encode ( ser, s ); | |||||
| } | |||||
| /** | |||||
| * Deserialize a field element, return TRUE if < p. | |||||
| */ | |||||
| static decaf_bool_t gf_deser(gf s, const unsigned char ser[SER_BYTES]) { | |||||
| return gf_deserialize((gf_s *)s, ser); | |||||
| gf_serialize ( ser, s ); | |||||
| } | } | ||||
| decaf_error_t API_NS(point_decode) ( | decaf_error_t API_NS(point_decode) ( | ||||
| @@ -524,7 +538,7 @@ decaf_error_t API_NS(point_decode) ( | |||||
| decaf_bool_t allow_identity | decaf_bool_t allow_identity | ||||
| ) { | ) { | ||||
| gf s, a, b, c, d, e, f; | gf s, a, b, c, d, e, f; | ||||
| decaf_bool_t succ = gf_deser(s, ser), zero = gf_eq(s, ZERO); | |||||
| decaf_bool_t succ = gf_deserialize(s, ser), zero = gf_eq(s, ZERO); | |||||
| allow_identity = ~word_is_zero(allow_identity); | allow_identity = ~word_is_zero(allow_identity); | ||||
| succ &= allow_identity | ~zero; | succ &= allow_identity | ~zero; | ||||
| succ &= ~hibit(s); | succ &= ~hibit(s); | ||||
| @@ -592,8 +606,6 @@ decaf_error_t API_NS(point_decode) ( | |||||
| #define NEG_D 0 | #define NEG_D 0 | ||||
| #endif | #endif | ||||
| void API_NS(point_sub) ( | void API_NS(point_sub) ( | ||||
| point_t p, | point_t p, | ||||
| const point_t q, | const point_t q, | ||||
| @@ -688,8 +700,8 @@ void API_NS(point_negate) ( | |||||
| const point_t a | const point_t a | ||||
| ) { | ) { | ||||
| gf_sub(nega->x, ZERO, a->x); | gf_sub(nega->x, ZERO, a->x); | ||||
| gf_cpy(nega->y, a->y); | |||||
| gf_cpy(nega->z, a->z); | |||||
| gf_copy(nega->y, a->y); | |||||
| gf_copy(nega->z, a->z); | |||||
| gf_sub(nega->t, ZERO, a->t); | gf_sub(nega->t, ZERO, a->t); | ||||
| } | } | ||||
| @@ -827,7 +839,7 @@ niels_to_pt ( | |||||
| gf_add ( e->y, n->b, n->a ); | gf_add ( e->y, n->b, n->a ); | ||||
| gf_sub ( e->x, n->b, n->a ); | gf_sub ( e->x, n->b, n->a ); | ||||
| gf_mul ( e->t, e->y, e->x ); | gf_mul ( e->t, e->y, e->x ); | ||||
| gf_cpy ( e->z, ONE ); | |||||
| gf_copy ( e->z, ONE ); | |||||
| } | } | ||||
| static NOINLINE void | static NOINLINE void | ||||
| @@ -882,7 +894,7 @@ add_pniels_to_pt ( | |||||
| ) { | ) { | ||||
| gf L0; | gf L0; | ||||
| gf_mul ( L0, p->z, pn->z ); | gf_mul ( L0, p->z, pn->z ); | ||||
| gf_cpy ( p->z, L0 ); | |||||
| gf_copy ( p->z, L0 ); | |||||
| add_niels_to_pt( p, pn->n, before_double ); | add_niels_to_pt( p, pn->n, before_double ); | ||||
| } | } | ||||
| @@ -894,7 +906,7 @@ sub_pniels_from_pt ( | |||||
| ) { | ) { | ||||
| gf L0; | gf L0; | ||||
| gf_mul ( L0, p->z, pn->z ); | gf_mul ( L0, p->z, pn->z ); | ||||
| gf_cpy ( p->z, L0 ); | |||||
| gf_copy ( p->z, L0 ); | |||||
| sub_niels_from_pt( p, pn->n, before_double ); | sub_niels_from_pt( p, pn->n, before_double ); | ||||
| } | } | ||||
| @@ -1203,7 +1215,7 @@ void API_NS(point_from_hash_nonuniform) ( | |||||
| // TODO: simplify since we don't return a hint anymore | // TODO: simplify since we don't return a hint anymore | ||||
| // TODO: test pathological case ur0^2 = 1/(1-d) | // TODO: test pathological case ur0^2 = 1/(1-d) | ||||
| gf r0,r,a,b,c,dee,D,N,rN,e; | gf r0,r,a,b,c,dee,D,N,rN,e; | ||||
| gf_deser(r0,ser); | |||||
| gf_deserialize(r0,ser); | |||||
| gf_strong_reduce(r0); | gf_strong_reduce(r0); | ||||
| gf_sqr(a,r0); | gf_sqr(a,r0); | ||||
| #if P_MOD_8 == 5 | #if P_MOD_8 == 5 | ||||
| @@ -1265,7 +1277,7 @@ void API_NS(point_from_hash_nonuniform) ( | |||||
| /* isogenize */ | /* isogenize */ | ||||
| #if IMAGINE_TWIST | #if IMAGINE_TWIST | ||||
| gf_mul(c,a,SQRT_MINUS_ONE); | gf_mul(c,a,SQRT_MINUS_ONE); | ||||
| gf_cpy(a,c); | |||||
| gf_copy(a,c); | |||||
| #endif | #endif | ||||
| gf_sqr(c,a); /* s^2 */ | gf_sqr(c,a); /* s^2 */ | ||||
| @@ -1326,7 +1338,7 @@ API_NS(invert_elligator_nonuniform) ( | |||||
| succ &= ~(is_identity & sgn_ed_T); /* NB: there are no preimages of rotated identity. */ | succ &= ~(is_identity & sgn_ed_T); /* NB: there are no preimages of rotated identity. */ | ||||
| #endif | #endif | ||||
| gf_encode(recovered_hash, b); | |||||
| gf_serialize(recovered_hash, b); | |||||
| /* TODO: deal with overflow flag */ | /* TODO: deal with overflow flag */ | ||||
| return decaf_succeed_if(succ); | return decaf_succeed_if(succ); | ||||
| } | } | ||||
| @@ -1380,14 +1392,14 @@ void API_NS(point_debugging_torque) ( | |||||
| gf tmp; | gf tmp; | ||||
| gf_mul(tmp,p->x,SQRT_MINUS_ONE); | gf_mul(tmp,p->x,SQRT_MINUS_ONE); | ||||
| gf_mul(q->x,p->y,SQRT_MINUS_ONE); | gf_mul(q->x,p->y,SQRT_MINUS_ONE); | ||||
| gf_cpy(q->y,tmp); | |||||
| gf_cpy(q->z,p->z); | |||||
| gf_copy(q->y,tmp); | |||||
| gf_copy(q->z,p->z); | |||||
| gf_sub(q->t,ZERO,p->t); | gf_sub(q->t,ZERO,p->t); | ||||
| #else | #else | ||||
| gf_sub(q->x,ZERO,p->x); | gf_sub(q->x,ZERO,p->x); | ||||
| gf_sub(q->y,ZERO,p->y); | gf_sub(q->y,ZERO,p->y); | ||||
| gf_cpy(q->z,p->z); | |||||
| gf_cpy(q->t,p->t); | |||||
| gf_copy(q->z,p->z); | |||||
| gf_copy(q->t,p->t); | |||||
| #endif | #endif | ||||
| } | } | ||||
| @@ -1397,16 +1409,16 @@ void API_NS(point_debugging_pscale) ( | |||||
| const uint8_t factor[SER_BYTES] | const uint8_t factor[SER_BYTES] | ||||
| ) { | ) { | ||||
| gf gfac,tmp; | gf gfac,tmp; | ||||
| ignore_result(gf_deser(gfac,factor)); | |||||
| ignore_result(gf_deserialize(gfac,factor)); | |||||
| cond_sel(gfac,gfac,ONE,gf_eq(gfac,ZERO)); | cond_sel(gfac,gfac,ONE,gf_eq(gfac,ZERO)); | ||||
| gf_mul(tmp,p->x,gfac); | gf_mul(tmp,p->x,gfac); | ||||
| gf_cpy(q->x,tmp); | |||||
| gf_copy(q->x,tmp); | |||||
| gf_mul(tmp,p->y,gfac); | gf_mul(tmp,p->y,gfac); | ||||
| gf_cpy(q->y,tmp); | |||||
| gf_copy(q->y,tmp); | |||||
| gf_mul(tmp,p->z,gfac); | gf_mul(tmp,p->z,gfac); | ||||
| gf_cpy(q->z,tmp); | |||||
| gf_copy(q->z,tmp); | |||||
| gf_mul(tmp,p->t,gfac); | gf_mul(tmp,p->t,gfac); | ||||
| gf_cpy(q->t,tmp); | |||||
| gf_copy(q->t,tmp); | |||||
| } | } | ||||
| static void gf_batch_invert ( | static void gf_batch_invert ( | ||||
| @@ -1417,7 +1429,7 @@ static void gf_batch_invert ( | |||||
| gf t1; | gf t1; | ||||
| assert(n>1); | assert(n>1); | ||||
| gf_cpy(out[1], in[0]); | |||||
| gf_copy(out[1], in[0]); | |||||
| int i; | int i; | ||||
| for (i=1; i<(int) (n-1); i++) { | for (i=1; i<(int) (n-1); i++) { | ||||
| gf_mul(out[i+1], out[i], in[i]); | gf_mul(out[i+1], out[i], in[i]); | ||||
| @@ -1428,9 +1440,9 @@ static void gf_batch_invert ( | |||||
| for (i=n-1; i>0; i--) { | for (i=n-1; i>0; i--) { | ||||
| gf_mul(t1, out[i], out[0]); | gf_mul(t1, out[i], out[0]); | ||||
| gf_cpy(out[i], t1); | |||||
| gf_copy(out[i], t1); | |||||
| gf_mul(t1, out[0], in[i]); | gf_mul(t1, out[0], in[i]); | ||||
| gf_cpy(out[0], t1); | |||||
| gf_copy(out[0], t1); | |||||
| } | } | ||||
| } | } | ||||
| @@ -1447,15 +1459,15 @@ static void batch_normalize_niels ( | |||||
| for (i=0; i<n; i++) { | for (i=0; i<n; i++) { | ||||
| gf_mul(product, table[i]->a, zis[i]); | gf_mul(product, table[i]->a, zis[i]); | ||||
| gf_strong_reduce(product); | gf_strong_reduce(product); | ||||
| gf_cpy(table[i]->a, product); | |||||
| gf_copy(table[i]->a, product); | |||||
| gf_mul(product, table[i]->b, zis[i]); | gf_mul(product, table[i]->b, zis[i]); | ||||
| gf_strong_reduce(product); | gf_strong_reduce(product); | ||||
| gf_cpy(table[i]->b, product); | |||||
| gf_copy(table[i]->b, product); | |||||
| gf_mul(product, table[i]->c, zis[i]); | gf_mul(product, table[i]->c, zis[i]); | ||||
| gf_strong_reduce(product); | gf_strong_reduce(product); | ||||
| gf_cpy(table[i]->c, product); | |||||
| gf_copy(table[i]->c, product); | |||||
| } | } | ||||
| decaf_bzero(product,sizeof(product)); | decaf_bzero(product,sizeof(product)); | ||||
| @@ -1500,7 +1512,7 @@ void API_NS(precompute) ( | |||||
| pt_to_pniels(pn_tmp, start); | pt_to_pniels(pn_tmp, start); | ||||
| memcpy(table->table[idx], pn_tmp->n, sizeof(pn_tmp->n)); | memcpy(table->table[idx], pn_tmp->n, sizeof(pn_tmp->n)); | ||||
| gf_cpy(zs[idx], pn_tmp->z); | |||||
| gf_copy(zs[idx], pn_tmp->z); | |||||
| if (j >= (1u<<(t-1)) - 1) break; | if (j >= (1u<<(t-1)) - 1) break; | ||||
| int delta = (j+1) ^ ((j+1)>>1) ^ gray; | int delta = (j+1) ^ ((j+1)>>1) ^ gray; | ||||
| @@ -1733,7 +1745,7 @@ void API_NS(precompute_wnafs) ( | |||||
| prepare_wnaf_table(tmp,base,DECAF_WNAF_FIXED_TABLE_BITS); | prepare_wnaf_table(tmp,base,DECAF_WNAF_FIXED_TABLE_BITS); | ||||
| for (i=0; i<1<<DECAF_WNAF_FIXED_TABLE_BITS; i++) { | for (i=0; i<1<<DECAF_WNAF_FIXED_TABLE_BITS; i++) { | ||||
| memcpy(out[i], tmp[i]->n, sizeof(niels_t)); | memcpy(out[i], tmp[i]->n, sizeof(niels_t)); | ||||
| gf_cpy(zs[i], tmp[i]->z); | |||||
| gf_copy(zs[i], tmp[i]->z); | |||||
| } | } | ||||
| batch_normalize_niels(out, (const gf *)zs, zis, 1<<DECAF_WNAF_FIXED_TABLE_BITS); | batch_normalize_niels(out, (const gf *)zs, zis, 1<<DECAF_WNAF_FIXED_TABLE_BITS); | ||||
| @@ -13,8 +13,9 @@ f_field_h = gen_file( | |||||
| #include "word.h" | #include "word.h" | ||||
| #define __DECAF_%(gf_shortname)s_GF_DEFINED__ 1 | #define __DECAF_%(gf_shortname)s_GF_DEFINED__ 1 | ||||
| #define NLIMBS (%(gf_impl_bits)d/sizeof(word_t)/8) | |||||
| typedef struct gf_%(gf_shortname)s_s { | typedef struct gf_%(gf_shortname)s_s { | ||||
| word_t limb[%(gf_impl_bits)d/sizeof(word_t)/8]; | |||||
| word_t limb[NLIMBS]; | |||||
| } __attribute__((aligned(32))) gf_%(gf_shortname)s_s, gf_%(gf_shortname)s_t[1]; | } __attribute__((aligned(32))) gf_%(gf_shortname)s_s, gf_%(gf_shortname)s_t[1]; | ||||
| #define GF_LIT_LIMB_BITS %(gf_lit_limb_bits)d | #define GF_LIT_LIMB_BITS %(gf_lit_limb_bits)d | ||||
| @@ -33,6 +34,7 @@ typedef struct gf_%(gf_shortname)s_s { | |||||
| #define gf_isr gf_%(gf_shortname)s_isr | #define gf_isr gf_%(gf_shortname)s_isr | ||||
| #define gf_serialize gf_%(gf_shortname)s_serialize | #define gf_serialize gf_%(gf_shortname)s_serialize | ||||
| #define gf_deserialize gf_%(gf_shortname)s_deserialize | #define gf_deserialize gf_%(gf_shortname)s_deserialize | ||||
| #define MODULUS gf_%(gf_shortname)s_MODULUS | |||||
| #define SQRT_MINUS_ONE P%(gf_shortname)s_SQRT_MINUS_ONE /* might not be defined */ | #define SQRT_MINUS_ONE P%(gf_shortname)s_SQRT_MINUS_ONE /* might not be defined */ | ||||
| @@ -42,6 +44,8 @@ typedef struct gf_%(gf_shortname)s_s { | |||||
| extern "C" { | extern "C" { | ||||
| #endif | #endif | ||||
| const gf MODULUS; | |||||
| /* Defined below in f_impl.h */ | /* Defined below in f_impl.h */ | ||||
| static INLINE_UNUSED void gf_copy (gf out, const gf a) { *out = *a; } | static INLINE_UNUSED void gf_copy (gf out, const gf a) { *out = *a; } | ||||
| static INLINE_UNUSED void gf_add_RAW (gf out, const gf a, const gf b); | static INLINE_UNUSED void gf_add_RAW (gf out, const gf a, const gf b); | ||||
| @@ -61,4 +65,9 @@ mask_t gf_deserialize (gf x, const uint8_t serial[(GF_BITS-1)/8+1]); | |||||
| #endif | #endif | ||||
| #include "f_impl.h" /* Bring in the inline implementations */ | #include "f_impl.h" /* Bring in the inline implementations */ | ||||
| #ifndef LIMBPERM | |||||
| #define LIMBPERM(i) (i) | |||||
| #endif | |||||
| #define LIMB_MASK(i) (((1ull)<<LIMB_PLACE_VALUE(i))-1) | |||||
| """) | """) | ||||
| @@ -91,88 +91,37 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) { | |||||
| } | } | ||||
| void gf_strong_reduce (gf a) { | void gf_strong_reduce (gf a) { | ||||
| uint32_t maske = (1<<26)-1, masko = (1<<25)-1; | |||||
| /* first, clear high */ | /* first, clear high */ | ||||
| a->limb[0] += (a->limb[9]>>25)*19; | a->limb[0] += (a->limb[9]>>25)*19; | ||||
| a->limb[9] &= masko; | |||||
| a->limb[9] &= LIMB_MASK(9); | |||||
| /* now the total is less than 2p */ | /* now the total is less than 2p */ | ||||
| /* compute total_value - p. No need to reduce mod p. */ | /* compute total_value - p. No need to reduce mod p. */ | ||||
| int64_t scarry = 0; | |||||
| int i; | |||||
| for (i=0; i<10; /*i+=2*/) { | |||||
| scarry = scarry + a->limb[i] - ((i==0)?maske-18:maske); | |||||
| a->limb[i] = scarry & maske; | |||||
| scarry >>= 26; | |||||
| i++; | |||||
| scarry = scarry + a->limb[i] - masko; | |||||
| a->limb[i] = scarry & masko; | |||||
| scarry >>= 25; | |||||
| i++; | |||||
| dsword_t scarry = 0; | |||||
| for (unsigned int i=0; i<10; i++) { | |||||
| scarry = scarry + a->limb[i] - MODULUS->limb[i]; | |||||
| a->limb[i] = scarry & LIMB_MASK(i); | |||||
| scarry >>= LIMB_PLACE_VALUE(i); | |||||
| } | } | ||||
| /* uncommon case: it was >= p, so now scarry = 0 and this = x | /* uncommon case: it was >= p, so now scarry = 0 and this = x | ||||
| * common case: it was < p, so now scarry = -1 and this = x - p + 2^255 | * common case: it was < p, so now scarry = -1 and this = x - p + 2^255 | ||||
| * so let's add back in p. will carry back off the top for 2^255. | * so let's add back in p. will carry back off the top for 2^255. | ||||
| */ | */ | ||||
| assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | ||||
| uint32_t scarry_masko = scarry & masko, scarry_maske = scarry & maske; | |||||
| uint64_t carry = 0; | |||||
| word_t scarry_0 = scarry; | |||||
| dword_t carry = 0; | |||||
| /* add it back */ | /* add it back */ | ||||
| for (i=0; i<10; /*i+=2*/) { | |||||
| carry = carry + a->limb[i] + ((i==0)?(scarry_maske&~18):scarry_maske); | |||||
| a->limb[i] = carry & maske; | |||||
| carry >>= 26; | |||||
| i++; | |||||
| carry = carry + a->limb[i] + scarry_masko; | |||||
| a->limb[i] = carry & masko; | |||||
| carry >>= 25; | |||||
| for (unsigned int i=0; i<10; i++) { | |||||
| carry = carry + a->limb[i] + (scarry_0 & MODULUS->limb[i]); | |||||
| a->limb[i] = carry & LIMB_MASK(i); | |||||
| carry >>= LIMB_PLACE_VALUE(i); | |||||
| i++; | i++; | ||||
| } | } | ||||
| assert(word_is_zero(carry + scarry)); | |||||
| } | |||||
| #define LIMB_PLACE_VALUE(i) (((i)&1)?25:26) | |||||
| void gf_serialize (uint8_t serial[32], const gf x) { | |||||
| gf red; | |||||
| gf_copy(red, x); | |||||
| gf_strong_reduce(red); | |||||
| unsigned int j=0, fill=0; | |||||
| dword_t buffer = 0; | |||||
| for (unsigned int i=0; i<32; i++) { | |||||
| if (fill < 8 && j < sizeof(red->limb)/sizeof(red->limb[0])) { | |||||
| buffer |= ((dword_t)red->limb[j]) << fill; | |||||
| fill += LIMB_PLACE_VALUE(j); | |||||
| j++; | |||||
| } | |||||
| serial[i] = buffer; | |||||
| fill -= 8; | |||||
| buffer >>= 8; | |||||
| } | |||||
| assert(word_is_zero(carry + scarry_0)); | |||||
| } | } | ||||
| mask_t gf_deserialize (gf x, const uint8_t serial[32]) { | |||||
| unsigned int j=0, fill=0; | |||||
| dword_t buffer = 0; | |||||
| for (unsigned int i=0; i<32; i++) { | |||||
| buffer |= ((dword_t)serial[i]) << fill; | |||||
| fill += 8; | |||||
| if (fill >= LIMB_PLACE_VALUE(j) || i == 31) { | |||||
| assert(j < sizeof(x->limb)/sizeof(x->limb[0])); | |||||
| word_t mask = ((1ull)<<LIMB_PLACE_VALUE(j))-1; | |||||
| x->limb[j] = (i==31) ? buffer : (buffer & mask); // FIXME: this can in theory truncate the buffer if it's not in field. | |||||
| buffer >>= LIMB_PLACE_VALUE(j); | |||||
| fill -= LIMB_PLACE_VALUE(j); | |||||
| j++; | |||||
| } | |||||
| } | |||||
| return -1; // FIXME: test whether in field. | |||||
| } | |||||
| @@ -3,8 +3,9 @@ | |||||
| */ | */ | ||||
| #define LIMB(x) (x##ull)&((1ull<<26)-1), (x##ull)>>26 | #define LIMB(x) (x##ull)&((1ull<<26)-1), (x##ull)>>26 | ||||
| #define FIELD_LITERAL(a,b,c,d,e) \ | |||||
| {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e)}} | |||||
| #define FIELD_LITERAL(a,b,c,d,e) {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e)}} | |||||
| #define LIMB_PLACE_VALUE(i) (((i)&1)?25:26) | |||||
| void gf_add_RAW (gf out, const gf a, const gf b) { | void gf_add_RAW (gf out, const gf a, const gf b) { | ||||
| for (unsigned int i=0; i<10; i++) { | for (unsigned int i=0; i<10; i++) { | ||||
| @@ -97,45 +97,3 @@ void gf_strong_reduce (gf a) { | |||||
| assert(word_is_zero(carry + scarry)); | assert(word_is_zero(carry + scarry)); | ||||
| } | } | ||||
| void gf_serialize (uint8_t serial[32], const gf x) { | |||||
| int i,j; | |||||
| gf red; | |||||
| gf_copy(red, x); | |||||
| gf_strong_reduce(red); | |||||
| uint64_t *r = red->limb; | |||||
| uint64_t ser64[4] = {r[0] | r[1]<<51, r[1]>>13|r[2]<<38, r[2]>>26|r[3]<<25, r[3]>>39|r[4]<<12}; | |||||
| for (i=0; i<4; i++) { | |||||
| for (j=0; j<8; j++) { | |||||
| serial[8*i+j] = ser64[i]; | |||||
| ser64[i] >>= 8; | |||||
| } | |||||
| } | |||||
| } | |||||
| mask_t gf_deserialize (gf x, const uint8_t serial[32]) { | |||||
| int i,j; | |||||
| uint64_t ser64[4], mask = ((1ull<<51)-1); | |||||
| for (i=0; i<4; i++) { | |||||
| uint64_t out = 0; | |||||
| for (j=0; j<8; j++) { | |||||
| out |= ((uint64_t)serial[8*i+j])<<(8*j); | |||||
| } | |||||
| ser64[i] = out; | |||||
| } | |||||
| /* Test for >= 2^255-19 */ | |||||
| uint64_t ge = -(((__uint128_t)ser64[0]+19)>>64); | |||||
| ge &= ser64[1]; | |||||
| ge &= ser64[2]; | |||||
| ge &= (ser64[3]<<1) + 1; | |||||
| ge |= -(((__uint128_t)ser64[3]+0x8000000000000000)>>64); | |||||
| x->limb[0] = ser64[0] & mask; | |||||
| x->limb[1] = (ser64[0]>>51 | ser64[1]<<13) & mask; | |||||
| x->limb[2] = (ser64[1]>>38 | ser64[2]<<26) & mask; | |||||
| x->limb[3] = (ser64[2]>>25 | ser64[3]<<39) & mask; | |||||
| x->limb[4] = ser64[3]>>12; | |||||
| return ~word_is_zero(~ge); | |||||
| } | |||||
| @@ -4,6 +4,8 @@ | |||||
| #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} | #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} | ||||
| #define LIMB_PLACE_VALUE(i) 51 | |||||
| void gf_add_RAW (gf out, const gf a, const gf b) { | void gf_add_RAW (gf out, const gf a, const gf b) { | ||||
| for (unsigned int i=0; i<5; i++) { | for (unsigned int i=0; i<5; i++) { | ||||
| out->limb[i] = a->limb[i] + b->limb[i]; | out->limb[i] = a->limb[i] + b->limb[i]; | ||||
| @@ -208,45 +208,3 @@ void gf_strong_reduce (gf a) { | |||||
| assert(word_is_zero(carry + scarry)); | assert(word_is_zero(carry + scarry)); | ||||
| } | } | ||||
| void gf_serialize (uint8_t serial[32], const gf x) { | |||||
| int i,j; | |||||
| gf red; | |||||
| gf_copy(red, x); | |||||
| gf_strong_reduce(red); | |||||
| uint64_t *r = red->limb; | |||||
| uint64_t ser64[4] = {r[0] | r[1]<<51, r[1]>>13|r[2]<<38, r[2]>>26|r[3]<<25, r[3]>>39|r[4]<<12}; | |||||
| for (i=0; i<4; i++) { | |||||
| for (j=0; j<8; j++) { | |||||
| serial[8*i+j] = ser64[i]; | |||||
| ser64[i] >>= 8; | |||||
| } | |||||
| } | |||||
| } | |||||
| mask_t gf_deserialize (gf x, const uint8_t serial[32]) { | |||||
| int i,j; | |||||
| uint64_t ser64[4], mask = ((1ull<<51)-1); | |||||
| for (i=0; i<4; i++) { | |||||
| uint64_t out = 0; | |||||
| for (j=0; j<8; j++) { | |||||
| out |= ((uint64_t)serial[8*i+j])<<(8*j); | |||||
| } | |||||
| ser64[i] = out; | |||||
| } | |||||
| /* Test for >= 2^255-19 */ | |||||
| uint64_t ge = -(((__uint128_t)ser64[0]+19)>>64); | |||||
| ge &= ser64[1]; | |||||
| ge &= ser64[2]; | |||||
| ge &= (ser64[3]<<1) + 1; | |||||
| ge |= -(((__uint128_t)ser64[3]+0x8000000000000000)>>64); | |||||
| x->limb[0] = ser64[0] & mask; | |||||
| x->limb[1] = (ser64[0]>>51 | ser64[1]<<13) & mask; | |||||
| x->limb[2] = (ser64[1]>>38 | ser64[2]<<26) & mask; | |||||
| x->limb[3] = (ser64[2]>>25 | ser64[3]<<39) & mask; | |||||
| x->limb[4] = ser64[3]>>12; | |||||
| return ~word_is_zero(~ge); | |||||
| } | |||||
| @@ -4,6 +4,8 @@ | |||||
| #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} | #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} | ||||
| #define LIMB_PLACE_VALUE(i) 51 | |||||
| void gf_add_RAW (gf out, const gf a, const gf b) { | void gf_add_RAW (gf out, const gf a, const gf b) { | ||||
| for (unsigned int i=0; i<5; i++) { | for (unsigned int i=0; i<5; i++) { | ||||
| out->limb[i] = a->limb[i] + b->limb[i]; | out->limb[i] = a->limb[i] + b->limb[i]; | ||||
| @@ -18,14 +18,17 @@ const gf_25519_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL( | |||||
| 0x78595a6804c9e, | 0x78595a6804c9e, | ||||
| 0x2b8324804fc1d | 0x2b8324804fc1d | ||||
| )}; | )}; | ||||
| const gf MODULUS = {FIELD_LITERAL( | |||||
| 0x7ffffffffffed, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff | |||||
| )}; | |||||
| /* TODO put in header */ | /* TODO put in header */ | ||||
| extern const gf_25519_t decaf_255_ONE; | extern const gf_25519_t decaf_255_ONE; | ||||
| extern mask_t decaf_255_gf_eq(const gf_25519_t a, const gf_25519_t b); | extern mask_t decaf_255_gf_eq(const gf_25519_t a, const gf_25519_t b); | ||||
| /* Guarantee: a^2 x = 0 if x = 0; else a^2 x = 1 or SQRT_MINUS_ONE; */ | /* Guarantee: a^2 x = 0 if x = 0; else a^2 x = 1 or SQRT_MINUS_ONE; */ | ||||
| void | |||||
| gf_isr ( | |||||
| void gf_isr ( | |||||
| gf_25519_t a, | gf_25519_t a, | ||||
| const gf_25519_t x | const gf_25519_t x | ||||
| ) { | ) { | ||||
| @@ -142,53 +142,3 @@ void gf_strong_reduce (gf a) { | |||||
| assert(word_is_zero(carry + scarry)); | assert(word_is_zero(carry + scarry)); | ||||
| } | } | ||||
| void gf_serialize (uint8_t *serial, const gf x) { | |||||
| int i,j; | |||||
| gf red; | |||||
| gf_copy(red, x); | |||||
| gf_strong_reduce(red); | |||||
| for (i=0; i<8; i++) { | |||||
| uint64_t limb = red->limb[2*i] + (((uint64_t)red->limb[2*i+1])<<28); | |||||
| for (j=0; j<7; j++) { | |||||
| serial[7*i+j] = limb; | |||||
| limb >>= 8; | |||||
| } | |||||
| assert(limb == 0); | |||||
| } | |||||
| } | |||||
| mask_t gf_deserialize (gf x, const uint8_t serial[56]) { | |||||
| int i,j; | |||||
| for (i=0; i<8; i++) { | |||||
| uint64_t out = 0; | |||||
| for (j=0; j<7; j++) { | |||||
| out |= ((uint64_t)serial[7*i+j])<<(8*j); | |||||
| } | |||||
| x->limb[2*i] = out & ((1ull<<28)-1); | |||||
| x->limb[2*i+1] = out >> 28; | |||||
| } | |||||
| /* Check for reduction. | |||||
| * | |||||
| * The idea is to create a variable ge which is all ones (rather, 56 ones) | |||||
| * if and only if the low $i$ words of $x$ are >= those of p. | |||||
| * | |||||
| * Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111) | |||||
| */ | |||||
| uint32_t ge = -1, mask = (1ull<<28)-1; | |||||
| for (i=0; i<8; i++) { | |||||
| ge &= x->limb[i]; | |||||
| } | |||||
| /* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */ | |||||
| ge = (ge & (x->limb[8] + 1)) | word_is_zero(x->limb[8] ^ mask); | |||||
| /* Propagate the rest */ | |||||
| for (i=9; i<16; i++) { | |||||
| ge &= x->limb[i]; | |||||
| } | |||||
| return ~word_is_zero(ge ^ mask); | |||||
| } | |||||
| @@ -5,6 +5,8 @@ | |||||
| #define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28 | #define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28 | ||||
| #define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ | #define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ | ||||
| {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}} | {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}} | ||||
| #define LIMB_PLACE_VALUE(i) 28 | |||||
| void gf_add_RAW (gf out, const gf a, const gf b) { | void gf_add_RAW (gf out, const gf a, const gf b) { | ||||
| for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | ||||
| @@ -834,9 +834,7 @@ void gf_mulw ( | |||||
| c[1] += accum8 >> 28; | c[1] += accum8 >> 28; | ||||
| } | } | ||||
| void gf_strong_reduce ( | |||||
| gf a | |||||
| ) { | |||||
| void gf_strong_reduce (gf a) { | |||||
| word_t mask = (1ull<<28)-1; | word_t mask = (1ull<<28)-1; | ||||
| /* first, clear high */ | /* first, clear high */ | ||||
| @@ -875,59 +873,3 @@ void gf_strong_reduce ( | |||||
| assert(word_is_zero(carry + scarry)); | assert(word_is_zero(carry + scarry)); | ||||
| } | } | ||||
| void gf_serialize ( | |||||
| uint8_t *serial, | |||||
| const gf x | |||||
| ) { | |||||
| int i,j; | |||||
| gf red; | |||||
| gf_copy(red, x); | |||||
| gf_strong_reduce(red); | |||||
| for (i=0; i<8; i++) { | |||||
| uint64_t limb = red->limb[2*i] + (((uint64_t)red->limb[2*i+1])<<28); | |||||
| for (j=0; j<7; j++) { | |||||
| serial[7*i+j] = limb; | |||||
| limb >>= 8; | |||||
| } | |||||
| assert(limb == 0); | |||||
| } | |||||
| } | |||||
| mask_t | |||||
| gf_deserialize ( | |||||
| gf x, | |||||
| const uint8_t serial[56] | |||||
| ) { | |||||
| int i,j; | |||||
| for (i=0; i<8; i++) { | |||||
| uint64_t out = 0; | |||||
| for (j=0; j<7; j++) { | |||||
| out |= ((uint64_t)serial[7*i+j])<<(8*j); | |||||
| } | |||||
| x->limb[2*i] = out & ((1ull<<28)-1); | |||||
| x->limb[2*i+1] = out >> 28; | |||||
| } | |||||
| /* Check for reduction. | |||||
| * | |||||
| * The idea is to create a variable ge which is all ones (rather, 56 ones) | |||||
| * if and only if the low $i$ words of $x$ are >= those of p. | |||||
| * | |||||
| * Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111) | |||||
| */ | |||||
| uint32_t ge = -1, mask = (1ull<<28)-1; | |||||
| for (i=0; i<8; i++) { | |||||
| ge &= x->limb[i]; | |||||
| } | |||||
| /* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */ | |||||
| ge = (ge & (x->limb[8] + 1)) | word_is_zero(x->limb[8] ^ mask); | |||||
| /* Propagate the rest */ | |||||
| for (i=9; i<16; i++) { | |||||
| ge &= x->limb[i]; | |||||
| } | |||||
| return ~word_is_zero(ge ^ mask); | |||||
| } | |||||
| @@ -5,6 +5,8 @@ | |||||
| #define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28 | #define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28 | ||||
| #define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ | #define FIELD_LITERAL(a,b,c,d,e,f,g,h) \ | ||||
| {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}} | {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}} | ||||
| #define LIMB_PLACE_VALUE(i) 28 | |||||
| void gf_add_RAW (gf out, const gf a, const gf b) { | void gf_add_RAW (gf out, const gf a, const gf b) { | ||||
| for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | ||||
| @@ -684,4 +684,3 @@ mask_t gf_deserialize (gf x, const uint8_t serial[56]) { | |||||
| return ~word_is_zero(ge ^ mask); | return ~word_is_zero(ge ^ mask); | ||||
| } | } | ||||
| @@ -11,6 +11,8 @@ | |||||
| LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \ | LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \ | ||||
| LIMBLO(c),LIMBLO(g), LIMBHI(c),LIMBHI(g), \ | LIMBLO(c),LIMBLO(g), LIMBHI(c),LIMBHI(g), \ | ||||
| LIMBLO(d),LIMBLO(h), LIMBHI(d),LIMBHI(h)}} | LIMBLO(d),LIMBLO(h), LIMBHI(d),LIMBHI(h)}} | ||||
| #define LIMB_PLACE_VALUE(i) 28 | |||||
| void gf_add_RAW (gf out, const gf a, const gf b) { | void gf_add_RAW (gf out, const gf a, const gf b) { | ||||
| for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) { | ||||
| @@ -3,6 +3,8 @@ | |||||
| */ | */ | ||||
| #define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}} | #define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}} | ||||
| #define LIMB_PLACE_VALUE(i) 56 | |||||
| void gf_add_RAW (gf out, const gf a, const gf b) { | void gf_add_RAW (gf out, const gf a, const gf b) { | ||||
| for (unsigned int i=0; i<8; i++) { | for (unsigned int i=0; i<8; i++) { | ||||
| @@ -329,51 +329,3 @@ void gf_strong_reduce (gf a) { | |||||
| assert(word_is_zero(carry + scarry)); | assert(word_is_zero(carry + scarry)); | ||||
| } | } | ||||
| void gf_serialize (uint8_t *serial, const gf x) { | |||||
| int i,j; | |||||
| gf red; | |||||
| gf_copy(red, x); | |||||
| gf_strong_reduce(red); | |||||
| for (i=0; i<8; i++) { | |||||
| for (j=0; j<7; j++) { | |||||
| serial[7*i+j] = red->limb[i]; | |||||
| red->limb[i] >>= 8; | |||||
| } | |||||
| assert(red->limb[i] == 0); | |||||
| } | |||||
| } | |||||
| mask_t gf_deserialize (gf x, const uint8_t serial[56]) { | |||||
| int i,j; | |||||
| for (i=0; i<8; i++) { | |||||
| word_t out = 0; | |||||
| for (j=0; j<7; j++) { | |||||
| out |= ((word_t)serial[7*i+j])<<(8*j); | |||||
| } | |||||
| x->limb[i] = out; | |||||
| } | |||||
| /* Check for reduction. | |||||
| * | |||||
| * The idea is to create a variable ge which is all ones (rather, 56 ones) | |||||
| * if and only if the low $i$ words of $x$ are >= those of p. | |||||
| * | |||||
| * Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111) | |||||
| */ | |||||
| word_t ge = -1, mask = (1ull<<56)-1; | |||||
| for (i=0; i<4; i++) { | |||||
| ge &= x->limb[i]; | |||||
| } | |||||
| /* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */ | |||||
| ge = (ge & (x->limb[4] + 1)) | word_is_zero(x->limb[4] ^ mask); | |||||
| /* Propagate the rest */ | |||||
| for (i=5; i<8; i++) { | |||||
| ge &= x->limb[i]; | |||||
| } | |||||
| return ~word_is_zero(ge ^ mask); | |||||
| } | |||||
| @@ -3,6 +3,7 @@ | |||||
| */ | */ | ||||
| #define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}} | #define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}} | ||||
| #define LIMB_PLACE_VALUE(i) 56 | |||||
| void gf_add_RAW (gf out, const gf a, const gf b) { | void gf_add_RAW (gf out, const gf a, const gf b) { | ||||
| for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | ||||
| @@ -10,6 +10,11 @@ | |||||
| #include "field.h" | #include "field.h" | ||||
| const gf MODULUS = {FIELD_LITERAL( | |||||
| 0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff, | |||||
| 0xfffffffffffffe, 0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff | |||||
| )}; | |||||
| void | void | ||||
| gf_isr ( | gf_isr ( | ||||
| gf a, | gf a, | ||||
| @@ -329,65 +329,3 @@ void gf_strong_reduce (gf *a) { | |||||
| assert(word_is_zero(carry + scarry)); | assert(word_is_zero(carry + scarry)); | ||||
| } | } | ||||
| void gf_serialize (uint8_t *serial, const struct gf *x) { | |||||
| int i,j,k=0; | |||||
| gf red; | |||||
| gf_copy(&red, x); | |||||
| gf_strong_reduce(&red); | |||||
| word_t r = 0; | |||||
| for (i=0; i<8; i+=2) { | |||||
| r = red.limb[i]; | |||||
| for (j=0; j<7; j++) { | |||||
| serial[k++] = r; | |||||
| r >>= 8; | |||||
| } | |||||
| assert(r<16); | |||||
| r += red.limb[i+1]<<4; | |||||
| for (j=0; j<8; j++) { | |||||
| serial[k++] = r; | |||||
| r >>= 8; | |||||
| } | |||||
| assert(r==0); | |||||
| } | |||||
| } | |||||
| mask_t gf_deserialize (gf *x, const uint8_t serial[60]) { | |||||
| int i,j,k=0; | |||||
| for (i=0; i<8; i+=2) { | |||||
| word_t r = 0; | |||||
| for (j=0; j<8; j++) { | |||||
| r |= ((word_t)serial[k++])<<(8*j); | |||||
| } | |||||
| x->limb[i] = r & ((1ull<<60)-1); | |||||
| r >>= 60; | |||||
| for (j=0; j<7; j++) { | |||||
| r |= ((word_t)serial[k++])<<(8*j+4); | |||||
| } | |||||
| x->limb[i+1] = r; | |||||
| } | |||||
| /* Check for reduction. | |||||
| * | |||||
| * The idea is to create a variable ge which is all ones (rather, 60 ones) | |||||
| * if and only if the low $i$ words of $x$ are >= those of p. | |||||
| * | |||||
| * Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111) | |||||
| */ | |||||
| word_t ge = -1, mask = (1ull<<60)-1; | |||||
| for (i=0; i<4; i++) { | |||||
| ge &= x->limb[i]; | |||||
| } | |||||
| /* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */ | |||||
| ge = (ge & (x->limb[4] + 1)) | word_is_zero(x->limb[4] ^ mask); | |||||
| /* Propagate the rest */ | |||||
| for (i=5; i<8; i++) { | |||||
| ge &= x->limb[i]; | |||||
| } | |||||
| return ~word_is_zero(ge ^ mask); | |||||
| } | |||||
| @@ -2,6 +2,8 @@ | |||||
| * Released under the MIT License. See LICENSE.txt for license information. | * Released under the MIT License. See LICENSE.txt for license information. | ||||
| */ | */ | ||||
| #define LIMB_PLACE_VALUE(i) 60 | |||||
| void gf_add_RAW (gf out, const gf a, const gf b) { | void gf_add_RAW (gf out, const gf a, const gf b) { | ||||
| for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) { | ||||
| ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; | ((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i]; | ||||
| @@ -26,12 +28,6 @@ void gf_sub_RAW (gf out, const gf a, const gf b) { | |||||
| */ | */ | ||||
| } | } | ||||
| void gf_copy (gf out, const gf a) { | |||||
| for (unsigned int i=0; i<sizeof(*out)/sizeof(big_register_t); i++) { | |||||
| ((big_register_t *)out)[i] = ((const big_register_t *)a)[i]; | |||||
| } | |||||
| } | |||||
| void gf_bias ( | void gf_bias ( | ||||
| gf a, int amt | gf a, int amt | ||||
| ) { | ) { | ||||
| @@ -10,6 +10,12 @@ | |||||
| #include "field.h" | #include "field.h" | ||||
| const gf MODULUS = {FIELD_LITERAL( | |||||
| 0xfffffffffffffff, 0xfffffffffffffff, 0xfffffffffffffff, 0xfffffffffffffff, | |||||
| 0xffffffffffffffe, 0xfffffffffffffff, 0xfffffffffffffff, 0xfffffffffffffff | |||||
| )}; | |||||
| void | void | ||||
| gf_isr ( | gf_isr ( | ||||
| gf_a_t a, | gf_a_t a, | ||||
| @@ -320,49 +320,3 @@ void gf_strong_reduce (gf a) { | |||||
| assert(word_is_zero(carry + scarry)); | assert(word_is_zero(carry + scarry)); | ||||
| } | } | ||||
| void gf_serialize (uint8_t *serial, const struct gf x) { | |||||
| int i,k=0; | |||||
| gf red; | |||||
| gf_copy(&red, x); | |||||
| gf_strong_reduce(&red); | |||||
| uint64_t r=0; | |||||
| int bits = 0; | |||||
| for (i=0; i<9; i++) { | |||||
| r |= red.limb[i] << bits; | |||||
| for (bits += 58; bits >= 8; bits -= 8) { | |||||
| serial[k++] = r; | |||||
| r >>= 8; | |||||
| } | |||||
| assert(bits <= 6); | |||||
| } | |||||
| assert(bits); | |||||
| serial[k++] = r; | |||||
| } | |||||
| mask_t gf_deserialize (gf x, const uint8_t serial[66]) { | |||||
| int i,k=0,bits=0; | |||||
| __uint128_t out = 0; | |||||
| uint64_t mask = (1ull<<58)-1; | |||||
| for (i=0; i<9; i++) { | |||||
| out >>= 58; | |||||
| for (; bits<58; bits+=8) { | |||||
| out |= ((__uint128_t)serial[k++])<<bits; | |||||
| } | |||||
| x->limb[i] = out & mask; | |||||
| bits -= 58; | |||||
| } | |||||
| /* Check for reduction. First, high has to be < 2^57 */ | |||||
| mask_t good = word_is_zero(out>>57); | |||||
| uint64_t and = -1ull; | |||||
| for (i=0; i<8; i++) { | |||||
| and &= x->limb[i]; | |||||
| } | |||||
| and &= (2*out+1); | |||||
| good &= word_is_zero((and+1)>>58); | |||||
| return good; | |||||
| } | |||||
| @@ -2,6 +2,8 @@ | |||||
| * Released under the MIT License. See LICENSE.txt for license information. | * Released under the MIT License. See LICENSE.txt for license information. | ||||
| */ | */ | ||||
| #define LIMB_PLACE_VALUE(i) 58 | |||||
| void gf_add_RAW (gf out, const gf a, const gf b) { | void gf_add_RAW (gf out, const gf a, const gf b) { | ||||
| for (unsigned int i=0; i<9; i++) { | for (unsigned int i=0; i<9; i++) { | ||||
| out->limb[i] = a->limb[i] + b->limb[i]; | out->limb[i] = a->limb[i] + b->limb[i]; | ||||
| @@ -389,51 +389,3 @@ void gf_strong_reduce (gf *a) { | |||||
| a->limb[3] = a->limb[7] = a->limb[11] = 0; | a->limb[3] = a->limb[7] = a->limb[11] = 0; | ||||
| } | } | ||||
| void gf_serialize (uint8_t *serial, const struct gf *x) { | |||||
| unsigned int i,k=0; | |||||
| gf red; | |||||
| gf_copy(&red, x); | |||||
| gf_strong_reduce(&red); | |||||
| uint64_t r=0; | |||||
| int bits = 0; | |||||
| for (i=0; i<9; i++) { | |||||
| r |= red.limb[LIMBPERM(i)] << bits; | |||||
| for (bits += 58; bits >= 8; bits -= 8) { | |||||
| serial[k++] = r; | |||||
| r >>= 8; | |||||
| } | |||||
| assert(bits <= 6); | |||||
| } | |||||
| assert(bits); | |||||
| serial[k++] = r; | |||||
| } | |||||
| mask_t gf_deserialize (gf *x, const uint8_t serial[LIMBPERM(66)]) { | |||||
| int i,k=0,bits=0; | |||||
| __uint128_t out = 0; | |||||
| uint64_t mask = (1ull<<58)-1; | |||||
| for (i=0; i<9; i++) { | |||||
| out >>= 58; | |||||
| for (; bits<58; bits+=8) { | |||||
| out |= ((__uint128_t)serial[k++])<<bits; | |||||
| } | |||||
| x->limb[LIMBPERM(i)] = out & mask; | |||||
| bits -= 58; | |||||
| } | |||||
| /* Check for reduction. First, high has to be < 2^57 */ | |||||
| mask_t good = word_is_zero(out>>57); | |||||
| uint64_t and = -1ull; | |||||
| for (i=0; i<8; i++) { | |||||
| and &= x->limb[LIMBPERM(i)]; | |||||
| } | |||||
| and &= (2*out+1); | |||||
| good &= word_is_zero((and+1)>>58); | |||||
| x->limb[3] = x->limb[7] = x->limb[11] = 0; | |||||
| return good; | |||||
| } | |||||
| @@ -4,6 +4,7 @@ | |||||
| /* FIXME: Currently this file desn't work at all, because the struct is declared [9] and not [12] */ | /* FIXME: Currently this file desn't work at all, because the struct is declared [9] and not [12] */ | ||||
| #define LIMBPERM(x) (((x)%3)*4 + (x)/3) | #define LIMBPERM(x) (((x)%3)*4 + (x)/3) | ||||
| #define LIMB_PLACE_VALUE(i) ((((i)&4)==3) ? 0 : 57) | |||||
| #define USE_P521_3x3_TRANSPOSE | #define USE_P521_3x3_TRANSPOSE | ||||
| typedef uint64x4_t uint64x3_t; /* fit it in a vector register */ | typedef uint64x4_t uint64x3_t; /* fit it in a vector register */ | ||||
| @@ -10,6 +10,12 @@ | |||||
| #include "field.h" | #include "field.h" | ||||
| const gf MODULUS = {FIELD_LITERAL( | |||||
| 0x3ffffffffffffff, 0x3ffffffffffffff, 0x3ffffffffffffff, | |||||
| 0x3ffffffffffffff, 0x3ffffffffffffff, 0x3ffffffffffffff, | |||||
| 0x3ffffffffffffff, 0x3ffffffffffffff, 0x1ffffffffffffff | |||||
| )}; | |||||
| void | void | ||||
| gf_isr ( | gf_isr ( | ||||
| gf_a_t a, | gf_a_t a, | ||||