Browse Source

one ser/deser to rule them all (TODO test on NEON and other places with LIMBPERM)

master
Michael Hamburg 10 years ago
parent
commit
38455f34f2
27 changed files with 136 additions and 530 deletions
  1. +69
    -57
      src/decaf.c
  2. +10
    -1
      src/gen_headers/f_field_h.py
  3. +13
    -64
      src/p25519/arch_32/f_impl.c
  4. +3
    -2
      src/p25519/arch_32/f_impl.h
  5. +0
    -42
      src/p25519/arch_ref64/f_impl.c
  6. +2
    -0
      src/p25519/arch_ref64/f_impl.h
  7. +0
    -42
      src/p25519/arch_x86_64/f_impl.c
  8. +2
    -0
      src/p25519/arch_x86_64/f_impl.h
  9. +5
    -2
      src/p25519/f_arithmetic.c
  10. +0
    -50
      src/p448/arch_32/f_impl.c
  11. +2
    -0
      src/p448/arch_32/f_impl.h
  12. +1
    -59
      src/p448/arch_arm_32/f_impl.c
  13. +2
    -0
      src/p448/arch_arm_32/f_impl.h
  14. +0
    -1
      src/p448/arch_neon/f_impl.c
  15. +2
    -0
      src/p448/arch_neon/f_impl.h
  16. +2
    -0
      src/p448/arch_ref64/f_impl.h
  17. +0
    -48
      src/p448/arch_x86_64/f_impl.c
  18. +1
    -0
      src/p448/arch_x86_64/f_impl.h
  19. +5
    -0
      src/p448/f_arithmetic.c
  20. +0
    -62
      src/p480/arch_x86_64/f_impl.c
  21. +2
    -6
      src/p480/arch_x86_64/f_impl.h
  22. +6
    -0
      src/p480/f_arithmetic.c
  23. +0
    -46
      src/p521/arch_ref64/f_impl.c
  24. +2
    -0
      src/p521/arch_ref64/f_impl.h
  25. +0
    -48
      src/p521/arch_x86_64_r12/f_impl.c
  26. +1
    -0
      src/p521/arch_x86_64_r12/f_impl.h
  27. +6
    -0
      src/p521/f_arithmetic.c

+ 69
- 57
src/decaf.c View File

@@ -86,12 +86,45 @@ const size_t API_NS2(alignof,precomputed_s) = 32;
#define UNROLL
#endif

#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<sizeof(gf)/sizeof(word_t); i++) { op; }}
#define FOR_LIMB_U(i,op) { unsigned int i=0; UNROLL for (i=0; i<sizeof(gf)/sizeof(word_t); i++) { op; }}
#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<NLIMBS; i++) { op; }}
#define FOR_LIMB_U(i,op) { unsigned int i=0; UNROLL for (i=0; i<NLIMBS; i++) { op; }}

void gf_serialize (uint8_t serial[SER_BYTES], const gf x) {
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
unsigned int j=0, fill=0;
dword_t buffer = 0;
UNROLL for (unsigned int i=0; i<SER_BYTES; i++) {
if (fill < 8 && j < NLIMBS) {
buffer |= ((dword_t)red->limb[LIMBPERM(j)]) << fill;
fill += LIMB_PLACE_VALUE(LIMBPERM(j));
j++;
}
serial[i] = buffer;
fill -= 8;
buffer >>= 8;
}
}

/** Copy x = y */
static INLINE void
gf_cpy(gf x, const gf y) { x[0] = y[0]; }
mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES]) {
unsigned int j=0, fill=0;
dword_t buffer = 0;
dsword_t scarry = 0;
UNROLL for (unsigned int i=0; i<NLIMBS; i++) {
UNROLL while (fill < LIMB_PLACE_VALUE(LIMBPERM(i)) && j < SER_BYTES) {
buffer |= ((dword_t)serial[j]) << fill;
fill += 8;
j++;
}
x->limb[LIMBPERM(i)] = (i<NLIMBS-1) ? buffer & LIMB_MASK(LIMBPERM(i)) : buffer;
fill -= LIMB_PLACE_VALUE(LIMBPERM(i));
buffer >>= LIMB_PLACE_VALUE(LIMBPERM(i));
scarry = (scarry + x->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]) >> (8*sizeof(word_t));
}
return word_is_zero(buffer) & ~word_is_zero(scarry);
}

/** Constant time, x = is_z ? z : y */
static INLINE void
@@ -120,9 +153,7 @@ cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) {
/** Compare a==b */
/* Not static because it's used in inverse square root. */
decaf_word_t gf_eq(const gf a, const gf b);

decaf_word_t
gf_eq(const gf a, const gf b) {
decaf_word_t gf_eq(const gf a, const gf b) {
gf c;
gf_sub(c,a,b);
gf_strong_reduce(c);
@@ -153,13 +184,10 @@ gf_invert(gf y, const gf x) {
(void)ret; assert(ret);
gf_sqr(t1, t2);
gf_mul(t2, t1, x); // not direct to y in case of alias.
gf_cpy(y, t2);
gf_copy(y, t2);
}

/**
* Mul by signed int. Not constant-time WRT the sign of that int.
* Just uses a full mul (PERF)
*/
/** Mul by signed int. Not constant-time WRT the sign of that int. */
static INLINE void
gf_mulw_sgn(gf c, const gf a, int w) {
if (w>0) {
@@ -182,7 +210,7 @@ static decaf_word_t hibit(const gf x) {
/** Return high bit of x = low bit of 2x mod p */
static decaf_word_t lobit(const gf x) {
gf y;
gf_cpy(y,x);
gf_copy(y,x);
gf_strong_reduce(y);
return -(y->limb[0]&1);
}
@@ -394,16 +422,9 @@ API_NS(scalar_eq) (
return word_is_zero(diff);
}

/* *** API begins here *** */

/** identity = (0,1) */
const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}};

static void
gf_encode ( unsigned char ser[SER_BYTES], gf a ) {
gf_serialize(ser, (gf_s *)a);
}

static void
deisogenize (
gf_s *__restrict__ s,
@@ -508,14 +529,7 @@ deisogenize (
void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) {
gf s, mtos;
deisogenize(s,mtos,p,0,0,0);
gf_encode ( ser, s );
}

/**
* Deserialize a field element, return TRUE if < p.
*/
static decaf_bool_t gf_deser(gf s, const unsigned char ser[SER_BYTES]) {
return gf_deserialize((gf_s *)s, ser);
gf_serialize ( ser, s );
}

decaf_error_t API_NS(point_decode) (
@@ -524,7 +538,7 @@ decaf_error_t API_NS(point_decode) (
decaf_bool_t allow_identity
) {
gf s, a, b, c, d, e, f;
decaf_bool_t succ = gf_deser(s, ser), zero = gf_eq(s, ZERO);
decaf_bool_t succ = gf_deserialize(s, ser), zero = gf_eq(s, ZERO);
allow_identity = ~word_is_zero(allow_identity);
succ &= allow_identity | ~zero;
succ &= ~hibit(s);
@@ -592,8 +606,6 @@ decaf_error_t API_NS(point_decode) (
#define NEG_D 0
#endif



void API_NS(point_sub) (
point_t p,
const point_t q,
@@ -688,8 +700,8 @@ void API_NS(point_negate) (
const point_t a
) {
gf_sub(nega->x, ZERO, a->x);
gf_cpy(nega->y, a->y);
gf_cpy(nega->z, a->z);
gf_copy(nega->y, a->y);
gf_copy(nega->z, a->z);
gf_sub(nega->t, ZERO, a->t);
}

@@ -827,7 +839,7 @@ niels_to_pt (
gf_add ( e->y, n->b, n->a );
gf_sub ( e->x, n->b, n->a );
gf_mul ( e->t, e->y, e->x );
gf_cpy ( e->z, ONE );
gf_copy ( e->z, ONE );
}

static NOINLINE void
@@ -882,7 +894,7 @@ add_pniels_to_pt (
) {
gf L0;
gf_mul ( L0, p->z, pn->z );
gf_cpy ( p->z, L0 );
gf_copy ( p->z, L0 );
add_niels_to_pt( p, pn->n, before_double );
}

@@ -894,7 +906,7 @@ sub_pniels_from_pt (
) {
gf L0;
gf_mul ( L0, p->z, pn->z );
gf_cpy ( p->z, L0 );
gf_copy ( p->z, L0 );
sub_niels_from_pt( p, pn->n, before_double );
}

@@ -1203,7 +1215,7 @@ void API_NS(point_from_hash_nonuniform) (
// TODO: simplify since we don't return a hint anymore
// TODO: test pathological case ur0^2 = 1/(1-d)
gf r0,r,a,b,c,dee,D,N,rN,e;
gf_deser(r0,ser);
gf_deserialize(r0,ser);
gf_strong_reduce(r0);
gf_sqr(a,r0);
#if P_MOD_8 == 5
@@ -1265,7 +1277,7 @@ void API_NS(point_from_hash_nonuniform) (
/* isogenize */
#if IMAGINE_TWIST
gf_mul(c,a,SQRT_MINUS_ONE);
gf_cpy(a,c);
gf_copy(a,c);
#endif
gf_sqr(c,a); /* s^2 */
@@ -1326,7 +1338,7 @@ API_NS(invert_elligator_nonuniform) (
succ &= ~(is_identity & sgn_ed_T); /* NB: there are no preimages of rotated identity. */
#endif
gf_encode(recovered_hash, b);
gf_serialize(recovered_hash, b);
/* TODO: deal with overflow flag */
return decaf_succeed_if(succ);
}
@@ -1380,14 +1392,14 @@ void API_NS(point_debugging_torque) (
gf tmp;
gf_mul(tmp,p->x,SQRT_MINUS_ONE);
gf_mul(q->x,p->y,SQRT_MINUS_ONE);
gf_cpy(q->y,tmp);
gf_cpy(q->z,p->z);
gf_copy(q->y,tmp);
gf_copy(q->z,p->z);
gf_sub(q->t,ZERO,p->t);
#else
gf_sub(q->x,ZERO,p->x);
gf_sub(q->y,ZERO,p->y);
gf_cpy(q->z,p->z);
gf_cpy(q->t,p->t);
gf_copy(q->z,p->z);
gf_copy(q->t,p->t);
#endif
}

@@ -1397,16 +1409,16 @@ void API_NS(point_debugging_pscale) (
const uint8_t factor[SER_BYTES]
) {
gf gfac,tmp;
ignore_result(gf_deser(gfac,factor));
ignore_result(gf_deserialize(gfac,factor));
cond_sel(gfac,gfac,ONE,gf_eq(gfac,ZERO));
gf_mul(tmp,p->x,gfac);
gf_cpy(q->x,tmp);
gf_copy(q->x,tmp);
gf_mul(tmp,p->y,gfac);
gf_cpy(q->y,tmp);
gf_copy(q->y,tmp);
gf_mul(tmp,p->z,gfac);
gf_cpy(q->z,tmp);
gf_copy(q->z,tmp);
gf_mul(tmp,p->t,gfac);
gf_cpy(q->t,tmp);
gf_copy(q->t,tmp);
}

static void gf_batch_invert (
@@ -1417,7 +1429,7 @@ static void gf_batch_invert (
gf t1;
assert(n>1);
gf_cpy(out[1], in[0]);
gf_copy(out[1], in[0]);
int i;
for (i=1; i<(int) (n-1); i++) {
gf_mul(out[i+1], out[i], in[i]);
@@ -1428,9 +1440,9 @@ static void gf_batch_invert (

for (i=n-1; i>0; i--) {
gf_mul(t1, out[i], out[0]);
gf_cpy(out[i], t1);
gf_copy(out[i], t1);
gf_mul(t1, out[0], in[i]);
gf_cpy(out[0], t1);
gf_copy(out[0], t1);
}
}

@@ -1447,15 +1459,15 @@ static void batch_normalize_niels (
for (i=0; i<n; i++) {
gf_mul(product, table[i]->a, zis[i]);
gf_strong_reduce(product);
gf_cpy(table[i]->a, product);
gf_copy(table[i]->a, product);
gf_mul(product, table[i]->b, zis[i]);
gf_strong_reduce(product);
gf_cpy(table[i]->b, product);
gf_copy(table[i]->b, product);
gf_mul(product, table[i]->c, zis[i]);
gf_strong_reduce(product);
gf_cpy(table[i]->c, product);
gf_copy(table[i]->c, product);
}
decaf_bzero(product,sizeof(product));
@@ -1500,7 +1512,7 @@ void API_NS(precompute) (

pt_to_pniels(pn_tmp, start);
memcpy(table->table[idx], pn_tmp->n, sizeof(pn_tmp->n));
gf_cpy(zs[idx], pn_tmp->z);
gf_copy(zs[idx], pn_tmp->z);
if (j >= (1u<<(t-1)) - 1) break;
int delta = (j+1) ^ ((j+1)>>1) ^ gray;
@@ -1733,7 +1745,7 @@ void API_NS(precompute_wnafs) (
prepare_wnaf_table(tmp,base,DECAF_WNAF_FIXED_TABLE_BITS);
for (i=0; i<1<<DECAF_WNAF_FIXED_TABLE_BITS; i++) {
memcpy(out[i], tmp[i]->n, sizeof(niels_t));
gf_cpy(zs[i], tmp[i]->z);
gf_copy(zs[i], tmp[i]->z);
}
batch_normalize_niels(out, (const gf *)zs, zis, 1<<DECAF_WNAF_FIXED_TABLE_BITS);


+ 10
- 1
src/gen_headers/f_field_h.py View File

@@ -13,8 +13,9 @@ f_field_h = gen_file(
#include "word.h"

#define __DECAF_%(gf_shortname)s_GF_DEFINED__ 1
#define NLIMBS (%(gf_impl_bits)d/sizeof(word_t)/8)
typedef struct gf_%(gf_shortname)s_s {
word_t limb[%(gf_impl_bits)d/sizeof(word_t)/8];
word_t limb[NLIMBS];
} __attribute__((aligned(32))) gf_%(gf_shortname)s_s, gf_%(gf_shortname)s_t[1];

#define GF_LIT_LIMB_BITS %(gf_lit_limb_bits)d
@@ -33,6 +34,7 @@ typedef struct gf_%(gf_shortname)s_s {
#define gf_isr gf_%(gf_shortname)s_isr
#define gf_serialize gf_%(gf_shortname)s_serialize
#define gf_deserialize gf_%(gf_shortname)s_deserialize
#define MODULUS gf_%(gf_shortname)s_MODULUS

#define SQRT_MINUS_ONE P%(gf_shortname)s_SQRT_MINUS_ONE /* might not be defined */

@@ -42,6 +44,8 @@ typedef struct gf_%(gf_shortname)s_s {
extern "C" {
#endif

const gf MODULUS;

/* Defined below in f_impl.h */
static INLINE_UNUSED void gf_copy (gf out, const gf a) { *out = *a; }
static INLINE_UNUSED void gf_add_RAW (gf out, const gf a, const gf b);
@@ -61,4 +65,9 @@ mask_t gf_deserialize (gf x, const uint8_t serial[(GF_BITS-1)/8+1]);
#endif

#include "f_impl.h" /* Bring in the inline implementations */

#ifndef LIMBPERM
#define LIMBPERM(i) (i)
#endif
#define LIMB_MASK(i) (((1ull)<<LIMB_PLACE_VALUE(i))-1)
""")

+ 13
- 64
src/p25519/arch_32/f_impl.c View File

@@ -91,88 +91,37 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
}

void gf_strong_reduce (gf a) {
uint32_t maske = (1<<26)-1, masko = (1<<25)-1;

/* first, clear high */
a->limb[0] += (a->limb[9]>>25)*19;
a->limb[9] &= masko;
a->limb[9] &= LIMB_MASK(9);

/* now the total is less than 2p */

/* compute total_value - p. No need to reduce mod p. */
int64_t scarry = 0;
int i;
for (i=0; i<10; /*i+=2*/) {
scarry = scarry + a->limb[i] - ((i==0)?maske-18:maske);
a->limb[i] = scarry & maske;
scarry >>= 26;
i++;

scarry = scarry + a->limb[i] - masko;
a->limb[i] = scarry & masko;
scarry >>= 25;
i++;
dsword_t scarry = 0;
for (unsigned int i=0; i<10; i++) {
scarry = scarry + a->limb[i] - MODULUS->limb[i];
a->limb[i] = scarry & LIMB_MASK(i);
scarry >>= LIMB_PLACE_VALUE(i);
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255
* so let's add back in p. will carry back off the top for 2^255.
*/

assert(word_is_zero(scarry) | word_is_zero(scarry+1));

uint32_t scarry_masko = scarry & masko, scarry_maske = scarry & maske;
uint64_t carry = 0;
word_t scarry_0 = scarry;
dword_t carry = 0;

/* add it back */
for (i=0; i<10; /*i+=2*/) {
carry = carry + a->limb[i] + ((i==0)?(scarry_maske&~18):scarry_maske);
a->limb[i] = carry & maske;
carry >>= 26;
i++;

carry = carry + a->limb[i] + scarry_masko;
a->limb[i] = carry & masko;
carry >>= 25;
for (unsigned int i=0; i<10; i++) {
carry = carry + a->limb[i] + (scarry_0 & MODULUS->limb[i]);
a->limb[i] = carry & LIMB_MASK(i);
carry >>= LIMB_PLACE_VALUE(i);
i++;
}

assert(word_is_zero(carry + scarry));
}

#define LIMB_PLACE_VALUE(i) (((i)&1)?25:26)
void gf_serialize (uint8_t serial[32], const gf x) {
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
unsigned int j=0, fill=0;
dword_t buffer = 0;
for (unsigned int i=0; i<32; i++) {
if (fill < 8 && j < sizeof(red->limb)/sizeof(red->limb[0])) {
buffer |= ((dword_t)red->limb[j]) << fill;
fill += LIMB_PLACE_VALUE(j);
j++;
}
serial[i] = buffer;
fill -= 8;
buffer >>= 8;
}
assert(word_is_zero(carry + scarry_0));
}

mask_t gf_deserialize (gf x, const uint8_t serial[32]) {
unsigned int j=0, fill=0;
dword_t buffer = 0;
for (unsigned int i=0; i<32; i++) {
buffer |= ((dword_t)serial[i]) << fill;
fill += 8;
if (fill >= LIMB_PLACE_VALUE(j) || i == 31) {
assert(j < sizeof(x->limb)/sizeof(x->limb[0]));
word_t mask = ((1ull)<<LIMB_PLACE_VALUE(j))-1;
x->limb[j] = (i==31) ? buffer : (buffer & mask); // FIXME: this can in theory truncate the buffer if it's not in field.
buffer >>= LIMB_PLACE_VALUE(j);
fill -= LIMB_PLACE_VALUE(j);
j++;
}
}
return -1; // FIXME: test whether in field.
}

+ 3
- 2
src/p25519/arch_32/f_impl.h View File

@@ -3,8 +3,9 @@
*/

#define LIMB(x) (x##ull)&((1ull<<26)-1), (x##ull)>>26
#define FIELD_LITERAL(a,b,c,d,e) \
{{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e)}}
#define FIELD_LITERAL(a,b,c,d,e) {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e)}}

#define LIMB_PLACE_VALUE(i) (((i)&1)?25:26)

void gf_add_RAW (gf out, const gf a, const gf b) {
for (unsigned int i=0; i<10; i++) {


+ 0
- 42
src/p25519/arch_ref64/f_impl.c View File

@@ -97,45 +97,3 @@ void gf_strong_reduce (gf a) {

assert(word_is_zero(carry + scarry));
}

void gf_serialize (uint8_t serial[32], const gf x) {
int i,j;
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
uint64_t *r = red->limb;
uint64_t ser64[4] = {r[0] | r[1]<<51, r[1]>>13|r[2]<<38, r[2]>>26|r[3]<<25, r[3]>>39|r[4]<<12};
for (i=0; i<4; i++) {
for (j=0; j<8; j++) {
serial[8*i+j] = ser64[i];
ser64[i] >>= 8;
}
}
}

mask_t gf_deserialize (gf x, const uint8_t serial[32]) {
int i,j;
uint64_t ser64[4], mask = ((1ull<<51)-1);
for (i=0; i<4; i++) {
uint64_t out = 0;
for (j=0; j<8; j++) {
out |= ((uint64_t)serial[8*i+j])<<(8*j);
}
ser64[i] = out;
}
/* Test for >= 2^255-19 */
uint64_t ge = -(((__uint128_t)ser64[0]+19)>>64);
ge &= ser64[1];
ge &= ser64[2];
ge &= (ser64[3]<<1) + 1;
ge |= -(((__uint128_t)ser64[3]+0x8000000000000000)>>64);
x->limb[0] = ser64[0] & mask;
x->limb[1] = (ser64[0]>>51 | ser64[1]<<13) & mask;
x->limb[2] = (ser64[1]>>38 | ser64[2]<<26) & mask;
x->limb[3] = (ser64[2]>>25 | ser64[3]<<39) & mask;
x->limb[4] = ser64[3]>>12;
return ~word_is_zero(~ge);
}

+ 2
- 0
src/p25519/arch_ref64/f_impl.h View File

@@ -4,6 +4,8 @@

#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}

#define LIMB_PLACE_VALUE(i) 51

void gf_add_RAW (gf out, const gf a, const gf b) {
for (unsigned int i=0; i<5; i++) {
out->limb[i] = a->limb[i] + b->limb[i];


+ 0
- 42
src/p25519/arch_x86_64/f_impl.c View File

@@ -208,45 +208,3 @@ void gf_strong_reduce (gf a) {

assert(word_is_zero(carry + scarry));
}

void gf_serialize (uint8_t serial[32], const gf x) {
int i,j;
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
uint64_t *r = red->limb;
uint64_t ser64[4] = {r[0] | r[1]<<51, r[1]>>13|r[2]<<38, r[2]>>26|r[3]<<25, r[3]>>39|r[4]<<12};
for (i=0; i<4; i++) {
for (j=0; j<8; j++) {
serial[8*i+j] = ser64[i];
ser64[i] >>= 8;
}
}
}

mask_t gf_deserialize (gf x, const uint8_t serial[32]) {
int i,j;
uint64_t ser64[4], mask = ((1ull<<51)-1);
for (i=0; i<4; i++) {
uint64_t out = 0;
for (j=0; j<8; j++) {
out |= ((uint64_t)serial[8*i+j])<<(8*j);
}
ser64[i] = out;
}
/* Test for >= 2^255-19 */
uint64_t ge = -(((__uint128_t)ser64[0]+19)>>64);
ge &= ser64[1];
ge &= ser64[2];
ge &= (ser64[3]<<1) + 1;
ge |= -(((__uint128_t)ser64[3]+0x8000000000000000)>>64);
x->limb[0] = ser64[0] & mask;
x->limb[1] = (ser64[0]>>51 | ser64[1]<<13) & mask;
x->limb[2] = (ser64[1]>>38 | ser64[2]<<26) & mask;
x->limb[3] = (ser64[2]>>25 | ser64[3]<<39) & mask;
x->limb[4] = ser64[3]>>12;
return ~word_is_zero(~ge);
}

+ 2
- 0
src/p25519/arch_x86_64/f_impl.h View File

@@ -4,6 +4,8 @@

#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}

#define LIMB_PLACE_VALUE(i) 51

void gf_add_RAW (gf out, const gf a, const gf b) {
for (unsigned int i=0; i<5; i++) {
out->limb[i] = a->limb[i] + b->limb[i];


+ 5
- 2
src/p25519/f_arithmetic.c View File

@@ -18,14 +18,17 @@ const gf_25519_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL(
0x78595a6804c9e,
0x2b8324804fc1d
)};

const gf MODULUS = {FIELD_LITERAL(
0x7ffffffffffed, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff
)};
/* TODO put in header */
extern const gf_25519_t decaf_255_ONE;
extern mask_t decaf_255_gf_eq(const gf_25519_t a, const gf_25519_t b);

/* Guarantee: a^2 x = 0 if x = 0; else a^2 x = 1 or SQRT_MINUS_ONE; */
void
gf_isr (
void gf_isr (
gf_25519_t a,
const gf_25519_t x
) {


+ 0
- 50
src/p448/arch_32/f_impl.c View File

@@ -142,53 +142,3 @@ void gf_strong_reduce (gf a) {

assert(word_is_zero(carry + scarry));
}

void gf_serialize (uint8_t *serial, const gf x) {
int i,j;
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
for (i=0; i<8; i++) {
uint64_t limb = red->limb[2*i] + (((uint64_t)red->limb[2*i+1])<<28);
for (j=0; j<7; j++) {
serial[7*i+j] = limb;
limb >>= 8;
}
assert(limb == 0);
}
}

mask_t gf_deserialize (gf x, const uint8_t serial[56]) {
int i,j;
for (i=0; i<8; i++) {
uint64_t out = 0;
for (j=0; j<7; j++) {
out |= ((uint64_t)serial[7*i+j])<<(8*j);
}
x->limb[2*i] = out & ((1ull<<28)-1);
x->limb[2*i+1] = out >> 28;
}
/* Check for reduction.
*
* The idea is to create a variable ge which is all ones (rather, 56 ones)
* if and only if the low $i$ words of $x$ are >= those of p.
*
* Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111)
*/
uint32_t ge = -1, mask = (1ull<<28)-1;
for (i=0; i<8; i++) {
ge &= x->limb[i];
}
/* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */
ge = (ge & (x->limb[8] + 1)) | word_is_zero(x->limb[8] ^ mask);
/* Propagate the rest */
for (i=9; i<16; i++) {
ge &= x->limb[i];
}
return ~word_is_zero(ge ^ mask);
}


+ 2
- 0
src/p448/arch_32/f_impl.h View File

@@ -5,6 +5,8 @@
#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
{{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}}
#define LIMB_PLACE_VALUE(i) 28

void gf_add_RAW (gf out, const gf a, const gf b) {
for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {


+ 1
- 59
src/p448/arch_arm_32/f_impl.c View File

@@ -834,9 +834,7 @@ void gf_mulw (
c[1] += accum8 >> 28;
}

void gf_strong_reduce (
gf a
) {
void gf_strong_reduce (gf a) {
word_t mask = (1ull<<28)-1;

/* first, clear high */
@@ -875,59 +873,3 @@ void gf_strong_reduce (

assert(word_is_zero(carry + scarry));
}

void gf_serialize (
uint8_t *serial,
const gf x
) {
int i,j;
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
for (i=0; i<8; i++) {
uint64_t limb = red->limb[2*i] + (((uint64_t)red->limb[2*i+1])<<28);
for (j=0; j<7; j++) {
serial[7*i+j] = limb;
limb >>= 8;
}
assert(limb == 0);
}
}

mask_t
gf_deserialize (
gf x,
const uint8_t serial[56]
) {
int i,j;
for (i=0; i<8; i++) {
uint64_t out = 0;
for (j=0; j<7; j++) {
out |= ((uint64_t)serial[7*i+j])<<(8*j);
}
x->limb[2*i] = out & ((1ull<<28)-1);
x->limb[2*i+1] = out >> 28;
}
/* Check for reduction.
*
* The idea is to create a variable ge which is all ones (rather, 56 ones)
* if and only if the low $i$ words of $x$ are >= those of p.
*
* Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111)
*/
uint32_t ge = -1, mask = (1ull<<28)-1;
for (i=0; i<8; i++) {
ge &= x->limb[i];
}
/* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */
ge = (ge & (x->limb[8] + 1)) | word_is_zero(x->limb[8] ^ mask);
/* Propagate the rest */
for (i=9; i<16; i++) {
ge &= x->limb[i];
}
return ~word_is_zero(ge ^ mask);
}

+ 2
- 0
src/p448/arch_arm_32/f_impl.h View File

@@ -5,6 +5,8 @@
#define LIMB(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) \
{{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e),LIMB(f),LIMB(g),LIMB(h)}}
#define LIMB_PLACE_VALUE(i) 28

void gf_add_RAW (gf out, const gf a, const gf b) {
for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {


+ 0
- 1
src/p448/arch_neon/f_impl.c View File

@@ -684,4 +684,3 @@ mask_t gf_deserialize (gf x, const uint8_t serial[56]) {
return ~word_is_zero(ge ^ mask);
}


+ 2
- 0
src/p448/arch_neon/f_impl.h View File

@@ -11,6 +11,8 @@
LIMBLO(b),LIMBLO(f), LIMBHI(b),LIMBHI(f), \
LIMBLO(c),LIMBLO(g), LIMBHI(c),LIMBHI(g), \
LIMBLO(d),LIMBLO(h), LIMBHI(d),LIMBHI(h)}}
#define LIMB_PLACE_VALUE(i) 28

void gf_add_RAW (gf out, const gf a, const gf b) {
for (unsigned int i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {


+ 2
- 0
src/p448/arch_ref64/f_impl.h View File

@@ -3,6 +3,8 @@
*/

#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}
#define LIMB_PLACE_VALUE(i) 56

void gf_add_RAW (gf out, const gf a, const gf b) {
for (unsigned int i=0; i<8; i++) {


+ 0
- 48
src/p448/arch_x86_64/f_impl.c View File

@@ -329,51 +329,3 @@ void gf_strong_reduce (gf a) {

assert(word_is_zero(carry + scarry));
}

void gf_serialize (uint8_t *serial, const gf x) {
int i,j;
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
for (i=0; i<8; i++) {
for (j=0; j<7; j++) {
serial[7*i+j] = red->limb[i];
red->limb[i] >>= 8;
}
assert(red->limb[i] == 0);
}
}

mask_t gf_deserialize (gf x, const uint8_t serial[56]) {
int i,j;
for (i=0; i<8; i++) {
word_t out = 0;
for (j=0; j<7; j++) {
out |= ((word_t)serial[7*i+j])<<(8*j);
}
x->limb[i] = out;
}
/* Check for reduction.
*
* The idea is to create a variable ge which is all ones (rather, 56 ones)
* if and only if the low $i$ words of $x$ are >= those of p.
*
* Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111)
*/
word_t ge = -1, mask = (1ull<<56)-1;
for (i=0; i<4; i++) {
ge &= x->limb[i];
}
/* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */
ge = (ge & (x->limb[4] + 1)) | word_is_zero(x->limb[4] ^ mask);
/* Propagate the rest */
for (i=5; i<8; i++) {
ge &= x->limb[i];
}
return ~word_is_zero(ge ^ mask);
}


+ 1
- 0
src/p448/arch_x86_64/f_impl.h View File

@@ -3,6 +3,7 @@
*/

#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}
#define LIMB_PLACE_VALUE(i) 56

void gf_add_RAW (gf out, const gf a, const gf b) {
for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {


+ 5
- 0
src/p448/f_arithmetic.c View File

@@ -10,6 +10,11 @@

#include "field.h"

const gf MODULUS = {FIELD_LITERAL(
0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff,
0xfffffffffffffe, 0xffffffffffffff, 0xffffffffffffff, 0xffffffffffffff
)};

void
gf_isr (
gf a,


+ 0
- 62
src/p480/arch_x86_64/f_impl.c View File

@@ -329,65 +329,3 @@ void gf_strong_reduce (gf *a) {

assert(word_is_zero(carry + scarry));
}

void gf_serialize (uint8_t *serial, const struct gf *x) {
int i,j,k=0;
gf red;
gf_copy(&red, x);
gf_strong_reduce(&red);
word_t r = 0;
for (i=0; i<8; i+=2) {
r = red.limb[i];
for (j=0; j<7; j++) {
serial[k++] = r;
r >>= 8;
}
assert(r<16);
r += red.limb[i+1]<<4;
for (j=0; j<8; j++) {
serial[k++] = r;
r >>= 8;
}
assert(r==0);
}
}

mask_t gf_deserialize (gf *x, const uint8_t serial[60]) {
int i,j,k=0;

for (i=0; i<8; i+=2) {
word_t r = 0;
for (j=0; j<8; j++) {
r |= ((word_t)serial[k++])<<(8*j);
}
x->limb[i] = r & ((1ull<<60)-1);
r >>= 60;
for (j=0; j<7; j++) {
r |= ((word_t)serial[k++])<<(8*j+4);
}
x->limb[i+1] = r;
}
/* Check for reduction.
*
* The idea is to create a variable ge which is all ones (rather, 60 ones)
* if and only if the low $i$ words of $x$ are >= those of p.
*
* Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111)
*/
word_t ge = -1, mask = (1ull<<60)-1;
for (i=0; i<4; i++) {
ge &= x->limb[i];
}
/* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */
ge = (ge & (x->limb[4] + 1)) | word_is_zero(x->limb[4] ^ mask);
/* Propagate the rest */
for (i=5; i<8; i++) {
ge &= x->limb[i];
}
return ~word_is_zero(ge ^ mask);
}


+ 2
- 6
src/p480/arch_x86_64/f_impl.h View File

@@ -2,6 +2,8 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#define LIMB_PLACE_VALUE(i) 60

void gf_add_RAW (gf out, const gf a, const gf b) {
for (unsigned int i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
((uint64xn_t*)out)[i] = ((const uint64xn_t*)a)[i] + ((const uint64xn_t*)b)[i];
@@ -26,12 +28,6 @@ void gf_sub_RAW (gf out, const gf a, const gf b) {
*/
}

void gf_copy (gf out, const gf a) {
for (unsigned int i=0; i<sizeof(*out)/sizeof(big_register_t); i++) {
((big_register_t *)out)[i] = ((const big_register_t *)a)[i];
}
}

void gf_bias (
gf a, int amt
) {


+ 6
- 0
src/p480/f_arithmetic.c View File

@@ -10,6 +10,12 @@

#include "field.h"


const gf MODULUS = {FIELD_LITERAL(
0xfffffffffffffff, 0xfffffffffffffff, 0xfffffffffffffff, 0xfffffffffffffff,
0xffffffffffffffe, 0xfffffffffffffff, 0xfffffffffffffff, 0xfffffffffffffff
)};

void
gf_isr (
gf_a_t a,


+ 0
- 46
src/p521/arch_ref64/f_impl.c View File

@@ -320,49 +320,3 @@ void gf_strong_reduce (gf a) {

assert(word_is_zero(carry + scarry));
}

void gf_serialize (uint8_t *serial, const struct gf x) {
int i,k=0;
gf red;
gf_copy(&red, x);
gf_strong_reduce(&red);
uint64_t r=0;
int bits = 0;
for (i=0; i<9; i++) {
r |= red.limb[i] << bits;
for (bits += 58; bits >= 8; bits -= 8) {
serial[k++] = r;
r >>= 8;
}
assert(bits <= 6);
}
assert(bits);
serial[k++] = r;
}

mask_t gf_deserialize (gf x, const uint8_t serial[66]) {
int i,k=0,bits=0;
__uint128_t out = 0;
uint64_t mask = (1ull<<58)-1;
for (i=0; i<9; i++) {
out >>= 58;
for (; bits<58; bits+=8) {
out |= ((__uint128_t)serial[k++])<<bits;
}
x->limb[i] = out & mask;
bits -= 58;
}
/* Check for reduction. First, high has to be < 2^57 */
mask_t good = word_is_zero(out>>57);
uint64_t and = -1ull;
for (i=0; i<8; i++) {
and &= x->limb[i];
}
and &= (2*out+1);
good &= word_is_zero((and+1)>>58);
return good;
}

+ 2
- 0
src/p521/arch_ref64/f_impl.h View File

@@ -2,6 +2,8 @@
* Released under the MIT License. See LICENSE.txt for license information.
*/

#define LIMB_PLACE_VALUE(i) 58

void gf_add_RAW (gf out, const gf a, const gf b) {
for (unsigned int i=0; i<9; i++) {
out->limb[i] = a->limb[i] + b->limb[i];


+ 0
- 48
src/p521/arch_x86_64_r12/f_impl.c View File

@@ -389,51 +389,3 @@ void gf_strong_reduce (gf *a) {

a->limb[3] = a->limb[7] = a->limb[11] = 0;
}

void gf_serialize (uint8_t *serial, const struct gf *x) {
unsigned int i,k=0;
gf red;
gf_copy(&red, x);
gf_strong_reduce(&red);
uint64_t r=0;
int bits = 0;
for (i=0; i<9; i++) {
r |= red.limb[LIMBPERM(i)] << bits;
for (bits += 58; bits >= 8; bits -= 8) {
serial[k++] = r;
r >>= 8;
}
assert(bits <= 6);
}
assert(bits);
serial[k++] = r;
}

mask_t gf_deserialize (gf *x, const uint8_t serial[LIMBPERM(66)]) {
int i,k=0,bits=0;
__uint128_t out = 0;
uint64_t mask = (1ull<<58)-1;
for (i=0; i<9; i++) {
out >>= 58;
for (; bits<58; bits+=8) {
out |= ((__uint128_t)serial[k++])<<bits;
}
x->limb[LIMBPERM(i)] = out & mask;
bits -= 58;
}
/* Check for reduction. First, high has to be < 2^57 */
mask_t good = word_is_zero(out>>57);
uint64_t and = -1ull;
for (i=0; i<8; i++) {
and &= x->limb[LIMBPERM(i)];
}
and &= (2*out+1);
good &= word_is_zero((and+1)>>58);

x->limb[3] = x->limb[7] = x->limb[11] = 0;
return good;
}

+ 1
- 0
src/p521/arch_x86_64_r12/f_impl.h View File

@@ -4,6 +4,7 @@

/* FIXME: Currently this file desn't work at all, because the struct is declared [9] and not [12] */
#define LIMBPERM(x) (((x)%3)*4 + (x)/3)
#define LIMB_PLACE_VALUE(i) ((((i)&4)==3) ? 0 : 57)
#define USE_P521_3x3_TRANSPOSE

typedef uint64x4_t uint64x3_t; /* fit it in a vector register */


+ 6
- 0
src/p521/f_arithmetic.c View File

@@ -10,6 +10,12 @@

#include "field.h"

const gf MODULUS = {FIELD_LITERAL(
0x3ffffffffffffff, 0x3ffffffffffffff, 0x3ffffffffffffff,
0x3ffffffffffffff, 0x3ffffffffffffff, 0x3ffffffffffffff,
0x3ffffffffffffff, 0x3ffffffffffffff, 0x1ffffffffffffff
)};

void
gf_isr (
gf_a_t a,


Loading…
Cancel
Save