Browse Source

wipe out the multiple layers of rename between decaf_fast and field. still some serious HACKs in the include prio to avoid multiple definition of struct gf

master
Michael Hamburg 9 years ago
parent
commit
5af980b85a
18 changed files with 748 additions and 465 deletions
  1. +39
    -92
      src/decaf_fast.c
  2. +24
    -26
      src/decaf_gen_tables.c
  3. +54
    -44
      src/include/field.h
  4. +20
    -20
      src/p25519/arch_ref64/p25519.c
  5. +53
    -53
      src/p25519/arch_ref64/p25519.h
  6. +20
    -20
      src/p25519/arch_x86_64/p25519.c
  7. +54
    -51
      src/p25519/arch_x86_64/p25519.h
  8. +0
    -1
      src/p25519/arch_x86_64/x86-64-arith.h
  9. +323
    -0
      src/p25519/arch_x86_64/x86-64-arith.h
  10. +14
    -14
      src/p25519/f_arithmetic.c
  11. +16
    -15
      src/p25519/f_field.h
  12. +28
    -28
      src/p448/f_arithmetic.c
  13. +14
    -14
      src/p448/f_field.h
  14. +28
    -28
      src/p480/f_arithmetic.c
  15. +14
    -14
      src/p480/f_field.h
  16. +28
    -28
      src/p521/f_arithmetic.c
  17. +14
    -14
      src/p521/f_field.h
  18. +5
    -3
      src/public_include/decaf/decaf_255.h

+ 39
- 92
src/decaf_fast.c View File

@@ -27,8 +27,6 @@
#define point_t decaf_255_point_t #define point_t decaf_255_point_t
#define precomputed_s decaf_255_precomputed_s #define precomputed_s decaf_255_precomputed_s
#define SER_BYTES DECAF_255_SER_BYTES #define SER_BYTES DECAF_255_SER_BYTES
#define gf_s gf_255_s
#define gf gf_255_t


#if WBITS == 64 #if WBITS == 64
typedef __int128_t decaf_sdword_t; typedef __int128_t decaf_sdword_t;
@@ -72,7 +70,7 @@ typedef struct { niels_t n; gf z; } __attribute__((aligned(32))) pniels_s, pniel
/* Precomputed base */ /* Precomputed base */
struct precomputed_s { niels_t table [DECAF_COMBS_N<<(DECAF_COMBS_T-1)]; }; struct precomputed_s { niels_t table [DECAF_COMBS_N<<(DECAF_COMBS_T-1)]; };


extern const field_t API_NS(precomputed_base_as_fe)[];
extern const gf API_NS(precomputed_base_as_fe)[];
const precomputed_s *API_NS(precomputed_base) = const precomputed_s *API_NS(precomputed_base) =
(const precomputed_s *) &API_NS(precomputed_base_as_fe); (const precomputed_s *) &API_NS(precomputed_base_as_fe);


@@ -95,52 +93,6 @@ const size_t API_NS2(alignof,precomputed_s) = 32;
/** Copy x = y */ /** Copy x = y */
siv gf_cpy(gf x, const gf y) { x[0] = y[0]; } siv gf_cpy(gf x, const gf y) { x[0] = y[0]; }


/** Mostly-unoptimized multiply, but at least it's unrolled. */
siv gf_mul (gf c, const gf a, const gf b) {
field_mul((field_t *)c, (const field_t *)a, (const field_t *)b);
}

/** Dedicated square */
siv gf_sqr (gf c, const gf a) {
field_sqr((field_t *)c, (const field_t *)a);
}

/** Add mod p. Conservatively always weak-reduce. */
snv gf_add ( gf_s *__restrict__ c, const gf a, const gf b ) {
field_add((field_t *)c, (const field_t *)a, (const field_t *)b);
}

/** Subtract mod p. Conservatively always weak-reduce. */
snv gf_sub ( gf c, const gf a, const gf b ) {
field_sub((field_t *)c, (const field_t *)a, (const field_t *)b);
}

/** Add mod p. Conservatively always weak-reduce.) */
siv gf_bias ( gf c, int amt) {
field_bias((field_t *)c, amt);
}

/** Subtract mod p. Bias by 2 and don't reduce */
siv gf_sub_nr ( gf_s *__restrict__ c, const gf a, const gf b ) {
// FOR_LIMB_U(i, c->limb[i] = a->limb[i] - b->limb[i] + 2*P->limb[i] );
field_sub_nr((field_t *)c, (const field_t *)a, (const field_t *)b);
gf_bias(c, 2);
if (WBITS==32) field_weak_reduce((field_t*) c); // HACK
}

/** Subtract mod p. Bias by amt but don't reduce. */
siv gf_sub_nr_x ( gf c, const gf a, const gf b, int amt ) {
field_sub_nr((field_t *)c, (const field_t *)a, (const field_t *)b);
gf_bias(c, amt);
if (WBITS==32) field_weak_reduce((field_t*) c); // HACK
}

/** Add mod p. Don't reduce. */
siv gf_add_nr ( gf c, const gf a, const gf b ) {
// FOR_LIMB_U(i, c->limb[i] = a->limb[i] + b->limb[i]);
field_add_nr((field_t *)c, (const field_t *)a, (const field_t *)b);
}

/** Constant time, x = is_z ? z : y */ /** Constant time, x = is_z ? z : y */
siv cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) { siv cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) {
constant_time_select(x,z,y,sizeof(gf),is_z); constant_time_select(x,z,y,sizeof(gf),is_z);
@@ -162,29 +114,11 @@ siv cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) {
}); });
} }


/**
* Mul by signed int. Not constant-time WRT the sign of that int.
* Just uses a full mul (PERF)
*/
siv gf_mlw(gf c, const gf a, int w) {
if (w>0) {
field_mulw((field_t *)c, (const field_t *)a, w);
} else {
field_mulw((field_t *)c, (const field_t *)a, -w);
gf_sub(c,ZERO,c);
}
}

/** Canonicalize */
siv gf_canon ( gf a ) {
field_strong_reduce((field_t *)a);
}

/** Compare a==b */ /** Compare a==b */
static decaf_word_t __attribute__((noinline)) gf_eq(const gf a, const gf b) { static decaf_word_t __attribute__((noinline)) gf_eq(const gf a, const gf b) {
gf c; gf c;
gf_sub(c,a,b); gf_sub(c,a,b);
gf_canon(c);
gf_strong_reduce(c);
decaf_word_t ret=0; decaf_word_t ret=0;
FOR_LIMB(i, ret |= c->limb[i] ); FOR_LIMB(i, ret |= c->limb[i] );
/* Hope the compiler is too dumb to optimize this, thus noinline */ /* Hope the compiler is too dumb to optimize this, thus noinline */
@@ -194,7 +128,7 @@ static decaf_word_t __attribute__((noinline)) gf_eq(const gf a, const gf b) {
/** Inverse square root using addition chain. */ /** Inverse square root using addition chain. */
static decaf_bool_t gf_isqrt_chk(gf y, const gf x, decaf_bool_t allow_zero) { static decaf_bool_t gf_isqrt_chk(gf y, const gf x, decaf_bool_t allow_zero) {
gf tmp0, tmp1; gf tmp0, tmp1;
field_isr((field_t *)y, (const field_t *)x);
gf_isr((gf_s *)y, (const gf_s *)x);
gf_sqr(tmp0,y); gf_sqr(tmp0,y);
gf_mul(tmp1,tmp0,x); gf_mul(tmp1,tmp0,x);
return gf_eq(tmp1,ONE) | (allow_zero & gf_eq(tmp1,ZERO)); return gf_eq(tmp1,ONE) | (allow_zero & gf_eq(tmp1,ZERO));
@@ -211,11 +145,24 @@ sv gf_invert(gf y, const gf x) {
gf_cpy(y, t2); gf_cpy(y, t2);
} }


/**
* Mul by signed int. Not constant-time WRT the sign of that int.
* Just uses a full mul (PERF)
*/
static inline void gf_mulw_sgn(gf c, const gf a, int w) {
if (w>0) {
gf_mulw(c, a, w);
} else {
gf_mulw(c, a, -w);
gf_sub(c,ZERO,c);
}
}

/** Return high bit of x = low bit of 2x mod p */ /** Return high bit of x = low bit of 2x mod p */
static decaf_word_t hibit(const gf x) { static decaf_word_t hibit(const gf x) {
gf y; gf y;
gf_add(y,x,x); gf_add(y,x,x);
gf_canon(y);
gf_strong_reduce(y);
return -(y->limb[0]&1); return -(y->limb[0]&1);
} }


@@ -223,7 +170,7 @@ static decaf_word_t hibit(const gf x) {
static decaf_word_t lobit(const gf x) { static decaf_word_t lobit(const gf x) {
gf y; gf y;
gf_cpy(y,x); gf_cpy(y,x);
gf_canon(y);
gf_strong_reduce(y);
return -(y->limb[0]&1); return -(y->limb[0]&1);
} }


@@ -454,7 +401,7 @@ decaf_bool_t API_NS(scalar_eq) (
const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}}; const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}};


static void gf_encode ( unsigned char ser[SER_BYTES], gf a ) { static void gf_encode ( unsigned char ser[SER_BYTES], gf a ) {
field_serialize(ser, (field_t *)a);
gf_serialize(ser, (gf_s *)a);
} }
extern const gf SQRT_MINUS_ONE, SQRT_ONE_MINUS_D; /* Intern this? */ extern const gf SQRT_MINUS_ONE, SQRT_ONE_MINUS_D; /* Intern this? */
@@ -528,7 +475,7 @@ void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) {
* Deserialize a bool, return TRUE if < p. * Deserialize a bool, return TRUE if < p.
*/ */
static decaf_bool_t gf_deser(gf s, const unsigned char ser[SER_BYTES]) { static decaf_bool_t gf_deser(gf s, const unsigned char ser[SER_BYTES]) {
return field_deserialize((field_t *)s, ser);
return gf_deserialize((gf_s *)s, ser);
} }
decaf_bool_t API_NS(point_decode) ( decaf_bool_t API_NS(point_decode) (
@@ -544,7 +491,7 @@ decaf_bool_t API_NS(point_decode) (
gf_sub ( f, ONE, a ); /* f = 1-s^2 = 1-as^2 since a=1 */ gf_sub ( f, ONE, a ); /* f = 1-s^2 = 1-as^2 since a=1 */
succ &= ~ gf_eq( f, ZERO ); succ &= ~ gf_eq( f, ZERO );
gf_sqr ( b, f ); gf_sqr ( b, f );
gf_mlw ( c, a, 4-4*EDWARDS_D );
gf_mulw_sgn ( c, a, 4-4*EDWARDS_D );
gf_add ( c, c, b ); /* t^2 */ gf_add ( c, c, b ); /* t^2 */
gf_mul ( d, f, s ); /* s(1-s^2) for denoms */ gf_mul ( d, f, s ); /* s(1-s^2) for denoms */
gf_sqr ( e, d ); gf_sqr ( e, d );
@@ -596,7 +543,7 @@ void API_NS(point_sub) (
gf_add_nr ( b, q->y, q->x ); gf_add_nr ( b, q->y, q->x );
gf_mul ( p->y, d, b ); gf_mul ( p->y, d, b );
gf_mul ( b, r->t, q->t ); gf_mul ( b, r->t, q->t );
gf_mlw ( p->x, b, -2*EDWARDS_D );
gf_mulw_sgn ( p->x, b, -2*EDWARDS_D );
gf_add_nr ( b, a, p->y ); gf_add_nr ( b, a, p->y );
gf_sub_nr ( c, p->y, a ); gf_sub_nr ( c, p->y, a );
gf_mul ( a, q->z, r->z ); gf_mul ( a, q->z, r->z );
@@ -622,7 +569,7 @@ void API_NS(point_add) (
gf_add_nr ( b, q->y, q->x ); gf_add_nr ( b, q->y, q->x );
gf_mul ( p->y, d, b ); gf_mul ( p->y, d, b );
gf_mul ( b, r->t, q->t ); gf_mul ( b, r->t, q->t );
gf_mlw ( p->x, b, -2*EDWARDS_D );
gf_mulw_sgn ( p->x, b, -2*EDWARDS_D );
gf_add_nr ( b, a, p->y ); gf_add_nr ( b, a, p->y );
gf_sub_nr ( c, p->y, a ); gf_sub_nr ( c, p->y, a );
gf_mul ( a, q->z, r->z ); gf_mul ( a, q->z, r->z );
@@ -646,11 +593,11 @@ snv point_double_internal (
gf_add_nr ( d, c, a ); gf_add_nr ( d, c, a );
gf_add_nr ( p->t, q->y, q->x ); gf_add_nr ( p->t, q->y, q->x );
gf_sqr ( b, p->t ); gf_sqr ( b, p->t );
gf_sub_nr_x ( b, b, d, 3 );
gf_subx_nr ( b, b, d, 3 );
gf_sub_nr ( p->t, a, c ); gf_sub_nr ( p->t, a, c );
gf_sqr ( p->x, q->z ); gf_sqr ( p->x, q->z );
gf_add_nr ( p->z, p->x, p->x ); gf_add_nr ( p->z, p->x, p->x );
gf_sub_nr_x ( a, p->z, p->t, 4 );
gf_subx_nr ( a, p->z, p->t, 4 );
gf_mul ( p->x, a, b ); gf_mul ( p->x, a, b );
gf_mul ( p->z, p->t, a ); gf_mul ( p->z, p->t, a );
gf_mul ( p->y, p->t, d ); gf_mul ( p->y, p->t, d );
@@ -777,7 +724,7 @@ static void pt_to_pniels (
) { ) {
gf_sub ( b->n->a, a->y, a->x ); gf_sub ( b->n->a, a->y, a->x );
gf_add ( b->n->b, a->x, a->y ); gf_add ( b->n->b, a->x, a->y );
gf_mlw ( b->n->c, a->t, -2*EDWARDS_D );
gf_mulw_sgn ( b->n->c, a->t, -2*EDWARDS_D );
gf_add ( b->z, a->z, a->z ); gf_add ( b->z, a->z, a->z );
} }


@@ -1047,12 +994,12 @@ void API_NS(point_from_hash_nonuniform) (
// TODO: simplify since we don't return a hint anymore // TODO: simplify since we don't return a hint anymore
gf r0,r,a,b,c,dee,D,N,rN,e; gf r0,r,a,b,c,dee,D,N,rN,e;
gf_deser(r0,ser); gf_deser(r0,ser);
gf_canon(r0);
gf_strong_reduce(r0);
gf_sqr(a,r0); gf_sqr(a,r0);
//gf_sub(r,ZERO,a); /*gf_mlw(r,a,QUADRATIC_NONRESIDUE);*/
//gf_sub(r,ZERO,a); /*gf_mulw_sgn(r,a,QUADRATIC_NONRESIDUE);*/
gf_mul(r,a,SQRT_MINUS_ONE); gf_mul(r,a,SQRT_MINUS_ONE);
gf_mlw(dee,ONE,EDWARDS_D);
gf_mlw(c,r,EDWARDS_D);
gf_mulw_sgn(dee,ONE,EDWARDS_D);
gf_mulw_sgn(c,r,EDWARDS_D);
/* Compute D := (dr+a-d)(dr-ar-d) with a=1 */ /* Compute D := (dr+a-d)(dr-ar-d) with a=1 */
gf_sub(a,c,dee); gf_sub(a,c,dee);
@@ -1064,7 +1011,7 @@ void API_NS(point_from_hash_nonuniform) (
/* compute N := (r+1)(a-2d) */ /* compute N := (r+1)(a-2d) */
gf_add(a,r,ONE); gf_add(a,r,ONE);
gf_mlw(N,a,1-2*EDWARDS_D);
gf_mulw_sgn(N,a,1-2*EDWARDS_D);
/* e = +-1/sqrt(+-ND) */ /* e = +-1/sqrt(+-ND) */
gf_mul(rN,r,N); gf_mul(rN,r,N);
@@ -1078,8 +1025,8 @@ void API_NS(point_from_hash_nonuniform) (
/* b <- t/s */ /* b <- t/s */
cond_sel(c,r0,r,square); /* r? = sqr ? r : 1 */ cond_sel(c,r0,r,square); /* r? = sqr ? r : 1 */
/* In two steps to avoid overflow on 32-bit arch */ /* In two steps to avoid overflow on 32-bit arch */
gf_mlw(a,c,1-2*EDWARDS_D);
gf_mlw(b,a,1-2*EDWARDS_D);
gf_mulw_sgn(a,c,1-2*EDWARDS_D);
gf_mulw_sgn(b,a,1-2*EDWARDS_D);
gf_sub(c,r,ONE); gf_sub(c,r,ONE);
gf_mul(a,b,c); /* = r? * (r-1) * (a-2d)^2 with a=1 */ gf_mul(a,b,c); /* = r? * (r-1) * (a-2d)^2 with a=1 */
gf_mul(b,a,e); gf_mul(b,a,e);
@@ -1148,7 +1095,7 @@ API_NS(invert_elligator_nonuniform) (
cond_sel(b,b,ZERO,is_identity & ~sgn_t_over_s & ~sgn_s); /* identity adjust */ cond_sel(b,b,ZERO,is_identity & ~sgn_t_over_s & ~sgn_s); /* identity adjust */
} }
gf_mlw(d,c,2*EDWARDS_D-1); /* $d = (2d-a)s^2 */
gf_mulw_sgn(d,c,2*EDWARDS_D-1); /* $d = (2d-a)s^2 */
gf_add(a,d,b); /* num? */ gf_add(a,d,b); /* num? */
gf_sub(d,d,b); /* den? */ gf_sub(d,d,b); /* den? */
gf_mul(b,a,d); /* n*d */ gf_mul(b,a,d); /* n*d */
@@ -1199,7 +1146,7 @@ decaf_bool_t API_NS(point_valid) (
gf_sqr(b,p->y); gf_sqr(b,p->y);
gf_sub(a,b,a); gf_sub(a,b,a);
gf_sqr(b,p->t); gf_sqr(b,p->t);
gf_mlw(c,b,-EDWARDS_D);
gf_mulw_sgn(c,b,-EDWARDS_D);
gf_sqr(b,p->z); gf_sqr(b,p->z);
gf_add(b,b,c); gf_add(b,b,c);
out &= gf_eq(a,b); out &= gf_eq(a,b);
@@ -1281,15 +1228,15 @@ static void batch_normalize_niels (


for (i=0; i<n; i++) { for (i=0; i<n; i++) {
gf_mul(product, table[i]->a, zis[i]); gf_mul(product, table[i]->a, zis[i]);
gf_canon(product);
gf_strong_reduce(product);
gf_cpy(table[i]->a, product); gf_cpy(table[i]->a, product);
gf_mul(product, table[i]->b, zis[i]); gf_mul(product, table[i]->b, zis[i]);
gf_canon(product);
gf_strong_reduce(product);
gf_cpy(table[i]->b, product); gf_cpy(table[i]->b, product);
gf_mul(product, table[i]->c, zis[i]); gf_mul(product, table[i]->c, zis[i]);
gf_canon(product);
gf_strong_reduce(product);
gf_cpy(table[i]->c, product); gf_cpy(table[i]->c, product);
} }
} }
@@ -1510,7 +1457,7 @@ sv prepare_wnaf_table(
} }
} }


extern const field_t API_NS(precomputed_wnaf_as_fe)[];
extern const gf API_NS(precomputed_wnaf_as_fe)[];
static const niels_t *API_NS(wnaf_base) = (const niels_t *)API_NS(precomputed_wnaf_as_fe); static const niels_t *API_NS(wnaf_base) = (const niels_t *)API_NS(precomputed_wnaf_as_fe);
const size_t API_NS2(sizeof,precomputed_wnafs) __attribute((visibility("hidden"))) const size_t API_NS2(sizeof,precomputed_wnafs) __attribute((visibility("hidden")))
= sizeof(niels_t)<<DECAF_WNAF_FIXED_TABLE_BITS; = sizeof(niels_t)<<DECAF_WNAF_FIXED_TABLE_BITS;


+ 24
- 26
src/decaf_gen_tables.c View File

@@ -19,7 +19,7 @@
#define API_NS2(_pref,_id) _pref##_decaf_255_##_id #define API_NS2(_pref,_id) _pref##_decaf_255_##_id


/* To satisfy linker. */ /* To satisfy linker. */
const field_t API_NS(precomputed_base_as_fe)[1];
const gf API_NS(precomputed_base_as_fe)[1];
const API_NS(scalar_t) API_NS(precomputed_scalarmul_adjustment); const API_NS(scalar_t) API_NS(precomputed_scalarmul_adjustment);
const API_NS(scalar_t) API_NS(point_scalarmul_adjustment); const API_NS(scalar_t) API_NS(point_scalarmul_adjustment);
const API_NS(scalar_t) sc_r2 = {{{0}}}; const API_NS(scalar_t) sc_r2 = {{{0}}};
@@ -29,7 +29,7 @@ const unsigned char base_point_ser_for_pregen[DECAF_255_SER_BYTES];
const API_NS(point_t) API_NS(point_base); const API_NS(point_t) API_NS(point_base);


struct niels_s; struct niels_s;
const field_t *API_NS(precomputed_wnaf_as_fe);
const gf_s *API_NS(precomputed_wnaf_as_fe);
extern const size_t API_NS2(sizeof,precomputed_wnafs); extern const size_t API_NS2(sizeof,precomputed_wnafs);


void API_NS(precompute_wnafs) ( void API_NS(precompute_wnafs) (
@@ -48,26 +48,26 @@ static void scalar_print(const char *name, const API_NS(scalar_t) sc) {
printf("}}};\n\n"); printf("}}};\n\n");
} }


static void field_print(const field_t *f) {
const int FIELD_SER_BYTES = (FIELD_BITS + 7) / 8;
unsigned char ser[FIELD_SER_BYTES];
field_serialize(ser,f);
static void field_print(const gf f) {
const int GF_SER_BYTES = (GF_BITS + 7) / 8;
unsigned char ser[GF_SER_BYTES];
gf_serialize(ser,f);
int b=0, i, comma=0; int b=0, i, comma=0;
unsigned long long limb = 0; unsigned long long limb = 0;
printf("FIELD_LITERAL(");
for (i=0; i<FIELD_SER_BYTES; i++) {
printf("{FIELD_LITERAL(");
for (i=0; i<GF_SER_BYTES; i++) {
limb |= ((uint64_t)ser[i])<<b; limb |= ((uint64_t)ser[i])<<b;
b += 8; b += 8;
if (b >= FIELD_LIT_LIMB_BITS) {
limb &= (1ull<<FIELD_LIT_LIMB_BITS) -1;
b -= FIELD_LIT_LIMB_BITS;
if (b >= GF_LIT_LIMB_BITS) {
limb &= (1ull<<GF_LIT_LIMB_BITS) -1;
b -= GF_LIT_LIMB_BITS;
if (comma) printf(","); if (comma) printf(",");
comma = 1; comma = 1;
printf("0x%016llx", limb); printf("0x%016llx", limb);
limb = ((uint64_t)ser[i])>>(8-b); limb = ((uint64_t)ser[i])>>(8-b);
} }
} }
printf(")");
printf(")}");
assert(b<8); assert(b<8);
} }


@@ -88,41 +88,39 @@ int main(int argc, char **argv) {
if (ret || !preWnaf) return 1; if (ret || !preWnaf) return 1;
API_NS(precompute_wnafs)(preWnaf, real_point_base); API_NS(precompute_wnafs)(preWnaf, real_point_base);


const field_t *output;
const gf_s *output;
unsigned i; unsigned i;
printf("/** @warning: this file was automatically generated. */\n"); printf("/** @warning: this file was automatically generated. */\n");
printf("#include <decaf.h>\n\n");
printf("#include \"field.h\"\n\n"); printf("#include \"field.h\"\n\n");
printf("#include \"decaf.h\"\n\n");
printf("#define API_NS(_id) decaf_255_##_id\n"); printf("#define API_NS(_id) decaf_255_##_id\n");
printf("#define API_NS2(_pref,_id) _pref##_decaf_255_##_id\n"); printf("#define API_NS2(_pref,_id) _pref##_decaf_255_##_id\n");
output = (const field_t *)real_point_base;
output = (const gf_s *)real_point_base;
printf("const API_NS(point_t) API_NS(point_base) = {{\n"); printf("const API_NS(point_t) API_NS(point_base) = {{\n");
for (i=0; i < sizeof(API_NS(point_t)); i+=sizeof(field_t)) {
for (i=0; i < sizeof(API_NS(point_t)); i+=sizeof(gf)) {
if (i) printf(",\n "); if (i) printf(",\n ");
printf("{");
field_print(output++); field_print(output++);
printf("}");
} }
printf("\n}};\n"); printf("\n}};\n");
output = (const field_t *)pre;
printf("const field_t API_NS(precomputed_base_as_fe)[%d]\n",
(int)(API_NS2(sizeof,precomputed_s) / sizeof(field_t)));
output = (const gf_s *)pre;
printf("const gf API_NS(precomputed_base_as_fe)[%d]\n",
(int)(API_NS2(sizeof,precomputed_s) / sizeof(gf)));
printf("__attribute__((aligned(%d),visibility(\"hidden\"))) = {\n ", (int)API_NS2(alignof,precomputed_s)); printf("__attribute__((aligned(%d),visibility(\"hidden\"))) = {\n ", (int)API_NS2(alignof,precomputed_s));
for (i=0; i < API_NS2(sizeof,precomputed_s); i+=sizeof(field_t)) {
for (i=0; i < API_NS2(sizeof,precomputed_s); i+=sizeof(gf)) {
if (i) printf(",\n "); if (i) printf(",\n ");
field_print(output++); field_print(output++);
} }
printf("\n};\n"); printf("\n};\n");
output = (const field_t *)preWnaf;
printf("const field_t API_NS(precomputed_wnaf_as_fe)[%d]\n",
(int)(API_NS2(sizeof,precomputed_wnafs) / sizeof(field_t)));
output = (const gf_s *)preWnaf;
printf("const gf API_NS(precomputed_wnaf_as_fe)[%d]\n",
(int)(API_NS2(sizeof,precomputed_wnafs) / sizeof(gf)));
printf("__attribute__((aligned(%d),visibility(\"hidden\"))) = {\n ", (int)API_NS2(alignof,precomputed_s)); printf("__attribute__((aligned(%d),visibility(\"hidden\"))) = {\n ", (int)API_NS2(alignof,precomputed_s));
for (i=0; i < API_NS2(sizeof,precomputed_wnafs); i+=sizeof(field_t)) {
for (i=0; i < API_NS2(sizeof,precomputed_wnafs); i+=sizeof(gf)) {
if (i) printf(",\n "); if (i) printf(",\n ");
field_print(output++); field_print(output++);
} }


+ 54
- 44
src/include/field.h View File

@@ -1,23 +1,20 @@
/** /**
* @file field.h * @file field.h
* @brief Generic field header.
* @brief Generic gf header.
* @copyright * @copyright
* Copyright (c) 2014 Cryptography Research, Inc. \n * Copyright (c) 2014 Cryptography Research, Inc. \n
* Released under the MIT License. See LICENSE.txt for license information. * Released under the MIT License. See LICENSE.txt for license information.
* @author Mike Hamburg * @author Mike Hamburg
*/ */


#ifndef __FIELD_H__
#define __FIELD_H__
#ifndef __GF_H__
#define __GF_H__


#include "constant_time.h" #include "constant_time.h"
#include "f_field.h" #include "f_field.h"
#include <string.h> #include <string.h>


typedef struct field_t field_a_t[1];
#define field_a_restrict_t struct field_t *__restrict__

#define is32 (GOLDI_BITS == 32 || FIELD_BITS != 448)
#define is32 (GOLDI_BITS == 32 || GF_BITS != 448)
#if (is32) #if (is32)
#define IF32(s) (s) #define IF32(s) (s)
#else #else
@@ -33,9 +30,9 @@ typedef struct field_t field_a_t[1];
* If x=0, returns 0. * If x=0, returns 0.
*/ */
void void
field_isr (
field_a_t a,
const field_a_t x
gf_isr (
gf a,
const gf x
); );
/** /**
@@ -43,62 +40,75 @@ field_isr (
*/ */
static __inline__ void static __inline__ void
__attribute__((unused,always_inline)) __attribute__((unused,always_inline))
field_sqrn (
field_a_restrict_t y,
const field_a_t x,
gf_sqrn (
gf_s *__restrict__ y,
const gf x,
int n int n
) { ) {
field_a_t tmp;
gf tmp;
assert(n>0); assert(n>0);
if (n&1) { if (n&1) {
field_sqr(y,x);
gf_sqr(y,x);
n--; n--;
} else { } else {
field_sqr(tmp,x);
field_sqr(y,tmp);
gf_sqr(tmp,x);
gf_sqr(y,tmp);
n-=2; n-=2;
} }
for (; n; n-=2) { for (; n; n-=2) {
field_sqr(tmp,y);
field_sqr(y,tmp);
gf_sqr(tmp,y);
gf_sqr(y,tmp);
} }
} }


static __inline__ void static __inline__ void
field_subx_RAW (
field_a_t d,
const field_a_t a,
const field_a_t b
gf_subx_RAW (
gf d,
const gf a,
const gf b
) { ) {
field_sub_RAW ( d, a, b );
field_bias( d, 2 );
IF32( field_weak_reduce ( d ) );
gf_sub_RAW ( d, a, b );
gf_bias( d, 2 );
IF32( gf_weak_reduce ( d ) );
} }


static __inline__ void static __inline__ void
field_sub (
field_a_t d,
const field_a_t a,
const field_a_t b
gf_sub (
gf d,
const gf a,
const gf b
) { ) {
field_sub_RAW ( d, a, b );
field_bias( d, 2 );
field_weak_reduce ( d );
gf_sub_RAW ( d, a, b );
gf_bias( d, 2 );
gf_weak_reduce ( d );
} }


static __inline__ void static __inline__ void
field_add (
field_a_t d,
const field_a_t a,
const field_a_t b
gf_add (
gf d,
const gf a,
const gf b
) { ) {
field_add_RAW ( d, a, b );
field_weak_reduce ( d );
gf_add_RAW ( d, a, b );
gf_weak_reduce ( d );
}

#define gf_add_nr gf_add_RAW

/** Subtract mod p. Bias by 2 and don't reduce */
static inline void gf_sub_nr ( gf c, const gf a, const gf b ) {
// FOR_LIMB_U(i, c->limb[i] = a->limb[i] - b->limb[i] + 2*P->limb[i] );
gf_sub_RAW(c,a,b);
gf_bias(c, 2);
if (DECAF_WORD_BITS==32) gf_weak_reduce(c); // HACK
}

/** Subtract mod p. Bias by amt but don't reduce. */
static inline void gf_subx_nr ( gf c, const gf a, const gf b, int amt ) {
gf_sub_RAW(c,a,b);
gf_bias(c, amt);
if (DECAF_WORD_BITS==32) gf_weak_reduce(c); // HACK
} }


/* FIXME: no warnings on RAW routines */
#define field_add_nr field_add_RAW
#define field_sub_nr field_sub_RAW
#define field_subx_nr field_subx_RAW


#endif // __FIELD_H__
#endif // __GF_H__

+ 20
- 20
src/p25519/arch_ref64/p25519.c View File

@@ -17,10 +17,10 @@ static __inline__ uint64_t is_zero(uint64_t a) {
} }


void void
p255_mul (
p255_t *__restrict__ cs,
const p255_t *as,
const p255_t *bs
gf_25519_mul (
gf_25519_t __restrict__ cs,
const gf_25519_t as,
const gf_25519_t bs
) { ) {
const uint64_t *a = as->limb, *b = bs->limb, mask = ((1ull<<51)-1); const uint64_t *a = as->limb, *b = bs->limb, mask = ((1ull<<51)-1);
@@ -52,9 +52,9 @@ p255_mul (
} }


void void
p255_mulw (
p255_t *__restrict__ cs,
const p255_t *as,
gf_25519_mulw (
gf_25519_t __restrict__ cs,
const gf_25519_t as,
uint64_t b uint64_t b
) { ) {
const uint64_t *a = as->limb, mask = ((1ull<<51)-1); const uint64_t *a = as->limb, mask = ((1ull<<51)-1);
@@ -79,16 +79,16 @@ p255_mulw (
} }


void void
p255_sqr (
p255_t *__restrict__ cs,
const p255_t *as
gf_25519_t qr (
gf_25519_t __restrict__ cs,
const gf_25519_t as
) { ) {
p255_mul(cs,as,as); // TODO
gf_25519_mul(cs,as,as); // TODO
} }


void void
p255_strong_reduce (
p255_t *a
gf_25519_t trong_reduce (
gf_25519_t a
) { ) {
uint64_t mask = (1ull<<51)-1; uint64_t mask = (1ull<<51)-1;


@@ -128,14 +128,14 @@ p255_strong_reduce (
} }


void void
p255_serialize (
gf_25519_t erialize (
uint8_t serial[32], uint8_t serial[32],
const struct p255_t *x
const struct gf_25519_t x
) { ) {
int i,j; int i,j;
p255_t red;
p255_copy(&red, x);
p255_strong_reduce(&red);
gf_25519_t red;
gf_25519_copy(&red, x);
gf_25519_t trong_reduce(&red);
uint64_t *r = red.limb; uint64_t *r = red.limb;
uint64_t ser64[4] = {r[0] | r[1]<<51, r[1]>>13|r[2]<<38, r[2]>>26|r[3]<<25, r[3]>>39|r[4]<<12}; uint64_t ser64[4] = {r[0] | r[1]<<51, r[1]>>13|r[2]<<38, r[2]>>26|r[3]<<25, r[3]>>39|r[4]<<12};
for (i=0; i<4; i++) { for (i=0; i<4; i++) {
@@ -147,8 +147,8 @@ p255_serialize (
} }


mask_t mask_t
p255_deserialize (
p255_t *x,
gf_25519_deserialize (
gf_25519_t x,
const uint8_t serial[32] const uint8_t serial[32]
) { ) {
int i,j; int i,j;


+ 53
- 53
src/p25519/arch_ref64/p25519.h View File

@@ -1,8 +1,8 @@
/* Copyright (c) 2014 Cryptography Research, Inc. /* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information. * Released under the MIT License. See LICENSE.txt for license information.
*/ */
#ifndef __P255_H__
#define __P255_H__ 1
#ifndef __P25519_H__
#define __P25519_H__ 1


#include <stdint.h> #include <stdint.h>
#include <assert.h> #include <assert.h>
@@ -10,9 +10,9 @@


#include "word.h" #include "word.h"


typedef struct p255_t {
typedef struct gf_25519_s {
uint64_t limb[5]; uint64_t limb[5];
} p255_t;
} gf_25519_s, gf_25519_t[1];


#define LBITS 51 #define LBITS 51
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}
@@ -32,113 +32,113 @@ extern "C" {
#endif #endif


static __inline__ void static __inline__ void
p255_add_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_add_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) __attribute__((unused)); ) __attribute__((unused));
static __inline__ void static __inline__ void
p255_sub_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_sub_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) __attribute__((unused)); ) __attribute__((unused));
static __inline__ void static __inline__ void
p255_copy (
p255_t *out,
const p255_t *a
gf_25519_copy (
gf_25519_t out,
const gf_25519_t a
) __attribute__((unused)); ) __attribute__((unused));
static __inline__ void static __inline__ void
p255_weak_reduce (
p255_t *inout
gf_25519_weak_reduce (
gf_25519_t inout
) __attribute__((unused)); ) __attribute__((unused));
void void
p255_strong_reduce (
p255_t *inout
gf_25519_strong_reduce (
gf_25519_t inout
); );


static __inline__ void static __inline__ void
p255_bias (
p255_t *inout,
gf_25519_bias (
gf_25519_t inout,
int amount int amount
) __attribute__((unused)); ) __attribute__((unused));
void void
p255_mul (
p255_t *__restrict__ out,
const p255_t *a,
const p255_t *b
gf_25519_mul (
gf_25519_s *__restrict__ out,
const gf_25519_t a,
const gf_25519_t b
); );


void void
p255_mulw (
p255_t *__restrict__ out,
const p255_t *a,
gf_25519_mulw (
gf_25519_s *__restrict__ out,
const gf_25519_t a,
uint64_t b uint64_t b
); );


void void
p255_sqr (
p255_t *__restrict__ out,
const p255_t *a
gf_25519_sqr (
gf_25519_s *__restrict__ out,
const gf_25519_t a
); );


void void
p255_serialize (
gf_25519_serialize (
uint8_t serial[32], uint8_t serial[32],
const struct p255_t *x
const gf_25519_t x
); );


mask_t mask_t
p255_deserialize (
p255_t *x,
gf_25519_deserialize (
gf_25519_t x,
const uint8_t serial[32] const uint8_t serial[32]
); );


/* -------------- Inline functions begin here -------------- */ /* -------------- Inline functions begin here -------------- */


void void
p255_add_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_add_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) { ) {
unsigned int i; unsigned int i;
for (i=0; i<5; i++) { for (i=0; i<5; i++) {
out->limb[i] = a->limb[i] + b->limb[i]; out->limb[i] = a->limb[i] + b->limb[i];
} }
p255_weak_reduce(out);
gf_25519_weak_reduce(out);
} }


void void
p255_sub_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_sub_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) { ) {
unsigned int i; unsigned int i;
uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36; uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36;
for (i=0; i<5; i++) { for (i=0; i<5; i++) {
out->limb[i] = a->limb[i] - b->limb[i] + ((i==0) ? co2 : co1); out->limb[i] = a->limb[i] - b->limb[i] + ((i==0) ? co2 : co1);
} }
p255_weak_reduce(out);
gf_25519_weak_reduce(out);
} }


void void
p255_copy (
p255_t *out,
const p255_t *a
gf_25519_copy (
gf_25519_t out,
const gf_25519_t a
) { ) {
memcpy(out,a,sizeof(*a)); memcpy(out,a,sizeof(*a));
} }


void void
p255_bias (
p255_t *a,
gf_25519_bias (
gf_25519_t a,
int amt int amt
) { ) {
(void) a; (void) a;
@@ -146,8 +146,8 @@ p255_bias (
} }


void void
p255_weak_reduce (
p255_t *a
gf_25519_weak_reduce (
gf_25519_t a
) { ) {
uint64_t mask = (1ull<<51) - 1; uint64_t mask = (1ull<<51) - 1;
uint64_t tmp = a->limb[4] >> 51; uint64_t tmp = a->limb[4] >> 51;
@@ -162,4 +162,4 @@ p255_weak_reduce (
}; /* extern "C" */ }; /* extern "C" */
#endif #endif


#endif /* __P255_H__ */
#endif /* __P25519_H__ */

+ 20
- 20
src/p25519/arch_x86_64/p25519.c View File

@@ -10,10 +10,10 @@ static inline uint64_t shr(__uint128_t x, int n) {
} }


void void
p255_mul (
p255_t *__restrict__ cs,
const p255_t *as,
const p255_t *bs
gf_25519_mul (
gf_25519_s *__restrict__ cs,
const gf_25519_t as,
const gf_25519_t bs
) { ) {
const uint64_t *a = as->limb, *b = bs->limb, mask = ((1ull<<51)-1); const uint64_t *a = as->limb, *b = bs->limb, mask = ((1ull<<51)-1);
uint64_t *c = cs->limb; uint64_t *c = cs->limb;
@@ -92,9 +92,9 @@ p255_mul (
} }


void void
p255_sqr (
p255_t *__restrict__ cs,
const p255_t *as
gf_25519_sqr (
gf_25519_s *__restrict__ cs,
const gf_25519_t as
) { ) {
const uint64_t *a = as->limb, mask = ((1ull<<51)-1); const uint64_t *a = as->limb, mask = ((1ull<<51)-1);
uint64_t *c = cs->limb; uint64_t *c = cs->limb;
@@ -156,9 +156,9 @@ p255_sqr (
} }


void void
p255_mulw (
p255_t *__restrict__ cs,
const p255_t *as,
gf_25519_mulw (
gf_25519_s *__restrict__ cs,
const gf_25519_t as,
uint64_t b uint64_t b
) { ) {
const uint64_t *a = as->limb, mask = ((1ull<<51)-1); const uint64_t *a = as->limb, mask = ((1ull<<51)-1);
@@ -191,8 +191,8 @@ p255_mulw (
} }


void void
p255_strong_reduce (
p255_t *a
gf_25519_strong_reduce (
gf_25519_t a
) { ) {
uint64_t mask = (1ull<<51)-1; uint64_t mask = (1ull<<51)-1;


@@ -232,15 +232,15 @@ p255_strong_reduce (
} }


void void
p255_serialize (
gf_25519_serialize (
uint8_t serial[32], uint8_t serial[32],
const struct p255_t *x
const gf_25519_t x
) { ) {
int i,j; int i,j;
p255_t red;
p255_copy(&red, x);
p255_strong_reduce(&red);
uint64_t *r = red.limb;
gf_25519_t red;
gf_25519_copy(red, x);
gf_25519_strong_reduce(red);
uint64_t *r = red->limb;
uint64_t ser64[4] = {r[0] | r[1]<<51, r[1]>>13|r[2]<<38, r[2]>>26|r[3]<<25, r[3]>>39|r[4]<<12}; uint64_t ser64[4] = {r[0] | r[1]<<51, r[1]>>13|r[2]<<38, r[2]>>26|r[3]<<25, r[3]>>39|r[4]<<12};
for (i=0; i<4; i++) { for (i=0; i<4; i++) {
for (j=0; j<8; j++) { for (j=0; j<8; j++) {
@@ -251,8 +251,8 @@ p255_serialize (
} }


mask_t mask_t
p255_deserialize (
p255_t *x,
gf_25519_deserialize (
gf_25519_t x,
const uint8_t serial[32] const uint8_t serial[32]
) { ) {
int i,j; int i,j;


+ 54
- 51
src/p25519/arch_x86_64/p25519.h View File

@@ -1,8 +1,8 @@
/* Copyright (c) 2014 Cryptography Research, Inc. /* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information. * Released under the MIT License. See LICENSE.txt for license information.
*/ */
#ifndef __P255_H__
#define __P255_H__ 1
#ifndef __P25519_H__
#define __P25519_H__ 1


#include <stdint.h> #include <stdint.h>
#include <assert.h> #include <assert.h>
@@ -10,9 +10,12 @@


#include "word.h" #include "word.h"


typedef struct p255_t {
#ifndef __DECAF_255_H__ // HACK FIXME
#define DECAF_WORD_BITS 64
typedef struct gf_25519_s {
uint64_t limb[5]; uint64_t limb[5];
} p255_t;
} gf_25519_s, gf_25519_t[1];
#endif


#define LBITS 51 #define LBITS 51
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}
@@ -32,80 +35,80 @@ extern "C" {
#endif #endif


static __inline__ void static __inline__ void
p255_add_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_add_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) __attribute__((unused)); ) __attribute__((unused));
static __inline__ void static __inline__ void
p255_sub_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_sub_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) __attribute__((unused)); ) __attribute__((unused));
static __inline__ void static __inline__ void
p255_copy (
p255_t *out,
const p255_t *a
gf_25519_copy (
gf_25519_t out,
const gf_25519_t a
) __attribute__((unused)); ) __attribute__((unused));
static __inline__ void static __inline__ void
p255_weak_reduce (
p255_t *inout
gf_25519_weak_reduce (
gf_25519_t inout
) __attribute__((unused)); ) __attribute__((unused));
void void
p255_strong_reduce (
p255_t *inout
gf_25519_strong_reduce (
gf_25519_t inout
); );


static __inline__ void static __inline__ void
p255_bias (
p255_t *inout,
gf_25519_bias (
gf_25519_t inout,
int amount int amount
) __attribute__((unused)); ) __attribute__((unused));
void void
p255_mul (
p255_t *__restrict__ out,
const p255_t *a,
const p255_t *b
gf_25519_mul (
gf_25519_s *__restrict__ out,
const gf_25519_t a,
const gf_25519_t b
); );


void void
p255_mulw (
p255_t *__restrict__ out,
const p255_t *a,
gf_25519_mulw (
gf_25519_s *__restrict__ out,
const gf_25519_t a,
uint64_t b uint64_t b
); );


void void
p255_sqr (
p255_t *__restrict__ out,
const p255_t *a
gf_25519_sqr (
gf_25519_s *__restrict__ out,
const gf_25519_t a
); );


void void
p255_serialize (
gf_25519_serialize (
uint8_t serial[32], uint8_t serial[32],
const struct p255_t *x
const gf_25519_t x
); );


mask_t mask_t
p255_deserialize (
p255_t *x,
gf_25519_deserialize (
gf_25519_t x,
const uint8_t serial[32] const uint8_t serial[32]
); );


/* -------------- Inline functions begin here -------------- */ /* -------------- Inline functions begin here -------------- */


void void
p255_add_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_add_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) { ) {
unsigned int i; unsigned int i;
for (i=0; i<5; i++) { for (i=0; i<5; i++) {
@@ -114,10 +117,10 @@ p255_add_RAW (
} }


void void
p255_sub_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_sub_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) { ) {
unsigned int i; unsigned int i;
uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36; uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36;
@@ -127,16 +130,16 @@ p255_sub_RAW (
} }


void void
p255_copy (
p255_t *out,
const p255_t *a
gf_25519_copy (
gf_25519_t out,
const gf_25519_t a
) { ) {
memcpy(out,a,sizeof(*a)); memcpy(out,a,sizeof(*a));
} }


void void
p255_bias (
p255_t *a,
gf_25519_bias (
gf_25519_t a,
int amt int amt
) { ) {
a->limb[0] += ((uint64_t)(amt)<<52) - 38*amt; a->limb[0] += ((uint64_t)(amt)<<52) - 38*amt;
@@ -147,8 +150,8 @@ p255_bias (
} }


void void
p255_weak_reduce (
p255_t *a
gf_25519_weak_reduce (
gf_25519_t a
) { ) {
uint64_t mask = (1ull<<51) - 1; uint64_t mask = (1ull<<51) - 1;
uint64_t tmp = a->limb[4] >> 51; uint64_t tmp = a->limb[4] >> 51;
@@ -163,4 +166,4 @@ p255_weak_reduce (
}; /* extern "C" */ }; /* extern "C" */
#endif #endif


#endif /* __P255_H__ */
#endif /* __P25519_H__ */

+ 0
- 1
src/p25519/arch_x86_64/x86-64-arith.h View File

@@ -1 +0,0 @@
../../p448/arch_x86_64/x86-64-arith.h

+ 323
- 0
src/p25519/arch_x86_64/x86-64-arith.h View File

@@ -0,0 +1,323 @@
/* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information.
*/

#ifndef __X86_64_ARITH_H__
#define __X86_64_ARITH_H__

#include <stdint.h>

/* TODO: non x86-64 versions of these.
* FUTURE: autogenerate
*/

static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b) {
#ifndef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rax;"
"mulq %[b];"
: [c]"=a"(c), [d]"=d"(d)
: [b]"m"(*b), [a]"m"(*a)
: "cc");
return (((__uint128_t)(d))<<64) | c;
#else
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx;"
"mulx %[b], %[c], %[d];"
: [c]"=r"(c), [d]"=r"(d)
: [b]"m"(*b), [a]"m"(*a)
: "rdx");
return (((__uint128_t)(d))<<64) | c;
#endif
}

static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b) {
#ifndef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rax;"
"mulq %[b];"
: [c]"=a"(c), [d]"=d"(d)
: [b]"m"(*b), [a]"r"(a)
: "cc");
return (((__uint128_t)(d))<<64) | c;
#else
uint64_t c,d;
__asm__ volatile
("mulx %[b], %[c], %[d];"
: [c]"=r"(c), [d]"=r"(d)
: [b]"m"(*b), [a]"d"(a));
return (((__uint128_t)(d))<<64) | c;
#endif
}

static __inline__ __uint128_t widemul_rr(uint64_t a, uint64_t b) {
#ifndef __BMI2__
uint64_t c,d;
__asm__ volatile
("mulq %[b];"
: [c]"=a"(c), [d]"=d"(d)
: [b]"r"(b), "a"(a)
: "cc");
return (((__uint128_t)(d))<<64) | c;
#else
uint64_t c,d;
__asm__ volatile
("mulx %[b], %[c], %[d];"
: [c]"=r"(c), [d]"=r"(d)
: [b]"r"(b), [a]"d"(a));
return (((__uint128_t)(d))<<64) | c;
#endif
}

static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b) {
#ifndef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rax; "
"addq %%rax, %%rax; "
"mulq %[b];"
: [c]"=a"(c), [d]"=d"(d)
: [b]"m"(*b), [a]"m"(*a)
: "cc");
return (((__uint128_t)(d))<<64) | c;
#else
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx;"
"leaq (,%%rdx,2), %%rdx;"
"mulx %[b], %[c], %[d];"
: [c]"=r"(c), [d]"=r"(d)
: [b]"m"(*b), [a]"m"(*a)
: "rdx");
return (((__uint128_t)(d))<<64) | c;
#endif
}

static __inline__ void mac(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
uint64_t lo = *acc, hi = *acc>>64;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx; "
"mulx %[b], %[c], %[d]; "
"addq %[c], %[lo]; "
"adcq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"mulq %[b]; "
"addq %%rax, %[lo]; "
"adcq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi))<<64) | lo;
}

static __inline__ void macac(__uint128_t *acc, __uint128_t *acc2, const uint64_t *a, const uint64_t *b) {
uint64_t lo = *acc, hi = *acc>>64;
uint64_t lo2 = *acc2, hi2 = *acc2>>64;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx; "
"mulx %[b], %[c], %[d]; "
"addq %[c], %[lo]; "
"adcq %[d], %[hi]; "
"addq %[c], %[lo2]; "
"adcq %[d], %[hi2]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"mulq %[b]; "
"addq %%rax, %[lo]; "
"adcq %%rdx, %[hi]; "
"addq %%rax, %[lo2]; "
"adcq %%rdx, %[hi2]; "
: [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
: [b]"m"(*b), [a]"m"(*a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi))<<64) | lo;
*acc2 = (((__uint128_t)(hi2))<<64) | lo2;
}

static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b) {
uint64_t lo = *acc, hi = *acc>>64;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("mulx %[b], %[c], %[d]; "
"addq %[c], %[lo]; "
"adcq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"d"(a)
: "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"mulq %[b]; "
"addq %%rax, %[lo]; "
"adcq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"r"(a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi))<<64) | lo;
}

static __inline__ void mac_rr(__uint128_t *acc, uint64_t a, const uint64_t b) {
uint64_t lo = *acc, hi = *acc>>64;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("mulx %[b], %[c], %[d]; "
"addq %[c], %[lo]; "
"adcq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"r"(b), [a]"d"(a)
: "cc");
#else
__asm__ volatile
("mulq %[b]; "
"addq %%rax, %[lo]; "
"adcq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"r"(b), "a"(a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi))<<64) | lo;
}

static __inline__ void mac2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
uint64_t lo = *acc, hi = *acc>>64;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx; "
"addq %%rdx, %%rdx; "
"mulx %[b], %[c], %[d]; "
"addq %[c], %[lo]; "
"adcq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"addq %%rax, %%rax; "
"mulq %[b]; "
"addq %%rax, %[lo]; "
"adcq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi))<<64) | lo;
}

static __inline__ void msb(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
uint64_t lo = *acc, hi = *acc>>64;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx; "
"mulx %[b], %[c], %[d]; "
"subq %[c], %[lo]; "
"sbbq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"mulq %[b]; "
"subq %%rax, %[lo]; "
"sbbq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi))<<64) | lo;
}

static __inline__ void msb2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
uint64_t lo = *acc, hi = *acc>>64;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx; "
"addq %%rdx, %%rdx; "
"mulx %[b], %[c], %[d]; "
"subq %[c], %[lo]; "
"sbbq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"addq %%rax, %%rax; "
"mulq %[b]; "
"subq %%rax, %[lo]; "
"sbbq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi))<<64) | lo;
}

static __inline__ void mrs(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
uint64_t c,d, lo = *acc, hi = *acc>>64;
__asm__ volatile
("movq %[a], %%rdx; "
"mulx %[b], %[c], %[d]; "
"subq %[lo], %[c]; "
"sbbq %[hi], %[d]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
*acc = (((__uint128_t)(d))<<64) | c;
}

static __inline__ __uint128_t widemulu(uint64_t a, uint64_t b) {
return ((__uint128_t)(a)) * b;
}

static __inline__ __int128_t widemuls(int64_t a, int64_t b) {
return ((__int128_t)(a)) * b;
}
static __inline__ uint64_t opacify(uint64_t x) {
__asm__ volatile("" : "+r"(x));
return x;
}

static __inline__ mask_t is_zero(uint64_t x) {
__asm__ volatile("neg %0; sbb %0, %0;" : "+r"(x));
return ~x;
}

#endif /* __X86_64_ARITH_H__ */

+ 14
- 14
src/p25519/f_arithmetic.c View File

@@ -10,7 +10,7 @@


#include "field.h" #include "field.h"


const field_a_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL(
const gf_25519_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL(
0x61b274a0ea0b0, 0x61b274a0ea0b0,
0x0d5a5fc8f189d, 0x0d5a5fc8f189d,
0x7ef5e9cbd0c60, 0x7ef5e9cbd0c60,
@@ -18,7 +18,7 @@ const field_a_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL(
0x2b8324804fc1d 0x2b8324804fc1d
)}; )};
const field_a_t SQRT_ONE_MINUS_D = {FIELD_LITERAL( // FIXME MAGIC goes elsewhere?
const gf_25519_t SQRT_ONE_MINUS_D = {FIELD_LITERAL( // FIXME MAGIC goes elsewhere?
0x6db8831bbddec, 0x6db8831bbddec,
0x38d7b56c9c165, 0x38d7b56c9c165,
0x016b221394bdc, 0x016b221394bdc,
@@ -26,15 +26,15 @@ const field_a_t SQRT_ONE_MINUS_D = {FIELD_LITERAL( // FIXME MAGIC goes elsewhere
0x0a0d85b4032b1 0x0a0d85b4032b1
)}; )};
static const field_a_t ONE = {FIELD_LITERAL( // FIXME copy-pasted
static const gf_25519_t ONE = {FIELD_LITERAL( // FIXME copy-pasted
1,0,0,0,0 1,0,0,0,0
)}; )};


// ARCH MAGIC FIXME copy-pasted from decaf_fast.c // ARCH MAGIC FIXME copy-pasted from decaf_fast.c
static mask_t gf_eq(const field_a_t a, const field_a_t b) {
field_a_t c;
field_sub(c,a,b);
field_strong_reduce(c);
static mask_t gf_eq(const gf_25519_t a, const gf_25519_t b) {
gf_25519_t c;
gf_sub(c,a,b);
gf_strong_reduce(c);
mask_t ret=0; mask_t ret=0;
int i; int i;
for (i=0; i<5; i++) { ret |= c->limb[i]; } for (i=0; i<5; i++) { ret |= c->limb[i]; }
@@ -43,19 +43,19 @@ static mask_t gf_eq(const field_a_t a, const field_a_t b) {


/* Guarantee: a^2 x = 0 if x = 0; else a^2 x = 1 or SQRT_MINUS_ONE; */ /* Guarantee: a^2 x = 0 if x = 0; else a^2 x = 1 or SQRT_MINUS_ONE; */
void void
field_isr (
field_a_t a,
const field_a_t x
gf_isr (
gf_25519_t a,
const gf_25519_t x
) { ) {
field_a_t st[3], tmp1, tmp2;
gf_25519_t st[3], tmp1, tmp2;
const struct { unsigned char sh, idx; } ops[] = { const struct { unsigned char sh, idx; } ops[] = {
{1,2},{1,2},{3,1},{6,0},{1,2},{12,1},{25,1},{25,1},{50,0},{125,0},{2,2},{1,2} {1,2},{1,2},{3,1},{6,0},{1,2},{12,1},{25,1},{25,1},{50,0},{125,0},{2,2},{1,2}
}; };
st[0][0] = st[1][0] = st[2][0] = x[0]; st[0][0] = st[1][0] = st[2][0] = x[0];
unsigned int i; unsigned int i;
for (i=0; i<sizeof(ops)/sizeof(ops[0]); i++) { for (i=0; i<sizeof(ops)/sizeof(ops[0]); i++) {
field_sqrn(tmp1, st[1^(i&1)], ops[i].sh);
field_mul(tmp2, tmp1, st[ops[i].idx]);
gf_sqrn(tmp1, st[1^(i&1)], ops[i].sh);
gf_mul(tmp2, tmp1, st[ops[i].idx]);
st[i&1][0] = tmp2[0]; st[i&1][0] = tmp2[0];
} }
@@ -64,5 +64,5 @@ field_isr (
// ARCH MAGIC FIXME: should be cond_sel // ARCH MAGIC FIXME: should be cond_sel
for (i=0; i<5; i++) tmp1->limb[i] = (ONE->limb[i] & mask) for (i=0; i<5; i++) tmp1->limb[i] = (ONE->limb[i] & mask)
| (SQRT_MINUS_ONE->limb[i] & ~mask); | (SQRT_MINUS_ONE->limb[i] & ~mask);
field_mul(a,tmp1,st[0]);
gf_mul(a,tmp1,st[0]);
} }

+ 16
- 15
src/p25519/f_field.h View File

@@ -13,20 +13,21 @@
#include <string.h> #include <string.h>


#include "p25519.h" #include "p25519.h"
#define FIELD_LIT_LIMB_BITS 51
#define FIELD_BITS 255
#define field_t p255_t
#define field_mul p255_mul
#define field_sqr p255_sqr
#define field_add_RAW p255_add_RAW
#define field_sub_RAW p255_sub_RAW
#define field_mulw p255_mulw
#define field_bias p255_bias
#define field_isr p255_isr
#define field_weak_reduce p255_weak_reduce
#define field_strong_reduce p255_strong_reduce
#define field_serialize p255_serialize
#define field_deserialize p255_deserialize
#define SQRT_MINUS_ONE P25519_SQRT_MINUS_ONE
#define GF_LIT_LIMB_BITS 51
#define GF_BITS 255
#define gf gf_25519_t
#define gf_s gf_25519_s
#define gf_mul gf_25519_mul
#define gf_sqr gf_25519_sqr
#define gf_add_RAW gf_25519_add_RAW
#define gf_sub_RAW gf_25519_sub_RAW
#define gf_mulw gf_25519_mulw
#define gf_bias gf_25519_bias
#define gf_isr gf_25519_isr
#define gf_weak_reduce gf_25519_weak_reduce
#define gf_strong_reduce gf_25519_strong_reduce
#define gf_serialize gf_25519_serialize
#define gf_deserialize gf_25519_deserialize
#define SQRT_MINUS_ONE P25519_SQRT_MINUS_ONE


#endif /* __F_FIELD_H__ */ #endif /* __F_FIELD_H__ */

+ 28
- 28
src/p448/f_arithmetic.c View File

@@ -11,33 +11,33 @@
#include "field.h" #include "field.h"


void void
field_isr (
field_a_t a,
const field_a_t x
gf_isr (
gf_a_t a,
const gf_a_t x
) { ) {
field_a_t L0, L1, L2;
field_sqr ( L1, x );
field_mul ( L2, x, L1 );
field_sqr ( L1, L2 );
field_mul ( L2, x, L1 );
field_sqrn ( L1, L2, 3 );
field_mul ( L0, L2, L1 );
field_sqrn ( L1, L0, 3 );
field_mul ( L0, L2, L1 );
field_sqrn ( L2, L0, 9 );
field_mul ( L1, L0, L2 );
field_sqr ( L0, L1 );
field_mul ( L2, x, L0 );
field_sqrn ( L0, L2, 18 );
field_mul ( L2, L1, L0 );
field_sqrn ( L0, L2, 37 );
field_mul ( L1, L2, L0 );
field_sqrn ( L0, L1, 37 );
field_mul ( L1, L2, L0 );
field_sqrn ( L0, L1, 111 );
field_mul ( L2, L1, L0 );
field_sqr ( L0, L2 );
field_mul ( L1, x, L0 );
field_sqrn ( L0, L1, 223 );
field_mul ( a, L2, L0 );
gf_a_t L0, L1, L2;
gf_sqr ( L1, x );
gf_mul ( L2, x, L1 );
gf_sqr ( L1, L2 );
gf_mul ( L2, x, L1 );
gf_sqrn ( L1, L2, 3 );
gf_mul ( L0, L2, L1 );
gf_sqrn ( L1, L0, 3 );
gf_mul ( L0, L2, L1 );
gf_sqrn ( L2, L0, 9 );
gf_mul ( L1, L0, L2 );
gf_sqr ( L0, L1 );
gf_mul ( L2, x, L0 );
gf_sqrn ( L0, L2, 18 );
gf_mul ( L2, L1, L0 );
gf_sqrn ( L0, L2, 37 );
gf_mul ( L1, L2, L0 );
gf_sqrn ( L0, L1, 37 );
gf_mul ( L1, L2, L0 );
gf_sqrn ( L0, L1, 111 );
gf_mul ( L2, L1, L0 );
gf_sqr ( L0, L2 );
gf_mul ( L1, x, L0 );
gf_sqrn ( L0, L1, 223 );
gf_mul ( a, L2, L0 );
} }

+ 14
- 14
src/p448/f_field.h View File

@@ -13,19 +13,19 @@
#include <string.h> #include <string.h>


#include "p448.h" #include "p448.h"
#define FIELD_LIT_LIMB_BITS 56
#define FIELD_BITS 448
#define field_t p448_t
#define field_mul p448_mul
#define field_sqr p448_sqr
#define field_add_RAW p448_add_RAW
#define field_sub_RAW p448_sub_RAW
#define field_mulw p448_mulw
#define field_bias p448_bias
#define field_isr p448_isr
#define field_weak_reduce p448_weak_reduce
#define field_strong_reduce p448_strong_reduce
#define field_serialize p448_serialize
#define field_deserialize p448_deserialize
#define GF_LIT_LIMB_BITS 56
#define GF_BITS 448
#define gf p448_t
#define gf_mul p448_mul
#define gf_sqr p448_sqr
#define gf_add_RAW p448_add_RAW
#define gf_sub_RAW p448_sub_RAW
#define gf_mulw p448_mulw
#define gf_bias p448_bias
#define gf_isr p448_isr
#define gf_weak_reduce p448_weak_reduce
#define gf_strong_reduce p448_strong_reduce
#define gf_serialize p448_serialize
#define gf_deserialize p448_deserialize


#endif /* __F_FIELD_H__ */ #endif /* __F_FIELD_H__ */

+ 28
- 28
src/p480/f_arithmetic.c View File

@@ -11,33 +11,33 @@
#include "field.h" #include "field.h"


void void
field_isr (
field_a_t a,
const field_a_t x
gf_isr (
gf_a_t a,
const gf_a_t x
) { ) {
field_a_t L0, L1, L2, L3;
field_sqr ( L2, x );
field_mul ( L1, x, L2 );
field_sqrn ( L0, L1, 2 );
field_mul ( L2, L1, L0 );
field_sqrn ( L0, L2, 4 );
field_mul ( L1, L2, L0 );
field_sqr ( L0, L1 );
field_mul ( L2, x, L0 );
field_sqrn ( L0, L2, 8 );
field_mul ( L2, L1, L0 );
field_sqrn ( L0, L2, 17 );
field_mul ( L1, L2, L0 );
field_sqrn ( L0, L1, 17 );
field_mul ( L1, L2, L0 );
field_sqrn ( L3, L1, 17 );
field_mul ( L0, L2, L3 );
field_sqrn ( L2, L0, 51 );
field_mul ( L0, L1, L2 );
field_sqrn ( L1, L0, 119 );
field_mul ( L2, L0, L1 );
field_sqr ( L0, L2 );
field_mul ( L1, x, L0 );
field_sqrn ( L0, L1, 239 );
field_mul ( a, L2, L0 );
gf_a_t L0, L1, L2, L3;
gf_sqr ( L2, x );
gf_mul ( L1, x, L2 );
gf_sqrn ( L0, L1, 2 );
gf_mul ( L2, L1, L0 );
gf_sqrn ( L0, L2, 4 );
gf_mul ( L1, L2, L0 );
gf_sqr ( L0, L1 );
gf_mul ( L2, x, L0 );
gf_sqrn ( L0, L2, 8 );
gf_mul ( L2, L1, L0 );
gf_sqrn ( L0, L2, 17 );
gf_mul ( L1, L2, L0 );
gf_sqrn ( L0, L1, 17 );
gf_mul ( L1, L2, L0 );
gf_sqrn ( L3, L1, 17 );
gf_mul ( L0, L2, L3 );
gf_sqrn ( L2, L0, 51 );
gf_mul ( L0, L1, L2 );
gf_sqrn ( L1, L0, 119 );
gf_mul ( L2, L0, L1 );
gf_sqr ( L0, L2 );
gf_mul ( L1, x, L0 );
gf_sqrn ( L0, L1, 239 );
gf_mul ( a, L2, L0 );
} }

+ 14
- 14
src/p480/f_field.h View File

@@ -13,19 +13,19 @@
#include <string.h> #include <string.h>


#include "p480.h" #include "p480.h"
#define FIELD_LIT_LIMB_BITS 60
#define FIELD_BITS 480
#define field_t p480_t
#define field_mul p480_mul
#define field_sqr p480_sqr
#define field_add_RAW p480_add_RAW
#define field_sub_RAW p480_sub_RAW
#define field_mulw p480_mulw
#define field_bias p480_bias
#define field_isr p480_isr
#define field_weak_reduce p480_weak_reduce
#define field_strong_reduce p480_strong_reduce
#define field_serialize p480_serialize
#define field_deserialize p480_deserialize
#define GF_LIT_LIMB_BITS 60
#define GF_BITS 480
#define gf p480_t
#define gf_mul p480_mul
#define gf_sqr p480_sqr
#define gf_add_RAW p480_add_RAW
#define gf_sub_RAW p480_sub_RAW
#define gf_mulw p480_mulw
#define gf_bias p480_bias
#define gf_isr p480_isr
#define gf_weak_reduce p480_weak_reduce
#define gf_strong_reduce p480_strong_reduce
#define gf_serialize p480_serialize
#define gf_deserialize p480_deserialize


#endif /* __F_FIELD_H__ */ #endif /* __F_FIELD_H__ */

+ 28
- 28
src/p521/f_arithmetic.c View File

@@ -11,33 +11,33 @@
#include "field.h" #include "field.h"


void void
field_isr (
field_a_t a,
const field_a_t x
gf_isr (
gf_a_t a,
const gf_a_t x
) { ) {
field_a_t L0, L1, L2;
field_sqr ( L1, x );
field_mul ( L0, x, L1 );
field_sqrn ( L2, L0, 2 );
field_mul ( L1, L0, L2 );
field_sqrn ( L2, L1, 4 );
field_mul ( L0, L1, L2 );
field_sqrn ( L2, L0, 8 );
field_mul ( L1, L0, L2 );
field_sqrn ( L2, L1, 16 );
field_mul ( L0, L1, L2 );
field_sqrn ( L2, L0, 32 );
field_mul ( L1, L0, L2 );
field_sqr ( L2, L1 );
field_mul ( L0, x, L2 );
field_sqrn ( L2, L0, 64 );
field_mul ( L0, L1, L2 );
field_sqrn ( L2, L0, 129 );
field_mul ( L1, L0, L2 );
field_sqr ( L2, L1 );
field_mul ( L0, x, L2 );
field_sqrn ( L2, L0, 259 );
field_mul ( L1, L0, L2 );
field_sqr ( L0, L1 );
field_mul ( a, x, L0 );
gf_a_t L0, L1, L2;
gf_sqr ( L1, x );
gf_mul ( L0, x, L1 );
gf_sqrn ( L2, L0, 2 );
gf_mul ( L1, L0, L2 );
gf_sqrn ( L2, L1, 4 );
gf_mul ( L0, L1, L2 );
gf_sqrn ( L2, L0, 8 );
gf_mul ( L1, L0, L2 );
gf_sqrn ( L2, L1, 16 );
gf_mul ( L0, L1, L2 );
gf_sqrn ( L2, L0, 32 );
gf_mul ( L1, L0, L2 );
gf_sqr ( L2, L1 );
gf_mul ( L0, x, L2 );
gf_sqrn ( L2, L0, 64 );
gf_mul ( L0, L1, L2 );
gf_sqrn ( L2, L0, 129 );
gf_mul ( L1, L0, L2 );
gf_sqr ( L2, L1 );
gf_mul ( L0, x, L2 );
gf_sqrn ( L2, L0, 259 );
gf_mul ( L1, L0, L2 );
gf_sqr ( L0, L1 );
gf_mul ( a, x, L0 );
} }

+ 14
- 14
src/p521/f_field.h View File

@@ -13,19 +13,19 @@
#include "constant_time.h" #include "constant_time.h"


#include "p521.h" #include "p521.h"
#define FIELD_LIT_LIMB_BITS 58
#define FIELD_BITS 521
#define field_t p521_t
#define field_mul p521_mul
#define field_sqr p521_sqr
#define field_add_RAW p521_add_RAW
#define field_sub_RAW p521_sub_RAW
#define field_mulw p521_mulw
#define field_bias p521_bias
#define field_isr p521_isr
#define field_weak_reduce p521_weak_reduce
#define field_strong_reduce p521_strong_reduce
#define field_serialize p521_serialize
#define field_deserialize p521_deserialize
#define GF_LIT_LIMB_BITS 58
#define GF_BITS 521
#define gf p521_t
#define gf_mul p521_mul
#define gf_sqr p521_sqr
#define gf_add_RAW p521_add_RAW
#define gf_sub_RAW p521_sub_RAW
#define gf_mulw p521_mulw
#define gf_bias p521_bias
#define gf_isr p521_isr
#define gf_weak_reduce p521_weak_reduce
#define gf_strong_reduce p521_strong_reduce
#define gf_serialize p521_serialize
#define gf_deserialize p521_deserialize


#endif /* __F_FIELD_H__ */ #endif /* __F_FIELD_H__ */

+ 5
- 3
src/public_include/decaf/decaf_255.h View File

@@ -21,11 +21,13 @@ extern "C" {
#define DECAF_255_SCALAR_BITS 254 // Curve25519: 253 #define DECAF_255_SCALAR_BITS 254 // Curve25519: 253
#define DECAF_255_SCALAR_LIMBS (256/DECAF_WORD_BITS) #define DECAF_255_SCALAR_LIMBS (256/DECAF_WORD_BITS)


#ifndef __DECAF_GF_ALREADY_DEFINED__
/** Galois field element internal structure */ /** Galois field element internal structure */
typedef struct gf_255_s {
typedef struct gf_25519_s {
decaf_word_t limb[DECAF_255_LIMBS]; decaf_word_t limb[DECAF_255_LIMBS];
} gf_255_s, gf_255_t[1];
} gf_25519_s, gf_25519_t[1];
/** @endcond */ /** @endcond */
#endif /* __DECAF_GF_ALREADY_DEFINED__ */


/** Number of bytes in a serialized point. */ /** Number of bytes in a serialized point. */
#define DECAF_255_SER_BYTES 32 #define DECAF_255_SER_BYTES 32
@@ -34,7 +36,7 @@ typedef struct gf_255_s {
#define DECAF_255_SCALAR_BYTES 32 #define DECAF_255_SCALAR_BYTES 32


/** Twisted Edwards (-1,d-1) extended homogeneous coordinates */ /** Twisted Edwards (-1,d-1) extended homogeneous coordinates */
typedef struct decaf_255_point_s { /**@cond internal*/gf_255_t x,y,z,t;/**@endcond*/ } decaf_255_point_t[1];
typedef struct decaf_255_point_s { /**@cond internal*/gf_25519_t x,y,z,t;/**@endcond*/ } decaf_255_point_t[1];


/** Precomputed table based on a point. Can be trivial implementation. */ /** Precomputed table based on a point. Can be trivial implementation. */
struct decaf_255_precomputed_s; struct decaf_255_precomputed_s;


Loading…
Cancel
Save