From 5af980b85a299b584062a17278835c0794b0ba45 Mon Sep 17 00:00:00 2001 From: Michael Hamburg Date: Sat, 11 Jul 2015 23:44:20 -0700 Subject: [PATCH] wipe out the multiple layers of rename between decaf_fast and field. still some serious HACKs in the include prio to avoid multiple definition of struct gf --- src/decaf_fast.c | 131 ++++------- src/decaf_gen_tables.c | 50 ++-- src/include/field.h | 98 ++++---- src/p25519/arch_ref64/p25519.c | 40 ++-- src/p25519/arch_ref64/p25519.h | 106 ++++----- src/p25519/arch_x86_64/p25519.c | 40 ++-- src/p25519/arch_x86_64/p25519.h | 105 +++++---- src/p25519/arch_x86_64/x86-64-arith.h | 324 +++++++++++++++++++++++++- src/p25519/f_arithmetic.c | 28 +-- src/p25519/f_field.h | 31 +-- src/p448/f_arithmetic.c | 56 ++--- src/p448/f_field.h | 28 +-- src/p480/f_arithmetic.c | 56 ++--- src/p480/f_field.h | 28 +-- src/p521/f_arithmetic.c | 56 ++--- src/p521/f_field.h | 28 +-- src/public_include/decaf/decaf_255.h | 8 +- 17 files changed, 748 insertions(+), 465 deletions(-) mode change 120000 => 100644 src/p25519/arch_x86_64/x86-64-arith.h diff --git a/src/decaf_fast.c b/src/decaf_fast.c index 4e1baa2..d0d9d4a 100644 --- a/src/decaf_fast.c +++ b/src/decaf_fast.c @@ -27,8 +27,6 @@ #define point_t decaf_255_point_t #define precomputed_s decaf_255_precomputed_s #define SER_BYTES DECAF_255_SER_BYTES -#define gf_s gf_255_s -#define gf gf_255_t #if WBITS == 64 typedef __int128_t decaf_sdword_t; @@ -72,7 +70,7 @@ typedef struct { niels_t n; gf z; } __attribute__((aligned(32))) pniels_s, pniel /* Precomputed base */ struct precomputed_s { niels_t table [DECAF_COMBS_N<<(DECAF_COMBS_T-1)]; }; -extern const field_t API_NS(precomputed_base_as_fe)[]; +extern const gf API_NS(precomputed_base_as_fe)[]; const precomputed_s *API_NS(precomputed_base) = (const precomputed_s *) &API_NS(precomputed_base_as_fe); @@ -95,52 +93,6 @@ const size_t API_NS2(alignof,precomputed_s) = 32; /** Copy x = y */ siv gf_cpy(gf x, const gf y) { x[0] = y[0]; } -/** Mostly-unoptimized multiply, but at least it's unrolled. */ -siv gf_mul (gf c, const gf a, const gf b) { - field_mul((field_t *)c, (const field_t *)a, (const field_t *)b); -} - -/** Dedicated square */ -siv gf_sqr (gf c, const gf a) { - field_sqr((field_t *)c, (const field_t *)a); -} - -/** Add mod p. Conservatively always weak-reduce. */ -snv gf_add ( gf_s *__restrict__ c, const gf a, const gf b ) { - field_add((field_t *)c, (const field_t *)a, (const field_t *)b); -} - -/** Subtract mod p. Conservatively always weak-reduce. */ -snv gf_sub ( gf c, const gf a, const gf b ) { - field_sub((field_t *)c, (const field_t *)a, (const field_t *)b); -} - -/** Add mod p. Conservatively always weak-reduce.) */ -siv gf_bias ( gf c, int amt) { - field_bias((field_t *)c, amt); -} - -/** Subtract mod p. Bias by 2 and don't reduce */ -siv gf_sub_nr ( gf_s *__restrict__ c, const gf a, const gf b ) { -// FOR_LIMB_U(i, c->limb[i] = a->limb[i] - b->limb[i] + 2*P->limb[i] ); - field_sub_nr((field_t *)c, (const field_t *)a, (const field_t *)b); - gf_bias(c, 2); - if (WBITS==32) field_weak_reduce((field_t*) c); // HACK -} - -/** Subtract mod p. Bias by amt but don't reduce. */ -siv gf_sub_nr_x ( gf c, const gf a, const gf b, int amt ) { - field_sub_nr((field_t *)c, (const field_t *)a, (const field_t *)b); - gf_bias(c, amt); - if (WBITS==32) field_weak_reduce((field_t*) c); // HACK -} - -/** Add mod p. Don't reduce. */ -siv gf_add_nr ( gf c, const gf a, const gf b ) { -// FOR_LIMB_U(i, c->limb[i] = a->limb[i] + b->limb[i]); - field_add_nr((field_t *)c, (const field_t *)a, (const field_t *)b); -} - /** Constant time, x = is_z ? z : y */ siv cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) { constant_time_select(x,z,y,sizeof(gf),is_z); @@ -162,29 +114,11 @@ siv cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) { }); } -/** - * Mul by signed int. Not constant-time WRT the sign of that int. - * Just uses a full mul (PERF) - */ -siv gf_mlw(gf c, const gf a, int w) { - if (w>0) { - field_mulw((field_t *)c, (const field_t *)a, w); - } else { - field_mulw((field_t *)c, (const field_t *)a, -w); - gf_sub(c,ZERO,c); - } -} - -/** Canonicalize */ -siv gf_canon ( gf a ) { - field_strong_reduce((field_t *)a); -} - /** Compare a==b */ static decaf_word_t __attribute__((noinline)) gf_eq(const gf a, const gf b) { gf c; gf_sub(c,a,b); - gf_canon(c); + gf_strong_reduce(c); decaf_word_t ret=0; FOR_LIMB(i, ret |= c->limb[i] ); /* Hope the compiler is too dumb to optimize this, thus noinline */ @@ -194,7 +128,7 @@ static decaf_word_t __attribute__((noinline)) gf_eq(const gf a, const gf b) { /** Inverse square root using addition chain. */ static decaf_bool_t gf_isqrt_chk(gf y, const gf x, decaf_bool_t allow_zero) { gf tmp0, tmp1; - field_isr((field_t *)y, (const field_t *)x); + gf_isr((gf_s *)y, (const gf_s *)x); gf_sqr(tmp0,y); gf_mul(tmp1,tmp0,x); return gf_eq(tmp1,ONE) | (allow_zero & gf_eq(tmp1,ZERO)); @@ -211,11 +145,24 @@ sv gf_invert(gf y, const gf x) { gf_cpy(y, t2); } +/** + * Mul by signed int. Not constant-time WRT the sign of that int. + * Just uses a full mul (PERF) + */ +static inline void gf_mulw_sgn(gf c, const gf a, int w) { + if (w>0) { + gf_mulw(c, a, w); + } else { + gf_mulw(c, a, -w); + gf_sub(c,ZERO,c); + } +} + /** Return high bit of x = low bit of 2x mod p */ static decaf_word_t hibit(const gf x) { gf y; gf_add(y,x,x); - gf_canon(y); + gf_strong_reduce(y); return -(y->limb[0]&1); } @@ -223,7 +170,7 @@ static decaf_word_t hibit(const gf x) { static decaf_word_t lobit(const gf x) { gf y; gf_cpy(y,x); - gf_canon(y); + gf_strong_reduce(y); return -(y->limb[0]&1); } @@ -454,7 +401,7 @@ decaf_bool_t API_NS(scalar_eq) ( const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}}; static void gf_encode ( unsigned char ser[SER_BYTES], gf a ) { - field_serialize(ser, (field_t *)a); + gf_serialize(ser, (gf_s *)a); } extern const gf SQRT_MINUS_ONE, SQRT_ONE_MINUS_D; /* Intern this? */ @@ -528,7 +475,7 @@ void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) { * Deserialize a bool, return TRUE if < p. */ static decaf_bool_t gf_deser(gf s, const unsigned char ser[SER_BYTES]) { - return field_deserialize((field_t *)s, ser); + return gf_deserialize((gf_s *)s, ser); } decaf_bool_t API_NS(point_decode) ( @@ -544,7 +491,7 @@ decaf_bool_t API_NS(point_decode) ( gf_sub ( f, ONE, a ); /* f = 1-s^2 = 1-as^2 since a=1 */ succ &= ~ gf_eq( f, ZERO ); gf_sqr ( b, f ); - gf_mlw ( c, a, 4-4*EDWARDS_D ); + gf_mulw_sgn ( c, a, 4-4*EDWARDS_D ); gf_add ( c, c, b ); /* t^2 */ gf_mul ( d, f, s ); /* s(1-s^2) for denoms */ gf_sqr ( e, d ); @@ -596,7 +543,7 @@ void API_NS(point_sub) ( gf_add_nr ( b, q->y, q->x ); gf_mul ( p->y, d, b ); gf_mul ( b, r->t, q->t ); - gf_mlw ( p->x, b, -2*EDWARDS_D ); + gf_mulw_sgn ( p->x, b, -2*EDWARDS_D ); gf_add_nr ( b, a, p->y ); gf_sub_nr ( c, p->y, a ); gf_mul ( a, q->z, r->z ); @@ -622,7 +569,7 @@ void API_NS(point_add) ( gf_add_nr ( b, q->y, q->x ); gf_mul ( p->y, d, b ); gf_mul ( b, r->t, q->t ); - gf_mlw ( p->x, b, -2*EDWARDS_D ); + gf_mulw_sgn ( p->x, b, -2*EDWARDS_D ); gf_add_nr ( b, a, p->y ); gf_sub_nr ( c, p->y, a ); gf_mul ( a, q->z, r->z ); @@ -646,11 +593,11 @@ snv point_double_internal ( gf_add_nr ( d, c, a ); gf_add_nr ( p->t, q->y, q->x ); gf_sqr ( b, p->t ); - gf_sub_nr_x ( b, b, d, 3 ); + gf_subx_nr ( b, b, d, 3 ); gf_sub_nr ( p->t, a, c ); gf_sqr ( p->x, q->z ); gf_add_nr ( p->z, p->x, p->x ); - gf_sub_nr_x ( a, p->z, p->t, 4 ); + gf_subx_nr ( a, p->z, p->t, 4 ); gf_mul ( p->x, a, b ); gf_mul ( p->z, p->t, a ); gf_mul ( p->y, p->t, d ); @@ -777,7 +724,7 @@ static void pt_to_pniels ( ) { gf_sub ( b->n->a, a->y, a->x ); gf_add ( b->n->b, a->x, a->y ); - gf_mlw ( b->n->c, a->t, -2*EDWARDS_D ); + gf_mulw_sgn ( b->n->c, a->t, -2*EDWARDS_D ); gf_add ( b->z, a->z, a->z ); } @@ -1047,12 +994,12 @@ void API_NS(point_from_hash_nonuniform) ( // TODO: simplify since we don't return a hint anymore gf r0,r,a,b,c,dee,D,N,rN,e; gf_deser(r0,ser); - gf_canon(r0); + gf_strong_reduce(r0); gf_sqr(a,r0); - //gf_sub(r,ZERO,a); /*gf_mlw(r,a,QUADRATIC_NONRESIDUE);*/ + //gf_sub(r,ZERO,a); /*gf_mulw_sgn(r,a,QUADRATIC_NONRESIDUE);*/ gf_mul(r,a,SQRT_MINUS_ONE); - gf_mlw(dee,ONE,EDWARDS_D); - gf_mlw(c,r,EDWARDS_D); + gf_mulw_sgn(dee,ONE,EDWARDS_D); + gf_mulw_sgn(c,r,EDWARDS_D); /* Compute D := (dr+a-d)(dr-ar-d) with a=1 */ gf_sub(a,c,dee); @@ -1064,7 +1011,7 @@ void API_NS(point_from_hash_nonuniform) ( /* compute N := (r+1)(a-2d) */ gf_add(a,r,ONE); - gf_mlw(N,a,1-2*EDWARDS_D); + gf_mulw_sgn(N,a,1-2*EDWARDS_D); /* e = +-1/sqrt(+-ND) */ gf_mul(rN,r,N); @@ -1078,8 +1025,8 @@ void API_NS(point_from_hash_nonuniform) ( /* b <- t/s */ cond_sel(c,r0,r,square); /* r? = sqr ? r : 1 */ /* In two steps to avoid overflow on 32-bit arch */ - gf_mlw(a,c,1-2*EDWARDS_D); - gf_mlw(b,a,1-2*EDWARDS_D); + gf_mulw_sgn(a,c,1-2*EDWARDS_D); + gf_mulw_sgn(b,a,1-2*EDWARDS_D); gf_sub(c,r,ONE); gf_mul(a,b,c); /* = r? * (r-1) * (a-2d)^2 with a=1 */ gf_mul(b,a,e); @@ -1148,7 +1095,7 @@ API_NS(invert_elligator_nonuniform) ( cond_sel(b,b,ZERO,is_identity & ~sgn_t_over_s & ~sgn_s); /* identity adjust */ } - gf_mlw(d,c,2*EDWARDS_D-1); /* $d = (2d-a)s^2 */ + gf_mulw_sgn(d,c,2*EDWARDS_D-1); /* $d = (2d-a)s^2 */ gf_add(a,d,b); /* num? */ gf_sub(d,d,b); /* den? */ gf_mul(b,a,d); /* n*d */ @@ -1199,7 +1146,7 @@ decaf_bool_t API_NS(point_valid) ( gf_sqr(b,p->y); gf_sub(a,b,a); gf_sqr(b,p->t); - gf_mlw(c,b,-EDWARDS_D); + gf_mulw_sgn(c,b,-EDWARDS_D); gf_sqr(b,p->z); gf_add(b,b,c); out &= gf_eq(a,b); @@ -1281,15 +1228,15 @@ static void batch_normalize_niels ( for (i=0; ia, zis[i]); - gf_canon(product); + gf_strong_reduce(product); gf_cpy(table[i]->a, product); gf_mul(product, table[i]->b, zis[i]); - gf_canon(product); + gf_strong_reduce(product); gf_cpy(table[i]->b, product); gf_mul(product, table[i]->c, zis[i]); - gf_canon(product); + gf_strong_reduce(product); gf_cpy(table[i]->c, product); } } @@ -1510,7 +1457,7 @@ sv prepare_wnaf_table( } } -extern const field_t API_NS(precomputed_wnaf_as_fe)[]; +extern const gf API_NS(precomputed_wnaf_as_fe)[]; static const niels_t *API_NS(wnaf_base) = (const niels_t *)API_NS(precomputed_wnaf_as_fe); const size_t API_NS2(sizeof,precomputed_wnafs) __attribute((visibility("hidden"))) = sizeof(niels_t)<= FIELD_LIT_LIMB_BITS) { - limb &= (1ull<= GF_LIT_LIMB_BITS) { + limb &= (1ull<>(8-b); } } - printf(")"); + printf(")}"); assert(b<8); } @@ -88,41 +88,39 @@ int main(int argc, char **argv) { if (ret || !preWnaf) return 1; API_NS(precompute_wnafs)(preWnaf, real_point_base); - const field_t *output; + const gf_s *output; unsigned i; printf("/** @warning: this file was automatically generated. */\n"); + printf("#include \n\n"); printf("#include \"field.h\"\n\n"); - printf("#include \"decaf.h\"\n\n"); printf("#define API_NS(_id) decaf_255_##_id\n"); printf("#define API_NS2(_pref,_id) _pref##_decaf_255_##_id\n"); - output = (const field_t *)real_point_base; + output = (const gf_s *)real_point_base; printf("const API_NS(point_t) API_NS(point_base) = {{\n"); - for (i=0; i < sizeof(API_NS(point_t)); i+=sizeof(field_t)) { + for (i=0; i < sizeof(API_NS(point_t)); i+=sizeof(gf)) { if (i) printf(",\n "); - printf("{"); field_print(output++); - printf("}"); } printf("\n}};\n"); - output = (const field_t *)pre; - printf("const field_t API_NS(precomputed_base_as_fe)[%d]\n", - (int)(API_NS2(sizeof,precomputed_s) / sizeof(field_t))); + output = (const gf_s *)pre; + printf("const gf API_NS(precomputed_base_as_fe)[%d]\n", + (int)(API_NS2(sizeof,precomputed_s) / sizeof(gf))); printf("__attribute__((aligned(%d),visibility(\"hidden\"))) = {\n ", (int)API_NS2(alignof,precomputed_s)); - for (i=0; i < API_NS2(sizeof,precomputed_s); i+=sizeof(field_t)) { + for (i=0; i < API_NS2(sizeof,precomputed_s); i+=sizeof(gf)) { if (i) printf(",\n "); field_print(output++); } printf("\n};\n"); - output = (const field_t *)preWnaf; - printf("const field_t API_NS(precomputed_wnaf_as_fe)[%d]\n", - (int)(API_NS2(sizeof,precomputed_wnafs) / sizeof(field_t))); + output = (const gf_s *)preWnaf; + printf("const gf API_NS(precomputed_wnaf_as_fe)[%d]\n", + (int)(API_NS2(sizeof,precomputed_wnafs) / sizeof(gf))); printf("__attribute__((aligned(%d),visibility(\"hidden\"))) = {\n ", (int)API_NS2(alignof,precomputed_s)); - for (i=0; i < API_NS2(sizeof,precomputed_wnafs); i+=sizeof(field_t)) { + for (i=0; i < API_NS2(sizeof,precomputed_wnafs); i+=sizeof(gf)) { if (i) printf(",\n "); field_print(output++); } diff --git a/src/include/field.h b/src/include/field.h index d5c8fbc..1012416 100644 --- a/src/include/field.h +++ b/src/include/field.h @@ -1,23 +1,20 @@ /** * @file field.h - * @brief Generic field header. + * @brief Generic gf header. * @copyright * Copyright (c) 2014 Cryptography Research, Inc. \n * Released under the MIT License. See LICENSE.txt for license information. * @author Mike Hamburg */ -#ifndef __FIELD_H__ -#define __FIELD_H__ +#ifndef __GF_H__ +#define __GF_H__ #include "constant_time.h" #include "f_field.h" #include -typedef struct field_t field_a_t[1]; -#define field_a_restrict_t struct field_t *__restrict__ - -#define is32 (GOLDI_BITS == 32 || FIELD_BITS != 448) +#define is32 (GOLDI_BITS == 32 || GF_BITS != 448) #if (is32) #define IF32(s) (s) #else @@ -33,9 +30,9 @@ typedef struct field_t field_a_t[1]; * If x=0, returns 0. */ void -field_isr ( - field_a_t a, - const field_a_t x +gf_isr ( + gf a, + const gf x ); /** @@ -43,62 +40,75 @@ field_isr ( */ static __inline__ void __attribute__((unused,always_inline)) -field_sqrn ( - field_a_restrict_t y, - const field_a_t x, +gf_sqrn ( + gf_s *__restrict__ y, + const gf x, int n ) { - field_a_t tmp; + gf tmp; assert(n>0); if (n&1) { - field_sqr(y,x); + gf_sqr(y,x); n--; } else { - field_sqr(tmp,x); - field_sqr(y,tmp); + gf_sqr(tmp,x); + gf_sqr(y,tmp); n-=2; } for (; n; n-=2) { - field_sqr(tmp,y); - field_sqr(y,tmp); + gf_sqr(tmp,y); + gf_sqr(y,tmp); } } static __inline__ void -field_subx_RAW ( - field_a_t d, - const field_a_t a, - const field_a_t b +gf_subx_RAW ( + gf d, + const gf a, + const gf b ) { - field_sub_RAW ( d, a, b ); - field_bias( d, 2 ); - IF32( field_weak_reduce ( d ) ); + gf_sub_RAW ( d, a, b ); + gf_bias( d, 2 ); + IF32( gf_weak_reduce ( d ) ); } static __inline__ void -field_sub ( - field_a_t d, - const field_a_t a, - const field_a_t b +gf_sub ( + gf d, + const gf a, + const gf b ) { - field_sub_RAW ( d, a, b ); - field_bias( d, 2 ); - field_weak_reduce ( d ); + gf_sub_RAW ( d, a, b ); + gf_bias( d, 2 ); + gf_weak_reduce ( d ); } static __inline__ void -field_add ( - field_a_t d, - const field_a_t a, - const field_a_t b +gf_add ( + gf d, + const gf a, + const gf b ) { - field_add_RAW ( d, a, b ); - field_weak_reduce ( d ); + gf_add_RAW ( d, a, b ); + gf_weak_reduce ( d ); +} + +#define gf_add_nr gf_add_RAW + +/** Subtract mod p. Bias by 2 and don't reduce */ +static inline void gf_sub_nr ( gf c, const gf a, const gf b ) { +// FOR_LIMB_U(i, c->limb[i] = a->limb[i] - b->limb[i] + 2*P->limb[i] ); + gf_sub_RAW(c,a,b); + gf_bias(c, 2); + if (DECAF_WORD_BITS==32) gf_weak_reduce(c); // HACK +} + +/** Subtract mod p. Bias by amt but don't reduce. */ +static inline void gf_subx_nr ( gf c, const gf a, const gf b, int amt ) { + gf_sub_RAW(c,a,b); + gf_bias(c, amt); + if (DECAF_WORD_BITS==32) gf_weak_reduce(c); // HACK } -/* FIXME: no warnings on RAW routines */ -#define field_add_nr field_add_RAW -#define field_sub_nr field_sub_RAW -#define field_subx_nr field_subx_RAW -#endif // __FIELD_H__ +#endif // __GF_H__ diff --git a/src/p25519/arch_ref64/p25519.c b/src/p25519/arch_ref64/p25519.c index 37cedb0..4381188 100644 --- a/src/p25519/arch_ref64/p25519.c +++ b/src/p25519/arch_ref64/p25519.c @@ -17,10 +17,10 @@ static __inline__ uint64_t is_zero(uint64_t a) { } void -p255_mul ( - p255_t *__restrict__ cs, - const p255_t *as, - const p255_t *bs +gf_25519_mul ( + gf_25519_t __restrict__ cs, + const gf_25519_t as, + const gf_25519_t bs ) { const uint64_t *a = as->limb, *b = bs->limb, mask = ((1ull<<51)-1); @@ -52,9 +52,9 @@ p255_mul ( } void -p255_mulw ( - p255_t *__restrict__ cs, - const p255_t *as, +gf_25519_mulw ( + gf_25519_t __restrict__ cs, + const gf_25519_t as, uint64_t b ) { const uint64_t *a = as->limb, mask = ((1ull<<51)-1); @@ -79,16 +79,16 @@ p255_mulw ( } void -p255_sqr ( - p255_t *__restrict__ cs, - const p255_t *as +gf_25519_t qr ( + gf_25519_t __restrict__ cs, + const gf_25519_t as ) { - p255_mul(cs,as,as); // TODO + gf_25519_mul(cs,as,as); // TODO } void -p255_strong_reduce ( - p255_t *a +gf_25519_t trong_reduce ( + gf_25519_t a ) { uint64_t mask = (1ull<<51)-1; @@ -128,14 +128,14 @@ p255_strong_reduce ( } void -p255_serialize ( +gf_25519_t erialize ( uint8_t serial[32], - const struct p255_t *x + const struct gf_25519_t x ) { int i,j; - p255_t red; - p255_copy(&red, x); - p255_strong_reduce(&red); + gf_25519_t red; + gf_25519_copy(&red, x); + gf_25519_t trong_reduce(&red); uint64_t *r = red.limb; uint64_t ser64[4] = {r[0] | r[1]<<51, r[1]>>13|r[2]<<38, r[2]>>26|r[3]<<25, r[3]>>39|r[4]<<12}; for (i=0; i<4; i++) { @@ -147,8 +147,8 @@ p255_serialize ( } mask_t -p255_deserialize ( - p255_t *x, +gf_25519_deserialize ( + gf_25519_t x, const uint8_t serial[32] ) { int i,j; diff --git a/src/p25519/arch_ref64/p25519.h b/src/p25519/arch_ref64/p25519.h index be64923..12e9c52 100644 --- a/src/p25519/arch_ref64/p25519.h +++ b/src/p25519/arch_ref64/p25519.h @@ -1,8 +1,8 @@ /* Copyright (c) 2014 Cryptography Research, Inc. * Released under the MIT License. See LICENSE.txt for license information. */ -#ifndef __P255_H__ -#define __P255_H__ 1 +#ifndef __P25519_H__ +#define __P25519_H__ 1 #include #include @@ -10,9 +10,9 @@ #include "word.h" -typedef struct p255_t { +typedef struct gf_25519_s { uint64_t limb[5]; -} p255_t; +} gf_25519_s, gf_25519_t[1]; #define LBITS 51 #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} @@ -32,113 +32,113 @@ extern "C" { #endif static __inline__ void -p255_add_RAW ( - p255_t *out, - const p255_t *a, - const p255_t *b +gf_25519_add_RAW ( + gf_25519_t out, + const gf_25519_t a, + const gf_25519_t b ) __attribute__((unused)); static __inline__ void -p255_sub_RAW ( - p255_t *out, - const p255_t *a, - const p255_t *b +gf_25519_sub_RAW ( + gf_25519_t out, + const gf_25519_t a, + const gf_25519_t b ) __attribute__((unused)); static __inline__ void -p255_copy ( - p255_t *out, - const p255_t *a +gf_25519_copy ( + gf_25519_t out, + const gf_25519_t a ) __attribute__((unused)); static __inline__ void -p255_weak_reduce ( - p255_t *inout +gf_25519_weak_reduce ( + gf_25519_t inout ) __attribute__((unused)); void -p255_strong_reduce ( - p255_t *inout +gf_25519_strong_reduce ( + gf_25519_t inout ); static __inline__ void -p255_bias ( - p255_t *inout, +gf_25519_bias ( + gf_25519_t inout, int amount ) __attribute__((unused)); void -p255_mul ( - p255_t *__restrict__ out, - const p255_t *a, - const p255_t *b +gf_25519_mul ( + gf_25519_s *__restrict__ out, + const gf_25519_t a, + const gf_25519_t b ); void -p255_mulw ( - p255_t *__restrict__ out, - const p255_t *a, +gf_25519_mulw ( + gf_25519_s *__restrict__ out, + const gf_25519_t a, uint64_t b ); void -p255_sqr ( - p255_t *__restrict__ out, - const p255_t *a +gf_25519_sqr ( + gf_25519_s *__restrict__ out, + const gf_25519_t a ); void -p255_serialize ( +gf_25519_serialize ( uint8_t serial[32], - const struct p255_t *x + const gf_25519_t x ); mask_t -p255_deserialize ( - p255_t *x, +gf_25519_deserialize ( + gf_25519_t x, const uint8_t serial[32] ); /* -------------- Inline functions begin here -------------- */ void -p255_add_RAW ( - p255_t *out, - const p255_t *a, - const p255_t *b +gf_25519_add_RAW ( + gf_25519_t out, + const gf_25519_t a, + const gf_25519_t b ) { unsigned int i; for (i=0; i<5; i++) { out->limb[i] = a->limb[i] + b->limb[i]; } - p255_weak_reduce(out); + gf_25519_weak_reduce(out); } void -p255_sub_RAW ( - p255_t *out, - const p255_t *a, - const p255_t *b +gf_25519_sub_RAW ( + gf_25519_t out, + const gf_25519_t a, + const gf_25519_t b ) { unsigned int i; uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36; for (i=0; i<5; i++) { out->limb[i] = a->limb[i] - b->limb[i] + ((i==0) ? co2 : co1); } - p255_weak_reduce(out); + gf_25519_weak_reduce(out); } void -p255_copy ( - p255_t *out, - const p255_t *a +gf_25519_copy ( + gf_25519_t out, + const gf_25519_t a ) { memcpy(out,a,sizeof(*a)); } void -p255_bias ( - p255_t *a, +gf_25519_bias ( + gf_25519_t a, int amt ) { (void) a; @@ -146,8 +146,8 @@ p255_bias ( } void -p255_weak_reduce ( - p255_t *a +gf_25519_weak_reduce ( + gf_25519_t a ) { uint64_t mask = (1ull<<51) - 1; uint64_t tmp = a->limb[4] >> 51; @@ -162,4 +162,4 @@ p255_weak_reduce ( }; /* extern "C" */ #endif -#endif /* __P255_H__ */ +#endif /* __P25519_H__ */ diff --git a/src/p25519/arch_x86_64/p25519.c b/src/p25519/arch_x86_64/p25519.c index 464522c..0e09086 100644 --- a/src/p25519/arch_x86_64/p25519.c +++ b/src/p25519/arch_x86_64/p25519.c @@ -10,10 +10,10 @@ static inline uint64_t shr(__uint128_t x, int n) { } void -p255_mul ( - p255_t *__restrict__ cs, - const p255_t *as, - const p255_t *bs +gf_25519_mul ( + gf_25519_s *__restrict__ cs, + const gf_25519_t as, + const gf_25519_t bs ) { const uint64_t *a = as->limb, *b = bs->limb, mask = ((1ull<<51)-1); uint64_t *c = cs->limb; @@ -92,9 +92,9 @@ p255_mul ( } void -p255_sqr ( - p255_t *__restrict__ cs, - const p255_t *as +gf_25519_sqr ( + gf_25519_s *__restrict__ cs, + const gf_25519_t as ) { const uint64_t *a = as->limb, mask = ((1ull<<51)-1); uint64_t *c = cs->limb; @@ -156,9 +156,9 @@ p255_sqr ( } void -p255_mulw ( - p255_t *__restrict__ cs, - const p255_t *as, +gf_25519_mulw ( + gf_25519_s *__restrict__ cs, + const gf_25519_t as, uint64_t b ) { const uint64_t *a = as->limb, mask = ((1ull<<51)-1); @@ -191,8 +191,8 @@ p255_mulw ( } void -p255_strong_reduce ( - p255_t *a +gf_25519_strong_reduce ( + gf_25519_t a ) { uint64_t mask = (1ull<<51)-1; @@ -232,15 +232,15 @@ p255_strong_reduce ( } void -p255_serialize ( +gf_25519_serialize ( uint8_t serial[32], - const struct p255_t *x + const gf_25519_t x ) { int i,j; - p255_t red; - p255_copy(&red, x); - p255_strong_reduce(&red); - uint64_t *r = red.limb; + gf_25519_t red; + gf_25519_copy(red, x); + gf_25519_strong_reduce(red); + uint64_t *r = red->limb; uint64_t ser64[4] = {r[0] | r[1]<<51, r[1]>>13|r[2]<<38, r[2]>>26|r[3]<<25, r[3]>>39|r[4]<<12}; for (i=0; i<4; i++) { for (j=0; j<8; j++) { @@ -251,8 +251,8 @@ p255_serialize ( } mask_t -p255_deserialize ( - p255_t *x, +gf_25519_deserialize ( + gf_25519_t x, const uint8_t serial[32] ) { int i,j; diff --git a/src/p25519/arch_x86_64/p25519.h b/src/p25519/arch_x86_64/p25519.h index 4106fcc..203b89a 100644 --- a/src/p25519/arch_x86_64/p25519.h +++ b/src/p25519/arch_x86_64/p25519.h @@ -1,8 +1,8 @@ /* Copyright (c) 2014 Cryptography Research, Inc. * Released under the MIT License. See LICENSE.txt for license information. */ -#ifndef __P255_H__ -#define __P255_H__ 1 +#ifndef __P25519_H__ +#define __P25519_H__ 1 #include #include @@ -10,9 +10,12 @@ #include "word.h" -typedef struct p255_t { +#ifndef __DECAF_255_H__ // HACK FIXME +#define DECAF_WORD_BITS 64 +typedef struct gf_25519_s { uint64_t limb[5]; -} p255_t; +} gf_25519_s, gf_25519_t[1]; +#endif #define LBITS 51 #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} @@ -32,80 +35,80 @@ extern "C" { #endif static __inline__ void -p255_add_RAW ( - p255_t *out, - const p255_t *a, - const p255_t *b +gf_25519_add_RAW ( + gf_25519_t out, + const gf_25519_t a, + const gf_25519_t b ) __attribute__((unused)); static __inline__ void -p255_sub_RAW ( - p255_t *out, - const p255_t *a, - const p255_t *b +gf_25519_sub_RAW ( + gf_25519_t out, + const gf_25519_t a, + const gf_25519_t b ) __attribute__((unused)); static __inline__ void -p255_copy ( - p255_t *out, - const p255_t *a +gf_25519_copy ( + gf_25519_t out, + const gf_25519_t a ) __attribute__((unused)); static __inline__ void -p255_weak_reduce ( - p255_t *inout +gf_25519_weak_reduce ( + gf_25519_t inout ) __attribute__((unused)); void -p255_strong_reduce ( - p255_t *inout +gf_25519_strong_reduce ( + gf_25519_t inout ); static __inline__ void -p255_bias ( - p255_t *inout, +gf_25519_bias ( + gf_25519_t inout, int amount ) __attribute__((unused)); void -p255_mul ( - p255_t *__restrict__ out, - const p255_t *a, - const p255_t *b +gf_25519_mul ( + gf_25519_s *__restrict__ out, + const gf_25519_t a, + const gf_25519_t b ); void -p255_mulw ( - p255_t *__restrict__ out, - const p255_t *a, +gf_25519_mulw ( + gf_25519_s *__restrict__ out, + const gf_25519_t a, uint64_t b ); void -p255_sqr ( - p255_t *__restrict__ out, - const p255_t *a +gf_25519_sqr ( + gf_25519_s *__restrict__ out, + const gf_25519_t a ); void -p255_serialize ( +gf_25519_serialize ( uint8_t serial[32], - const struct p255_t *x + const gf_25519_t x ); mask_t -p255_deserialize ( - p255_t *x, +gf_25519_deserialize ( + gf_25519_t x, const uint8_t serial[32] ); /* -------------- Inline functions begin here -------------- */ void -p255_add_RAW ( - p255_t *out, - const p255_t *a, - const p255_t *b +gf_25519_add_RAW ( + gf_25519_t out, + const gf_25519_t a, + const gf_25519_t b ) { unsigned int i; for (i=0; i<5; i++) { @@ -114,10 +117,10 @@ p255_add_RAW ( } void -p255_sub_RAW ( - p255_t *out, - const p255_t *a, - const p255_t *b +gf_25519_sub_RAW ( + gf_25519_t out, + const gf_25519_t a, + const gf_25519_t b ) { unsigned int i; uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36; @@ -127,16 +130,16 @@ p255_sub_RAW ( } void -p255_copy ( - p255_t *out, - const p255_t *a +gf_25519_copy ( + gf_25519_t out, + const gf_25519_t a ) { memcpy(out,a,sizeof(*a)); } void -p255_bias ( - p255_t *a, +gf_25519_bias ( + gf_25519_t a, int amt ) { a->limb[0] += ((uint64_t)(amt)<<52) - 38*amt; @@ -147,8 +150,8 @@ p255_bias ( } void -p255_weak_reduce ( - p255_t *a +gf_25519_weak_reduce ( + gf_25519_t a ) { uint64_t mask = (1ull<<51) - 1; uint64_t tmp = a->limb[4] >> 51; @@ -163,4 +166,4 @@ p255_weak_reduce ( }; /* extern "C" */ #endif -#endif /* __P255_H__ */ +#endif /* __P25519_H__ */ diff --git a/src/p25519/arch_x86_64/x86-64-arith.h b/src/p25519/arch_x86_64/x86-64-arith.h deleted file mode 120000 index 93c6c47..0000000 --- a/src/p25519/arch_x86_64/x86-64-arith.h +++ /dev/null @@ -1 +0,0 @@ -../../p448/arch_x86_64/x86-64-arith.h \ No newline at end of file diff --git a/src/p25519/arch_x86_64/x86-64-arith.h b/src/p25519/arch_x86_64/x86-64-arith.h new file mode 100644 index 0000000..00fcc1e --- /dev/null +++ b/src/p25519/arch_x86_64/x86-64-arith.h @@ -0,0 +1,323 @@ +/* Copyright (c) 2014 Cryptography Research, Inc. + * Released under the MIT License. See LICENSE.txt for license information. + */ + +#ifndef __X86_64_ARITH_H__ +#define __X86_64_ARITH_H__ + +#include + +/* TODO: non x86-64 versions of these. + * FUTURE: autogenerate + */ + +static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b) { + #ifndef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rax;" + "mulq %[b];" + : [c]"=a"(c), [d]"=d"(d) + : [b]"m"(*b), [a]"m"(*a) + : "cc"); + return (((__uint128_t)(d))<<64) | c; + #else + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rdx;" + "mulx %[b], %[c], %[d];" + : [c]"=r"(c), [d]"=r"(d) + : [b]"m"(*b), [a]"m"(*a) + : "rdx"); + return (((__uint128_t)(d))<<64) | c; + #endif +} + +static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b) { + #ifndef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rax;" + "mulq %[b];" + : [c]"=a"(c), [d]"=d"(d) + : [b]"m"(*b), [a]"r"(a) + : "cc"); + return (((__uint128_t)(d))<<64) | c; + #else + uint64_t c,d; + __asm__ volatile + ("mulx %[b], %[c], %[d];" + : [c]"=r"(c), [d]"=r"(d) + : [b]"m"(*b), [a]"d"(a)); + return (((__uint128_t)(d))<<64) | c; + #endif +} + +static __inline__ __uint128_t widemul_rr(uint64_t a, uint64_t b) { + #ifndef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("mulq %[b];" + : [c]"=a"(c), [d]"=d"(d) + : [b]"r"(b), "a"(a) + : "cc"); + return (((__uint128_t)(d))<<64) | c; + #else + uint64_t c,d; + __asm__ volatile + ("mulx %[b], %[c], %[d];" + : [c]"=r"(c), [d]"=r"(d) + : [b]"r"(b), [a]"d"(a)); + return (((__uint128_t)(d))<<64) | c; + #endif +} + +static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b) { + #ifndef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rax; " + "addq %%rax, %%rax; " + "mulq %[b];" + : [c]"=a"(c), [d]"=d"(d) + : [b]"m"(*b), [a]"m"(*a) + : "cc"); + return (((__uint128_t)(d))<<64) | c; + #else + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rdx;" + "leaq (,%%rdx,2), %%rdx;" + "mulx %[b], %[c], %[d];" + : [c]"=r"(c), [d]"=r"(d) + : [b]"m"(*b), [a]"m"(*a) + : "rdx"); + return (((__uint128_t)(d))<<64) | c; + #endif +} + +static __inline__ void mac(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { + uint64_t lo = *acc, hi = *acc>>64; + + #ifdef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rdx; " + "mulx %[b], %[c], %[d]; " + "addq %[c], %[lo]; " + "adcq %[d], %[hi]; " + : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rdx", "cc"); + #else + __asm__ volatile + ("movq %[a], %%rax; " + "mulq %[b]; " + "addq %%rax, %[lo]; " + "adcq %%rdx, %[hi]; " + : [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rax", "rdx", "cc"); + #endif + + *acc = (((__uint128_t)(hi))<<64) | lo; +} + +static __inline__ void macac(__uint128_t *acc, __uint128_t *acc2, const uint64_t *a, const uint64_t *b) { + uint64_t lo = *acc, hi = *acc>>64; + uint64_t lo2 = *acc2, hi2 = *acc2>>64; + + #ifdef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rdx; " + "mulx %[b], %[c], %[d]; " + "addq %[c], %[lo]; " + "adcq %[d], %[hi]; " + "addq %[c], %[lo2]; " + "adcq %[d], %[hi2]; " + : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2) + : [b]"m"(*b), [a]"m"(*a) + : "rdx", "cc"); + #else + __asm__ volatile + ("movq %[a], %%rax; " + "mulq %[b]; " + "addq %%rax, %[lo]; " + "adcq %%rdx, %[hi]; " + "addq %%rax, %[lo2]; " + "adcq %%rdx, %[hi2]; " + : [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2) + : [b]"m"(*b), [a]"m"(*a) + : "rax", "rdx", "cc"); + #endif + + *acc = (((__uint128_t)(hi))<<64) | lo; + *acc2 = (((__uint128_t)(hi2))<<64) | lo2; +} + +static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b) { + uint64_t lo = *acc, hi = *acc>>64; + + #ifdef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("mulx %[b], %[c], %[d]; " + "addq %[c], %[lo]; " + "adcq %[d], %[hi]; " + : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"d"(a) + : "cc"); + #else + __asm__ volatile + ("movq %[a], %%rax; " + "mulq %[b]; " + "addq %%rax, %[lo]; " + "adcq %%rdx, %[hi]; " + : [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"r"(a) + : "rax", "rdx", "cc"); + #endif + + *acc = (((__uint128_t)(hi))<<64) | lo; +} + +static __inline__ void mac_rr(__uint128_t *acc, uint64_t a, const uint64_t b) { + uint64_t lo = *acc, hi = *acc>>64; + + #ifdef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("mulx %[b], %[c], %[d]; " + "addq %[c], %[lo]; " + "adcq %[d], %[hi]; " + : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"r"(b), [a]"d"(a) + : "cc"); + #else + __asm__ volatile + ("mulq %[b]; " + "addq %%rax, %[lo]; " + "adcq %%rdx, %[hi]; " + : [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"r"(b), "a"(a) + : "rax", "rdx", "cc"); + #endif + + *acc = (((__uint128_t)(hi))<<64) | lo; +} + +static __inline__ void mac2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { + uint64_t lo = *acc, hi = *acc>>64; + + #ifdef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rdx; " + "addq %%rdx, %%rdx; " + "mulx %[b], %[c], %[d]; " + "addq %[c], %[lo]; " + "adcq %[d], %[hi]; " + : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rdx", "cc"); + #else + __asm__ volatile + ("movq %[a], %%rax; " + "addq %%rax, %%rax; " + "mulq %[b]; " + "addq %%rax, %[lo]; " + "adcq %%rdx, %[hi]; " + : [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rax", "rdx", "cc"); + #endif + + *acc = (((__uint128_t)(hi))<<64) | lo; +} + +static __inline__ void msb(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { + uint64_t lo = *acc, hi = *acc>>64; + #ifdef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rdx; " + "mulx %[b], %[c], %[d]; " + "subq %[c], %[lo]; " + "sbbq %[d], %[hi]; " + : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rdx", "cc"); + #else + __asm__ volatile + ("movq %[a], %%rax; " + "mulq %[b]; " + "subq %%rax, %[lo]; " + "sbbq %%rdx, %[hi]; " + : [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rax", "rdx", "cc"); + #endif + *acc = (((__uint128_t)(hi))<<64) | lo; +} + +static __inline__ void msb2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { + uint64_t lo = *acc, hi = *acc>>64; + #ifdef __BMI2__ + uint64_t c,d; + __asm__ volatile + ("movq %[a], %%rdx; " + "addq %%rdx, %%rdx; " + "mulx %[b], %[c], %[d]; " + "subq %[c], %[lo]; " + "sbbq %[d], %[hi]; " + : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rdx", "cc"); + #else + __asm__ volatile + ("movq %[a], %%rax; " + "addq %%rax, %%rax; " + "mulq %[b]; " + "subq %%rax, %[lo]; " + "sbbq %%rdx, %[hi]; " + : [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rax", "rdx", "cc"); + #endif + *acc = (((__uint128_t)(hi))<<64) | lo; + +} + +static __inline__ void mrs(__uint128_t *acc, const uint64_t *a, const uint64_t *b) { + uint64_t c,d, lo = *acc, hi = *acc>>64; + __asm__ volatile + ("movq %[a], %%rdx; " + "mulx %[b], %[c], %[d]; " + "subq %[lo], %[c]; " + "sbbq %[hi], %[d]; " + : [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi) + : [b]"m"(*b), [a]"m"(*a) + : "rdx", "cc"); + *acc = (((__uint128_t)(d))<<64) | c; +} + +static __inline__ __uint128_t widemulu(uint64_t a, uint64_t b) { + return ((__uint128_t)(a)) * b; +} + +static __inline__ __int128_t widemuls(int64_t a, int64_t b) { + return ((__int128_t)(a)) * b; +} + +static __inline__ uint64_t opacify(uint64_t x) { + __asm__ volatile("" : "+r"(x)); + return x; +} + +static __inline__ mask_t is_zero(uint64_t x) { + __asm__ volatile("neg %0; sbb %0, %0;" : "+r"(x)); + return ~x; +} + +#endif /* __X86_64_ARITH_H__ */ diff --git a/src/p25519/f_arithmetic.c b/src/p25519/f_arithmetic.c index c211388..82600db 100644 --- a/src/p25519/f_arithmetic.c +++ b/src/p25519/f_arithmetic.c @@ -10,7 +10,7 @@ #include "field.h" -const field_a_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL( +const gf_25519_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL( 0x61b274a0ea0b0, 0x0d5a5fc8f189d, 0x7ef5e9cbd0c60, @@ -18,7 +18,7 @@ const field_a_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL( 0x2b8324804fc1d )}; -const field_a_t SQRT_ONE_MINUS_D = {FIELD_LITERAL( // FIXME MAGIC goes elsewhere? +const gf_25519_t SQRT_ONE_MINUS_D = {FIELD_LITERAL( // FIXME MAGIC goes elsewhere? 0x6db8831bbddec, 0x38d7b56c9c165, 0x016b221394bdc, @@ -26,15 +26,15 @@ const field_a_t SQRT_ONE_MINUS_D = {FIELD_LITERAL( // FIXME MAGIC goes elsewhere 0x0a0d85b4032b1 )}; -static const field_a_t ONE = {FIELD_LITERAL( // FIXME copy-pasted +static const gf_25519_t ONE = {FIELD_LITERAL( // FIXME copy-pasted 1,0,0,0,0 )}; // ARCH MAGIC FIXME copy-pasted from decaf_fast.c -static mask_t gf_eq(const field_a_t a, const field_a_t b) { - field_a_t c; - field_sub(c,a,b); - field_strong_reduce(c); +static mask_t gf_eq(const gf_25519_t a, const gf_25519_t b) { + gf_25519_t c; + gf_sub(c,a,b); + gf_strong_reduce(c); mask_t ret=0; int i; for (i=0; i<5; i++) { ret |= c->limb[i]; } @@ -43,19 +43,19 @@ static mask_t gf_eq(const field_a_t a, const field_a_t b) { /* Guarantee: a^2 x = 0 if x = 0; else a^2 x = 1 or SQRT_MINUS_ONE; */ void -field_isr ( - field_a_t a, - const field_a_t x +gf_isr ( + gf_25519_t a, + const gf_25519_t x ) { - field_a_t st[3], tmp1, tmp2; + gf_25519_t st[3], tmp1, tmp2; const struct { unsigned char sh, idx; } ops[] = { {1,2},{1,2},{3,1},{6,0},{1,2},{12,1},{25,1},{25,1},{50,0},{125,0},{2,2},{1,2} }; st[0][0] = st[1][0] = st[2][0] = x[0]; unsigned int i; for (i=0; ilimb[i] = (ONE->limb[i] & mask) | (SQRT_MINUS_ONE->limb[i] & ~mask); - field_mul(a,tmp1,st[0]); + gf_mul(a,tmp1,st[0]); } diff --git a/src/p25519/f_field.h b/src/p25519/f_field.h index e63596a..c2a7ee0 100644 --- a/src/p25519/f_field.h +++ b/src/p25519/f_field.h @@ -13,20 +13,21 @@ #include #include "p25519.h" -#define FIELD_LIT_LIMB_BITS 51 -#define FIELD_BITS 255 -#define field_t p255_t -#define field_mul p255_mul -#define field_sqr p255_sqr -#define field_add_RAW p255_add_RAW -#define field_sub_RAW p255_sub_RAW -#define field_mulw p255_mulw -#define field_bias p255_bias -#define field_isr p255_isr -#define field_weak_reduce p255_weak_reduce -#define field_strong_reduce p255_strong_reduce -#define field_serialize p255_serialize -#define field_deserialize p255_deserialize -#define SQRT_MINUS_ONE P25519_SQRT_MINUS_ONE +#define GF_LIT_LIMB_BITS 51 +#define GF_BITS 255 +#define gf gf_25519_t +#define gf_s gf_25519_s +#define gf_mul gf_25519_mul +#define gf_sqr gf_25519_sqr +#define gf_add_RAW gf_25519_add_RAW +#define gf_sub_RAW gf_25519_sub_RAW +#define gf_mulw gf_25519_mulw +#define gf_bias gf_25519_bias +#define gf_isr gf_25519_isr +#define gf_weak_reduce gf_25519_weak_reduce +#define gf_strong_reduce gf_25519_strong_reduce +#define gf_serialize gf_25519_serialize +#define gf_deserialize gf_25519_deserialize +#define SQRT_MINUS_ONE P25519_SQRT_MINUS_ONE #endif /* __F_FIELD_H__ */ diff --git a/src/p448/f_arithmetic.c b/src/p448/f_arithmetic.c index 12e2b07..d73832a 100644 --- a/src/p448/f_arithmetic.c +++ b/src/p448/f_arithmetic.c @@ -11,33 +11,33 @@ #include "field.h" void -field_isr ( - field_a_t a, - const field_a_t x +gf_isr ( + gf_a_t a, + const gf_a_t x ) { - field_a_t L0, L1, L2; - field_sqr ( L1, x ); - field_mul ( L2, x, L1 ); - field_sqr ( L1, L2 ); - field_mul ( L2, x, L1 ); - field_sqrn ( L1, L2, 3 ); - field_mul ( L0, L2, L1 ); - field_sqrn ( L1, L0, 3 ); - field_mul ( L0, L2, L1 ); - field_sqrn ( L2, L0, 9 ); - field_mul ( L1, L0, L2 ); - field_sqr ( L0, L1 ); - field_mul ( L2, x, L0 ); - field_sqrn ( L0, L2, 18 ); - field_mul ( L2, L1, L0 ); - field_sqrn ( L0, L2, 37 ); - field_mul ( L1, L2, L0 ); - field_sqrn ( L0, L1, 37 ); - field_mul ( L1, L2, L0 ); - field_sqrn ( L0, L1, 111 ); - field_mul ( L2, L1, L0 ); - field_sqr ( L0, L2 ); - field_mul ( L1, x, L0 ); - field_sqrn ( L0, L1, 223 ); - field_mul ( a, L2, L0 ); + gf_a_t L0, L1, L2; + gf_sqr ( L1, x ); + gf_mul ( L2, x, L1 ); + gf_sqr ( L1, L2 ); + gf_mul ( L2, x, L1 ); + gf_sqrn ( L1, L2, 3 ); + gf_mul ( L0, L2, L1 ); + gf_sqrn ( L1, L0, 3 ); + gf_mul ( L0, L2, L1 ); + gf_sqrn ( L2, L0, 9 ); + gf_mul ( L1, L0, L2 ); + gf_sqr ( L0, L1 ); + gf_mul ( L2, x, L0 ); + gf_sqrn ( L0, L2, 18 ); + gf_mul ( L2, L1, L0 ); + gf_sqrn ( L0, L2, 37 ); + gf_mul ( L1, L2, L0 ); + gf_sqrn ( L0, L1, 37 ); + gf_mul ( L1, L2, L0 ); + gf_sqrn ( L0, L1, 111 ); + gf_mul ( L2, L1, L0 ); + gf_sqr ( L0, L2 ); + gf_mul ( L1, x, L0 ); + gf_sqrn ( L0, L1, 223 ); + gf_mul ( a, L2, L0 ); } diff --git a/src/p448/f_field.h b/src/p448/f_field.h index cc06ab7..29188e7 100644 --- a/src/p448/f_field.h +++ b/src/p448/f_field.h @@ -13,19 +13,19 @@ #include #include "p448.h" -#define FIELD_LIT_LIMB_BITS 56 -#define FIELD_BITS 448 -#define field_t p448_t -#define field_mul p448_mul -#define field_sqr p448_sqr -#define field_add_RAW p448_add_RAW -#define field_sub_RAW p448_sub_RAW -#define field_mulw p448_mulw -#define field_bias p448_bias -#define field_isr p448_isr -#define field_weak_reduce p448_weak_reduce -#define field_strong_reduce p448_strong_reduce -#define field_serialize p448_serialize -#define field_deserialize p448_deserialize +#define GF_LIT_LIMB_BITS 56 +#define GF_BITS 448 +#define gf p448_t +#define gf_mul p448_mul +#define gf_sqr p448_sqr +#define gf_add_RAW p448_add_RAW +#define gf_sub_RAW p448_sub_RAW +#define gf_mulw p448_mulw +#define gf_bias p448_bias +#define gf_isr p448_isr +#define gf_weak_reduce p448_weak_reduce +#define gf_strong_reduce p448_strong_reduce +#define gf_serialize p448_serialize +#define gf_deserialize p448_deserialize #endif /* __F_FIELD_H__ */ diff --git a/src/p480/f_arithmetic.c b/src/p480/f_arithmetic.c index 1166c3c..227cdfe 100644 --- a/src/p480/f_arithmetic.c +++ b/src/p480/f_arithmetic.c @@ -11,33 +11,33 @@ #include "field.h" void -field_isr ( - field_a_t a, - const field_a_t x +gf_isr ( + gf_a_t a, + const gf_a_t x ) { - field_a_t L0, L1, L2, L3; - field_sqr ( L2, x ); - field_mul ( L1, x, L2 ); - field_sqrn ( L0, L1, 2 ); - field_mul ( L2, L1, L0 ); - field_sqrn ( L0, L2, 4 ); - field_mul ( L1, L2, L0 ); - field_sqr ( L0, L1 ); - field_mul ( L2, x, L0 ); - field_sqrn ( L0, L2, 8 ); - field_mul ( L2, L1, L0 ); - field_sqrn ( L0, L2, 17 ); - field_mul ( L1, L2, L0 ); - field_sqrn ( L0, L1, 17 ); - field_mul ( L1, L2, L0 ); - field_sqrn ( L3, L1, 17 ); - field_mul ( L0, L2, L3 ); - field_sqrn ( L2, L0, 51 ); - field_mul ( L0, L1, L2 ); - field_sqrn ( L1, L0, 119 ); - field_mul ( L2, L0, L1 ); - field_sqr ( L0, L2 ); - field_mul ( L1, x, L0 ); - field_sqrn ( L0, L1, 239 ); - field_mul ( a, L2, L0 ); + gf_a_t L0, L1, L2, L3; + gf_sqr ( L2, x ); + gf_mul ( L1, x, L2 ); + gf_sqrn ( L0, L1, 2 ); + gf_mul ( L2, L1, L0 ); + gf_sqrn ( L0, L2, 4 ); + gf_mul ( L1, L2, L0 ); + gf_sqr ( L0, L1 ); + gf_mul ( L2, x, L0 ); + gf_sqrn ( L0, L2, 8 ); + gf_mul ( L2, L1, L0 ); + gf_sqrn ( L0, L2, 17 ); + gf_mul ( L1, L2, L0 ); + gf_sqrn ( L0, L1, 17 ); + gf_mul ( L1, L2, L0 ); + gf_sqrn ( L3, L1, 17 ); + gf_mul ( L0, L2, L3 ); + gf_sqrn ( L2, L0, 51 ); + gf_mul ( L0, L1, L2 ); + gf_sqrn ( L1, L0, 119 ); + gf_mul ( L2, L0, L1 ); + gf_sqr ( L0, L2 ); + gf_mul ( L1, x, L0 ); + gf_sqrn ( L0, L1, 239 ); + gf_mul ( a, L2, L0 ); } diff --git a/src/p480/f_field.h b/src/p480/f_field.h index 1c94a98..471e90d 100644 --- a/src/p480/f_field.h +++ b/src/p480/f_field.h @@ -13,19 +13,19 @@ #include #include "p480.h" -#define FIELD_LIT_LIMB_BITS 60 -#define FIELD_BITS 480 -#define field_t p480_t -#define field_mul p480_mul -#define field_sqr p480_sqr -#define field_add_RAW p480_add_RAW -#define field_sub_RAW p480_sub_RAW -#define field_mulw p480_mulw -#define field_bias p480_bias -#define field_isr p480_isr -#define field_weak_reduce p480_weak_reduce -#define field_strong_reduce p480_strong_reduce -#define field_serialize p480_serialize -#define field_deserialize p480_deserialize +#define GF_LIT_LIMB_BITS 60 +#define GF_BITS 480 +#define gf p480_t +#define gf_mul p480_mul +#define gf_sqr p480_sqr +#define gf_add_RAW p480_add_RAW +#define gf_sub_RAW p480_sub_RAW +#define gf_mulw p480_mulw +#define gf_bias p480_bias +#define gf_isr p480_isr +#define gf_weak_reduce p480_weak_reduce +#define gf_strong_reduce p480_strong_reduce +#define gf_serialize p480_serialize +#define gf_deserialize p480_deserialize #endif /* __F_FIELD_H__ */ diff --git a/src/p521/f_arithmetic.c b/src/p521/f_arithmetic.c index 7c36478..7ce39d8 100644 --- a/src/p521/f_arithmetic.c +++ b/src/p521/f_arithmetic.c @@ -11,33 +11,33 @@ #include "field.h" void -field_isr ( - field_a_t a, - const field_a_t x +gf_isr ( + gf_a_t a, + const gf_a_t x ) { - field_a_t L0, L1, L2; - field_sqr ( L1, x ); - field_mul ( L0, x, L1 ); - field_sqrn ( L2, L0, 2 ); - field_mul ( L1, L0, L2 ); - field_sqrn ( L2, L1, 4 ); - field_mul ( L0, L1, L2 ); - field_sqrn ( L2, L0, 8 ); - field_mul ( L1, L0, L2 ); - field_sqrn ( L2, L1, 16 ); - field_mul ( L0, L1, L2 ); - field_sqrn ( L2, L0, 32 ); - field_mul ( L1, L0, L2 ); - field_sqr ( L2, L1 ); - field_mul ( L0, x, L2 ); - field_sqrn ( L2, L0, 64 ); - field_mul ( L0, L1, L2 ); - field_sqrn ( L2, L0, 129 ); - field_mul ( L1, L0, L2 ); - field_sqr ( L2, L1 ); - field_mul ( L0, x, L2 ); - field_sqrn ( L2, L0, 259 ); - field_mul ( L1, L0, L2 ); - field_sqr ( L0, L1 ); - field_mul ( a, x, L0 ); + gf_a_t L0, L1, L2; + gf_sqr ( L1, x ); + gf_mul ( L0, x, L1 ); + gf_sqrn ( L2, L0, 2 ); + gf_mul ( L1, L0, L2 ); + gf_sqrn ( L2, L1, 4 ); + gf_mul ( L0, L1, L2 ); + gf_sqrn ( L2, L0, 8 ); + gf_mul ( L1, L0, L2 ); + gf_sqrn ( L2, L1, 16 ); + gf_mul ( L0, L1, L2 ); + gf_sqrn ( L2, L0, 32 ); + gf_mul ( L1, L0, L2 ); + gf_sqr ( L2, L1 ); + gf_mul ( L0, x, L2 ); + gf_sqrn ( L2, L0, 64 ); + gf_mul ( L0, L1, L2 ); + gf_sqrn ( L2, L0, 129 ); + gf_mul ( L1, L0, L2 ); + gf_sqr ( L2, L1 ); + gf_mul ( L0, x, L2 ); + gf_sqrn ( L2, L0, 259 ); + gf_mul ( L1, L0, L2 ); + gf_sqr ( L0, L1 ); + gf_mul ( a, x, L0 ); } diff --git a/src/p521/f_field.h b/src/p521/f_field.h index ebbb666..6a72ea7 100644 --- a/src/p521/f_field.h +++ b/src/p521/f_field.h @@ -13,19 +13,19 @@ #include "constant_time.h" #include "p521.h" -#define FIELD_LIT_LIMB_BITS 58 -#define FIELD_BITS 521 -#define field_t p521_t -#define field_mul p521_mul -#define field_sqr p521_sqr -#define field_add_RAW p521_add_RAW -#define field_sub_RAW p521_sub_RAW -#define field_mulw p521_mulw -#define field_bias p521_bias -#define field_isr p521_isr -#define field_weak_reduce p521_weak_reduce -#define field_strong_reduce p521_strong_reduce -#define field_serialize p521_serialize -#define field_deserialize p521_deserialize +#define GF_LIT_LIMB_BITS 58 +#define GF_BITS 521 +#define gf p521_t +#define gf_mul p521_mul +#define gf_sqr p521_sqr +#define gf_add_RAW p521_add_RAW +#define gf_sub_RAW p521_sub_RAW +#define gf_mulw p521_mulw +#define gf_bias p521_bias +#define gf_isr p521_isr +#define gf_weak_reduce p521_weak_reduce +#define gf_strong_reduce p521_strong_reduce +#define gf_serialize p521_serialize +#define gf_deserialize p521_deserialize #endif /* __F_FIELD_H__ */ diff --git a/src/public_include/decaf/decaf_255.h b/src/public_include/decaf/decaf_255.h index cbf09c8..fa6d939 100644 --- a/src/public_include/decaf/decaf_255.h +++ b/src/public_include/decaf/decaf_255.h @@ -21,11 +21,13 @@ extern "C" { #define DECAF_255_SCALAR_BITS 254 // Curve25519: 253 #define DECAF_255_SCALAR_LIMBS (256/DECAF_WORD_BITS) +#ifndef __DECAF_GF_ALREADY_DEFINED__ /** Galois field element internal structure */ -typedef struct gf_255_s { +typedef struct gf_25519_s { decaf_word_t limb[DECAF_255_LIMBS]; -} gf_255_s, gf_255_t[1]; +} gf_25519_s, gf_25519_t[1]; /** @endcond */ +#endif /* __DECAF_GF_ALREADY_DEFINED__ */ /** Number of bytes in a serialized point. */ #define DECAF_255_SER_BYTES 32 @@ -34,7 +36,7 @@ typedef struct gf_255_s { #define DECAF_255_SCALAR_BYTES 32 /** Twisted Edwards (-1,d-1) extended homogeneous coordinates */ -typedef struct decaf_255_point_s { /**@cond internal*/gf_255_t x,y,z,t;/**@endcond*/ } decaf_255_point_t[1]; +typedef struct decaf_255_point_s { /**@cond internal*/gf_25519_t x,y,z,t;/**@endcond*/ } decaf_255_point_t[1]; /** Precomputed table based on a point. Can be trivial implementation. */ struct decaf_255_precomputed_s;