Browse Source

wipe out the multiple layers of rename between decaf_fast and field. still some serious HACKs in the include prio to avoid multiple definition of struct gf

master
Michael Hamburg 10 years ago
parent
commit
5af980b85a
18 changed files with 748 additions and 465 deletions
  1. +39
    -92
      src/decaf_fast.c
  2. +24
    -26
      src/decaf_gen_tables.c
  3. +54
    -44
      src/include/field.h
  4. +20
    -20
      src/p25519/arch_ref64/p25519.c
  5. +53
    -53
      src/p25519/arch_ref64/p25519.h
  6. +20
    -20
      src/p25519/arch_x86_64/p25519.c
  7. +54
    -51
      src/p25519/arch_x86_64/p25519.h
  8. +0
    -1
      src/p25519/arch_x86_64/x86-64-arith.h
  9. +323
    -0
      src/p25519/arch_x86_64/x86-64-arith.h
  10. +14
    -14
      src/p25519/f_arithmetic.c
  11. +16
    -15
      src/p25519/f_field.h
  12. +28
    -28
      src/p448/f_arithmetic.c
  13. +14
    -14
      src/p448/f_field.h
  14. +28
    -28
      src/p480/f_arithmetic.c
  15. +14
    -14
      src/p480/f_field.h
  16. +28
    -28
      src/p521/f_arithmetic.c
  17. +14
    -14
      src/p521/f_field.h
  18. +5
    -3
      src/public_include/decaf/decaf_255.h

+ 39
- 92
src/decaf_fast.c View File

@@ -27,8 +27,6 @@
#define point_t decaf_255_point_t
#define precomputed_s decaf_255_precomputed_s
#define SER_BYTES DECAF_255_SER_BYTES
#define gf_s gf_255_s
#define gf gf_255_t

#if WBITS == 64
typedef __int128_t decaf_sdword_t;
@@ -72,7 +70,7 @@ typedef struct { niels_t n; gf z; } __attribute__((aligned(32))) pniels_s, pniel
/* Precomputed base */
struct precomputed_s { niels_t table [DECAF_COMBS_N<<(DECAF_COMBS_T-1)]; };

extern const field_t API_NS(precomputed_base_as_fe)[];
extern const gf API_NS(precomputed_base_as_fe)[];
const precomputed_s *API_NS(precomputed_base) =
(const precomputed_s *) &API_NS(precomputed_base_as_fe);

@@ -95,52 +93,6 @@ const size_t API_NS2(alignof,precomputed_s) = 32;
/** Copy x = y */
siv gf_cpy(gf x, const gf y) { x[0] = y[0]; }

/** Mostly-unoptimized multiply, but at least it's unrolled. */
siv gf_mul (gf c, const gf a, const gf b) {
field_mul((field_t *)c, (const field_t *)a, (const field_t *)b);
}

/** Dedicated square */
siv gf_sqr (gf c, const gf a) {
field_sqr((field_t *)c, (const field_t *)a);
}

/** Add mod p. Conservatively always weak-reduce. */
snv gf_add ( gf_s *__restrict__ c, const gf a, const gf b ) {
field_add((field_t *)c, (const field_t *)a, (const field_t *)b);
}

/** Subtract mod p. Conservatively always weak-reduce. */
snv gf_sub ( gf c, const gf a, const gf b ) {
field_sub((field_t *)c, (const field_t *)a, (const field_t *)b);
}

/** Add mod p. Conservatively always weak-reduce.) */
siv gf_bias ( gf c, int amt) {
field_bias((field_t *)c, amt);
}

/** Subtract mod p. Bias by 2 and don't reduce */
siv gf_sub_nr ( gf_s *__restrict__ c, const gf a, const gf b ) {
// FOR_LIMB_U(i, c->limb[i] = a->limb[i] - b->limb[i] + 2*P->limb[i] );
field_sub_nr((field_t *)c, (const field_t *)a, (const field_t *)b);
gf_bias(c, 2);
if (WBITS==32) field_weak_reduce((field_t*) c); // HACK
}

/** Subtract mod p. Bias by amt but don't reduce. */
siv gf_sub_nr_x ( gf c, const gf a, const gf b, int amt ) {
field_sub_nr((field_t *)c, (const field_t *)a, (const field_t *)b);
gf_bias(c, amt);
if (WBITS==32) field_weak_reduce((field_t*) c); // HACK
}

/** Add mod p. Don't reduce. */
siv gf_add_nr ( gf c, const gf a, const gf b ) {
// FOR_LIMB_U(i, c->limb[i] = a->limb[i] + b->limb[i]);
field_add_nr((field_t *)c, (const field_t *)a, (const field_t *)b);
}

/** Constant time, x = is_z ? z : y */
siv cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) {
constant_time_select(x,z,y,sizeof(gf),is_z);
@@ -162,29 +114,11 @@ siv cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) {
});
}

/**
* Mul by signed int. Not constant-time WRT the sign of that int.
* Just uses a full mul (PERF)
*/
siv gf_mlw(gf c, const gf a, int w) {
if (w>0) {
field_mulw((field_t *)c, (const field_t *)a, w);
} else {
field_mulw((field_t *)c, (const field_t *)a, -w);
gf_sub(c,ZERO,c);
}
}

/** Canonicalize */
siv gf_canon ( gf a ) {
field_strong_reduce((field_t *)a);
}

/** Compare a==b */
static decaf_word_t __attribute__((noinline)) gf_eq(const gf a, const gf b) {
gf c;
gf_sub(c,a,b);
gf_canon(c);
gf_strong_reduce(c);
decaf_word_t ret=0;
FOR_LIMB(i, ret |= c->limb[i] );
/* Hope the compiler is too dumb to optimize this, thus noinline */
@@ -194,7 +128,7 @@ static decaf_word_t __attribute__((noinline)) gf_eq(const gf a, const gf b) {
/** Inverse square root using addition chain. */
static decaf_bool_t gf_isqrt_chk(gf y, const gf x, decaf_bool_t allow_zero) {
gf tmp0, tmp1;
field_isr((field_t *)y, (const field_t *)x);
gf_isr((gf_s *)y, (const gf_s *)x);
gf_sqr(tmp0,y);
gf_mul(tmp1,tmp0,x);
return gf_eq(tmp1,ONE) | (allow_zero & gf_eq(tmp1,ZERO));
@@ -211,11 +145,24 @@ sv gf_invert(gf y, const gf x) {
gf_cpy(y, t2);
}

/**
* Mul by signed int. Not constant-time WRT the sign of that int.
* Just uses a full mul (PERF)
*/
static inline void gf_mulw_sgn(gf c, const gf a, int w) {
if (w>0) {
gf_mulw(c, a, w);
} else {
gf_mulw(c, a, -w);
gf_sub(c,ZERO,c);
}
}

/** Return high bit of x = low bit of 2x mod p */
static decaf_word_t hibit(const gf x) {
gf y;
gf_add(y,x,x);
gf_canon(y);
gf_strong_reduce(y);
return -(y->limb[0]&1);
}

@@ -223,7 +170,7 @@ static decaf_word_t hibit(const gf x) {
static decaf_word_t lobit(const gf x) {
gf y;
gf_cpy(y,x);
gf_canon(y);
gf_strong_reduce(y);
return -(y->limb[0]&1);
}

@@ -454,7 +401,7 @@ decaf_bool_t API_NS(scalar_eq) (
const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}};

static void gf_encode ( unsigned char ser[SER_BYTES], gf a ) {
field_serialize(ser, (field_t *)a);
gf_serialize(ser, (gf_s *)a);
}
extern const gf SQRT_MINUS_ONE, SQRT_ONE_MINUS_D; /* Intern this? */
@@ -528,7 +475,7 @@ void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) {
* Deserialize a bool, return TRUE if < p.
*/
static decaf_bool_t gf_deser(gf s, const unsigned char ser[SER_BYTES]) {
return field_deserialize((field_t *)s, ser);
return gf_deserialize((gf_s *)s, ser);
}
decaf_bool_t API_NS(point_decode) (
@@ -544,7 +491,7 @@ decaf_bool_t API_NS(point_decode) (
gf_sub ( f, ONE, a ); /* f = 1-s^2 = 1-as^2 since a=1 */
succ &= ~ gf_eq( f, ZERO );
gf_sqr ( b, f );
gf_mlw ( c, a, 4-4*EDWARDS_D );
gf_mulw_sgn ( c, a, 4-4*EDWARDS_D );
gf_add ( c, c, b ); /* t^2 */
gf_mul ( d, f, s ); /* s(1-s^2) for denoms */
gf_sqr ( e, d );
@@ -596,7 +543,7 @@ void API_NS(point_sub) (
gf_add_nr ( b, q->y, q->x );
gf_mul ( p->y, d, b );
gf_mul ( b, r->t, q->t );
gf_mlw ( p->x, b, -2*EDWARDS_D );
gf_mulw_sgn ( p->x, b, -2*EDWARDS_D );
gf_add_nr ( b, a, p->y );
gf_sub_nr ( c, p->y, a );
gf_mul ( a, q->z, r->z );
@@ -622,7 +569,7 @@ void API_NS(point_add) (
gf_add_nr ( b, q->y, q->x );
gf_mul ( p->y, d, b );
gf_mul ( b, r->t, q->t );
gf_mlw ( p->x, b, -2*EDWARDS_D );
gf_mulw_sgn ( p->x, b, -2*EDWARDS_D );
gf_add_nr ( b, a, p->y );
gf_sub_nr ( c, p->y, a );
gf_mul ( a, q->z, r->z );
@@ -646,11 +593,11 @@ snv point_double_internal (
gf_add_nr ( d, c, a );
gf_add_nr ( p->t, q->y, q->x );
gf_sqr ( b, p->t );
gf_sub_nr_x ( b, b, d, 3 );
gf_subx_nr ( b, b, d, 3 );
gf_sub_nr ( p->t, a, c );
gf_sqr ( p->x, q->z );
gf_add_nr ( p->z, p->x, p->x );
gf_sub_nr_x ( a, p->z, p->t, 4 );
gf_subx_nr ( a, p->z, p->t, 4 );
gf_mul ( p->x, a, b );
gf_mul ( p->z, p->t, a );
gf_mul ( p->y, p->t, d );
@@ -777,7 +724,7 @@ static void pt_to_pniels (
) {
gf_sub ( b->n->a, a->y, a->x );
gf_add ( b->n->b, a->x, a->y );
gf_mlw ( b->n->c, a->t, -2*EDWARDS_D );
gf_mulw_sgn ( b->n->c, a->t, -2*EDWARDS_D );
gf_add ( b->z, a->z, a->z );
}

@@ -1047,12 +994,12 @@ void API_NS(point_from_hash_nonuniform) (
// TODO: simplify since we don't return a hint anymore
gf r0,r,a,b,c,dee,D,N,rN,e;
gf_deser(r0,ser);
gf_canon(r0);
gf_strong_reduce(r0);
gf_sqr(a,r0);
//gf_sub(r,ZERO,a); /*gf_mlw(r,a,QUADRATIC_NONRESIDUE);*/
//gf_sub(r,ZERO,a); /*gf_mulw_sgn(r,a,QUADRATIC_NONRESIDUE);*/
gf_mul(r,a,SQRT_MINUS_ONE);
gf_mlw(dee,ONE,EDWARDS_D);
gf_mlw(c,r,EDWARDS_D);
gf_mulw_sgn(dee,ONE,EDWARDS_D);
gf_mulw_sgn(c,r,EDWARDS_D);
/* Compute D := (dr+a-d)(dr-ar-d) with a=1 */
gf_sub(a,c,dee);
@@ -1064,7 +1011,7 @@ void API_NS(point_from_hash_nonuniform) (
/* compute N := (r+1)(a-2d) */
gf_add(a,r,ONE);
gf_mlw(N,a,1-2*EDWARDS_D);
gf_mulw_sgn(N,a,1-2*EDWARDS_D);
/* e = +-1/sqrt(+-ND) */
gf_mul(rN,r,N);
@@ -1078,8 +1025,8 @@ void API_NS(point_from_hash_nonuniform) (
/* b <- t/s */
cond_sel(c,r0,r,square); /* r? = sqr ? r : 1 */
/* In two steps to avoid overflow on 32-bit arch */
gf_mlw(a,c,1-2*EDWARDS_D);
gf_mlw(b,a,1-2*EDWARDS_D);
gf_mulw_sgn(a,c,1-2*EDWARDS_D);
gf_mulw_sgn(b,a,1-2*EDWARDS_D);
gf_sub(c,r,ONE);
gf_mul(a,b,c); /* = r? * (r-1) * (a-2d)^2 with a=1 */
gf_mul(b,a,e);
@@ -1148,7 +1095,7 @@ API_NS(invert_elligator_nonuniform) (
cond_sel(b,b,ZERO,is_identity & ~sgn_t_over_s & ~sgn_s); /* identity adjust */
}
gf_mlw(d,c,2*EDWARDS_D-1); /* $d = (2d-a)s^2 */
gf_mulw_sgn(d,c,2*EDWARDS_D-1); /* $d = (2d-a)s^2 */
gf_add(a,d,b); /* num? */
gf_sub(d,d,b); /* den? */
gf_mul(b,a,d); /* n*d */
@@ -1199,7 +1146,7 @@ decaf_bool_t API_NS(point_valid) (
gf_sqr(b,p->y);
gf_sub(a,b,a);
gf_sqr(b,p->t);
gf_mlw(c,b,-EDWARDS_D);
gf_mulw_sgn(c,b,-EDWARDS_D);
gf_sqr(b,p->z);
gf_add(b,b,c);
out &= gf_eq(a,b);
@@ -1281,15 +1228,15 @@ static void batch_normalize_niels (

for (i=0; i<n; i++) {
gf_mul(product, table[i]->a, zis[i]);
gf_canon(product);
gf_strong_reduce(product);
gf_cpy(table[i]->a, product);
gf_mul(product, table[i]->b, zis[i]);
gf_canon(product);
gf_strong_reduce(product);
gf_cpy(table[i]->b, product);
gf_mul(product, table[i]->c, zis[i]);
gf_canon(product);
gf_strong_reduce(product);
gf_cpy(table[i]->c, product);
}
}
@@ -1510,7 +1457,7 @@ sv prepare_wnaf_table(
}
}

extern const field_t API_NS(precomputed_wnaf_as_fe)[];
extern const gf API_NS(precomputed_wnaf_as_fe)[];
static const niels_t *API_NS(wnaf_base) = (const niels_t *)API_NS(precomputed_wnaf_as_fe);
const size_t API_NS2(sizeof,precomputed_wnafs) __attribute((visibility("hidden")))
= sizeof(niels_t)<<DECAF_WNAF_FIXED_TABLE_BITS;


+ 24
- 26
src/decaf_gen_tables.c View File

@@ -19,7 +19,7 @@
#define API_NS2(_pref,_id) _pref##_decaf_255_##_id

/* To satisfy linker. */
const field_t API_NS(precomputed_base_as_fe)[1];
const gf API_NS(precomputed_base_as_fe)[1];
const API_NS(scalar_t) API_NS(precomputed_scalarmul_adjustment);
const API_NS(scalar_t) API_NS(point_scalarmul_adjustment);
const API_NS(scalar_t) sc_r2 = {{{0}}};
@@ -29,7 +29,7 @@ const unsigned char base_point_ser_for_pregen[DECAF_255_SER_BYTES];
const API_NS(point_t) API_NS(point_base);

struct niels_s;
const field_t *API_NS(precomputed_wnaf_as_fe);
const gf_s *API_NS(precomputed_wnaf_as_fe);
extern const size_t API_NS2(sizeof,precomputed_wnafs);

void API_NS(precompute_wnafs) (
@@ -48,26 +48,26 @@ static void scalar_print(const char *name, const API_NS(scalar_t) sc) {
printf("}}};\n\n");
}

static void field_print(const field_t *f) {
const int FIELD_SER_BYTES = (FIELD_BITS + 7) / 8;
unsigned char ser[FIELD_SER_BYTES];
field_serialize(ser,f);
static void field_print(const gf f) {
const int GF_SER_BYTES = (GF_BITS + 7) / 8;
unsigned char ser[GF_SER_BYTES];
gf_serialize(ser,f);
int b=0, i, comma=0;
unsigned long long limb = 0;
printf("FIELD_LITERAL(");
for (i=0; i<FIELD_SER_BYTES; i++) {
printf("{FIELD_LITERAL(");
for (i=0; i<GF_SER_BYTES; i++) {
limb |= ((uint64_t)ser[i])<<b;
b += 8;
if (b >= FIELD_LIT_LIMB_BITS) {
limb &= (1ull<<FIELD_LIT_LIMB_BITS) -1;
b -= FIELD_LIT_LIMB_BITS;
if (b >= GF_LIT_LIMB_BITS) {
limb &= (1ull<<GF_LIT_LIMB_BITS) -1;
b -= GF_LIT_LIMB_BITS;
if (comma) printf(",");
comma = 1;
printf("0x%016llx", limb);
limb = ((uint64_t)ser[i])>>(8-b);
}
}
printf(")");
printf(")}");
assert(b<8);
}

@@ -88,41 +88,39 @@ int main(int argc, char **argv) {
if (ret || !preWnaf) return 1;
API_NS(precompute_wnafs)(preWnaf, real_point_base);

const field_t *output;
const gf_s *output;
unsigned i;
printf("/** @warning: this file was automatically generated. */\n");
printf("#include <decaf.h>\n\n");
printf("#include \"field.h\"\n\n");
printf("#include \"decaf.h\"\n\n");
printf("#define API_NS(_id) decaf_255_##_id\n");
printf("#define API_NS2(_pref,_id) _pref##_decaf_255_##_id\n");
output = (const field_t *)real_point_base;
output = (const gf_s *)real_point_base;
printf("const API_NS(point_t) API_NS(point_base) = {{\n");
for (i=0; i < sizeof(API_NS(point_t)); i+=sizeof(field_t)) {
for (i=0; i < sizeof(API_NS(point_t)); i+=sizeof(gf)) {
if (i) printf(",\n ");
printf("{");
field_print(output++);
printf("}");
}
printf("\n}};\n");
output = (const field_t *)pre;
printf("const field_t API_NS(precomputed_base_as_fe)[%d]\n",
(int)(API_NS2(sizeof,precomputed_s) / sizeof(field_t)));
output = (const gf_s *)pre;
printf("const gf API_NS(precomputed_base_as_fe)[%d]\n",
(int)(API_NS2(sizeof,precomputed_s) / sizeof(gf)));
printf("__attribute__((aligned(%d),visibility(\"hidden\"))) = {\n ", (int)API_NS2(alignof,precomputed_s));
for (i=0; i < API_NS2(sizeof,precomputed_s); i+=sizeof(field_t)) {
for (i=0; i < API_NS2(sizeof,precomputed_s); i+=sizeof(gf)) {
if (i) printf(",\n ");
field_print(output++);
}
printf("\n};\n");
output = (const field_t *)preWnaf;
printf("const field_t API_NS(precomputed_wnaf_as_fe)[%d]\n",
(int)(API_NS2(sizeof,precomputed_wnafs) / sizeof(field_t)));
output = (const gf_s *)preWnaf;
printf("const gf API_NS(precomputed_wnaf_as_fe)[%d]\n",
(int)(API_NS2(sizeof,precomputed_wnafs) / sizeof(gf)));
printf("__attribute__((aligned(%d),visibility(\"hidden\"))) = {\n ", (int)API_NS2(alignof,precomputed_s));
for (i=0; i < API_NS2(sizeof,precomputed_wnafs); i+=sizeof(field_t)) {
for (i=0; i < API_NS2(sizeof,precomputed_wnafs); i+=sizeof(gf)) {
if (i) printf(",\n ");
field_print(output++);
}


+ 54
- 44
src/include/field.h View File

@@ -1,23 +1,20 @@
/**
* @file field.h
* @brief Generic field header.
* @brief Generic gf header.
* @copyright
* Copyright (c) 2014 Cryptography Research, Inc. \n
* Released under the MIT License. See LICENSE.txt for license information.
* @author Mike Hamburg
*/

#ifndef __FIELD_H__
#define __FIELD_H__
#ifndef __GF_H__
#define __GF_H__

#include "constant_time.h"
#include "f_field.h"
#include <string.h>

typedef struct field_t field_a_t[1];
#define field_a_restrict_t struct field_t *__restrict__

#define is32 (GOLDI_BITS == 32 || FIELD_BITS != 448)
#define is32 (GOLDI_BITS == 32 || GF_BITS != 448)
#if (is32)
#define IF32(s) (s)
#else
@@ -33,9 +30,9 @@ typedef struct field_t field_a_t[1];
* If x=0, returns 0.
*/
void
field_isr (
field_a_t a,
const field_a_t x
gf_isr (
gf a,
const gf x
);
/**
@@ -43,62 +40,75 @@ field_isr (
*/
static __inline__ void
__attribute__((unused,always_inline))
field_sqrn (
field_a_restrict_t y,
const field_a_t x,
gf_sqrn (
gf_s *__restrict__ y,
const gf x,
int n
) {
field_a_t tmp;
gf tmp;
assert(n>0);
if (n&1) {
field_sqr(y,x);
gf_sqr(y,x);
n--;
} else {
field_sqr(tmp,x);
field_sqr(y,tmp);
gf_sqr(tmp,x);
gf_sqr(y,tmp);
n-=2;
}
for (; n; n-=2) {
field_sqr(tmp,y);
field_sqr(y,tmp);
gf_sqr(tmp,y);
gf_sqr(y,tmp);
}
}

static __inline__ void
field_subx_RAW (
field_a_t d,
const field_a_t a,
const field_a_t b
gf_subx_RAW (
gf d,
const gf a,
const gf b
) {
field_sub_RAW ( d, a, b );
field_bias( d, 2 );
IF32( field_weak_reduce ( d ) );
gf_sub_RAW ( d, a, b );
gf_bias( d, 2 );
IF32( gf_weak_reduce ( d ) );
}

static __inline__ void
field_sub (
field_a_t d,
const field_a_t a,
const field_a_t b
gf_sub (
gf d,
const gf a,
const gf b
) {
field_sub_RAW ( d, a, b );
field_bias( d, 2 );
field_weak_reduce ( d );
gf_sub_RAW ( d, a, b );
gf_bias( d, 2 );
gf_weak_reduce ( d );
}

static __inline__ void
field_add (
field_a_t d,
const field_a_t a,
const field_a_t b
gf_add (
gf d,
const gf a,
const gf b
) {
field_add_RAW ( d, a, b );
field_weak_reduce ( d );
gf_add_RAW ( d, a, b );
gf_weak_reduce ( d );
}

#define gf_add_nr gf_add_RAW

/** Subtract mod p. Bias by 2 and don't reduce */
static inline void gf_sub_nr ( gf c, const gf a, const gf b ) {
// FOR_LIMB_U(i, c->limb[i] = a->limb[i] - b->limb[i] + 2*P->limb[i] );
gf_sub_RAW(c,a,b);
gf_bias(c, 2);
if (DECAF_WORD_BITS==32) gf_weak_reduce(c); // HACK
}

/** Subtract mod p. Bias by amt but don't reduce. */
static inline void gf_subx_nr ( gf c, const gf a, const gf b, int amt ) {
gf_sub_RAW(c,a,b);
gf_bias(c, amt);
if (DECAF_WORD_BITS==32) gf_weak_reduce(c); // HACK
}

/* FIXME: no warnings on RAW routines */
#define field_add_nr field_add_RAW
#define field_sub_nr field_sub_RAW
#define field_subx_nr field_subx_RAW

#endif // __FIELD_H__
#endif // __GF_H__

+ 20
- 20
src/p25519/arch_ref64/p25519.c View File

@@ -17,10 +17,10 @@ static __inline__ uint64_t is_zero(uint64_t a) {
}

void
p255_mul (
p255_t *__restrict__ cs,
const p255_t *as,
const p255_t *bs
gf_25519_mul (
gf_25519_t __restrict__ cs,
const gf_25519_t as,
const gf_25519_t bs
) {
const uint64_t *a = as->limb, *b = bs->limb, mask = ((1ull<<51)-1);
@@ -52,9 +52,9 @@ p255_mul (
}

void
p255_mulw (
p255_t *__restrict__ cs,
const p255_t *as,
gf_25519_mulw (
gf_25519_t __restrict__ cs,
const gf_25519_t as,
uint64_t b
) {
const uint64_t *a = as->limb, mask = ((1ull<<51)-1);
@@ -79,16 +79,16 @@ p255_mulw (
}

void
p255_sqr (
p255_t *__restrict__ cs,
const p255_t *as
gf_25519_t qr (
gf_25519_t __restrict__ cs,
const gf_25519_t as
) {
p255_mul(cs,as,as); // TODO
gf_25519_mul(cs,as,as); // TODO
}

void
p255_strong_reduce (
p255_t *a
gf_25519_t trong_reduce (
gf_25519_t a
) {
uint64_t mask = (1ull<<51)-1;

@@ -128,14 +128,14 @@ p255_strong_reduce (
}

void
p255_serialize (
gf_25519_t erialize (
uint8_t serial[32],
const struct p255_t *x
const struct gf_25519_t x
) {
int i,j;
p255_t red;
p255_copy(&red, x);
p255_strong_reduce(&red);
gf_25519_t red;
gf_25519_copy(&red, x);
gf_25519_t trong_reduce(&red);
uint64_t *r = red.limb;
uint64_t ser64[4] = {r[0] | r[1]<<51, r[1]>>13|r[2]<<38, r[2]>>26|r[3]<<25, r[3]>>39|r[4]<<12};
for (i=0; i<4; i++) {
@@ -147,8 +147,8 @@ p255_serialize (
}

mask_t
p255_deserialize (
p255_t *x,
gf_25519_deserialize (
gf_25519_t x,
const uint8_t serial[32]
) {
int i,j;


+ 53
- 53
src/p25519/arch_ref64/p25519.h View File

@@ -1,8 +1,8 @@
/* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information.
*/
#ifndef __P255_H__
#define __P255_H__ 1
#ifndef __P25519_H__
#define __P25519_H__ 1

#include <stdint.h>
#include <assert.h>
@@ -10,9 +10,9 @@

#include "word.h"

typedef struct p255_t {
typedef struct gf_25519_s {
uint64_t limb[5];
} p255_t;
} gf_25519_s, gf_25519_t[1];

#define LBITS 51
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}
@@ -32,113 +32,113 @@ extern "C" {
#endif

static __inline__ void
p255_add_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_add_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) __attribute__((unused));
static __inline__ void
p255_sub_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_sub_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) __attribute__((unused));
static __inline__ void
p255_copy (
p255_t *out,
const p255_t *a
gf_25519_copy (
gf_25519_t out,
const gf_25519_t a
) __attribute__((unused));
static __inline__ void
p255_weak_reduce (
p255_t *inout
gf_25519_weak_reduce (
gf_25519_t inout
) __attribute__((unused));
void
p255_strong_reduce (
p255_t *inout
gf_25519_strong_reduce (
gf_25519_t inout
);

static __inline__ void
p255_bias (
p255_t *inout,
gf_25519_bias (
gf_25519_t inout,
int amount
) __attribute__((unused));
void
p255_mul (
p255_t *__restrict__ out,
const p255_t *a,
const p255_t *b
gf_25519_mul (
gf_25519_s *__restrict__ out,
const gf_25519_t a,
const gf_25519_t b
);

void
p255_mulw (
p255_t *__restrict__ out,
const p255_t *a,
gf_25519_mulw (
gf_25519_s *__restrict__ out,
const gf_25519_t a,
uint64_t b
);

void
p255_sqr (
p255_t *__restrict__ out,
const p255_t *a
gf_25519_sqr (
gf_25519_s *__restrict__ out,
const gf_25519_t a
);

void
p255_serialize (
gf_25519_serialize (
uint8_t serial[32],
const struct p255_t *x
const gf_25519_t x
);

mask_t
p255_deserialize (
p255_t *x,
gf_25519_deserialize (
gf_25519_t x,
const uint8_t serial[32]
);

/* -------------- Inline functions begin here -------------- */

void
p255_add_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_add_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) {
unsigned int i;
for (i=0; i<5; i++) {
out->limb[i] = a->limb[i] + b->limb[i];
}
p255_weak_reduce(out);
gf_25519_weak_reduce(out);
}

void
p255_sub_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_sub_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) {
unsigned int i;
uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36;
for (i=0; i<5; i++) {
out->limb[i] = a->limb[i] - b->limb[i] + ((i==0) ? co2 : co1);
}
p255_weak_reduce(out);
gf_25519_weak_reduce(out);
}

void
p255_copy (
p255_t *out,
const p255_t *a
gf_25519_copy (
gf_25519_t out,
const gf_25519_t a
) {
memcpy(out,a,sizeof(*a));
}

void
p255_bias (
p255_t *a,
gf_25519_bias (
gf_25519_t a,
int amt
) {
(void) a;
@@ -146,8 +146,8 @@ p255_bias (
}

void
p255_weak_reduce (
p255_t *a
gf_25519_weak_reduce (
gf_25519_t a
) {
uint64_t mask = (1ull<<51) - 1;
uint64_t tmp = a->limb[4] >> 51;
@@ -162,4 +162,4 @@ p255_weak_reduce (
}; /* extern "C" */
#endif

#endif /* __P255_H__ */
#endif /* __P25519_H__ */

+ 20
- 20
src/p25519/arch_x86_64/p25519.c View File

@@ -10,10 +10,10 @@ static inline uint64_t shr(__uint128_t x, int n) {
}

void
p255_mul (
p255_t *__restrict__ cs,
const p255_t *as,
const p255_t *bs
gf_25519_mul (
gf_25519_s *__restrict__ cs,
const gf_25519_t as,
const gf_25519_t bs
) {
const uint64_t *a = as->limb, *b = bs->limb, mask = ((1ull<<51)-1);
uint64_t *c = cs->limb;
@@ -92,9 +92,9 @@ p255_mul (
}

void
p255_sqr (
p255_t *__restrict__ cs,
const p255_t *as
gf_25519_sqr (
gf_25519_s *__restrict__ cs,
const gf_25519_t as
) {
const uint64_t *a = as->limb, mask = ((1ull<<51)-1);
uint64_t *c = cs->limb;
@@ -156,9 +156,9 @@ p255_sqr (
}

void
p255_mulw (
p255_t *__restrict__ cs,
const p255_t *as,
gf_25519_mulw (
gf_25519_s *__restrict__ cs,
const gf_25519_t as,
uint64_t b
) {
const uint64_t *a = as->limb, mask = ((1ull<<51)-1);
@@ -191,8 +191,8 @@ p255_mulw (
}

void
p255_strong_reduce (
p255_t *a
gf_25519_strong_reduce (
gf_25519_t a
) {
uint64_t mask = (1ull<<51)-1;

@@ -232,15 +232,15 @@ p255_strong_reduce (
}

void
p255_serialize (
gf_25519_serialize (
uint8_t serial[32],
const struct p255_t *x
const gf_25519_t x
) {
int i,j;
p255_t red;
p255_copy(&red, x);
p255_strong_reduce(&red);
uint64_t *r = red.limb;
gf_25519_t red;
gf_25519_copy(red, x);
gf_25519_strong_reduce(red);
uint64_t *r = red->limb;
uint64_t ser64[4] = {r[0] | r[1]<<51, r[1]>>13|r[2]<<38, r[2]>>26|r[3]<<25, r[3]>>39|r[4]<<12};
for (i=0; i<4; i++) {
for (j=0; j<8; j++) {
@@ -251,8 +251,8 @@ p255_serialize (
}

mask_t
p255_deserialize (
p255_t *x,
gf_25519_deserialize (
gf_25519_t x,
const uint8_t serial[32]
) {
int i,j;


+ 54
- 51
src/p25519/arch_x86_64/p25519.h View File

@@ -1,8 +1,8 @@
/* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information.
*/
#ifndef __P255_H__
#define __P255_H__ 1
#ifndef __P25519_H__
#define __P25519_H__ 1

#include <stdint.h>
#include <assert.h>
@@ -10,9 +10,12 @@

#include "word.h"

typedef struct p255_t {
#ifndef __DECAF_255_H__ // HACK FIXME
#define DECAF_WORD_BITS 64
typedef struct gf_25519_s {
uint64_t limb[5];
} p255_t;
} gf_25519_s, gf_25519_t[1];
#endif

#define LBITS 51
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}
@@ -32,80 +35,80 @@ extern "C" {
#endif

static __inline__ void
p255_add_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_add_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) __attribute__((unused));
static __inline__ void
p255_sub_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_sub_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) __attribute__((unused));
static __inline__ void
p255_copy (
p255_t *out,
const p255_t *a
gf_25519_copy (
gf_25519_t out,
const gf_25519_t a
) __attribute__((unused));
static __inline__ void
p255_weak_reduce (
p255_t *inout
gf_25519_weak_reduce (
gf_25519_t inout
) __attribute__((unused));
void
p255_strong_reduce (
p255_t *inout
gf_25519_strong_reduce (
gf_25519_t inout
);

static __inline__ void
p255_bias (
p255_t *inout,
gf_25519_bias (
gf_25519_t inout,
int amount
) __attribute__((unused));
void
p255_mul (
p255_t *__restrict__ out,
const p255_t *a,
const p255_t *b
gf_25519_mul (
gf_25519_s *__restrict__ out,
const gf_25519_t a,
const gf_25519_t b
);

void
p255_mulw (
p255_t *__restrict__ out,
const p255_t *a,
gf_25519_mulw (
gf_25519_s *__restrict__ out,
const gf_25519_t a,
uint64_t b
);

void
p255_sqr (
p255_t *__restrict__ out,
const p255_t *a
gf_25519_sqr (
gf_25519_s *__restrict__ out,
const gf_25519_t a
);

void
p255_serialize (
gf_25519_serialize (
uint8_t serial[32],
const struct p255_t *x
const gf_25519_t x
);

mask_t
p255_deserialize (
p255_t *x,
gf_25519_deserialize (
gf_25519_t x,
const uint8_t serial[32]
);

/* -------------- Inline functions begin here -------------- */

void
p255_add_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_add_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) {
unsigned int i;
for (i=0; i<5; i++) {
@@ -114,10 +117,10 @@ p255_add_RAW (
}

void
p255_sub_RAW (
p255_t *out,
const p255_t *a,
const p255_t *b
gf_25519_sub_RAW (
gf_25519_t out,
const gf_25519_t a,
const gf_25519_t b
) {
unsigned int i;
uint64_t co1 = ((1ull<<51)-1)*2, co2 = co1-36;
@@ -127,16 +130,16 @@ p255_sub_RAW (
}

void
p255_copy (
p255_t *out,
const p255_t *a
gf_25519_copy (
gf_25519_t out,
const gf_25519_t a
) {
memcpy(out,a,sizeof(*a));
}

void
p255_bias (
p255_t *a,
gf_25519_bias (
gf_25519_t a,
int amt
) {
a->limb[0] += ((uint64_t)(amt)<<52) - 38*amt;
@@ -147,8 +150,8 @@ p255_bias (
}

void
p255_weak_reduce (
p255_t *a
gf_25519_weak_reduce (
gf_25519_t a
) {
uint64_t mask = (1ull<<51) - 1;
uint64_t tmp = a->limb[4] >> 51;
@@ -163,4 +166,4 @@ p255_weak_reduce (
}; /* extern "C" */
#endif

#endif /* __P255_H__ */
#endif /* __P25519_H__ */

+ 0
- 1
src/p25519/arch_x86_64/x86-64-arith.h View File

@@ -1 +0,0 @@
../../p448/arch_x86_64/x86-64-arith.h

+ 323
- 0
src/p25519/arch_x86_64/x86-64-arith.h View File

@@ -0,0 +1,323 @@
/* Copyright (c) 2014 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information.
*/

#ifndef __X86_64_ARITH_H__
#define __X86_64_ARITH_H__

#include <stdint.h>

/* TODO: non x86-64 versions of these.
* FUTURE: autogenerate
*/

static __inline__ __uint128_t widemul(const uint64_t *a, const uint64_t *b) {
#ifndef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rax;"
"mulq %[b];"
: [c]"=a"(c), [d]"=d"(d)
: [b]"m"(*b), [a]"m"(*a)
: "cc");
return (((__uint128_t)(d))<<64) | c;
#else
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx;"
"mulx %[b], %[c], %[d];"
: [c]"=r"(c), [d]"=r"(d)
: [b]"m"(*b), [a]"m"(*a)
: "rdx");
return (((__uint128_t)(d))<<64) | c;
#endif
}

static __inline__ __uint128_t widemul_rm(uint64_t a, const uint64_t *b) {
#ifndef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rax;"
"mulq %[b];"
: [c]"=a"(c), [d]"=d"(d)
: [b]"m"(*b), [a]"r"(a)
: "cc");
return (((__uint128_t)(d))<<64) | c;
#else
uint64_t c,d;
__asm__ volatile
("mulx %[b], %[c], %[d];"
: [c]"=r"(c), [d]"=r"(d)
: [b]"m"(*b), [a]"d"(a));
return (((__uint128_t)(d))<<64) | c;
#endif
}

static __inline__ __uint128_t widemul_rr(uint64_t a, uint64_t b) {
#ifndef __BMI2__
uint64_t c,d;
__asm__ volatile
("mulq %[b];"
: [c]"=a"(c), [d]"=d"(d)
: [b]"r"(b), "a"(a)
: "cc");
return (((__uint128_t)(d))<<64) | c;
#else
uint64_t c,d;
__asm__ volatile
("mulx %[b], %[c], %[d];"
: [c]"=r"(c), [d]"=r"(d)
: [b]"r"(b), [a]"d"(a));
return (((__uint128_t)(d))<<64) | c;
#endif
}

static __inline__ __uint128_t widemul2(const uint64_t *a, const uint64_t *b) {
#ifndef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rax; "
"addq %%rax, %%rax; "
"mulq %[b];"
: [c]"=a"(c), [d]"=d"(d)
: [b]"m"(*b), [a]"m"(*a)
: "cc");
return (((__uint128_t)(d))<<64) | c;
#else
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx;"
"leaq (,%%rdx,2), %%rdx;"
"mulx %[b], %[c], %[d];"
: [c]"=r"(c), [d]"=r"(d)
: [b]"m"(*b), [a]"m"(*a)
: "rdx");
return (((__uint128_t)(d))<<64) | c;
#endif
}

static __inline__ void mac(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
uint64_t lo = *acc, hi = *acc>>64;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx; "
"mulx %[b], %[c], %[d]; "
"addq %[c], %[lo]; "
"adcq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"mulq %[b]; "
"addq %%rax, %[lo]; "
"adcq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi))<<64) | lo;
}

static __inline__ void macac(__uint128_t *acc, __uint128_t *acc2, const uint64_t *a, const uint64_t *b) {
uint64_t lo = *acc, hi = *acc>>64;
uint64_t lo2 = *acc2, hi2 = *acc2>>64;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx; "
"mulx %[b], %[c], %[d]; "
"addq %[c], %[lo]; "
"adcq %[d], %[hi]; "
"addq %[c], %[lo2]; "
"adcq %[d], %[hi2]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"mulq %[b]; "
"addq %%rax, %[lo]; "
"adcq %%rdx, %[hi]; "
"addq %%rax, %[lo2]; "
"adcq %%rdx, %[hi2]; "
: [lo]"+r"(lo), [hi]"+r"(hi), [lo2]"+r"(lo2), [hi2]"+r"(hi2)
: [b]"m"(*b), [a]"m"(*a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi))<<64) | lo;
*acc2 = (((__uint128_t)(hi2))<<64) | lo2;
}

static __inline__ void mac_rm(__uint128_t *acc, uint64_t a, const uint64_t *b) {
uint64_t lo = *acc, hi = *acc>>64;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("mulx %[b], %[c], %[d]; "
"addq %[c], %[lo]; "
"adcq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"d"(a)
: "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"mulq %[b]; "
"addq %%rax, %[lo]; "
"adcq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"r"(a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi))<<64) | lo;
}

static __inline__ void mac_rr(__uint128_t *acc, uint64_t a, const uint64_t b) {
uint64_t lo = *acc, hi = *acc>>64;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("mulx %[b], %[c], %[d]; "
"addq %[c], %[lo]; "
"adcq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"r"(b), [a]"d"(a)
: "cc");
#else
__asm__ volatile
("mulq %[b]; "
"addq %%rax, %[lo]; "
"adcq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"r"(b), "a"(a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi))<<64) | lo;
}

static __inline__ void mac2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
uint64_t lo = *acc, hi = *acc>>64;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx; "
"addq %%rdx, %%rdx; "
"mulx %[b], %[c], %[d]; "
"addq %[c], %[lo]; "
"adcq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"addq %%rax, %%rax; "
"mulq %[b]; "
"addq %%rax, %[lo]; "
"adcq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi))<<64) | lo;
}

static __inline__ void msb(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
uint64_t lo = *acc, hi = *acc>>64;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx; "
"mulx %[b], %[c], %[d]; "
"subq %[c], %[lo]; "
"sbbq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"mulq %[b]; "
"subq %%rax, %[lo]; "
"sbbq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi))<<64) | lo;
}

static __inline__ void msb2(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
uint64_t lo = *acc, hi = *acc>>64;
#ifdef __BMI2__
uint64_t c,d;
__asm__ volatile
("movq %[a], %%rdx; "
"addq %%rdx, %%rdx; "
"mulx %[b], %[c], %[d]; "
"subq %[c], %[lo]; "
"sbbq %[d], %[hi]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
#else
__asm__ volatile
("movq %[a], %%rax; "
"addq %%rax, %%rax; "
"mulq %[b]; "
"subq %%rax, %[lo]; "
"sbbq %%rdx, %[hi]; "
: [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rax", "rdx", "cc");
#endif
*acc = (((__uint128_t)(hi))<<64) | lo;
}

static __inline__ void mrs(__uint128_t *acc, const uint64_t *a, const uint64_t *b) {
uint64_t c,d, lo = *acc, hi = *acc>>64;
__asm__ volatile
("movq %[a], %%rdx; "
"mulx %[b], %[c], %[d]; "
"subq %[lo], %[c]; "
"sbbq %[hi], %[d]; "
: [c]"=r"(c), [d]"=r"(d), [lo]"+r"(lo), [hi]"+r"(hi)
: [b]"m"(*b), [a]"m"(*a)
: "rdx", "cc");
*acc = (((__uint128_t)(d))<<64) | c;
}

static __inline__ __uint128_t widemulu(uint64_t a, uint64_t b) {
return ((__uint128_t)(a)) * b;
}

static __inline__ __int128_t widemuls(int64_t a, int64_t b) {
return ((__int128_t)(a)) * b;
}
static __inline__ uint64_t opacify(uint64_t x) {
__asm__ volatile("" : "+r"(x));
return x;
}

static __inline__ mask_t is_zero(uint64_t x) {
__asm__ volatile("neg %0; sbb %0, %0;" : "+r"(x));
return ~x;
}

#endif /* __X86_64_ARITH_H__ */

+ 14
- 14
src/p25519/f_arithmetic.c View File

@@ -10,7 +10,7 @@

#include "field.h"

const field_a_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL(
const gf_25519_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL(
0x61b274a0ea0b0,
0x0d5a5fc8f189d,
0x7ef5e9cbd0c60,
@@ -18,7 +18,7 @@ const field_a_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL(
0x2b8324804fc1d
)};
const field_a_t SQRT_ONE_MINUS_D = {FIELD_LITERAL( // FIXME MAGIC goes elsewhere?
const gf_25519_t SQRT_ONE_MINUS_D = {FIELD_LITERAL( // FIXME MAGIC goes elsewhere?
0x6db8831bbddec,
0x38d7b56c9c165,
0x016b221394bdc,
@@ -26,15 +26,15 @@ const field_a_t SQRT_ONE_MINUS_D = {FIELD_LITERAL( // FIXME MAGIC goes elsewhere
0x0a0d85b4032b1
)};
static const field_a_t ONE = {FIELD_LITERAL( // FIXME copy-pasted
static const gf_25519_t ONE = {FIELD_LITERAL( // FIXME copy-pasted
1,0,0,0,0
)};

// ARCH MAGIC FIXME copy-pasted from decaf_fast.c
static mask_t gf_eq(const field_a_t a, const field_a_t b) {
field_a_t c;
field_sub(c,a,b);
field_strong_reduce(c);
static mask_t gf_eq(const gf_25519_t a, const gf_25519_t b) {
gf_25519_t c;
gf_sub(c,a,b);
gf_strong_reduce(c);
mask_t ret=0;
int i;
for (i=0; i<5; i++) { ret |= c->limb[i]; }
@@ -43,19 +43,19 @@ static mask_t gf_eq(const field_a_t a, const field_a_t b) {

/* Guarantee: a^2 x = 0 if x = 0; else a^2 x = 1 or SQRT_MINUS_ONE; */
void
field_isr (
field_a_t a,
const field_a_t x
gf_isr (
gf_25519_t a,
const gf_25519_t x
) {
field_a_t st[3], tmp1, tmp2;
gf_25519_t st[3], tmp1, tmp2;
const struct { unsigned char sh, idx; } ops[] = {
{1,2},{1,2},{3,1},{6,0},{1,2},{12,1},{25,1},{25,1},{50,0},{125,0},{2,2},{1,2}
};
st[0][0] = st[1][0] = st[2][0] = x[0];
unsigned int i;
for (i=0; i<sizeof(ops)/sizeof(ops[0]); i++) {
field_sqrn(tmp1, st[1^(i&1)], ops[i].sh);
field_mul(tmp2, tmp1, st[ops[i].idx]);
gf_sqrn(tmp1, st[1^(i&1)], ops[i].sh);
gf_mul(tmp2, tmp1, st[ops[i].idx]);
st[i&1][0] = tmp2[0];
}
@@ -64,5 +64,5 @@ field_isr (
// ARCH MAGIC FIXME: should be cond_sel
for (i=0; i<5; i++) tmp1->limb[i] = (ONE->limb[i] & mask)
| (SQRT_MINUS_ONE->limb[i] & ~mask);
field_mul(a,tmp1,st[0]);
gf_mul(a,tmp1,st[0]);
}

+ 16
- 15
src/p25519/f_field.h View File

@@ -13,20 +13,21 @@
#include <string.h>

#include "p25519.h"
#define FIELD_LIT_LIMB_BITS 51
#define FIELD_BITS 255
#define field_t p255_t
#define field_mul p255_mul
#define field_sqr p255_sqr
#define field_add_RAW p255_add_RAW
#define field_sub_RAW p255_sub_RAW
#define field_mulw p255_mulw
#define field_bias p255_bias
#define field_isr p255_isr
#define field_weak_reduce p255_weak_reduce
#define field_strong_reduce p255_strong_reduce
#define field_serialize p255_serialize
#define field_deserialize p255_deserialize
#define SQRT_MINUS_ONE P25519_SQRT_MINUS_ONE
#define GF_LIT_LIMB_BITS 51
#define GF_BITS 255
#define gf gf_25519_t
#define gf_s gf_25519_s
#define gf_mul gf_25519_mul
#define gf_sqr gf_25519_sqr
#define gf_add_RAW gf_25519_add_RAW
#define gf_sub_RAW gf_25519_sub_RAW
#define gf_mulw gf_25519_mulw
#define gf_bias gf_25519_bias
#define gf_isr gf_25519_isr
#define gf_weak_reduce gf_25519_weak_reduce
#define gf_strong_reduce gf_25519_strong_reduce
#define gf_serialize gf_25519_serialize
#define gf_deserialize gf_25519_deserialize
#define SQRT_MINUS_ONE P25519_SQRT_MINUS_ONE

#endif /* __F_FIELD_H__ */

+ 28
- 28
src/p448/f_arithmetic.c View File

@@ -11,33 +11,33 @@
#include "field.h"

void
field_isr (
field_a_t a,
const field_a_t x
gf_isr (
gf_a_t a,
const gf_a_t x
) {
field_a_t L0, L1, L2;
field_sqr ( L1, x );
field_mul ( L2, x, L1 );
field_sqr ( L1, L2 );
field_mul ( L2, x, L1 );
field_sqrn ( L1, L2, 3 );
field_mul ( L0, L2, L1 );
field_sqrn ( L1, L0, 3 );
field_mul ( L0, L2, L1 );
field_sqrn ( L2, L0, 9 );
field_mul ( L1, L0, L2 );
field_sqr ( L0, L1 );
field_mul ( L2, x, L0 );
field_sqrn ( L0, L2, 18 );
field_mul ( L2, L1, L0 );
field_sqrn ( L0, L2, 37 );
field_mul ( L1, L2, L0 );
field_sqrn ( L0, L1, 37 );
field_mul ( L1, L2, L0 );
field_sqrn ( L0, L1, 111 );
field_mul ( L2, L1, L0 );
field_sqr ( L0, L2 );
field_mul ( L1, x, L0 );
field_sqrn ( L0, L1, 223 );
field_mul ( a, L2, L0 );
gf_a_t L0, L1, L2;
gf_sqr ( L1, x );
gf_mul ( L2, x, L1 );
gf_sqr ( L1, L2 );
gf_mul ( L2, x, L1 );
gf_sqrn ( L1, L2, 3 );
gf_mul ( L0, L2, L1 );
gf_sqrn ( L1, L0, 3 );
gf_mul ( L0, L2, L1 );
gf_sqrn ( L2, L0, 9 );
gf_mul ( L1, L0, L2 );
gf_sqr ( L0, L1 );
gf_mul ( L2, x, L0 );
gf_sqrn ( L0, L2, 18 );
gf_mul ( L2, L1, L0 );
gf_sqrn ( L0, L2, 37 );
gf_mul ( L1, L2, L0 );
gf_sqrn ( L0, L1, 37 );
gf_mul ( L1, L2, L0 );
gf_sqrn ( L0, L1, 111 );
gf_mul ( L2, L1, L0 );
gf_sqr ( L0, L2 );
gf_mul ( L1, x, L0 );
gf_sqrn ( L0, L1, 223 );
gf_mul ( a, L2, L0 );
}

+ 14
- 14
src/p448/f_field.h View File

@@ -13,19 +13,19 @@
#include <string.h>

#include "p448.h"
#define FIELD_LIT_LIMB_BITS 56
#define FIELD_BITS 448
#define field_t p448_t
#define field_mul p448_mul
#define field_sqr p448_sqr
#define field_add_RAW p448_add_RAW
#define field_sub_RAW p448_sub_RAW
#define field_mulw p448_mulw
#define field_bias p448_bias
#define field_isr p448_isr
#define field_weak_reduce p448_weak_reduce
#define field_strong_reduce p448_strong_reduce
#define field_serialize p448_serialize
#define field_deserialize p448_deserialize
#define GF_LIT_LIMB_BITS 56
#define GF_BITS 448
#define gf p448_t
#define gf_mul p448_mul
#define gf_sqr p448_sqr
#define gf_add_RAW p448_add_RAW
#define gf_sub_RAW p448_sub_RAW
#define gf_mulw p448_mulw
#define gf_bias p448_bias
#define gf_isr p448_isr
#define gf_weak_reduce p448_weak_reduce
#define gf_strong_reduce p448_strong_reduce
#define gf_serialize p448_serialize
#define gf_deserialize p448_deserialize

#endif /* __F_FIELD_H__ */

+ 28
- 28
src/p480/f_arithmetic.c View File

@@ -11,33 +11,33 @@
#include "field.h"

void
field_isr (
field_a_t a,
const field_a_t x
gf_isr (
gf_a_t a,
const gf_a_t x
) {
field_a_t L0, L1, L2, L3;
field_sqr ( L2, x );
field_mul ( L1, x, L2 );
field_sqrn ( L0, L1, 2 );
field_mul ( L2, L1, L0 );
field_sqrn ( L0, L2, 4 );
field_mul ( L1, L2, L0 );
field_sqr ( L0, L1 );
field_mul ( L2, x, L0 );
field_sqrn ( L0, L2, 8 );
field_mul ( L2, L1, L0 );
field_sqrn ( L0, L2, 17 );
field_mul ( L1, L2, L0 );
field_sqrn ( L0, L1, 17 );
field_mul ( L1, L2, L0 );
field_sqrn ( L3, L1, 17 );
field_mul ( L0, L2, L3 );
field_sqrn ( L2, L0, 51 );
field_mul ( L0, L1, L2 );
field_sqrn ( L1, L0, 119 );
field_mul ( L2, L0, L1 );
field_sqr ( L0, L2 );
field_mul ( L1, x, L0 );
field_sqrn ( L0, L1, 239 );
field_mul ( a, L2, L0 );
gf_a_t L0, L1, L2, L3;
gf_sqr ( L2, x );
gf_mul ( L1, x, L2 );
gf_sqrn ( L0, L1, 2 );
gf_mul ( L2, L1, L0 );
gf_sqrn ( L0, L2, 4 );
gf_mul ( L1, L2, L0 );
gf_sqr ( L0, L1 );
gf_mul ( L2, x, L0 );
gf_sqrn ( L0, L2, 8 );
gf_mul ( L2, L1, L0 );
gf_sqrn ( L0, L2, 17 );
gf_mul ( L1, L2, L0 );
gf_sqrn ( L0, L1, 17 );
gf_mul ( L1, L2, L0 );
gf_sqrn ( L3, L1, 17 );
gf_mul ( L0, L2, L3 );
gf_sqrn ( L2, L0, 51 );
gf_mul ( L0, L1, L2 );
gf_sqrn ( L1, L0, 119 );
gf_mul ( L2, L0, L1 );
gf_sqr ( L0, L2 );
gf_mul ( L1, x, L0 );
gf_sqrn ( L0, L1, 239 );
gf_mul ( a, L2, L0 );
}

+ 14
- 14
src/p480/f_field.h View File

@@ -13,19 +13,19 @@
#include <string.h>

#include "p480.h"
#define FIELD_LIT_LIMB_BITS 60
#define FIELD_BITS 480
#define field_t p480_t
#define field_mul p480_mul
#define field_sqr p480_sqr
#define field_add_RAW p480_add_RAW
#define field_sub_RAW p480_sub_RAW
#define field_mulw p480_mulw
#define field_bias p480_bias
#define field_isr p480_isr
#define field_weak_reduce p480_weak_reduce
#define field_strong_reduce p480_strong_reduce
#define field_serialize p480_serialize
#define field_deserialize p480_deserialize
#define GF_LIT_LIMB_BITS 60
#define GF_BITS 480
#define gf p480_t
#define gf_mul p480_mul
#define gf_sqr p480_sqr
#define gf_add_RAW p480_add_RAW
#define gf_sub_RAW p480_sub_RAW
#define gf_mulw p480_mulw
#define gf_bias p480_bias
#define gf_isr p480_isr
#define gf_weak_reduce p480_weak_reduce
#define gf_strong_reduce p480_strong_reduce
#define gf_serialize p480_serialize
#define gf_deserialize p480_deserialize

#endif /* __F_FIELD_H__ */

+ 28
- 28
src/p521/f_arithmetic.c View File

@@ -11,33 +11,33 @@
#include "field.h"

void
field_isr (
field_a_t a,
const field_a_t x
gf_isr (
gf_a_t a,
const gf_a_t x
) {
field_a_t L0, L1, L2;
field_sqr ( L1, x );
field_mul ( L0, x, L1 );
field_sqrn ( L2, L0, 2 );
field_mul ( L1, L0, L2 );
field_sqrn ( L2, L1, 4 );
field_mul ( L0, L1, L2 );
field_sqrn ( L2, L0, 8 );
field_mul ( L1, L0, L2 );
field_sqrn ( L2, L1, 16 );
field_mul ( L0, L1, L2 );
field_sqrn ( L2, L0, 32 );
field_mul ( L1, L0, L2 );
field_sqr ( L2, L1 );
field_mul ( L0, x, L2 );
field_sqrn ( L2, L0, 64 );
field_mul ( L0, L1, L2 );
field_sqrn ( L2, L0, 129 );
field_mul ( L1, L0, L2 );
field_sqr ( L2, L1 );
field_mul ( L0, x, L2 );
field_sqrn ( L2, L0, 259 );
field_mul ( L1, L0, L2 );
field_sqr ( L0, L1 );
field_mul ( a, x, L0 );
gf_a_t L0, L1, L2;
gf_sqr ( L1, x );
gf_mul ( L0, x, L1 );
gf_sqrn ( L2, L0, 2 );
gf_mul ( L1, L0, L2 );
gf_sqrn ( L2, L1, 4 );
gf_mul ( L0, L1, L2 );
gf_sqrn ( L2, L0, 8 );
gf_mul ( L1, L0, L2 );
gf_sqrn ( L2, L1, 16 );
gf_mul ( L0, L1, L2 );
gf_sqrn ( L2, L0, 32 );
gf_mul ( L1, L0, L2 );
gf_sqr ( L2, L1 );
gf_mul ( L0, x, L2 );
gf_sqrn ( L2, L0, 64 );
gf_mul ( L0, L1, L2 );
gf_sqrn ( L2, L0, 129 );
gf_mul ( L1, L0, L2 );
gf_sqr ( L2, L1 );
gf_mul ( L0, x, L2 );
gf_sqrn ( L2, L0, 259 );
gf_mul ( L1, L0, L2 );
gf_sqr ( L0, L1 );
gf_mul ( a, x, L0 );
}

+ 14
- 14
src/p521/f_field.h View File

@@ -13,19 +13,19 @@
#include "constant_time.h"

#include "p521.h"
#define FIELD_LIT_LIMB_BITS 58
#define FIELD_BITS 521
#define field_t p521_t
#define field_mul p521_mul
#define field_sqr p521_sqr
#define field_add_RAW p521_add_RAW
#define field_sub_RAW p521_sub_RAW
#define field_mulw p521_mulw
#define field_bias p521_bias
#define field_isr p521_isr
#define field_weak_reduce p521_weak_reduce
#define field_strong_reduce p521_strong_reduce
#define field_serialize p521_serialize
#define field_deserialize p521_deserialize
#define GF_LIT_LIMB_BITS 58
#define GF_BITS 521
#define gf p521_t
#define gf_mul p521_mul
#define gf_sqr p521_sqr
#define gf_add_RAW p521_add_RAW
#define gf_sub_RAW p521_sub_RAW
#define gf_mulw p521_mulw
#define gf_bias p521_bias
#define gf_isr p521_isr
#define gf_weak_reduce p521_weak_reduce
#define gf_strong_reduce p521_strong_reduce
#define gf_serialize p521_serialize
#define gf_deserialize p521_deserialize

#endif /* __F_FIELD_H__ */

+ 5
- 3
src/public_include/decaf/decaf_255.h View File

@@ -21,11 +21,13 @@ extern "C" {
#define DECAF_255_SCALAR_BITS 254 // Curve25519: 253
#define DECAF_255_SCALAR_LIMBS (256/DECAF_WORD_BITS)

#ifndef __DECAF_GF_ALREADY_DEFINED__
/** Galois field element internal structure */
typedef struct gf_255_s {
typedef struct gf_25519_s {
decaf_word_t limb[DECAF_255_LIMBS];
} gf_255_s, gf_255_t[1];
} gf_25519_s, gf_25519_t[1];
/** @endcond */
#endif /* __DECAF_GF_ALREADY_DEFINED__ */

/** Number of bytes in a serialized point. */
#define DECAF_255_SER_BYTES 32
@@ -34,7 +36,7 @@ typedef struct gf_255_s {
#define DECAF_255_SCALAR_BYTES 32

/** Twisted Edwards (-1,d-1) extended homogeneous coordinates */
typedef struct decaf_255_point_s { /**@cond internal*/gf_255_t x,y,z,t;/**@endcond*/ } decaf_255_point_t[1];
typedef struct decaf_255_point_s { /**@cond internal*/gf_25519_t x,y,z,t;/**@endcond*/ } decaf_255_point_t[1];

/** Precomputed table based on a point. Can be trivial implementation. */
struct decaf_255_precomputed_s;


Loading…
Cancel
Save