From ffb44b4c948cb3d531efef2eb6a4563c12113407 Mon Sep 17 00:00:00 2001 From: Michael Hamburg Date: Mon, 26 Jan 2015 19:30:51 -0800 Subject: [PATCH] decaf tuning and simplification --- include/decaf.h | 2 +- src/decaf.c | 129 ++++++++++++++++++++---------------------------- test/bench.c | 12 +++++ 3 files changed, 66 insertions(+), 77 deletions(-) diff --git a/include/decaf.h b/include/decaf.h index d376123..a649d5a 100644 --- a/include/decaf.h +++ b/include/decaf.h @@ -17,7 +17,7 @@ typedef uint64_t decaf_word_t, decaf_bool_t; #define DECAF_SER_BYTES 56 typedef struct decaf_point_s { decaf_word_t x[DECAF_LIMBS],y[DECAF_LIMBS],z[DECAF_LIMBS],t[DECAF_LIMBS]; -} __attribute__((aligned(32))) decaf_point_t[1]; +} decaf_point_t[1]; static const decaf_bool_t DECAF_SUCCESS = -(decaf_bool_t)1, DECAF_FAILURE = 0; diff --git a/src/decaf.c b/src/decaf.c index 8ec5c47..fd08059 100644 --- a/src/decaf.c +++ b/src/decaf.c @@ -25,22 +25,22 @@ static const gf ZERO = {0}, ONE = {1}, TWO = {2}; static const word_t LMASK = (1ull<> LBITS; accum[NLIMBS-2] &= LMASK; @@ -52,44 +52,27 @@ siv gf_mul_x (gf c, const gf a, const word_t *b, int limbs_b) { FOR_LIMB(j, c[j] = accum[j] ); } -static void gf_mul( gf a, const gf b, const gf c ) { gf_mul_x(a,b,c,NLIMBS); } -static void gf_sqr( gf a, const gf b ) { gf_mul_x(a,b,b,NLIMBS); } - -siv gf_sqrn ( gf x, const gf y, int n ) { - gf_cpy(x,y); - int i; - for (i=0; i> LBITS; FOR_LIMB(j,{ @@ -110,10 +93,11 @@ siv gf_sub ( gf x, const gf y, const gf z ) { siv gf_mlw(gf a, const gf b, word_t w) { if (w>0) { - gf_mul_x(a,b,&w,1); + gf ww = {w}; + gf_mul_inline(a,b,ww); } else { - word_t ww = -w; - gf_mul_x(a,b,&ww,1); + gf ww = {-w}; + gf_mul_inline(a,b,ww); gf_sub(a,ZERO,a); } } @@ -170,36 +154,6 @@ static inline word_t hibit(const gf x) { return -(y[0]&1); } -// FIXME: 32-bit cleanliness -siv gf_ser ( uint8_t serial[DECAF_SER_BYTES], const gf x ) { - int j; - gf red; - gf_cpy(red,x); - gf_canon(red); - FOR_LIMB(i,{ - for (j=0; j<7; j++) { - serial[7*i+j] = red[i]; - red[i] >>= 8; - } - }); -} - -// FIXME: 32-bit cleanliness -static mask_t gf_deser ( gf x, const uint8_t serial[DECAF_SER_BYTES] ) { - int j; - FOR_LIMB(i, { - uint64_t out = 0; - for (j=0; j<7; j++) { - out |= ((uint64_t)serial[7*i+j])<<(8*j); - } - x[i] = out; - }); - - sdword_t accum = 0; - FOR_LIMB(i, accum = (accum + P[i] - x[i]) >> WBITS ); - return ~accum; -} - const decaf_point_t decaf_identity_point = {{{0},{1},{1},{0}}}; siv add_sub_point ( @@ -231,7 +185,7 @@ siv add_sub_point ( gf_mul ( p->t, b, c ); } -void decaf_encode( uint8_t ser[DECAF_SER_BYTES], const decaf_point_t p ) { +void decaf_encode( unsigned char ser[DECAF_SER_BYTES], const decaf_point_t p ) { gf a, b, c, d; gf_mlw ( a, p->y, 1-EDWARDS_D ); gf_mul ( c, a, p->t ); @@ -251,16 +205,39 @@ void decaf_encode( uint8_t ser[DECAF_SER_BYTES], const decaf_point_t p ) { gf_mul ( c, b, p->y ); gf_add ( a, a, c ); cond_neg ( a, hibit(a) ); - gf_ser(ser,a); + + // FIXME arch + gf_canon(a); + int j; + FOR_LIMB(i,{ + for (j=0; j<7; j++) { + ser[7*i+j] = a[i]; + a[i] >>= 8; + } + }); } decaf_bool_t decaf_decode ( decaf_point_t p, - const uint8_t ser[DECAF_SER_BYTES], + const unsigned char ser[DECAF_SER_BYTES], decaf_bool_t allow_identity ) { gf s, a, b, c, d, e; - mask_t succ = gf_deser( s, ser ); + + // FIXME arch + int j; + FOR_LIMB(i, { + word_t out = 0; + for (j=0; j<7; j++) { + out |= ((word_t)ser[7*i+j])<<(8*j); + } + s[i] = out; + }); + + sdword_t accum = 0; + FOR_LIMB(i, accum = (accum + P[i] - s[i]) >> WBITS ); + + mask_t succ = ~accum; mask_t zero = gf_eq(s, ZERO); succ &= allow_identity | ~zero; succ &= ~hibit(s); diff --git a/test/bench.c b/test/bench.c index ddf02ef..a0b478e 100644 --- a/test/bench.c +++ b/test/bench.c @@ -16,6 +16,7 @@ #include "crandom.h" #include "goldilocks.h" #include "sha512.h" +#include "decaf.h" static __inline__ void ignore_result ( int result ) { @@ -284,6 +285,17 @@ int main(int argc, char **argv) { } when = now() - when; printf("txt + txt : %5.1fns\n", when * 1e9 / i); + + decaf_point_t Da,Db,Dc; + memset(Da,0,sizeof(Da)); + memset(Db,0,sizeof(Db)); + memset(Dc,0,sizeof(Dc)); + when = now(); + for (i=0; i