Browse Source

test alternative selection-based ladder

master
Michael Hamburg 10 years ago
parent
commit
d7fef55d55
1 changed files with 37 additions and 28 deletions
  1. +37
    -28
      src/decaf_fast.c

+ 37
- 28
src/decaf_fast.c View File

@@ -13,8 +13,6 @@
#include <string.h> #include <string.h>
#include "field.h" #include "field.h"


#include "ec_point.h" // REMOVE!

#define WBITS DECAF_WORD_BITS #define WBITS DECAF_WORD_BITS


#if WBITS == 64 #if WBITS == 64
@@ -833,11 +831,16 @@ decaf_bool_t decaf_448_direct_scalarmul (
decaf_bool_t allow_identity, decaf_bool_t allow_identity,
decaf_bool_t short_circuit decaf_bool_t short_circuit
) { ) {
/* The Montgomery ladder does not short-circuit return on invalid points,
* since it detects them during recompress.
*/
(void)short_circuit; (void)short_circuit;
gf s0, x0, xa, za, xd, zd, xs, zs;
gf s0, x0, xa, za, xd, zd, xs, zs, L0, L1;
decaf_bool_t succ = gf_deser ( s0, base ); decaf_bool_t succ = gf_deser ( s0, base );
succ &= allow_identity |~ gf_eq( s0, ZERO); succ &= allow_identity |~ gf_eq( s0, ZERO);


/* Prepare the Montgomery ladder: Q = 1:0, P+Q = P */
gf_sqr ( xa, s0 ); gf_sqr ( xa, s0 );
gf_cpy ( x0, xa ); gf_cpy ( x0, xa );
gf_cpy ( za, ONE ); gf_cpy ( za, ONE );
@@ -846,26 +849,38 @@ decaf_bool_t decaf_448_direct_scalarmul (
int j; int j;
decaf_bool_t pflip = 0; decaf_bool_t pflip = 0;
for (j=448-1; j>=0; j--) { /* TODO: DECAF_SCALAR_BITS */
decaf_bool_t flip = -((scalar->limb[j/WORD_BITS]>>(j%WORD_BITS))&1);;
cond_swap(xa,xd,flip^pflip);
cond_swap(za,zd,flip^pflip);
for (j=DECAF_448_SCALAR_BITS+1; j>=0; j--) {
/* FIXME: -1, but the test cases use too many bits */
/* TODO PERF: consider a selection-based ladder. It uses more memory but is probably faster. */
/* Augmented Montgomery ladder */
decaf_bool_t flip = -((scalar->limb[j/WORD_BITS]>>(j%WORD_BITS))&1);
/* Differential add first... */
gf_add_nr ( xs, xa, za ); gf_add_nr ( xs, xa, za );
gf_sub_nr ( zs, xa, za ); gf_sub_nr ( zs, xa, za );
gf_add_nr ( xa, xd, zd ); gf_add_nr ( xa, xd, zd );
gf_sub_nr ( za, xd, zd ); gf_sub_nr ( za, xd, zd );
cond_sel(L0,xa,xs,flip^pflip);
cond_sel(L1,za,zs,flip^pflip);
gf_mul ( xd, xa, zs ); gf_mul ( xd, xa, zs );
gf_mul ( zd, xs, za ); gf_mul ( zd, xs, za );
gf_add_nr ( xs, xd, zd ); gf_add_nr ( xs, xd, zd );
gf_sub_nr ( zd, xd, zd ); gf_sub_nr ( zd, xd, zd );
gf_mul ( zs, zd, s0 ); gf_mul ( zs, zd, s0 );
gf_sqr ( zd, xa );
gf_sqr ( xa, za );
/* ... and then double */
gf_sqr ( zd, L0 );
gf_sqr ( xa, L1 );
gf_sub_nr ( za, zd, xa ); gf_sub_nr ( za, zd, xa );
gf_mul ( xd, xa, zd ); gf_mul ( xd, xa, zd );
gf_mlw ( zd, za, 1-EDWARDS_D ); gf_mlw ( zd, za, 1-EDWARDS_D );
gf_add_nr ( xa, xa, zd ); gf_add_nr ( xa, xa, zd );
gf_mul ( zd, xa, za ); gf_mul ( zd, xa, za );
/* OK, finish the dadd */
gf_sqr ( xa, xs ); gf_sqr ( xa, xs );
gf_sqr ( za, zs ); gf_sqr ( za, zs );
pflip = flip; pflip = flip;
@@ -874,7 +889,7 @@ decaf_bool_t decaf_448_direct_scalarmul (
cond_swap(za,zd,pflip); cond_swap(za,zd,pflip);
/* OK, time to reserialize! Should be easy (heh, but seriously, TODO: simplify) */ /* OK, time to reserialize! Should be easy (heh, but seriously, TODO: simplify) */
gf xz_d, xz_a, xz_s, den, L0, L1, L2, L3;
gf xz_d, xz_a, xz_s, den, L2, L3;
mask_t zcase, output_zero, sflip, za_zero; mask_t zcase, output_zero, sflip, za_zero;
gf_mul(xz_s, xs, zs); gf_mul(xz_s, xs, zs);
gf_mul(xz_d, xd, zd); gf_mul(xz_d, xd, zd);
@@ -884,7 +899,9 @@ decaf_bool_t decaf_448_direct_scalarmul (
zcase = output_zero | gf_eq(xz_a, ZERO); zcase = output_zero | gf_eq(xz_a, ZERO);
za_zero = gf_eq(za, ZERO); za_zero = gf_eq(za, ZERO);


/* Curve test in zcase */
/* Curve test in zcase, compute x0^2 + (2d-4)x0 + 1
* (we know that x0 = s0^2 is square).
*/
gf_add(L0,x0,ONE); gf_add(L0,x0,ONE);
gf_sqr(L1,L0); gf_sqr(L1,L0);
gf_mlw(L0,x0,-4*EDWARDS_D); gf_mlw(L0,x0,-4*EDWARDS_D);
@@ -896,14 +913,14 @@ decaf_bool_t decaf_448_direct_scalarmul (
gf_mul(L1, L0, xz_d); gf_mul(L1, L0, xz_d);
gf_isqrt(den, L1); gf_isqrt(den, L1);


/* Check squareness */
/* Check that the square root came out OK. */
gf_sqr(L2, den); gf_sqr(L2, den);
gf_mul(L3, L0, L2); /* x0 xa za den^2 = 1/xz_d, for later */ gf_mul(L3, L0, L2); /* x0 xa za den^2 = 1/xz_d, for later */
gf_mul(L0, L1, L2); gf_mul(L0, L1, L2);
gf_add(L0, L0, ONE); gf_add(L0, L0, ONE);
succ &= ~hibit(s0) & ~gf_eq(L0, ZERO); succ &= ~hibit(s0) & ~gf_eq(L0, ZERO);


/* Compute y/x */
/* Compute y/x for input and output point. */
gf_mul(L1, x0, xd); gf_mul(L1, x0, xd);
gf_sub(L1, zd, L1); gf_sub(L1, zd, L1);
gf_mul(L0, za, L1); /* L0 = "opq" */ gf_mul(L0, za, L1); /* L0 = "opq" */
@@ -917,30 +934,22 @@ decaf_bool_t decaf_448_direct_scalarmul (
sflip = (lobit(L1) ^ lobit(L2)) | za_zero; sflip = (lobit(L1) ^ lobit(L2)) | za_zero;
/* OK, done with y-coordinates */ /* OK, done with y-coordinates */
/* If xa==0 or za ==0:
* return 0
* Else if za == 0:
* return s0 * (sflip ? zd : xd)^2 * L3
* Else if zd == 0:
* return s0 * (sflip ? zd : xd)^2 * L3
* Else if pflip:
* return xs * zs * (sflip ? zd : xd) * L3
* Else:
* return s0 * xs * zs * (sflip ? zd : xd) * den
/* If xa==0 or za ==0: return 0
* Else if za == 0: return s0 * (sflip ? zd : xd)^2 * L3
* Else if zd == 0: return s0 * (sflip ? zd : xd)^2 * L3
* Else if pflip: return xs * zs * (sflip ? zd : xd) * L3
* Else: return s0 * xs * zs * (sflip ? zd : xd) * den
*/ */
cond_sel(xd, xd, zd, sflip); /* xd = actual xd we care about */ cond_sel(xd, xd, zd, sflip); /* xd = actual xd we care about */
cond_sel(den,den,L3,pflip|zcase); cond_sel(den,den,L3,pflip|zcase);
cond_sel(xz_s,xz_s,xd,zcase); cond_sel(xz_s,xz_s,xd,zcase);
cond_sel(s0,s0,ONE,pflip&~zcase); cond_sel(s0,s0,ONE,pflip&~zcase);
cond_sel(s0,s0,ZERO,output_zero); cond_sel(s0,s0,ZERO,output_zero);

/* compute the output xd*den*xs*zs or
* den*xd^2*s0 = (oden*s0*xd)^2 * xa * za * s0
* in zcase */
gf_mul(L0,xd,den); gf_mul(L0,xd,den);
gf_mul(L1,L0,s0); gf_mul(L1,L0,s0);
gf_mul(L0,L1,xz_s); gf_mul(L0,L1,xz_s);
cond_neg(L0,hibit(L0)); cond_neg(L0,hibit(L0));
gf_encode(scaled, L0); gf_encode(scaled, L0);




Loading…
Cancel
Save