|
|
@@ -817,14 +817,123 @@ decaf_bool_t decaf_448_direct_scalarmul ( |
|
|
|
decaf_bool_t allow_identity, |
|
|
|
decaf_bool_t short_circuit |
|
|
|
) { |
|
|
|
decaf_448_point_t basep; |
|
|
|
decaf_bool_t succ = decaf_448_point_decode(basep, base, allow_identity); |
|
|
|
/* FIXME: compiler can probably reorder this to something non-consttime even if |
|
|
|
* !short_circuit. |
|
|
|
*/ |
|
|
|
if (short_circuit && ~succ) return succ; |
|
|
|
decaf_448_point_scalarmul(basep, basep, scalar); |
|
|
|
decaf_448_point_encode(scaled, basep); |
|
|
|
gf s0, xa, za, xd, zd, xs, zs; |
|
|
|
decaf_bool_t succ = gf_deser ( s0, base ); |
|
|
|
succ &= allow_identity |~ gf_eq(s0, ZERO); |
|
|
|
(void) short_circuit; |
|
|
|
gf_sqr ( xa, s0 ); |
|
|
|
gf_cpy ( za, ONE ); |
|
|
|
gf_cpy ( xd, ONE ); |
|
|
|
gf_cpy ( zd, ZERO ); |
|
|
|
|
|
|
|
int j; |
|
|
|
decaf_bool_t pflip = 0; |
|
|
|
for (j=DECAF_448_SCALAR_BITS-1; j>=0; j--) { |
|
|
|
decaf_bool_t flip = -((scalar->limb[j/WORD_BITS]>>(j%WORD_BITS))&1);; |
|
|
|
cond_swap(xa,xd,flip^pflip); |
|
|
|
cond_swap(za,zd,flip^pflip); |
|
|
|
gf_add_nr ( xs, xa, za ); |
|
|
|
gf_sub_nr ( zs, xa, za ); |
|
|
|
gf_add_nr ( xa, xd, zd ); |
|
|
|
gf_sub_nr ( za, xd, zd ); |
|
|
|
gf_mul ( xd, xa, zs ); |
|
|
|
gf_mul ( zd, xs, za ); |
|
|
|
gf_add_nr ( xs, xd, zd ); |
|
|
|
gf_sub_nr ( zd, xd, zd ); |
|
|
|
gf_mul ( zs, zd, s0 ); |
|
|
|
gf_sqr ( zd, xa ); |
|
|
|
gf_sqr ( xa, za ); |
|
|
|
gf_sub_nr ( za, zd, xa ); |
|
|
|
gf_mul ( xd, xa, zd ); |
|
|
|
gf_mlw ( zd, za, 1-EDWARDS_D ); |
|
|
|
gf_add_nr ( xa, xa, zd ); |
|
|
|
gf_mul ( zd, xa, za ); |
|
|
|
gf_sqr ( xa, xs ); |
|
|
|
gf_sqr ( za, zs ); |
|
|
|
pflip = flip; |
|
|
|
} |
|
|
|
cond_swap(xa,xd,pflip); |
|
|
|
cond_swap(za,zd,pflip); |
|
|
|
|
|
|
|
/* OK, time to reserialize! */ |
|
|
|
gf xz_d, xz_a, x0, den, L0, L1, L2, L3, out; /* TODO: simplify */ |
|
|
|
mask_t zcase, output_zero, sflip, za_zero; |
|
|
|
gf_mul(xz_d, xd, zd); |
|
|
|
gf_mul(xz_a, xa, za); |
|
|
|
output_zero = gf_eq(xz_d, ZERO); |
|
|
|
za_zero = gf_eq(za, ZERO); |
|
|
|
cond_sel(xz_d, xz_d, ONE, output_zero); /* make xz_d always nonzero */ |
|
|
|
zcase = output_zero | gf_eq(xz_a, ZERO); |
|
|
|
|
|
|
|
gf_sqr(x0, s0); |
|
|
|
|
|
|
|
/* Curve test in zcase */ |
|
|
|
gf_cpy(L0,x0); |
|
|
|
gf_add(L0,L0,ONE); |
|
|
|
gf_sqr(L1,L0); |
|
|
|
gf_mlw(L0,x0,-4*EDWARDS_D); |
|
|
|
gf_add(L1,L1,L0); |
|
|
|
cond_sel(xz_a,xz_a,L1,zcase); |
|
|
|
|
|
|
|
/* Compute denominator */ |
|
|
|
gf_mul(L0, x0, xz_d); |
|
|
|
gf_mlw(L2, L0, 4); |
|
|
|
gf_mul(L1, L2, xz_a); |
|
|
|
gf_isqrt(den, L1); |
|
|
|
|
|
|
|
/* Check squareness */ |
|
|
|
gf_sqr(L2, den); |
|
|
|
gf_mul(L0, L1, L2); |
|
|
|
gf_add(L0, L0, ONE); |
|
|
|
succ &= ~hibit(s0) & ~gf_eq(L0, ZERO); |
|
|
|
|
|
|
|
/* Compute y/x */ |
|
|
|
gf_mul(L1, x0, xd); |
|
|
|
gf_sub(L1, zd, L1); |
|
|
|
gf_mul(L0, za, L1); /* L0 = "opq" */ |
|
|
|
gf_mul(L1, x0, zd); |
|
|
|
gf_sub(L1, L1, xd); |
|
|
|
gf_mul(L2, xa, L1); /* L2 = "pqr" */ |
|
|
|
|
|
|
|
gf_sub(L1, L0, L2); |
|
|
|
gf_add(L0, L0, L2); |
|
|
|
gf_mul(L2, L1, den); /* L2 = y0 / x0 */ |
|
|
|
gf_mul(L1, L0, den); /* L1 = yO / xO */ |
|
|
|
sflip = hibit(L1) ^ hibit(L2) ^ za_zero; |
|
|
|
cond_sel(L0, xd, zd, sflip); /* L0 = "times" */ |
|
|
|
/* OK, done with y-coordinates */ |
|
|
|
|
|
|
|
/* OK, now correct for swappage */ |
|
|
|
gf_add(den,den,den); |
|
|
|
gf_mul(L1,den,s0); |
|
|
|
gf_sqr(L2,L1); |
|
|
|
gf_mul(L3,L2,xz_a); |
|
|
|
cond_sel(den,L1,L3,pflip|zcase); |
|
|
|
|
|
|
|
/* compute the output */ |
|
|
|
gf_mul(L1,L0,den); |
|
|
|
|
|
|
|
cond_sel(L2,zs,s0,zcase); /* zs, but s0 in zcase */ |
|
|
|
gf_mul(L0,L1,L2); |
|
|
|
|
|
|
|
cond_sel(L3,xd,zd,za_zero); |
|
|
|
cond_sel(L2,xs,L3,zcase); /* xs, but zq or qq in zcase */ |
|
|
|
gf_mul(out,L0,L2); |
|
|
|
|
|
|
|
cond_sel(out,out,ZERO,output_zero); |
|
|
|
cond_neg(out,hibit(out)); |
|
|
|
|
|
|
|
/* TODO: resubroutineize? */ |
|
|
|
gf_canon(out); |
|
|
|
int i, k=0, bits=0; |
|
|
|
decaf_dword_t buf=0; |
|
|
|
for (i=0; i<DECAF_448_LIMBS; i++) { |
|
|
|
buf |= (decaf_dword_t)out[i]<<bits; |
|
|
|
for (bits += LBITS; (bits>=8 || i==DECAF_448_LIMBS-1) && k<DECAF_448_SER_BYTES; bits-=8, buf>>=8) { |
|
|
|
scaled[k++]=buf; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
return succ; |
|
|
|
} |
|
|
|
|