@@ -1064,6 +1064,106 @@ void API_NS(point_double_scalarmul) ( | |||
decaf_bzero(tmp,sizeof(tmp)); | |||
} | |||
void API_NS(point_dual_scalarmul) ( | |||
point_t a1, | |||
point_t a2, | |||
const point_t b, | |||
const scalar_t scalar1, | |||
const scalar_t scalar2 | |||
) { | |||
const int WINDOW = DECAF_WINDOW_BITS, | |||
WINDOW_MASK = (1<<WINDOW)-1, | |||
WINDOW_T_MASK = WINDOW_MASK >> 1, | |||
NTABLE = 1<<(WINDOW-1); | |||
scalar_t scalar1x, scalar2x; | |||
API_NS(scalar_add)(scalar1x, scalar1, API_NS(point_scalarmul_adjustment)); | |||
sc_halve(scalar1x,scalar1x,sc_p); | |||
API_NS(scalar_add)(scalar2x, scalar2, API_NS(point_scalarmul_adjustment)); | |||
sc_halve(scalar2x,scalar2x,sc_p); | |||
/* Set up a precomputed table with odd multiples of b. */ | |||
point_t multiples1[NTABLE], multiples2[NTABLE], working, tmp; | |||
pniels_t pn; | |||
API_NS(point_copy)(working, b); | |||
/* Initialize. */ | |||
int i,j; | |||
for (i=0; i<NTABLE; i++) { | |||
API_NS(point_copy)(multiples1[i], API_NS(point_identity)); | |||
API_NS(point_copy)(multiples2[i], API_NS(point_identity)); | |||
} | |||
for (i=0; i<SCALAR_BITS; i+=WINDOW) { | |||
if (i) { | |||
for (j=0; j<WINDOW-1; j++) | |||
point_double_internal(working, working, -1); | |||
point_double_internal(working, working, 0); | |||
} | |||
/* Fetch another block of bits */ | |||
decaf_word_t bits1 = scalar1x->limb[i/WBITS] >> (i%WBITS), | |||
bits2 = scalar2x->limb[i/WBITS] >> (i%WBITS); | |||
if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) { | |||
bits1 ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS)); | |||
bits2 ^= scalar2x->limb[i/WBITS+1] << (WBITS - (i%WBITS)); | |||
} | |||
bits1 &= WINDOW_MASK; | |||
bits2 &= WINDOW_MASK; | |||
decaf_word_t inv1 = (bits1>>(WINDOW-1))-1; | |||
decaf_word_t inv2 = (bits2>>(WINDOW-1))-1; | |||
bits1 ^= inv1; | |||
bits2 ^= inv2; | |||
pt_to_pniels(pn, working); | |||
constant_time_lookup_xx(tmp, multiples1, sizeof(tmp), NTABLE, bits1 & WINDOW_T_MASK); | |||
cond_neg_niels(pn->n, inv1); | |||
/* add_pniels_to_pt(multiples1[bits1 & WINDOW_T_MASK], pn, 0); */ | |||
add_pniels_to_pt(tmp, pn, 0); | |||
constant_time_insert(multiples1, tmp, sizeof(tmp), NTABLE, bits1 & WINDOW_T_MASK); | |||
constant_time_lookup_xx(tmp, multiples2, sizeof(tmp), NTABLE, bits2 & WINDOW_T_MASK); | |||
cond_neg_niels(pn->n, inv1^inv2); | |||
/* add_pniels_to_pt(multiples2[bits2 & WINDOW_T_MASK], pn, 0); */ | |||
add_pniels_to_pt(tmp, pn, 0); | |||
constant_time_insert(multiples2, tmp, sizeof(tmp), NTABLE, bits2 & WINDOW_T_MASK); | |||
} | |||
if (NTABLE > 1) { | |||
API_NS(point_copy)(working, multiples1[NTABLE-1]); | |||
API_NS(point_copy)(tmp , multiples2[NTABLE-1]); | |||
for (i=NTABLE-1; i>1; i--) { | |||
API_NS(point_add)(multiples1[i-1], multiples1[i-1], multiples1[i]); | |||
API_NS(point_add)(multiples2[i-1], multiples2[i-1], multiples2[i]); | |||
API_NS(point_add)(working, working, multiples1[i-1]); | |||
API_NS(point_add)(tmp, tmp, multiples2[i-1]); | |||
} | |||
API_NS(point_add)(multiples1[0], multiples1[0], multiples1[1]); | |||
API_NS(point_add)(multiples2[0], multiples2[0], multiples2[1]); | |||
point_double_internal(working, working, 0); | |||
point_double_internal(tmp, tmp, 0); | |||
API_NS(point_add)(a1, working, multiples1[0]); | |||
API_NS(point_add)(a2, tmp, multiples2[0]); | |||
} else { | |||
API_NS(point_copy)(a1, multiples1[0]); | |||
API_NS(point_copy)(a2, multiples2[0]); | |||
} | |||
decaf_bzero(scalar1x,sizeof(scalar1x)); | |||
decaf_bzero(scalar2x,sizeof(scalar2x)); | |||
decaf_bzero(pn,sizeof(pn)); | |||
decaf_bzero(multiples1,sizeof(multiples1)); | |||
decaf_bzero(multiples2,sizeof(multiples2)); | |||
decaf_bzero(tmp,sizeof(tmp)); | |||
decaf_bzero(working,sizeof(working)); | |||
} | |||
decaf_bool_t API_NS(point_eq) ( const point_t p, const point_t q ) { | |||
/* equality mod 2-torsion compares x/y */ | |||
gf a, b; | |||
@@ -184,6 +184,73 @@ constant_time_lookup ( | |||
} | |||
} | |||
/** | |||
* @brief Constant-time equivalent of memcpy(table + elem_bytes*idx, in, elem_bytes); | |||
* | |||
* The table must be at least as aligned as elem_bytes. The input must be word aligned, | |||
* and if the output size is vector aligned it must also be vector aligned. | |||
* | |||
* The table and input must not alias. | |||
*/ | |||
static __inline__ void | |||
__attribute__((unused,always_inline)) | |||
constant_time_insert ( | |||
void *__restrict__ table_, | |||
const void *in_, | |||
word_t elem_bytes, | |||
word_t n_table, | |||
word_t idx | |||
) { | |||
big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx); | |||
/* Can't do pointer arithmetic on void* */ | |||
const unsigned char *in = (const unsigned char *)in_; | |||
unsigned char *table = (unsigned char *)table_; | |||
word_t j,k; | |||
for (j=0; j<n_table; j++, big_i-=big_one) { | |||
big_register_t br_mask = br_is_zero(big_i); | |||
for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) { | |||
if (elem_bytes % sizeof(big_register_t)) { | |||
/* unaligned */ | |||
((unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned | |||
= ( ((unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned & ~br_mask ) | |||
| ( ((const unaligned_br_t *)(in+k))->unaligned & br_mask ); | |||
} else { | |||
/* aligned */ | |||
*(big_register_t*)(&table[k+j*elem_bytes]) | |||
= ( *(big_register_t*)(&table[k+j*elem_bytes]) & ~br_mask ) | |||
| ( *(const big_register_t *)(in+k) & br_mask ); | |||
} | |||
} | |||
word_t mask = word_is_zero(idx^j); | |||
if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { | |||
for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) { | |||
if (elem_bytes % sizeof(word_t)) { | |||
/* output unaligned, input aligned */ | |||
((unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned | |||
= ( ((unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned & ~mask ) | |||
| ( *(const word_t *)(in+k) & mask ); | |||
} else { | |||
/* aligned */ | |||
*(word_t*)(&table[k+j*elem_bytes]) | |||
= ( *(word_t*)(&table[k+j*elem_bytes]) & ~mask ) | |||
| ( *(const word_t *)(in+k) & mask ); | |||
} | |||
} | |||
} | |||
if (elem_bytes % sizeof(word_t)) { | |||
for (; k<elem_bytes; k+=1) { | |||
table[k+j*elem_bytes] | |||
= ( table[k+j*elem_bytes] & ~mask ) | |||
| ( in[k] & mask ); | |||
} | |||
} | |||
} | |||
} | |||
/** | |||
* @brief Constant-time a = b&mask. | |||
* | |||
@@ -391,6 +391,28 @@ void decaf_255_point_double_scalarmul ( | |||
const decaf_255_point_t base2, | |||
const decaf_255_scalar_t scalar2 | |||
) API_VIS NONNULL5 NOINLINE; | |||
/* | |||
* @brief Multiply one base point by two scalars: | |||
* a1 = scalar1 * base | |||
* a2 = scalar2 * base | |||
* | |||
* Equivalent to two calls to decaf_255_point_scalarmul, but may be | |||
* faster. | |||
* | |||
* @param [out] a1 The first multiple | |||
* @param [out] a2 The second multiple | |||
* @param [in] base1 A point to be scaled. | |||
* @param [in] scalar1 A first scalar to multiply by. | |||
* @param [in] scalar2 A second scalar to multiply by. | |||
*/ | |||
void decaf_255_point_dual_scalarmul ( | |||
decaf_255_point_t a1, | |||
decaf_255_point_t a2, | |||
const decaf_255_point_t b, | |||
const decaf_255_scalar_t scalar1, | |||
const decaf_255_scalar_t scalar2 | |||
) API_VIS NONNULL5 NOINLINE; | |||
/** | |||
* @brief Multiply two base points by two scalars: | |||
@@ -363,6 +363,13 @@ public: | |||
Point p((NOINIT())); decaf_255_point_double_scalarmul(p.p,q.p,qs.s,r.p,rs.s); return p; | |||
} | |||
/** @brief Dual-scalar multiply, equivalent to this*r1, this*r2 but faster. */ | |||
inline void dual_scalarmul ( | |||
Point &q1, Point &q2, const Scalar &r1, const Scalar &r2 | |||
) const NOEXCEPT { | |||
decaf_255_point_dual_scalarmul(q1.p,q2.p,p,r1.s,r2.s); | |||
} | |||
/** | |||
* @brief Double-scalar multiply, equivalent to q*qs + r*rs but faster. | |||
* For those who like their scalars before the point. | |||
@@ -394,6 +394,28 @@ void decaf_448_point_double_scalarmul ( | |||
const decaf_448_point_t base2, | |||
const decaf_448_scalar_t scalar2 | |||
) API_VIS NONNULL5 NOINLINE; | |||
/* | |||
* @brief Multiply one base point by two scalars: | |||
* a1 = scalar1 * base | |||
* a2 = scalar2 * base | |||
* | |||
* Equivalent to two calls to decaf_255_point_scalarmul, but may be | |||
* faster. | |||
* | |||
* @param [out] a1 The first multiple | |||
* @param [out] a2 The second multiple | |||
* @param [in] base1 A point to be scaled. | |||
* @param [in] scalar1 A first scalar to multiply by. | |||
* @param [in] scalar2 A second scalar to multiply by. | |||
*/ | |||
void decaf_448_point_dual_scalarmul ( | |||
decaf_448_point_t a1, | |||
decaf_448_point_t a2, | |||
const decaf_448_point_t b, | |||
const decaf_448_scalar_t scalar1, | |||
const decaf_448_scalar_t scalar2 | |||
) API_VIS NONNULL5 NOINLINE; | |||
/** | |||
* @brief Multiply two base points by two scalars: | |||
@@ -374,6 +374,13 @@ public: | |||
) NOEXCEPT { | |||
Point p((NOINIT())); decaf_448_point_double_scalarmul(p.p,q.p,qs.s,r.p,rs.s); return p; | |||
} | |||
/** @brief Dual-scalar multiply, equivalent to this*r1, this*r2 but faster. */ | |||
inline void dual_scalarmul ( | |||
Point &q1, Point &q2, const Scalar &r1, const Scalar &r2 | |||
) const NOEXCEPT { | |||
decaf_448_point_dual_scalarmul(q1.p,q2.p,p,r1.s,r2.s); | |||
} | |||
/** | |||
* @brief Double-scalar multiply, equivalent to q*qs + r*rs but faster. | |||
@@ -358,6 +358,7 @@ static void micro() { | |||
for (Benchmark b("Point unhash uniform"); b.iter(); ) { ignore_result(p.invert_elligator(ep2,0)); } | |||
for (Benchmark b("Point steg"); b.iter(); ) { p.steg_encode(rng); } | |||
for (Benchmark b("Point double scalarmul"); b.iter(); ) { Point::double_scalarmul(p,s,q,t); } | |||
for (Benchmark b("Point dual scalarmul"); b.iter(); ) { p.dual_scalarmul(p,q,s,t); } | |||
for (Benchmark b("Point precmp scalarmul"); b.iter(); ) { pBase * s; } | |||
for (Benchmark b("Point double scalarmul_v"); b.iter(); ) { | |||
s = Scalar(rng); | |||
@@ -286,6 +286,8 @@ static void test_ec() { | |||
Point p(rng); | |||
Point q(rng); | |||
Point d1, d2; | |||
SecureBuffer buffer(2*Point::HASH_BYTES); | |||
rng.read(buffer); | |||
Point r = Point::from_hash(buffer); | |||
@@ -305,7 +307,12 @@ static void test_ec() { | |||
if (i%10) continue; | |||
point_check(test,p,q,r,x,0,x*(p+q),x*p+x*q,"distr mul"); | |||
point_check(test,p,q,r,x,y,(x*y)*p,x*(y*p),"assoc mul"); | |||
point_check(test,p,q,r,x,y,x*p+y*q,Point::double_scalarmul(x,p,y,q),"ds mul"); | |||
point_check(test,p,q,r,x,y,x*p+y*q,Point::double_scalarmul(x,p,y,q),"double mul"); | |||
p.dual_scalarmul(d1,d2,x,y); | |||
point_check(test,p,q,r,x,y,x*p,d1,"dual mul 1"); | |||
point_check(test,p,q,r,x,y,y*p,d2,"dual mul 2"); | |||
point_check(test,base,q,r,x,y,x*base+y*q,q.non_secret_combo_with_base(y,x),"ds vt mul"); | |||
point_check(test,p,q,r,x,0,Precomputed(p)*x,p*x,"precomp mul"); | |||
point_check(test,p,q,r,0,0,r, | |||