Browse Source

dual scalarmul because of TLS discussion

master
Mike Hamburg 9 years ago
parent
commit
704b424982
8 changed files with 234 additions and 1 deletions
  1. +100
    -0
      src/decaf_fast.c
  2. +67
    -0
      src/include/constant_time.h
  3. +22
    -0
      src/public_include/decaf/decaf_255.h
  4. +7
    -0
      src/public_include/decaf/decaf_255.hxx
  5. +22
    -0
      src/public_include/decaf/decaf_448.h
  6. +7
    -0
      src/public_include/decaf/decaf_448.hxx
  7. +1
    -0
      test/bench_decaf.cxx
  8. +8
    -1
      test/test_decaf.cxx

+ 100
- 0
src/decaf_fast.c View File

@@ -1064,6 +1064,106 @@ void API_NS(point_double_scalarmul) (
decaf_bzero(tmp,sizeof(tmp));
}

void API_NS(point_dual_scalarmul) (
point_t a1,
point_t a2,
const point_t b,
const scalar_t scalar1,
const scalar_t scalar2
) {
const int WINDOW = DECAF_WINDOW_BITS,
WINDOW_MASK = (1<<WINDOW)-1,
WINDOW_T_MASK = WINDOW_MASK >> 1,
NTABLE = 1<<(WINDOW-1);
scalar_t scalar1x, scalar2x;
API_NS(scalar_add)(scalar1x, scalar1, API_NS(point_scalarmul_adjustment));
sc_halve(scalar1x,scalar1x,sc_p);
API_NS(scalar_add)(scalar2x, scalar2, API_NS(point_scalarmul_adjustment));
sc_halve(scalar2x,scalar2x,sc_p);
/* Set up a precomputed table with odd multiples of b. */
point_t multiples1[NTABLE], multiples2[NTABLE], working, tmp;
pniels_t pn;
API_NS(point_copy)(working, b);

/* Initialize. */
int i,j;
for (i=0; i<NTABLE; i++) {
API_NS(point_copy)(multiples1[i], API_NS(point_identity));
API_NS(point_copy)(multiples2[i], API_NS(point_identity));
}

for (i=0; i<SCALAR_BITS; i+=WINDOW) {
if (i) {
for (j=0; j<WINDOW-1; j++)
point_double_internal(working, working, -1);
point_double_internal(working, working, 0);
}
/* Fetch another block of bits */
decaf_word_t bits1 = scalar1x->limb[i/WBITS] >> (i%WBITS),
bits2 = scalar2x->limb[i/WBITS] >> (i%WBITS);
if (i%WBITS >= WBITS-WINDOW && i/WBITS<SCALAR_LIMBS-1) {
bits1 ^= scalar1x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
bits2 ^= scalar2x->limb[i/WBITS+1] << (WBITS - (i%WBITS));
}
bits1 &= WINDOW_MASK;
bits2 &= WINDOW_MASK;
decaf_word_t inv1 = (bits1>>(WINDOW-1))-1;
decaf_word_t inv2 = (bits2>>(WINDOW-1))-1;
bits1 ^= inv1;
bits2 ^= inv2;
pt_to_pniels(pn, working);

constant_time_lookup_xx(tmp, multiples1, sizeof(tmp), NTABLE, bits1 & WINDOW_T_MASK);
cond_neg_niels(pn->n, inv1);
/* add_pniels_to_pt(multiples1[bits1 & WINDOW_T_MASK], pn, 0); */
add_pniels_to_pt(tmp, pn, 0);
constant_time_insert(multiples1, tmp, sizeof(tmp), NTABLE, bits1 & WINDOW_T_MASK);
constant_time_lookup_xx(tmp, multiples2, sizeof(tmp), NTABLE, bits2 & WINDOW_T_MASK);
cond_neg_niels(pn->n, inv1^inv2);
/* add_pniels_to_pt(multiples2[bits2 & WINDOW_T_MASK], pn, 0); */
add_pniels_to_pt(tmp, pn, 0);
constant_time_insert(multiples2, tmp, sizeof(tmp), NTABLE, bits2 & WINDOW_T_MASK);
}
if (NTABLE > 1) {
API_NS(point_copy)(working, multiples1[NTABLE-1]);
API_NS(point_copy)(tmp , multiples2[NTABLE-1]);
for (i=NTABLE-1; i>1; i--) {
API_NS(point_add)(multiples1[i-1], multiples1[i-1], multiples1[i]);
API_NS(point_add)(multiples2[i-1], multiples2[i-1], multiples2[i]);
API_NS(point_add)(working, working, multiples1[i-1]);
API_NS(point_add)(tmp, tmp, multiples2[i-1]);
}
API_NS(point_add)(multiples1[0], multiples1[0], multiples1[1]);
API_NS(point_add)(multiples2[0], multiples2[0], multiples2[1]);
point_double_internal(working, working, 0);
point_double_internal(tmp, tmp, 0);
API_NS(point_add)(a1, working, multiples1[0]);
API_NS(point_add)(a2, tmp, multiples2[0]);
} else {
API_NS(point_copy)(a1, multiples1[0]);
API_NS(point_copy)(a2, multiples2[0]);
}

decaf_bzero(scalar1x,sizeof(scalar1x));
decaf_bzero(scalar2x,sizeof(scalar2x));
decaf_bzero(pn,sizeof(pn));
decaf_bzero(multiples1,sizeof(multiples1));
decaf_bzero(multiples2,sizeof(multiples2));
decaf_bzero(tmp,sizeof(tmp));
decaf_bzero(working,sizeof(working));
}

decaf_bool_t API_NS(point_eq) ( const point_t p, const point_t q ) {
/* equality mod 2-torsion compares x/y */
gf a, b;


+ 67
- 0
src/include/constant_time.h View File

@@ -184,6 +184,73 @@ constant_time_lookup (
}
}

/**
* @brief Constant-time equivalent of memcpy(table + elem_bytes*idx, in, elem_bytes);
*
* The table must be at least as aligned as elem_bytes. The input must be word aligned,
* and if the output size is vector aligned it must also be vector aligned.
*
* The table and input must not alias.
*/
static __inline__ void
__attribute__((unused,always_inline))
constant_time_insert (
void *__restrict__ table_,
const void *in_,
word_t elem_bytes,
word_t n_table,
word_t idx
) {
big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
/* Can't do pointer arithmetic on void* */
const unsigned char *in = (const unsigned char *)in_;
unsigned char *table = (unsigned char *)table_;
word_t j,k;
for (j=0; j<n_table; j++, big_i-=big_one) {
big_register_t br_mask = br_is_zero(big_i);
for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
if (elem_bytes % sizeof(big_register_t)) {
/* unaligned */
((unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned
= ( ((unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned & ~br_mask )
| ( ((const unaligned_br_t *)(in+k))->unaligned & br_mask );
} else {
/* aligned */
*(big_register_t*)(&table[k+j*elem_bytes])
= ( *(big_register_t*)(&table[k+j*elem_bytes]) & ~br_mask )
| ( *(const big_register_t *)(in+k) & br_mask );
}
}

word_t mask = word_is_zero(idx^j);
if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
if (elem_bytes % sizeof(word_t)) {
/* output unaligned, input aligned */
((unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned
= ( ((unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned & ~mask )
| ( *(const word_t *)(in+k) & mask );
} else {
/* aligned */
*(word_t*)(&table[k+j*elem_bytes])
= ( *(word_t*)(&table[k+j*elem_bytes]) & ~mask )
| ( *(const word_t *)(in+k) & mask );
}
}
}
if (elem_bytes % sizeof(word_t)) {
for (; k<elem_bytes; k+=1) {
table[k+j*elem_bytes]
= ( table[k+j*elem_bytes] & ~mask )
| ( in[k] & mask );
}
}
}
}

/**
* @brief Constant-time a = b&mask.
*


+ 22
- 0
src/public_include/decaf/decaf_255.h View File

@@ -391,6 +391,28 @@ void decaf_255_point_double_scalarmul (
const decaf_255_point_t base2,
const decaf_255_scalar_t scalar2
) API_VIS NONNULL5 NOINLINE;
/*
* @brief Multiply one base point by two scalars:
* a1 = scalar1 * base
* a2 = scalar2 * base
*
* Equivalent to two calls to decaf_255_point_scalarmul, but may be
* faster.
*
* @param [out] a1 The first multiple
* @param [out] a2 The second multiple
* @param [in] base1 A point to be scaled.
* @param [in] scalar1 A first scalar to multiply by.
* @param [in] scalar2 A second scalar to multiply by.
*/
void decaf_255_point_dual_scalarmul (
decaf_255_point_t a1,
decaf_255_point_t a2,
const decaf_255_point_t b,
const decaf_255_scalar_t scalar1,
const decaf_255_scalar_t scalar2
) API_VIS NONNULL5 NOINLINE;

/**
* @brief Multiply two base points by two scalars:


+ 7
- 0
src/public_include/decaf/decaf_255.hxx View File

@@ -363,6 +363,13 @@ public:
Point p((NOINIT())); decaf_255_point_double_scalarmul(p.p,q.p,qs.s,r.p,rs.s); return p;
}
/** @brief Dual-scalar multiply, equivalent to this*r1, this*r2 but faster. */
inline void dual_scalarmul (
Point &q1, Point &q2, const Scalar &r1, const Scalar &r2
) const NOEXCEPT {
decaf_255_point_dual_scalarmul(q1.p,q2.p,p,r1.s,r2.s);
}
/**
* @brief Double-scalar multiply, equivalent to q*qs + r*rs but faster.
* For those who like their scalars before the point.


+ 22
- 0
src/public_include/decaf/decaf_448.h View File

@@ -394,6 +394,28 @@ void decaf_448_point_double_scalarmul (
const decaf_448_point_t base2,
const decaf_448_scalar_t scalar2
) API_VIS NONNULL5 NOINLINE;
/*
* @brief Multiply one base point by two scalars:
* a1 = scalar1 * base
* a2 = scalar2 * base
*
* Equivalent to two calls to decaf_255_point_scalarmul, but may be
* faster.
*
* @param [out] a1 The first multiple
* @param [out] a2 The second multiple
* @param [in] base1 A point to be scaled.
* @param [in] scalar1 A first scalar to multiply by.
* @param [in] scalar2 A second scalar to multiply by.
*/
void decaf_448_point_dual_scalarmul (
decaf_448_point_t a1,
decaf_448_point_t a2,
const decaf_448_point_t b,
const decaf_448_scalar_t scalar1,
const decaf_448_scalar_t scalar2
) API_VIS NONNULL5 NOINLINE;

/**
* @brief Multiply two base points by two scalars:


+ 7
- 0
src/public_include/decaf/decaf_448.hxx View File

@@ -374,6 +374,13 @@ public:
) NOEXCEPT {
Point p((NOINIT())); decaf_448_point_double_scalarmul(p.p,q.p,qs.s,r.p,rs.s); return p;
}

/** @brief Dual-scalar multiply, equivalent to this*r1, this*r2 but faster. */
inline void dual_scalarmul (
Point &q1, Point &q2, const Scalar &r1, const Scalar &r2
) const NOEXCEPT {
decaf_448_point_dual_scalarmul(q1.p,q2.p,p,r1.s,r2.s);
}
/**
* @brief Double-scalar multiply, equivalent to q*qs + r*rs but faster.


+ 1
- 0
test/bench_decaf.cxx View File

@@ -358,6 +358,7 @@ static void micro() {
for (Benchmark b("Point unhash uniform"); b.iter(); ) { ignore_result(p.invert_elligator(ep2,0)); }
for (Benchmark b("Point steg"); b.iter(); ) { p.steg_encode(rng); }
for (Benchmark b("Point double scalarmul"); b.iter(); ) { Point::double_scalarmul(p,s,q,t); }
for (Benchmark b("Point dual scalarmul"); b.iter(); ) { p.dual_scalarmul(p,q,s,t); }
for (Benchmark b("Point precmp scalarmul"); b.iter(); ) { pBase * s; }
for (Benchmark b("Point double scalarmul_v"); b.iter(); ) {
s = Scalar(rng);


+ 8
- 1
test/test_decaf.cxx View File

@@ -286,6 +286,8 @@ static void test_ec() {
Point p(rng);
Point q(rng);
Point d1, d2;
SecureBuffer buffer(2*Point::HASH_BYTES);
rng.read(buffer);
Point r = Point::from_hash(buffer);
@@ -305,7 +307,12 @@ static void test_ec() {
if (i%10) continue;
point_check(test,p,q,r,x,0,x*(p+q),x*p+x*q,"distr mul");
point_check(test,p,q,r,x,y,(x*y)*p,x*(y*p),"assoc mul");
point_check(test,p,q,r,x,y,x*p+y*q,Point::double_scalarmul(x,p,y,q),"ds mul");
point_check(test,p,q,r,x,y,x*p+y*q,Point::double_scalarmul(x,p,y,q),"double mul");
p.dual_scalarmul(d1,d2,x,y);
point_check(test,p,q,r,x,y,x*p,d1,"dual mul 1");
point_check(test,p,q,r,x,y,y*p,d2,"dual mul 2");
point_check(test,base,q,r,x,y,x*base+y*q,q.non_secret_combo_with_base(y,x),"ds vt mul");
point_check(test,p,q,r,x,0,Precomputed(p)*x,p*x,"precomp mul");
point_check(test,p,q,r,0,0,r,


Loading…
Cancel
Save