Browse Source

real WNAF

master
Michael Hamburg 8 years ago
parent
commit
a9e2e08bf2
1 changed files with 38 additions and 41 deletions
  1. +38
    -41
      src/decaf.c

+ 38
- 41
src/decaf.c View File

@@ -1710,54 +1710,51 @@ static int recode_wnaf (
const scalar_t scalar,
unsigned int tableBits
) {
int current = 0, i, j;
unsigned int position = 0;
unsigned int table_size = SCALAR_BITS/(tableBits+1) + 3;
unsigned int position = table_size - 1; /* at the end */
/* place the end marker */
control[position].power = -1;
control[position].addend = 0;
position--;

/* PERF: negate scalar if it's large
* PERF: this is a pretty simplistic algorithm. I'm sure there's a faster one...
* PERF MINOR: not technically WNAF, since last digits can be adjacent. Could be rtl.
/* PERF: Could negate scalar if it's large. But then would need more cases
* in the actual code that uses it, all for an expected reduction of like 1/5 op.
* Probably not worth it.
*/
for (i=SCALAR_BITS-1; i >= 0; i--) {
int bit = (scalar->limb[i/WBITS] >> (i%WBITS)) & 1;
current = 2*current + bit;

/*
* Sizing: |current| >= 2^(tableBits+1) -> |current| = 2^0
* So current loses (tableBits+1) bits every time. It otherwise gains
* 1 bit per iteration. The number of iterations is
* (nbits + 2 + tableBits), and an additional control word is added at
* the end. So the total number of control words is at most
* ceil((nbits+1) / (tableBits+1)) + 2 = floor((nbits)/(tableBits+1)) + 2.
* There's also the stopper with power -1, for a total of +3.
*/
if (current >= (2<<tableBits) || current <= -1 - (2<<tableBits)) {
int delta = (current + 1) >> 1; /* |delta| < 2^tablebits */
current = -(current & 1);

for (j=i; (delta & 1) == 0; j++) {
delta >>= 1;
}
control[position].power = j+1;
uint64_t current = scalar->limb[0] & 0xFFFF;
uint32_t mask = (1<<(tableBits+1))-1;

unsigned int w;
const unsigned int B_OVER_16 = sizeof(scalar->limb[0]) / 2;
for (w = 1; w<(SCALAR_BITS-1)/16+3; w++) {
if (w < (SCALAR_BITS-1)/16+1) {
/* Refill the 16 high bits of current */
current += (uint32_t)((scalar->limb[w/B_OVER_16]>>(16*(w%B_OVER_16)))<<16);
}
while (current & 0xFFFF) {
assert(position >= 0);
uint32_t pos = __builtin_ctz((uint32_t)current), odd = (uint32_t)current >> pos;
int32_t delta = odd & mask;
if (odd & 1<<(tableBits+1)) delta -= (1<<(tableBits+1));
current -= delta << pos;
control[position].power = pos + 16*(w-1);
control[position].addend = delta;
position++;
assert(position <= SCALAR_BITS/(tableBits+1) + 2);
position--;
}
current >>= 16;
}
assert(current==0);
if (current) {
for (j=0; (current & 1) == 0; j++) {
current >>= 1;
}
control[position].power = j;
control[position].addend = current;
position++;
assert(position <= SCALAR_BITS/(tableBits+1) + 2);
position++;
unsigned int n = table_size - position;
unsigned int i;
for (i=0; i<n; i++) {
control[i] = control[i+position];
}
control[position].power = -1;
control[position].addend = 0;
return position;
return n-1;
}

static void


Loading…
Cancel
Save