|
@@ -1710,54 +1710,51 @@ static int recode_wnaf ( |
|
|
const scalar_t scalar, |
|
|
const scalar_t scalar, |
|
|
unsigned int tableBits |
|
|
unsigned int tableBits |
|
|
) { |
|
|
) { |
|
|
int current = 0, i, j; |
|
|
|
|
|
unsigned int position = 0; |
|
|
|
|
|
|
|
|
unsigned int table_size = SCALAR_BITS/(tableBits+1) + 3; |
|
|
|
|
|
unsigned int position = table_size - 1; /* at the end */ |
|
|
|
|
|
|
|
|
|
|
|
/* place the end marker */ |
|
|
|
|
|
control[position].power = -1; |
|
|
|
|
|
control[position].addend = 0; |
|
|
|
|
|
position--; |
|
|
|
|
|
|
|
|
/* PERF: negate scalar if it's large |
|
|
|
|
|
* PERF: this is a pretty simplistic algorithm. I'm sure there's a faster one... |
|
|
|
|
|
* PERF MINOR: not technically WNAF, since last digits can be adjacent. Could be rtl. |
|
|
|
|
|
|
|
|
/* PERF: Could negate scalar if it's large. But then would need more cases |
|
|
|
|
|
* in the actual code that uses it, all for an expected reduction of like 1/5 op. |
|
|
|
|
|
* Probably not worth it. |
|
|
*/ |
|
|
*/ |
|
|
for (i=SCALAR_BITS-1; i >= 0; i--) { |
|
|
|
|
|
int bit = (scalar->limb[i/WBITS] >> (i%WBITS)) & 1; |
|
|
|
|
|
current = 2*current + bit; |
|
|
|
|
|
|
|
|
|
|
|
/* |
|
|
|
|
|
* Sizing: |current| >= 2^(tableBits+1) -> |current| = 2^0 |
|
|
|
|
|
* So current loses (tableBits+1) bits every time. It otherwise gains |
|
|
|
|
|
* 1 bit per iteration. The number of iterations is |
|
|
|
|
|
* (nbits + 2 + tableBits), and an additional control word is added at |
|
|
|
|
|
* the end. So the total number of control words is at most |
|
|
|
|
|
* ceil((nbits+1) / (tableBits+1)) + 2 = floor((nbits)/(tableBits+1)) + 2. |
|
|
|
|
|
* There's also the stopper with power -1, for a total of +3. |
|
|
|
|
|
*/ |
|
|
|
|
|
if (current >= (2<<tableBits) || current <= -1 - (2<<tableBits)) { |
|
|
|
|
|
int delta = (current + 1) >> 1; /* |delta| < 2^tablebits */ |
|
|
|
|
|
current = -(current & 1); |
|
|
|
|
|
|
|
|
|
|
|
for (j=i; (delta & 1) == 0; j++) { |
|
|
|
|
|
delta >>= 1; |
|
|
|
|
|
} |
|
|
|
|
|
control[position].power = j+1; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
uint64_t current = scalar->limb[0] & 0xFFFF; |
|
|
|
|
|
uint32_t mask = (1<<(tableBits+1))-1; |
|
|
|
|
|
|
|
|
|
|
|
unsigned int w; |
|
|
|
|
|
const unsigned int B_OVER_16 = sizeof(scalar->limb[0]) / 2; |
|
|
|
|
|
for (w = 1; w<(SCALAR_BITS-1)/16+3; w++) { |
|
|
|
|
|
if (w < (SCALAR_BITS-1)/16+1) { |
|
|
|
|
|
/* Refill the 16 high bits of current */ |
|
|
|
|
|
current += (uint32_t)((scalar->limb[w/B_OVER_16]>>(16*(w%B_OVER_16)))<<16); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
while (current & 0xFFFF) { |
|
|
|
|
|
assert(position >= 0); |
|
|
|
|
|
uint32_t pos = __builtin_ctz((uint32_t)current), odd = (uint32_t)current >> pos; |
|
|
|
|
|
int32_t delta = odd & mask; |
|
|
|
|
|
if (odd & 1<<(tableBits+1)) delta -= (1<<(tableBits+1)); |
|
|
|
|
|
current -= delta << pos; |
|
|
|
|
|
control[position].power = pos + 16*(w-1); |
|
|
control[position].addend = delta; |
|
|
control[position].addend = delta; |
|
|
position++; |
|
|
|
|
|
assert(position <= SCALAR_BITS/(tableBits+1) + 2); |
|
|
|
|
|
|
|
|
position--; |
|
|
} |
|
|
} |
|
|
|
|
|
current >>= 16; |
|
|
} |
|
|
} |
|
|
|
|
|
assert(current==0); |
|
|
|
|
|
|
|
|
if (current) { |
|
|
|
|
|
for (j=0; (current & 1) == 0; j++) { |
|
|
|
|
|
current >>= 1; |
|
|
|
|
|
} |
|
|
|
|
|
control[position].power = j; |
|
|
|
|
|
control[position].addend = current; |
|
|
|
|
|
position++; |
|
|
|
|
|
assert(position <= SCALAR_BITS/(tableBits+1) + 2); |
|
|
|
|
|
|
|
|
position++; |
|
|
|
|
|
unsigned int n = table_size - position; |
|
|
|
|
|
unsigned int i; |
|
|
|
|
|
for (i=0; i<n; i++) { |
|
|
|
|
|
control[i] = control[i+position]; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
control[position].power = -1; |
|
|
|
|
|
control[position].addend = 0; |
|
|
|
|
|
return position; |
|
|
|
|
|
|
|
|
return n-1; |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
static void |
|
|
static void |
|
|