Browse Source

one gf_strong_reduce to rule them all

master
Michael Hamburg 8 years ago
parent
commit
825751d034
12 changed files with 36 additions and 525 deletions
  1. +36
    -0
      src/decaf.c
  2. +0
    -34
      src/p25519/arch_32/f_impl.c
  3. +0
    -38
      src/p25519/arch_ref64/f_impl.c
  4. +0
    -38
      src/p25519/arch_x86_64/f_impl.c
  5. +0
    -39
      src/p448/arch_32/f_impl.c
  6. +0
    -40
      src/p448/arch_arm_32/f_impl.c
  7. +0
    -91
      src/p448/arch_neon/f_impl.c
  8. +0
    -86
      src/p448/arch_ref64/f_impl.c
  9. +0
    -40
      src/p448/arch_x86_64/f_impl.c
  10. +0
    -40
      src/p480/arch_x86_64/f_impl.c
  11. +0
    -38
      src/p521/arch_ref64/f_impl.c
  12. +0
    -41
      src/p521/arch_x86_64_r12/f_impl.c

+ 36
- 0
src/decaf.c View File

@@ -89,6 +89,9 @@ const size_t API_NS2(alignof,precomputed_s) = 32;
#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<NLIMBS; i++) { op; }}
#define FOR_LIMB_U(i,op) { unsigned int i=0; UNROLL for (i=0; i<NLIMBS; i++) { op; }}

/* FUTURE: move this code from per-curve to per-field header
* (like f_arithmetic.c but same for all fields)
*/
void gf_serialize (uint8_t serial[SER_BYTES], const gf x) {
gf red;
gf_copy(red, x);
@@ -126,6 +129,39 @@ mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES]) {
return word_is_zero(buffer) & ~word_is_zero(scarry);
}

void gf_strong_reduce (gf a) {
/* first, clear high */
gf_weak_reduce(a); /* PERF: only really need one step of this, but whatevs */

/* now the total is less than 2p */

/* compute total_value - p. No need to reduce mod p. */
dsword_t scarry = 0;
for (unsigned int i=0; i<NLIMBS; i++) {
scarry = scarry + a->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)];
a->limb[i] = scarry & LIMB_MASK(LIMBPERM(i));
scarry >>= LIMB_PLACE_VALUE(LIMBPERM(i));
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255
* so let's add back in p. will carry back off the top for 2^255.
*/
assert(word_is_zero(scarry) | word_is_zero(scarry+1));

word_t scarry_0 = scarry;
dword_t carry = 0;

/* add it back */
for (unsigned int i=0; i<NLIMBS; i++) {
carry = carry + a->limb[LIMBPERM(i)] + (scarry_0 & MODULUS->limb[LIMBPERM(i)]);
a->limb[i] = carry & LIMB_MASK(LIMBPERM(i));
carry >>= LIMB_PLACE_VALUE(LIMBPERM(i));
}

assert(word_is_zero(carry + scarry_0));
}

/** Constant time, x = is_z ? z : y */
static INLINE void
cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) {


+ 0
- 34
src/p25519/arch_32/f_impl.c View File

@@ -90,38 +90,4 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
gf_mul(cs,as,as); // PERF
}

void gf_strong_reduce (gf a) {
/* first, clear high */
a->limb[0] += (a->limb[9]>>25)*19;
a->limb[9] &= LIMB_MASK(9);

/* now the total is less than 2p */

/* compute total_value - p. No need to reduce mod p. */
dsword_t scarry = 0;
for (unsigned int i=0; i<10; i++) {
scarry = scarry + a->limb[i] - MODULUS->limb[i];
a->limb[i] = scarry & LIMB_MASK(i);
scarry >>= LIMB_PLACE_VALUE(i);
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255
* so let's add back in p. will carry back off the top for 2^255.
*/
assert(word_is_zero(scarry) | word_is_zero(scarry+1));

word_t scarry_0 = scarry;
dword_t carry = 0;

/* add it back */
for (unsigned int i=0; i<10; i++) {
carry = carry + a->limb[i] + (scarry_0 & MODULUS->limb[i]);
a->limb[i] = carry & LIMB_MASK(i);
carry >>= LIMB_PLACE_VALUE(i);
i++;
}

assert(word_is_zero(carry + scarry_0));
}


+ 0
- 38
src/p25519/arch_ref64/f_impl.c View File

@@ -59,41 +59,3 @@ void gf_mulw (gf_s *__restrict__ cs, const gf as, uint64_t b) {
void gf_sqr (gf_s *__restrict__ cs, const gf as) {
gf_mul(cs,as,as); // PERF
}

void gf_strong_reduce (gf a) {
uint64_t mask = (1ull<<51)-1;

/* first, clear high */
a->limb[0] += (a->limb[4]>>51)*19;
a->limb[4] &= mask;

/* now the total is less than 2p */

/* compute total_value - p. No need to reduce mod p. */
__int128_t scarry = 0;
int i;
for (i=0; i<5; i++) {
scarry = scarry + a->limb[i] - ((i==0)?mask-18:mask);
a->limb[i] = scarry & mask;
scarry >>= 51;
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255
* so let's add back in p. will carry back off the top for 2^255.
*/

assert(word_is_zero(scarry) | word_is_zero(scarry+1));

uint64_t scarry_mask = scarry & mask;
__uint128_t carry = 0;

/* add it back */
for (i=0; i<5; i++) {
carry = carry + a->limb[i] + ((i==0)?(scarry_mask&~18):scarry_mask);
a->limb[i] = carry & mask;
carry >>= 51;
}

assert(word_is_zero(carry + scarry));
}

+ 0
- 38
src/p25519/arch_x86_64/f_impl.c View File

@@ -170,41 +170,3 @@ void gf_mulw (gf_s *__restrict__ cs, const gf as, uint64_t b) {
c[0] = accum & mask;
c[1] = c1 + shrld(accum,51);
}

void gf_strong_reduce (gf a) {
uint64_t mask = (1ull<<51)-1;

/* first, clear high */
a->limb[0] += (a->limb[4]>>51)*19;
a->limb[4] &= mask;

/* now the total is less than 2p */

/* compute total_value - p. No need to reduce mod p. */
__int128_t scarry = 0;
int i;
for (i=0; i<5; i++) {
scarry = scarry + a->limb[i] - ((i==0)?mask-18:mask);
a->limb[i] = scarry & mask;
scarry >>= 51;
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255
* so let's add back in p. will carry back off the top for 2^255.
*/

assert(word_is_zero(scarry) | word_is_zero(scarry+1));

uint64_t scarry_mask = scarry & mask;
__uint128_t carry = 0;

/* add it back */
for (i=0; i<5; i++) {
carry = carry + a->limb[i] + ((i==0)?(scarry_mask&~18):scarry_mask);
a->limb[i] = carry & mask;
carry >>= 51;
}

assert(word_is_zero(carry + scarry));
}

+ 0
- 39
src/p448/arch_32/f_impl.c View File

@@ -103,42 +103,3 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
gf_mul(cs,as,as); /* PERF */
}

void gf_strong_reduce (gf a) {
word_t mask = (1ull<<28)-1;

/* first, clear high */
a->limb[8] += a->limb[15]>>28;
a->limb[0] += a->limb[15]>>28;
a->limb[15] &= mask;

/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */

/* compute total_value - p. No need to reduce mod p. */

dsword_t scarry = 0;
int i;
for (i=0; i<16; i++) {
scarry = scarry + a->limb[i] - ((i==8)?mask-1:mask);
a->limb[i] = scarry & mask;
scarry >>= 28;
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448
* so let's add back in p. will carry back off the top for 2^448.
*/

assert(word_is_zero(scarry) | word_is_zero(scarry+1));

word_t scarry_mask = scarry & mask;
dword_t carry = 0;

/* add it back */
for (i=0; i<16; i++) {
carry = carry + a->limb[i] + ((i==8)?(scarry_mask&~1):scarry_mask);
a->limb[i] = carry & mask;
carry >>= 28;
}

assert(word_is_zero(carry + scarry));
}

+ 0
- 40
src/p448/arch_arm_32/f_impl.c View File

@@ -833,43 +833,3 @@ void gf_mulw (
c[0] = accum8 & mask;
c[1] += accum8 >> 28;
}

void gf_strong_reduce (gf a) {
word_t mask = (1ull<<28)-1;

/* first, clear high */
a->limb[8] += a->limb[15]>>28;
a->limb[0] += a->limb[15]>>28;
a->limb[15] &= mask;

/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */

/* compute total_value - p. No need to reduce mod p. */

dsword_t scarry = 0;
int i;
for (i=0; i<16; i++) {
scarry = scarry + a->limb[i] - ((i==8)?mask-1:mask);
a->limb[i] = scarry & mask;
scarry >>= 28;
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448
* so let's add back in p. will carry back off the top for 2^448.
*/

assert(word_is_zero(scarry) | word_is_zero(scarry+1));

word_t scarry_mask = scarry & mask;
dword_t carry = 0;

/* add it back */
for (i=0; i<16; i++) {
carry = carry + a->limb[i] + ((i==8)?(scarry_mask&~1):scarry_mask);
a->limb[i] = carry & mask;
carry >>= 28;
}

assert(word_is_zero(carry + scarry));
}

+ 0
- 91
src/p448/arch_neon/f_impl.c View File

@@ -593,94 +593,3 @@ void gf_mulw (gf_s *__restrict__ cs, const gf as, uint64_t b) {
accum = vshrq_n_u64(accum,28);
vo[1] += vmovn_u64(accum);
}

/* PERF: vectorize? */
void gf_strong_reduce (gf a) {
word_t mask = (1ull<<28)-1;

/* first, clear high */
a->limb[1] += a->limb[15]>>28;
a->limb[0] += a->limb[15]>>28;
a->limb[15] &= mask;

/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */

/* compute total_value - p. No need to reduce mod p. */

dsword_t scarry = 0;
int i;
for (i=0; i<16; i++) {
scarry = scarry + a->limb[LIMBPERM(i)] - ((i==8)?mask-1:mask);
a->limb[LIMBPERM(i)] = scarry & mask;
scarry >>= 28;
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448
* so let's add back in p. will carry back off the top for 2^448.
*/

assert(word_is_zero(scarry) | word_is_zero(scarry+1));

word_t scarry_mask = scarry & mask;
dword_t carry = 0;

/* add it back */
for (i=0; i<16; i++) {
carry = carry + a->limb[LIMBPERM(i)] + ((i==8)?(scarry_mask&~1):scarry_mask);
a->limb[LIMBPERM(i)] = carry & mask;
carry >>= 28;
}

assert(word_is_zero(carry + scarry));
}

void gf_serialize (uint8_t *serial, const gf x) {
int i,j;
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
for (i=0; i<8; i++) {
uint64_t limb = red->limb[LIMBPERM(2*i)] + (((uint64_t)red->limb[LIMBPERM(2*i+1)])<<28);
for (j=0; j<7; j++) {
serial[7*i+j] = limb;
limb >>= 8;
}
assert(limb == 0);
}
}

mask_t gf_deserialize (gf x, const uint8_t serial[56]) {
int i,j;
for (i=0; i<8; i++) {
uint64_t out = 0;
for (j=0; j<7; j++) {
out |= ((uint64_t)serial[7*i+j])<<(8*j);
}
x->limb[LIMBPERM(2*i)] = out & ((1ull<<28)-1);
x->limb[LIMBPERM(2*i+1)] = out >> 28;
}
/* Check for reduction.
*
* The idea is to create a variable ge which is all ones (rather, 56 ones)
* if and only if the low $i$ words of $x$ are >= those of p.
*
* Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111)
*/
uint32_t ge = -1, mask = (1ull<<28)-1;
for (i=0; i<8; i++) {
ge &= x->limb[LIMBPERM(i)];
}
/* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */
ge = (ge & (x->limb[LIMBPERM(8)] + 1)) | word_is_zero(x->limb[LIMBPERM(8)] ^ mask);
/* Propagate the rest */
for (i=9; i<16; i++) {
ge &= x->limb[LIMBPERM(i)];
}
return ~word_is_zero(ge ^ mask);
}

+ 0
- 86
src/p448/arch_ref64/f_impl.c View File

@@ -300,89 +300,3 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
c[0] += ((uint64_t)(accum1));
}

void gf_strong_reduce (gf a) {
uint64_t mask = (1ull<<56)-1;

/* first, clear high */
a->limb[4] += a->limb[7]>>56;
a->limb[0] += a->limb[7]>>56;
a->limb[7] &= mask;

/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */

/* compute total_value - p. No need to reduce mod p. */

__int128_t scarry = 0;
int i;
for (i=0; i<8; i++) {
scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask);
a->limb[i] = scarry & mask;
scarry >>= 56;
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448
* so let's add back in p. will carry back off the top for 2^448.
*/

assert(word_is_zero(scarry) | word_is_zero(scarry+1));

uint64_t scarry_mask = scarry & mask;
__uint128_t carry = 0;

/* add it back */
for (i=0; i<8; i++) {
carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask);
a->limb[i] = carry & mask;
carry >>= 56;
}

assert(word_is_zero(carry + scarry));
}

void gf_serialize (uint8_t *serial, const gf x) {
int i,j;
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
for (i=0; i<8; i++) {
for (j=0; j<7; j++) {
serial[7*i+j] = red->limb[i];
red->limb[i] >>= 8;
}
assert(red->limb[i] == 0);
}
}

mask_t gf_deserialize (gf x, const uint8_t serial[56]) {
int i,j;
for (i=0; i<8; i++) {
uint64_t out = 0;
for (j=0; j<7; j++) {
out |= ((uint64_t)serial[7*i+j])<<(8*j);
}
x->limb[i] = out;
}
/* Check for reduction.
*
* The idea is to create a variable ge which is all ones (rather, 56 ones)
* if and only if the low $i$ words of $x$ are >= those of p.
*
* Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111)
*/
uint64_t ge = -1, mask = (1ull<<56)-1;
for (i=0; i<4; i++) {
ge &= x->limb[i];
}
/* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */
ge = (ge & (x->limb[4] + 1)) | word_is_zero(x->limb[4] ^ mask);
/* Propagate the rest */
for (i=5; i<8; i++) {
ge &= x->limb[i];
}
return ~word_is_zero(ge ^ mask);
}

+ 0
- 40
src/p448/arch_x86_64/f_impl.c View File

@@ -289,43 +289,3 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1));
c[0] += ((uint64_t)(accum1));
}

void gf_strong_reduce (gf a) {
uint64_t mask = (1ull<<56)-1;

/* first, clear high */
a->limb[4] += a->limb[7]>>56;
a->limb[0] += a->limb[7]>>56;
a->limb[7] &= mask;

/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */

/* compute total_value - p. No need to reduce mod p. */

__int128_t scarry = 0;
int i;
for (i=0; i<8; i++) {
scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask);
a->limb[i] = scarry & mask;
scarry >>= 56;
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448
* so let's add back in p. will carry back off the top for 2^448.
*/

assert(word_is_zero(scarry) | word_is_zero(scarry+1));

uint64_t scarry_mask = scarry & mask;
__uint128_t carry = 0;

/* add it back */
for (i=0; i<8; i++) {
carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask);
a->limb[i] = carry & mask;
carry >>= 56;
}

assert(word_is_zero(carry + scarry));
}

+ 0
- 40
src/p480/arch_x86_64/f_impl.c View File

@@ -289,43 +289,3 @@ void gf_sqr (gf *__restrict__ cs, const gf *as) {
c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1));
c[0] += ((uint64_t)(accum1));
}

void gf_strong_reduce (gf *a) {
uint64_t mask = (1ull<<60)-1;

/* first, clear high */
a->limb[4] += a->limb[7]>>60;
a->limb[0] += a->limb[7]>>60;
a->limb[7] &= mask;

/* now the total is less than 2^480 - 2^(480-60) + 2^(480-60+8) < 2p */

/* compute total_value - p. No need to reduce mod p. */

__int128_t scarry = 0;
int i;
for (i=0; i<8; i++) {
scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask);
a->limb[i] = scarry & mask;
scarry >>= 60;
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^480
* so let's add back in p. will carry back off the top for 2^480.
*/

assert(word_is_zero(scarry) | word_is_zero(scarry+1));

uint64_t scarry_mask = scarry & mask;
__uint128_t carry = 0;

/* add it back */
for (i=0; i<8; i++) {
carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask);
a->limb[i] = carry & mask;
carry >>= 60;
}

assert(word_is_zero(carry + scarry));
}

+ 0
- 38
src/p521/arch_ref64/f_impl.c View File

@@ -282,41 +282,3 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
c[8] += accum1 >> 58;
}

void gf_strong_reduce (gf a) {
uint64_t mask = (1ull<<58)-1, mask2 = (1ull<<57)-1;

/* first, clear high */
__int128_t scarry = a->limb[8]>>57;
a->limb[8] &= mask2;

/* now the total is less than 2p */

/* compute total_value - p. No need to reduce mod p. */

int i;
for (i=0; i<9; i++) {
scarry = scarry + a->limb[i] - ((i==8) ? mask2 : mask);
a->limb[i] = scarry & ((i==8) ? mask2 : mask);
scarry >>= (i==8) ? 57 : 58;
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^521
* so let's add back in p. will carry back off the top for 2^521.
*/

assert(word_is_zero(scarry) | word_is_zero(scarry+1));

uint64_t scarry_mask = scarry & mask;
__uint128_t carry = 0;

/* add it back */
for (i=0; i<9; i++) {
carry = carry + a->limb[i] + ((i==8)?(scarry_mask>>1):scarry_mask);
a->limb[i] = carry & ((i==8) ? mask>>1 : mask);
carry >>= (i==8) ? 57 : 58;
}

assert(word_is_zero(carry + scarry));
}

+ 0
- 41
src/p521/arch_x86_64_r12/f_impl.c View File

@@ -348,44 +348,3 @@ void gf_mulw (gf *__restrict__ cs, const gf *as, uint64_t b) {
c[3] = c[7] = c[11] = 0;
}


void gf_strong_reduce (gf *a) {
uint64_t mask = (1ull<<58)-1, mask2 = (1ull<<57)-1;

/* first, clear high */
__int128_t scarry = a->limb[LIMBPERM(8)]>>57;
a->limb[LIMBPERM(8)] &= mask2;

/* now the total is less than 2p */

/* compute total_value - p. No need to reduce mod p. */

int i;
for (i=0; i<9; i++) {
scarry = scarry + a->limb[LIMBPERM(i)] - ((i==8) ? mask2 : mask);
a->limb[LIMBPERM(i)] = scarry & ((i==8) ? mask2 : mask);
scarry >>= (i==8) ? 57 : 58;
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^521
* so let's add back in p. will carry back off the top for 2^521.
*/

assert(word_is_zero(scarry) | word_is_zero(scarry+1));

uint64_t scarry_mask = scarry & mask;
__uint128_t carry = 0;

/* add it back */
for (i=0; i<9; i++) {
carry = carry + a->limb[LIMBPERM(i)] + ((i==8)?(scarry_mask>>1):scarry_mask);
a->limb[LIMBPERM(i)] = carry & ((i==8) ? mask>>1 : mask);
carry >>= (i==8) ? 57 : 58;
}

assert(word_is_zero(carry + scarry));

a->limb[3] = a->limb[7] = a->limb[11] = 0;
}

Loading…
Cancel
Save