@@ -89,6 +89,9 @@ const size_t API_NS2(alignof,precomputed_s) = 32; | |||
#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<NLIMBS; i++) { op; }} | |||
#define FOR_LIMB_U(i,op) { unsigned int i=0; UNROLL for (i=0; i<NLIMBS; i++) { op; }} | |||
/* FUTURE: move this code from per-curve to per-field header | |||
* (like f_arithmetic.c but same for all fields) | |||
*/ | |||
void gf_serialize (uint8_t serial[SER_BYTES], const gf x) { | |||
gf red; | |||
gf_copy(red, x); | |||
@@ -126,6 +129,39 @@ mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES]) { | |||
return word_is_zero(buffer) & ~word_is_zero(scarry); | |||
} | |||
void gf_strong_reduce (gf a) { | |||
/* first, clear high */ | |||
gf_weak_reduce(a); /* PERF: only really need one step of this, but whatevs */ | |||
/* now the total is less than 2p */ | |||
/* compute total_value - p. No need to reduce mod p. */ | |||
dsword_t scarry = 0; | |||
for (unsigned int i=0; i<NLIMBS; i++) { | |||
scarry = scarry + a->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]; | |||
a->limb[i] = scarry & LIMB_MASK(LIMBPERM(i)); | |||
scarry >>= LIMB_PLACE_VALUE(LIMBPERM(i)); | |||
} | |||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255 | |||
* so let's add back in p. will carry back off the top for 2^255. | |||
*/ | |||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||
word_t scarry_0 = scarry; | |||
dword_t carry = 0; | |||
/* add it back */ | |||
for (unsigned int i=0; i<NLIMBS; i++) { | |||
carry = carry + a->limb[LIMBPERM(i)] + (scarry_0 & MODULUS->limb[LIMBPERM(i)]); | |||
a->limb[i] = carry & LIMB_MASK(LIMBPERM(i)); | |||
carry >>= LIMB_PLACE_VALUE(LIMBPERM(i)); | |||
} | |||
assert(word_is_zero(carry + scarry_0)); | |||
} | |||
/** Constant time, x = is_z ? z : y */ | |||
static INLINE void | |||
cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) { | |||
@@ -90,38 +90,4 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) { | |||
gf_mul(cs,as,as); // PERF | |||
} | |||
void gf_strong_reduce (gf a) { | |||
/* first, clear high */ | |||
a->limb[0] += (a->limb[9]>>25)*19; | |||
a->limb[9] &= LIMB_MASK(9); | |||
/* now the total is less than 2p */ | |||
/* compute total_value - p. No need to reduce mod p. */ | |||
dsword_t scarry = 0; | |||
for (unsigned int i=0; i<10; i++) { | |||
scarry = scarry + a->limb[i] - MODULUS->limb[i]; | |||
a->limb[i] = scarry & LIMB_MASK(i); | |||
scarry >>= LIMB_PLACE_VALUE(i); | |||
} | |||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255 | |||
* so let's add back in p. will carry back off the top for 2^255. | |||
*/ | |||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||
word_t scarry_0 = scarry; | |||
dword_t carry = 0; | |||
/* add it back */ | |||
for (unsigned int i=0; i<10; i++) { | |||
carry = carry + a->limb[i] + (scarry_0 & MODULUS->limb[i]); | |||
a->limb[i] = carry & LIMB_MASK(i); | |||
carry >>= LIMB_PLACE_VALUE(i); | |||
i++; | |||
} | |||
assert(word_is_zero(carry + scarry_0)); | |||
} | |||
@@ -59,41 +59,3 @@ void gf_mulw (gf_s *__restrict__ cs, const gf as, uint64_t b) { | |||
void gf_sqr (gf_s *__restrict__ cs, const gf as) { | |||
gf_mul(cs,as,as); // PERF | |||
} | |||
void gf_strong_reduce (gf a) { | |||
uint64_t mask = (1ull<<51)-1; | |||
/* first, clear high */ | |||
a->limb[0] += (a->limb[4]>>51)*19; | |||
a->limb[4] &= mask; | |||
/* now the total is less than 2p */ | |||
/* compute total_value - p. No need to reduce mod p. */ | |||
__int128_t scarry = 0; | |||
int i; | |||
for (i=0; i<5; i++) { | |||
scarry = scarry + a->limb[i] - ((i==0)?mask-18:mask); | |||
a->limb[i] = scarry & mask; | |||
scarry >>= 51; | |||
} | |||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255 | |||
* so let's add back in p. will carry back off the top for 2^255. | |||
*/ | |||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||
uint64_t scarry_mask = scarry & mask; | |||
__uint128_t carry = 0; | |||
/* add it back */ | |||
for (i=0; i<5; i++) { | |||
carry = carry + a->limb[i] + ((i==0)?(scarry_mask&~18):scarry_mask); | |||
a->limb[i] = carry & mask; | |||
carry >>= 51; | |||
} | |||
assert(word_is_zero(carry + scarry)); | |||
} |
@@ -170,41 +170,3 @@ void gf_mulw (gf_s *__restrict__ cs, const gf as, uint64_t b) { | |||
c[0] = accum & mask; | |||
c[1] = c1 + shrld(accum,51); | |||
} | |||
void gf_strong_reduce (gf a) { | |||
uint64_t mask = (1ull<<51)-1; | |||
/* first, clear high */ | |||
a->limb[0] += (a->limb[4]>>51)*19; | |||
a->limb[4] &= mask; | |||
/* now the total is less than 2p */ | |||
/* compute total_value - p. No need to reduce mod p. */ | |||
__int128_t scarry = 0; | |||
int i; | |||
for (i=0; i<5; i++) { | |||
scarry = scarry + a->limb[i] - ((i==0)?mask-18:mask); | |||
a->limb[i] = scarry & mask; | |||
scarry >>= 51; | |||
} | |||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255 | |||
* so let's add back in p. will carry back off the top for 2^255. | |||
*/ | |||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||
uint64_t scarry_mask = scarry & mask; | |||
__uint128_t carry = 0; | |||
/* add it back */ | |||
for (i=0; i<5; i++) { | |||
carry = carry + a->limb[i] + ((i==0)?(scarry_mask&~18):scarry_mask); | |||
a->limb[i] = carry & mask; | |||
carry >>= 51; | |||
} | |||
assert(word_is_zero(carry + scarry)); | |||
} |
@@ -103,42 +103,3 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) { | |||
gf_mul(cs,as,as); /* PERF */ | |||
} | |||
void gf_strong_reduce (gf a) { | |||
word_t mask = (1ull<<28)-1; | |||
/* first, clear high */ | |||
a->limb[8] += a->limb[15]>>28; | |||
a->limb[0] += a->limb[15]>>28; | |||
a->limb[15] &= mask; | |||
/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ | |||
/* compute total_value - p. No need to reduce mod p. */ | |||
dsword_t scarry = 0; | |||
int i; | |||
for (i=0; i<16; i++) { | |||
scarry = scarry + a->limb[i] - ((i==8)?mask-1:mask); | |||
a->limb[i] = scarry & mask; | |||
scarry >>= 28; | |||
} | |||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448 | |||
* so let's add back in p. will carry back off the top for 2^448. | |||
*/ | |||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||
word_t scarry_mask = scarry & mask; | |||
dword_t carry = 0; | |||
/* add it back */ | |||
for (i=0; i<16; i++) { | |||
carry = carry + a->limb[i] + ((i==8)?(scarry_mask&~1):scarry_mask); | |||
a->limb[i] = carry & mask; | |||
carry >>= 28; | |||
} | |||
assert(word_is_zero(carry + scarry)); | |||
} |
@@ -833,43 +833,3 @@ void gf_mulw ( | |||
c[0] = accum8 & mask; | |||
c[1] += accum8 >> 28; | |||
} | |||
void gf_strong_reduce (gf a) { | |||
word_t mask = (1ull<<28)-1; | |||
/* first, clear high */ | |||
a->limb[8] += a->limb[15]>>28; | |||
a->limb[0] += a->limb[15]>>28; | |||
a->limb[15] &= mask; | |||
/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ | |||
/* compute total_value - p. No need to reduce mod p. */ | |||
dsword_t scarry = 0; | |||
int i; | |||
for (i=0; i<16; i++) { | |||
scarry = scarry + a->limb[i] - ((i==8)?mask-1:mask); | |||
a->limb[i] = scarry & mask; | |||
scarry >>= 28; | |||
} | |||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448 | |||
* so let's add back in p. will carry back off the top for 2^448. | |||
*/ | |||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||
word_t scarry_mask = scarry & mask; | |||
dword_t carry = 0; | |||
/* add it back */ | |||
for (i=0; i<16; i++) { | |||
carry = carry + a->limb[i] + ((i==8)?(scarry_mask&~1):scarry_mask); | |||
a->limb[i] = carry & mask; | |||
carry >>= 28; | |||
} | |||
assert(word_is_zero(carry + scarry)); | |||
} |
@@ -593,94 +593,3 @@ void gf_mulw (gf_s *__restrict__ cs, const gf as, uint64_t b) { | |||
accum = vshrq_n_u64(accum,28); | |||
vo[1] += vmovn_u64(accum); | |||
} | |||
/* PERF: vectorize? */ | |||
void gf_strong_reduce (gf a) { | |||
word_t mask = (1ull<<28)-1; | |||
/* first, clear high */ | |||
a->limb[1] += a->limb[15]>>28; | |||
a->limb[0] += a->limb[15]>>28; | |||
a->limb[15] &= mask; | |||
/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ | |||
/* compute total_value - p. No need to reduce mod p. */ | |||
dsword_t scarry = 0; | |||
int i; | |||
for (i=0; i<16; i++) { | |||
scarry = scarry + a->limb[LIMBPERM(i)] - ((i==8)?mask-1:mask); | |||
a->limb[LIMBPERM(i)] = scarry & mask; | |||
scarry >>= 28; | |||
} | |||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448 | |||
* so let's add back in p. will carry back off the top for 2^448. | |||
*/ | |||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||
word_t scarry_mask = scarry & mask; | |||
dword_t carry = 0; | |||
/* add it back */ | |||
for (i=0; i<16; i++) { | |||
carry = carry + a->limb[LIMBPERM(i)] + ((i==8)?(scarry_mask&~1):scarry_mask); | |||
a->limb[LIMBPERM(i)] = carry & mask; | |||
carry >>= 28; | |||
} | |||
assert(word_is_zero(carry + scarry)); | |||
} | |||
void gf_serialize (uint8_t *serial, const gf x) { | |||
int i,j; | |||
gf red; | |||
gf_copy(red, x); | |||
gf_strong_reduce(red); | |||
for (i=0; i<8; i++) { | |||
uint64_t limb = red->limb[LIMBPERM(2*i)] + (((uint64_t)red->limb[LIMBPERM(2*i+1)])<<28); | |||
for (j=0; j<7; j++) { | |||
serial[7*i+j] = limb; | |||
limb >>= 8; | |||
} | |||
assert(limb == 0); | |||
} | |||
} | |||
mask_t gf_deserialize (gf x, const uint8_t serial[56]) { | |||
int i,j; | |||
for (i=0; i<8; i++) { | |||
uint64_t out = 0; | |||
for (j=0; j<7; j++) { | |||
out |= ((uint64_t)serial[7*i+j])<<(8*j); | |||
} | |||
x->limb[LIMBPERM(2*i)] = out & ((1ull<<28)-1); | |||
x->limb[LIMBPERM(2*i+1)] = out >> 28; | |||
} | |||
/* Check for reduction. | |||
* | |||
* The idea is to create a variable ge which is all ones (rather, 56 ones) | |||
* if and only if the low $i$ words of $x$ are >= those of p. | |||
* | |||
* Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111) | |||
*/ | |||
uint32_t ge = -1, mask = (1ull<<28)-1; | |||
for (i=0; i<8; i++) { | |||
ge &= x->limb[LIMBPERM(i)]; | |||
} | |||
/* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */ | |||
ge = (ge & (x->limb[LIMBPERM(8)] + 1)) | word_is_zero(x->limb[LIMBPERM(8)] ^ mask); | |||
/* Propagate the rest */ | |||
for (i=9; i<16; i++) { | |||
ge &= x->limb[LIMBPERM(i)]; | |||
} | |||
return ~word_is_zero(ge ^ mask); | |||
} |
@@ -300,89 +300,3 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) { | |||
c[0] += ((uint64_t)(accum1)); | |||
} | |||
void gf_strong_reduce (gf a) { | |||
uint64_t mask = (1ull<<56)-1; | |||
/* first, clear high */ | |||
a->limb[4] += a->limb[7]>>56; | |||
a->limb[0] += a->limb[7]>>56; | |||
a->limb[7] &= mask; | |||
/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ | |||
/* compute total_value - p. No need to reduce mod p. */ | |||
__int128_t scarry = 0; | |||
int i; | |||
for (i=0; i<8; i++) { | |||
scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask); | |||
a->limb[i] = scarry & mask; | |||
scarry >>= 56; | |||
} | |||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448 | |||
* so let's add back in p. will carry back off the top for 2^448. | |||
*/ | |||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||
uint64_t scarry_mask = scarry & mask; | |||
__uint128_t carry = 0; | |||
/* add it back */ | |||
for (i=0; i<8; i++) { | |||
carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask); | |||
a->limb[i] = carry & mask; | |||
carry >>= 56; | |||
} | |||
assert(word_is_zero(carry + scarry)); | |||
} | |||
void gf_serialize (uint8_t *serial, const gf x) { | |||
int i,j; | |||
gf red; | |||
gf_copy(red, x); | |||
gf_strong_reduce(red); | |||
for (i=0; i<8; i++) { | |||
for (j=0; j<7; j++) { | |||
serial[7*i+j] = red->limb[i]; | |||
red->limb[i] >>= 8; | |||
} | |||
assert(red->limb[i] == 0); | |||
} | |||
} | |||
mask_t gf_deserialize (gf x, const uint8_t serial[56]) { | |||
int i,j; | |||
for (i=0; i<8; i++) { | |||
uint64_t out = 0; | |||
for (j=0; j<7; j++) { | |||
out |= ((uint64_t)serial[7*i+j])<<(8*j); | |||
} | |||
x->limb[i] = out; | |||
} | |||
/* Check for reduction. | |||
* | |||
* The idea is to create a variable ge which is all ones (rather, 56 ones) | |||
* if and only if the low $i$ words of $x$ are >= those of p. | |||
* | |||
* Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111) | |||
*/ | |||
uint64_t ge = -1, mask = (1ull<<56)-1; | |||
for (i=0; i<4; i++) { | |||
ge &= x->limb[i]; | |||
} | |||
/* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */ | |||
ge = (ge & (x->limb[4] + 1)) | word_is_zero(x->limb[4] ^ mask); | |||
/* Propagate the rest */ | |||
for (i=5; i<8; i++) { | |||
ge &= x->limb[i]; | |||
} | |||
return ~word_is_zero(ge ^ mask); | |||
} |
@@ -289,43 +289,3 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) { | |||
c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); | |||
c[0] += ((uint64_t)(accum1)); | |||
} | |||
void gf_strong_reduce (gf a) { | |||
uint64_t mask = (1ull<<56)-1; | |||
/* first, clear high */ | |||
a->limb[4] += a->limb[7]>>56; | |||
a->limb[0] += a->limb[7]>>56; | |||
a->limb[7] &= mask; | |||
/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ | |||
/* compute total_value - p. No need to reduce mod p. */ | |||
__int128_t scarry = 0; | |||
int i; | |||
for (i=0; i<8; i++) { | |||
scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask); | |||
a->limb[i] = scarry & mask; | |||
scarry >>= 56; | |||
} | |||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448 | |||
* so let's add back in p. will carry back off the top for 2^448. | |||
*/ | |||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||
uint64_t scarry_mask = scarry & mask; | |||
__uint128_t carry = 0; | |||
/* add it back */ | |||
for (i=0; i<8; i++) { | |||
carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask); | |||
a->limb[i] = carry & mask; | |||
carry >>= 56; | |||
} | |||
assert(word_is_zero(carry + scarry)); | |||
} |
@@ -289,43 +289,3 @@ void gf_sqr (gf *__restrict__ cs, const gf *as) { | |||
c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); | |||
c[0] += ((uint64_t)(accum1)); | |||
} | |||
void gf_strong_reduce (gf *a) { | |||
uint64_t mask = (1ull<<60)-1; | |||
/* first, clear high */ | |||
a->limb[4] += a->limb[7]>>60; | |||
a->limb[0] += a->limb[7]>>60; | |||
a->limb[7] &= mask; | |||
/* now the total is less than 2^480 - 2^(480-60) + 2^(480-60+8) < 2p */ | |||
/* compute total_value - p. No need to reduce mod p. */ | |||
__int128_t scarry = 0; | |||
int i; | |||
for (i=0; i<8; i++) { | |||
scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask); | |||
a->limb[i] = scarry & mask; | |||
scarry >>= 60; | |||
} | |||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^480 | |||
* so let's add back in p. will carry back off the top for 2^480. | |||
*/ | |||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||
uint64_t scarry_mask = scarry & mask; | |||
__uint128_t carry = 0; | |||
/* add it back */ | |||
for (i=0; i<8; i++) { | |||
carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask); | |||
a->limb[i] = carry & mask; | |||
carry >>= 60; | |||
} | |||
assert(word_is_zero(carry + scarry)); | |||
} |
@@ -282,41 +282,3 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) { | |||
c[8] += accum1 >> 58; | |||
} | |||
void gf_strong_reduce (gf a) { | |||
uint64_t mask = (1ull<<58)-1, mask2 = (1ull<<57)-1; | |||
/* first, clear high */ | |||
__int128_t scarry = a->limb[8]>>57; | |||
a->limb[8] &= mask2; | |||
/* now the total is less than 2p */ | |||
/* compute total_value - p. No need to reduce mod p. */ | |||
int i; | |||
for (i=0; i<9; i++) { | |||
scarry = scarry + a->limb[i] - ((i==8) ? mask2 : mask); | |||
a->limb[i] = scarry & ((i==8) ? mask2 : mask); | |||
scarry >>= (i==8) ? 57 : 58; | |||
} | |||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^521 | |||
* so let's add back in p. will carry back off the top for 2^521. | |||
*/ | |||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||
uint64_t scarry_mask = scarry & mask; | |||
__uint128_t carry = 0; | |||
/* add it back */ | |||
for (i=0; i<9; i++) { | |||
carry = carry + a->limb[i] + ((i==8)?(scarry_mask>>1):scarry_mask); | |||
a->limb[i] = carry & ((i==8) ? mask>>1 : mask); | |||
carry >>= (i==8) ? 57 : 58; | |||
} | |||
assert(word_is_zero(carry + scarry)); | |||
} |
@@ -348,44 +348,3 @@ void gf_mulw (gf *__restrict__ cs, const gf *as, uint64_t b) { | |||
c[3] = c[7] = c[11] = 0; | |||
} | |||
void gf_strong_reduce (gf *a) { | |||
uint64_t mask = (1ull<<58)-1, mask2 = (1ull<<57)-1; | |||
/* first, clear high */ | |||
__int128_t scarry = a->limb[LIMBPERM(8)]>>57; | |||
a->limb[LIMBPERM(8)] &= mask2; | |||
/* now the total is less than 2p */ | |||
/* compute total_value - p. No need to reduce mod p. */ | |||
int i; | |||
for (i=0; i<9; i++) { | |||
scarry = scarry + a->limb[LIMBPERM(i)] - ((i==8) ? mask2 : mask); | |||
a->limb[LIMBPERM(i)] = scarry & ((i==8) ? mask2 : mask); | |||
scarry >>= (i==8) ? 57 : 58; | |||
} | |||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^521 | |||
* so let's add back in p. will carry back off the top for 2^521. | |||
*/ | |||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||
uint64_t scarry_mask = scarry & mask; | |||
__uint128_t carry = 0; | |||
/* add it back */ | |||
for (i=0; i<9; i++) { | |||
carry = carry + a->limb[LIMBPERM(i)] + ((i==8)?(scarry_mask>>1):scarry_mask); | |||
a->limb[LIMBPERM(i)] = carry & ((i==8) ? mask>>1 : mask); | |||
carry >>= (i==8) ? 57 : 58; | |||
} | |||
assert(word_is_zero(carry + scarry)); | |||
a->limb[3] = a->limb[7] = a->limb[11] = 0; | |||
} |