@@ -89,6 +89,9 @@ const size_t API_NS2(alignof,precomputed_s) = 32; | |||||
#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<NLIMBS; i++) { op; }} | #define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<NLIMBS; i++) { op; }} | ||||
#define FOR_LIMB_U(i,op) { unsigned int i=0; UNROLL for (i=0; i<NLIMBS; i++) { op; }} | #define FOR_LIMB_U(i,op) { unsigned int i=0; UNROLL for (i=0; i<NLIMBS; i++) { op; }} | ||||
/* FUTURE: move this code from per-curve to per-field header | |||||
* (like f_arithmetic.c but same for all fields) | |||||
*/ | |||||
void gf_serialize (uint8_t serial[SER_BYTES], const gf x) { | void gf_serialize (uint8_t serial[SER_BYTES], const gf x) { | ||||
gf red; | gf red; | ||||
gf_copy(red, x); | gf_copy(red, x); | ||||
@@ -126,6 +129,39 @@ mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES]) { | |||||
return word_is_zero(buffer) & ~word_is_zero(scarry); | return word_is_zero(buffer) & ~word_is_zero(scarry); | ||||
} | } | ||||
void gf_strong_reduce (gf a) { | |||||
/* first, clear high */ | |||||
gf_weak_reduce(a); /* PERF: only really need one step of this, but whatevs */ | |||||
/* now the total is less than 2p */ | |||||
/* compute total_value - p. No need to reduce mod p. */ | |||||
dsword_t scarry = 0; | |||||
for (unsigned int i=0; i<NLIMBS; i++) { | |||||
scarry = scarry + a->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]; | |||||
a->limb[i] = scarry & LIMB_MASK(LIMBPERM(i)); | |||||
scarry >>= LIMB_PLACE_VALUE(LIMBPERM(i)); | |||||
} | |||||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255 | |||||
* so let's add back in p. will carry back off the top for 2^255. | |||||
*/ | |||||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||||
word_t scarry_0 = scarry; | |||||
dword_t carry = 0; | |||||
/* add it back */ | |||||
for (unsigned int i=0; i<NLIMBS; i++) { | |||||
carry = carry + a->limb[LIMBPERM(i)] + (scarry_0 & MODULUS->limb[LIMBPERM(i)]); | |||||
a->limb[i] = carry & LIMB_MASK(LIMBPERM(i)); | |||||
carry >>= LIMB_PLACE_VALUE(LIMBPERM(i)); | |||||
} | |||||
assert(word_is_zero(carry + scarry_0)); | |||||
} | |||||
/** Constant time, x = is_z ? z : y */ | /** Constant time, x = is_z ? z : y */ | ||||
static INLINE void | static INLINE void | ||||
cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) { | cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) { | ||||
@@ -90,38 +90,4 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) { | |||||
gf_mul(cs,as,as); // PERF | gf_mul(cs,as,as); // PERF | ||||
} | } | ||||
void gf_strong_reduce (gf a) { | |||||
/* first, clear high */ | |||||
a->limb[0] += (a->limb[9]>>25)*19; | |||||
a->limb[9] &= LIMB_MASK(9); | |||||
/* now the total is less than 2p */ | |||||
/* compute total_value - p. No need to reduce mod p. */ | |||||
dsword_t scarry = 0; | |||||
for (unsigned int i=0; i<10; i++) { | |||||
scarry = scarry + a->limb[i] - MODULUS->limb[i]; | |||||
a->limb[i] = scarry & LIMB_MASK(i); | |||||
scarry >>= LIMB_PLACE_VALUE(i); | |||||
} | |||||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255 | |||||
* so let's add back in p. will carry back off the top for 2^255. | |||||
*/ | |||||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||||
word_t scarry_0 = scarry; | |||||
dword_t carry = 0; | |||||
/* add it back */ | |||||
for (unsigned int i=0; i<10; i++) { | |||||
carry = carry + a->limb[i] + (scarry_0 & MODULUS->limb[i]); | |||||
a->limb[i] = carry & LIMB_MASK(i); | |||||
carry >>= LIMB_PLACE_VALUE(i); | |||||
i++; | |||||
} | |||||
assert(word_is_zero(carry + scarry_0)); | |||||
} | |||||
@@ -59,41 +59,3 @@ void gf_mulw (gf_s *__restrict__ cs, const gf as, uint64_t b) { | |||||
void gf_sqr (gf_s *__restrict__ cs, const gf as) { | void gf_sqr (gf_s *__restrict__ cs, const gf as) { | ||||
gf_mul(cs,as,as); // PERF | gf_mul(cs,as,as); // PERF | ||||
} | } | ||||
void gf_strong_reduce (gf a) { | |||||
uint64_t mask = (1ull<<51)-1; | |||||
/* first, clear high */ | |||||
a->limb[0] += (a->limb[4]>>51)*19; | |||||
a->limb[4] &= mask; | |||||
/* now the total is less than 2p */ | |||||
/* compute total_value - p. No need to reduce mod p. */ | |||||
__int128_t scarry = 0; | |||||
int i; | |||||
for (i=0; i<5; i++) { | |||||
scarry = scarry + a->limb[i] - ((i==0)?mask-18:mask); | |||||
a->limb[i] = scarry & mask; | |||||
scarry >>= 51; | |||||
} | |||||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255 | |||||
* so let's add back in p. will carry back off the top for 2^255. | |||||
*/ | |||||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||||
uint64_t scarry_mask = scarry & mask; | |||||
__uint128_t carry = 0; | |||||
/* add it back */ | |||||
for (i=0; i<5; i++) { | |||||
carry = carry + a->limb[i] + ((i==0)?(scarry_mask&~18):scarry_mask); | |||||
a->limb[i] = carry & mask; | |||||
carry >>= 51; | |||||
} | |||||
assert(word_is_zero(carry + scarry)); | |||||
} |
@@ -170,41 +170,3 @@ void gf_mulw (gf_s *__restrict__ cs, const gf as, uint64_t b) { | |||||
c[0] = accum & mask; | c[0] = accum & mask; | ||||
c[1] = c1 + shrld(accum,51); | c[1] = c1 + shrld(accum,51); | ||||
} | } | ||||
void gf_strong_reduce (gf a) { | |||||
uint64_t mask = (1ull<<51)-1; | |||||
/* first, clear high */ | |||||
a->limb[0] += (a->limb[4]>>51)*19; | |||||
a->limb[4] &= mask; | |||||
/* now the total is less than 2p */ | |||||
/* compute total_value - p. No need to reduce mod p. */ | |||||
__int128_t scarry = 0; | |||||
int i; | |||||
for (i=0; i<5; i++) { | |||||
scarry = scarry + a->limb[i] - ((i==0)?mask-18:mask); | |||||
a->limb[i] = scarry & mask; | |||||
scarry >>= 51; | |||||
} | |||||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255 | |||||
* so let's add back in p. will carry back off the top for 2^255. | |||||
*/ | |||||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||||
uint64_t scarry_mask = scarry & mask; | |||||
__uint128_t carry = 0; | |||||
/* add it back */ | |||||
for (i=0; i<5; i++) { | |||||
carry = carry + a->limb[i] + ((i==0)?(scarry_mask&~18):scarry_mask); | |||||
a->limb[i] = carry & mask; | |||||
carry >>= 51; | |||||
} | |||||
assert(word_is_zero(carry + scarry)); | |||||
} |
@@ -103,42 +103,3 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) { | |||||
gf_mul(cs,as,as); /* PERF */ | gf_mul(cs,as,as); /* PERF */ | ||||
} | } | ||||
void gf_strong_reduce (gf a) { | |||||
word_t mask = (1ull<<28)-1; | |||||
/* first, clear high */ | |||||
a->limb[8] += a->limb[15]>>28; | |||||
a->limb[0] += a->limb[15]>>28; | |||||
a->limb[15] &= mask; | |||||
/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ | |||||
/* compute total_value - p. No need to reduce mod p. */ | |||||
dsword_t scarry = 0; | |||||
int i; | |||||
for (i=0; i<16; i++) { | |||||
scarry = scarry + a->limb[i] - ((i==8)?mask-1:mask); | |||||
a->limb[i] = scarry & mask; | |||||
scarry >>= 28; | |||||
} | |||||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448 | |||||
* so let's add back in p. will carry back off the top for 2^448. | |||||
*/ | |||||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||||
word_t scarry_mask = scarry & mask; | |||||
dword_t carry = 0; | |||||
/* add it back */ | |||||
for (i=0; i<16; i++) { | |||||
carry = carry + a->limb[i] + ((i==8)?(scarry_mask&~1):scarry_mask); | |||||
a->limb[i] = carry & mask; | |||||
carry >>= 28; | |||||
} | |||||
assert(word_is_zero(carry + scarry)); | |||||
} |
@@ -833,43 +833,3 @@ void gf_mulw ( | |||||
c[0] = accum8 & mask; | c[0] = accum8 & mask; | ||||
c[1] += accum8 >> 28; | c[1] += accum8 >> 28; | ||||
} | } | ||||
void gf_strong_reduce (gf a) { | |||||
word_t mask = (1ull<<28)-1; | |||||
/* first, clear high */ | |||||
a->limb[8] += a->limb[15]>>28; | |||||
a->limb[0] += a->limb[15]>>28; | |||||
a->limb[15] &= mask; | |||||
/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ | |||||
/* compute total_value - p. No need to reduce mod p. */ | |||||
dsword_t scarry = 0; | |||||
int i; | |||||
for (i=0; i<16; i++) { | |||||
scarry = scarry + a->limb[i] - ((i==8)?mask-1:mask); | |||||
a->limb[i] = scarry & mask; | |||||
scarry >>= 28; | |||||
} | |||||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448 | |||||
* so let's add back in p. will carry back off the top for 2^448. | |||||
*/ | |||||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||||
word_t scarry_mask = scarry & mask; | |||||
dword_t carry = 0; | |||||
/* add it back */ | |||||
for (i=0; i<16; i++) { | |||||
carry = carry + a->limb[i] + ((i==8)?(scarry_mask&~1):scarry_mask); | |||||
a->limb[i] = carry & mask; | |||||
carry >>= 28; | |||||
} | |||||
assert(word_is_zero(carry + scarry)); | |||||
} |
@@ -593,94 +593,3 @@ void gf_mulw (gf_s *__restrict__ cs, const gf as, uint64_t b) { | |||||
accum = vshrq_n_u64(accum,28); | accum = vshrq_n_u64(accum,28); | ||||
vo[1] += vmovn_u64(accum); | vo[1] += vmovn_u64(accum); | ||||
} | } | ||||
/* PERF: vectorize? */ | |||||
void gf_strong_reduce (gf a) { | |||||
word_t mask = (1ull<<28)-1; | |||||
/* first, clear high */ | |||||
a->limb[1] += a->limb[15]>>28; | |||||
a->limb[0] += a->limb[15]>>28; | |||||
a->limb[15] &= mask; | |||||
/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ | |||||
/* compute total_value - p. No need to reduce mod p. */ | |||||
dsword_t scarry = 0; | |||||
int i; | |||||
for (i=0; i<16; i++) { | |||||
scarry = scarry + a->limb[LIMBPERM(i)] - ((i==8)?mask-1:mask); | |||||
a->limb[LIMBPERM(i)] = scarry & mask; | |||||
scarry >>= 28; | |||||
} | |||||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448 | |||||
* so let's add back in p. will carry back off the top for 2^448. | |||||
*/ | |||||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||||
word_t scarry_mask = scarry & mask; | |||||
dword_t carry = 0; | |||||
/* add it back */ | |||||
for (i=0; i<16; i++) { | |||||
carry = carry + a->limb[LIMBPERM(i)] + ((i==8)?(scarry_mask&~1):scarry_mask); | |||||
a->limb[LIMBPERM(i)] = carry & mask; | |||||
carry >>= 28; | |||||
} | |||||
assert(word_is_zero(carry + scarry)); | |||||
} | |||||
void gf_serialize (uint8_t *serial, const gf x) { | |||||
int i,j; | |||||
gf red; | |||||
gf_copy(red, x); | |||||
gf_strong_reduce(red); | |||||
for (i=0; i<8; i++) { | |||||
uint64_t limb = red->limb[LIMBPERM(2*i)] + (((uint64_t)red->limb[LIMBPERM(2*i+1)])<<28); | |||||
for (j=0; j<7; j++) { | |||||
serial[7*i+j] = limb; | |||||
limb >>= 8; | |||||
} | |||||
assert(limb == 0); | |||||
} | |||||
} | |||||
mask_t gf_deserialize (gf x, const uint8_t serial[56]) { | |||||
int i,j; | |||||
for (i=0; i<8; i++) { | |||||
uint64_t out = 0; | |||||
for (j=0; j<7; j++) { | |||||
out |= ((uint64_t)serial[7*i+j])<<(8*j); | |||||
} | |||||
x->limb[LIMBPERM(2*i)] = out & ((1ull<<28)-1); | |||||
x->limb[LIMBPERM(2*i+1)] = out >> 28; | |||||
} | |||||
/* Check for reduction. | |||||
* | |||||
* The idea is to create a variable ge which is all ones (rather, 56 ones) | |||||
* if and only if the low $i$ words of $x$ are >= those of p. | |||||
* | |||||
* Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111) | |||||
*/ | |||||
uint32_t ge = -1, mask = (1ull<<28)-1; | |||||
for (i=0; i<8; i++) { | |||||
ge &= x->limb[LIMBPERM(i)]; | |||||
} | |||||
/* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */ | |||||
ge = (ge & (x->limb[LIMBPERM(8)] + 1)) | word_is_zero(x->limb[LIMBPERM(8)] ^ mask); | |||||
/* Propagate the rest */ | |||||
for (i=9; i<16; i++) { | |||||
ge &= x->limb[LIMBPERM(i)]; | |||||
} | |||||
return ~word_is_zero(ge ^ mask); | |||||
} |
@@ -300,89 +300,3 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) { | |||||
c[0] += ((uint64_t)(accum1)); | c[0] += ((uint64_t)(accum1)); | ||||
} | } | ||||
void gf_strong_reduce (gf a) { | |||||
uint64_t mask = (1ull<<56)-1; | |||||
/* first, clear high */ | |||||
a->limb[4] += a->limb[7]>>56; | |||||
a->limb[0] += a->limb[7]>>56; | |||||
a->limb[7] &= mask; | |||||
/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ | |||||
/* compute total_value - p. No need to reduce mod p. */ | |||||
__int128_t scarry = 0; | |||||
int i; | |||||
for (i=0; i<8; i++) { | |||||
scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask); | |||||
a->limb[i] = scarry & mask; | |||||
scarry >>= 56; | |||||
} | |||||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448 | |||||
* so let's add back in p. will carry back off the top for 2^448. | |||||
*/ | |||||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||||
uint64_t scarry_mask = scarry & mask; | |||||
__uint128_t carry = 0; | |||||
/* add it back */ | |||||
for (i=0; i<8; i++) { | |||||
carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask); | |||||
a->limb[i] = carry & mask; | |||||
carry >>= 56; | |||||
} | |||||
assert(word_is_zero(carry + scarry)); | |||||
} | |||||
void gf_serialize (uint8_t *serial, const gf x) { | |||||
int i,j; | |||||
gf red; | |||||
gf_copy(red, x); | |||||
gf_strong_reduce(red); | |||||
for (i=0; i<8; i++) { | |||||
for (j=0; j<7; j++) { | |||||
serial[7*i+j] = red->limb[i]; | |||||
red->limb[i] >>= 8; | |||||
} | |||||
assert(red->limb[i] == 0); | |||||
} | |||||
} | |||||
mask_t gf_deserialize (gf x, const uint8_t serial[56]) { | |||||
int i,j; | |||||
for (i=0; i<8; i++) { | |||||
uint64_t out = 0; | |||||
for (j=0; j<7; j++) { | |||||
out |= ((uint64_t)serial[7*i+j])<<(8*j); | |||||
} | |||||
x->limb[i] = out; | |||||
} | |||||
/* Check for reduction. | |||||
* | |||||
* The idea is to create a variable ge which is all ones (rather, 56 ones) | |||||
* if and only if the low $i$ words of $x$ are >= those of p. | |||||
* | |||||
* Remember p = little_endian(1111,1111,1111,1111,1110,1111,1111,1111) | |||||
*/ | |||||
uint64_t ge = -1, mask = (1ull<<56)-1; | |||||
for (i=0; i<4; i++) { | |||||
ge &= x->limb[i]; | |||||
} | |||||
/* At this point, ge = 1111 iff bottom are all 1111. Now propagate if 1110, or set if 1111 */ | |||||
ge = (ge & (x->limb[4] + 1)) | word_is_zero(x->limb[4] ^ mask); | |||||
/* Propagate the rest */ | |||||
for (i=5; i<8; i++) { | |||||
ge &= x->limb[i]; | |||||
} | |||||
return ~word_is_zero(ge ^ mask); | |||||
} |
@@ -289,43 +289,3 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) { | |||||
c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); | c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); | ||||
c[0] += ((uint64_t)(accum1)); | c[0] += ((uint64_t)(accum1)); | ||||
} | } | ||||
void gf_strong_reduce (gf a) { | |||||
uint64_t mask = (1ull<<56)-1; | |||||
/* first, clear high */ | |||||
a->limb[4] += a->limb[7]>>56; | |||||
a->limb[0] += a->limb[7]>>56; | |||||
a->limb[7] &= mask; | |||||
/* now the total is less than 2^448 - 2^(448-56) + 2^(448-56+8) < 2p */ | |||||
/* compute total_value - p. No need to reduce mod p. */ | |||||
__int128_t scarry = 0; | |||||
int i; | |||||
for (i=0; i<8; i++) { | |||||
scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask); | |||||
a->limb[i] = scarry & mask; | |||||
scarry >>= 56; | |||||
} | |||||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^448 | |||||
* so let's add back in p. will carry back off the top for 2^448. | |||||
*/ | |||||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||||
uint64_t scarry_mask = scarry & mask; | |||||
__uint128_t carry = 0; | |||||
/* add it back */ | |||||
for (i=0; i<8; i++) { | |||||
carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask); | |||||
a->limb[i] = carry & mask; | |||||
carry >>= 56; | |||||
} | |||||
assert(word_is_zero(carry + scarry)); | |||||
} |
@@ -289,43 +289,3 @@ void gf_sqr (gf *__restrict__ cs, const gf *as) { | |||||
c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); | c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1)); | ||||
c[0] += ((uint64_t)(accum1)); | c[0] += ((uint64_t)(accum1)); | ||||
} | } | ||||
void gf_strong_reduce (gf *a) { | |||||
uint64_t mask = (1ull<<60)-1; | |||||
/* first, clear high */ | |||||
a->limb[4] += a->limb[7]>>60; | |||||
a->limb[0] += a->limb[7]>>60; | |||||
a->limb[7] &= mask; | |||||
/* now the total is less than 2^480 - 2^(480-60) + 2^(480-60+8) < 2p */ | |||||
/* compute total_value - p. No need to reduce mod p. */ | |||||
__int128_t scarry = 0; | |||||
int i; | |||||
for (i=0; i<8; i++) { | |||||
scarry = scarry + a->limb[i] - ((i==4)?mask-1:mask); | |||||
a->limb[i] = scarry & mask; | |||||
scarry >>= 60; | |||||
} | |||||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^480 | |||||
* so let's add back in p. will carry back off the top for 2^480. | |||||
*/ | |||||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||||
uint64_t scarry_mask = scarry & mask; | |||||
__uint128_t carry = 0; | |||||
/* add it back */ | |||||
for (i=0; i<8; i++) { | |||||
carry = carry + a->limb[i] + ((i==4)?(scarry_mask&~1):scarry_mask); | |||||
a->limb[i] = carry & mask; | |||||
carry >>= 60; | |||||
} | |||||
assert(word_is_zero(carry + scarry)); | |||||
} |
@@ -282,41 +282,3 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) { | |||||
c[8] += accum1 >> 58; | c[8] += accum1 >> 58; | ||||
} | } | ||||
void gf_strong_reduce (gf a) { | |||||
uint64_t mask = (1ull<<58)-1, mask2 = (1ull<<57)-1; | |||||
/* first, clear high */ | |||||
__int128_t scarry = a->limb[8]>>57; | |||||
a->limb[8] &= mask2; | |||||
/* now the total is less than 2p */ | |||||
/* compute total_value - p. No need to reduce mod p. */ | |||||
int i; | |||||
for (i=0; i<9; i++) { | |||||
scarry = scarry + a->limb[i] - ((i==8) ? mask2 : mask); | |||||
a->limb[i] = scarry & ((i==8) ? mask2 : mask); | |||||
scarry >>= (i==8) ? 57 : 58; | |||||
} | |||||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^521 | |||||
* so let's add back in p. will carry back off the top for 2^521. | |||||
*/ | |||||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||||
uint64_t scarry_mask = scarry & mask; | |||||
__uint128_t carry = 0; | |||||
/* add it back */ | |||||
for (i=0; i<9; i++) { | |||||
carry = carry + a->limb[i] + ((i==8)?(scarry_mask>>1):scarry_mask); | |||||
a->limb[i] = carry & ((i==8) ? mask>>1 : mask); | |||||
carry >>= (i==8) ? 57 : 58; | |||||
} | |||||
assert(word_is_zero(carry + scarry)); | |||||
} |
@@ -348,44 +348,3 @@ void gf_mulw (gf *__restrict__ cs, const gf *as, uint64_t b) { | |||||
c[3] = c[7] = c[11] = 0; | c[3] = c[7] = c[11] = 0; | ||||
} | } | ||||
void gf_strong_reduce (gf *a) { | |||||
uint64_t mask = (1ull<<58)-1, mask2 = (1ull<<57)-1; | |||||
/* first, clear high */ | |||||
__int128_t scarry = a->limb[LIMBPERM(8)]>>57; | |||||
a->limb[LIMBPERM(8)] &= mask2; | |||||
/* now the total is less than 2p */ | |||||
/* compute total_value - p. No need to reduce mod p. */ | |||||
int i; | |||||
for (i=0; i<9; i++) { | |||||
scarry = scarry + a->limb[LIMBPERM(i)] - ((i==8) ? mask2 : mask); | |||||
a->limb[LIMBPERM(i)] = scarry & ((i==8) ? mask2 : mask); | |||||
scarry >>= (i==8) ? 57 : 58; | |||||
} | |||||
/* uncommon case: it was >= p, so now scarry = 0 and this = x | |||||
* common case: it was < p, so now scarry = -1 and this = x - p + 2^521 | |||||
* so let's add back in p. will carry back off the top for 2^521. | |||||
*/ | |||||
assert(word_is_zero(scarry) | word_is_zero(scarry+1)); | |||||
uint64_t scarry_mask = scarry & mask; | |||||
__uint128_t carry = 0; | |||||
/* add it back */ | |||||
for (i=0; i<9; i++) { | |||||
carry = carry + a->limb[LIMBPERM(i)] + ((i==8)?(scarry_mask>>1):scarry_mask); | |||||
a->limb[LIMBPERM(i)] = carry & ((i==8) ? mask>>1 : mask); | |||||
carry >>= (i==8) ? 57 : 58; | |||||
} | |||||
assert(word_is_zero(carry + scarry)); | |||||
a->limb[3] = a->limb[7] = a->limb[11] = 0; | |||||
} |