@@ -14,7 +14,7 @@ uint64_t word_is_zero(uint64_t a) { | |||||
} | } | ||||
static __inline__ __attribute((always_inline,unused)) | static __inline__ __attribute((always_inline,unused)) | ||||
uint64_t widemul(uint64_t a, uint64_t b) { | |||||
__uint128_t widemul(uint64_t a, uint64_t b) { | |||||
return ((__uint128_t)a) * b; | return ((__uint128_t)a) * b; | ||||
} | } | ||||
@@ -7,7 +7,7 @@ | |||||
void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { | void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { | ||||
const uint32_t *a = as->limb, *b = bs->limb, maske = ((1<<26)-1), masko = ((1<<25)-1); | const uint32_t *a = as->limb, *b = bs->limb, maske = ((1<<26)-1), masko = ((1<<25)-1); | ||||
uint64_t bh[9]; | |||||
uint32_t bh[9]; | |||||
int i,j; | int i,j; | ||||
for (i=0; i<9; i++) bh[i] = b[i+1] * 19; | for (i=0; i<9; i++) bh[i] = b[i+1] * 19; | ||||
@@ -18,13 +18,13 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { | |||||
/* Even case. */ | /* Even case. */ | ||||
for (j=0; j<i; /*j+=2*/) { | for (j=0; j<i; /*j+=2*/) { | ||||
accum += widemul(b[i-j], a[j]); j++; | accum += widemul(b[i-j], a[j]); j++; | ||||
accum += widemul(2*b[i-j], a[j]); j++; | |||||
accum += widemul(b[i-j], 2*a[j]); j++; | |||||
} | } | ||||
accum += widemul(b[0], a[j]); j++; | accum += widemul(b[0], a[j]); j++; | ||||
accum += widemul(2*bh[8], a[j]); j++; | |||||
accum += widemul(bh[8], 2*a[j]); j++; | |||||
for (; j<10; /* j+=2*/) { | for (; j<10; /* j+=2*/) { | ||||
accum += widemul(bh[i-j+9], a[j]); j++; | accum += widemul(bh[i-j+9], a[j]); j++; | ||||
accum += widemul(2*bh[i-j+9], a[j]); j++; | |||||
accum += widemul(bh[i-j+9], 2*a[j]); j++; | |||||
} | } | ||||
c[i] = accum & maske; | c[i] = accum & maske; | ||||
accum >>= 26; | accum >>= 26; | ||||
@@ -53,25 +53,22 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { | |||||
void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) { | void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) { | ||||
const uint32_t *a = as->limb, maske = ((1<<26)-1), masko = ((1<<25)-1); | const uint32_t *a = as->limb, maske = ((1<<26)-1), masko = ((1<<25)-1); | ||||
uint32_t blo = b & maske, bhi = b>>26, bhi2 = 2*bhi; | |||||
uint32_t *c = cs->limb; | uint32_t *c = cs->limb; | ||||
uint64_t accum = 0; | |||||
accum = widemul(blo, a[0]) + widemul(bhi*38,a[9]); | |||||
uint64_t accum = widemul(b, a[0]); | |||||
c[0] = accum & maske; | c[0] = accum & maske; | ||||
accum >>= 26; | accum >>= 26; | ||||
accum += widemul(blo, a[1]) + widemul(bhi,a[0]); | |||||
accum += widemul(b, a[1]); | |||||
c[1] = accum & masko; | c[1] = accum & masko; | ||||
accum >>= 25; | accum >>= 25; | ||||
for (int i=2; i<10; /*i+=2*/) { | for (int i=2; i<10; /*i+=2*/) { | ||||
accum += widemul(blo, a[i]) + widemul(bhi2, a[i-1]); | |||||
accum += widemul(b, a[i]); | |||||
c[i] = accum & maske; | c[i] = accum & maske; | ||||
accum >>= 26; | accum >>= 26; | ||||
i++; | i++; | ||||
accum += widemul(blo, a[i]) + widemul(bhi, a[i-1]); | |||||
accum += widemul(b, a[i]); | |||||
c[i] = accum & masko; | c[i] = accum & masko; | ||||
accum >>= 25; | accum >>= 25; | ||||
i++; | i++; | ||||
@@ -2,7 +2,7 @@ | |||||
* Released under the MIT License. See LICENSE.txt for license information. | * Released under the MIT License. See LICENSE.txt for license information. | ||||
*/ | */ | ||||
#define GF_HEADROOM 5 | |||||
#define GF_HEADROOM 3 /* Would be 5, but 3*19 * 2^26+small is all that fits in a uint32_t */ | |||||
#define LIMB(x) (x##ull)&((1ull<<26)-1), (x##ull)>>26 | #define LIMB(x) (x##ull)&((1ull<<26)-1), (x##ull)>>26 | ||||
#define FIELD_LITERAL(a,b,c,d,e) {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e)}} | #define FIELD_LITERAL(a,b,c,d,e) {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e)}} | ||||
@@ -12,21 +12,20 @@ void gf_add_RAW (gf out, const gf a, const gf b) { | |||||
for (unsigned int i=0; i<10; i++) { | for (unsigned int i=0; i<10; i++) { | ||||
out->limb[i] = a->limb[i] + b->limb[i]; | out->limb[i] = a->limb[i] + b->limb[i]; | ||||
} | } | ||||
gf_weak_reduce(out); | |||||
} | } | ||||
void gf_sub_RAW (gf out, const gf a, const gf b) { | void gf_sub_RAW (gf out, const gf a, const gf b) { | ||||
uint32_t coe = ((1ull<<26)-1)*2, coo = ((1ull<<25)-1)*2, co0 = coe-36; | |||||
for (unsigned int i=0; i<10; i+=2) { | |||||
out->limb[i] = a->limb[i] - b->limb[i] + ((i==0) ? co0 : coe); | |||||
out->limb[i+1] = a->limb[i+1] - b->limb[i+1] + coo; | |||||
for (unsigned int i=0; i<10; i++) { | |||||
out->limb[i] = a->limb[i] - b->limb[i]; | |||||
} | } | ||||
gf_weak_reduce(out); | |||||
} | } | ||||
void gf_bias (gf a, int amt) { | void gf_bias (gf a, int amt) { | ||||
(void) a; | |||||
(void) amt; | |||||
uint32_t coe = ((1ull<<26)-1)*amt, coo = ((1ull<<25)-1)*amt, co0 = coe-18*amt; | |||||
for (unsigned int i=0; i<10; i+=2) { | |||||
a->limb[i] += ((i==0) ? co0 : coe); | |||||
a->limb[i+1] += coo; | |||||
} | |||||
} | } | ||||
void gf_weak_reduce (gf a) { | void gf_weak_reduce (gf a) { | ||||
@@ -2,7 +2,7 @@ | |||||
* Released under the MIT License. See LICENSE.txt for license information. | * Released under the MIT License. See LICENSE.txt for license information. | ||||
*/ | */ | ||||
#define GF_HEADROOM 933 | |||||
#define GF_HEADROOM 9999 /* Always reduced */ | |||||
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} | #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} | ||||
#define LIMB_PLACE_VALUE(i) 51 | #define LIMB_PLACE_VALUE(i) 51 | ||||