Browse Source

fix embarassing arch_ref64 bug; improve code for p25519 arch_32

master
Michael Hamburg 9 years ago
parent
commit
2104923b6f
4 changed files with 18 additions and 22 deletions
  1. +1
    -1
      src/include/arch_ref64/arch_intrinsics.h
  2. +8
    -11
      src/p25519/arch_32/f_impl.c
  3. +8
    -9
      src/p25519/arch_32/f_impl.h
  4. +1
    -1
      src/p25519/arch_ref64/f_impl.h

+ 1
- 1
src/include/arch_ref64/arch_intrinsics.h View File

@@ -14,7 +14,7 @@ uint64_t word_is_zero(uint64_t a) {
} }


static __inline__ __attribute((always_inline,unused)) static __inline__ __attribute((always_inline,unused))
uint64_t widemul(uint64_t a, uint64_t b) {
__uint128_t widemul(uint64_t a, uint64_t b) {
return ((__uint128_t)a) * b; return ((__uint128_t)a) * b;
} }




+ 8
- 11
src/p25519/arch_32/f_impl.c View File

@@ -7,7 +7,7 @@
void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
const uint32_t *a = as->limb, *b = bs->limb, maske = ((1<<26)-1), masko = ((1<<25)-1); const uint32_t *a = as->limb, *b = bs->limb, maske = ((1<<26)-1), masko = ((1<<25)-1);
uint64_t bh[9];
uint32_t bh[9];
int i,j; int i,j;
for (i=0; i<9; i++) bh[i] = b[i+1] * 19; for (i=0; i<9; i++) bh[i] = b[i+1] * 19;
@@ -18,13 +18,13 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
/* Even case. */ /* Even case. */
for (j=0; j<i; /*j+=2*/) { for (j=0; j<i; /*j+=2*/) {
accum += widemul(b[i-j], a[j]); j++; accum += widemul(b[i-j], a[j]); j++;
accum += widemul(2*b[i-j], a[j]); j++;
accum += widemul(b[i-j], 2*a[j]); j++;
} }
accum += widemul(b[0], a[j]); j++; accum += widemul(b[0], a[j]); j++;
accum += widemul(2*bh[8], a[j]); j++;
accum += widemul(bh[8], 2*a[j]); j++;
for (; j<10; /* j+=2*/) { for (; j<10; /* j+=2*/) {
accum += widemul(bh[i-j+9], a[j]); j++; accum += widemul(bh[i-j+9], a[j]); j++;
accum += widemul(2*bh[i-j+9], a[j]); j++;
accum += widemul(bh[i-j+9], 2*a[j]); j++;
} }
c[i] = accum & maske; c[i] = accum & maske;
accum >>= 26; accum >>= 26;
@@ -53,25 +53,22 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {


void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) { void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
const uint32_t *a = as->limb, maske = ((1<<26)-1), masko = ((1<<25)-1); const uint32_t *a = as->limb, maske = ((1<<26)-1), masko = ((1<<25)-1);
uint32_t blo = b & maske, bhi = b>>26, bhi2 = 2*bhi;
uint32_t *c = cs->limb; uint32_t *c = cs->limb;
uint64_t accum = 0;

accum = widemul(blo, a[0]) + widemul(bhi*38,a[9]);
uint64_t accum = widemul(b, a[0]);
c[0] = accum & maske; c[0] = accum & maske;
accum >>= 26; accum >>= 26;


accum += widemul(blo, a[1]) + widemul(bhi,a[0]);
accum += widemul(b, a[1]);
c[1] = accum & masko; c[1] = accum & masko;
accum >>= 25; accum >>= 25;


for (int i=2; i<10; /*i+=2*/) { for (int i=2; i<10; /*i+=2*/) {
accum += widemul(blo, a[i]) + widemul(bhi2, a[i-1]);
accum += widemul(b, a[i]);
c[i] = accum & maske; c[i] = accum & maske;
accum >>= 26; accum >>= 26;
i++; i++;


accum += widemul(blo, a[i]) + widemul(bhi, a[i-1]);
accum += widemul(b, a[i]);
c[i] = accum & masko; c[i] = accum & masko;
accum >>= 25; accum >>= 25;
i++; i++;


+ 8
- 9
src/p25519/arch_32/f_impl.h View File

@@ -2,7 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information. * Released under the MIT License. See LICENSE.txt for license information.
*/ */


#define GF_HEADROOM 5
#define GF_HEADROOM 3 /* Would be 5, but 3*19 * 2^26+small is all that fits in a uint32_t */
#define LIMB(x) (x##ull)&((1ull<<26)-1), (x##ull)>>26 #define LIMB(x) (x##ull)&((1ull<<26)-1), (x##ull)>>26
#define FIELD_LITERAL(a,b,c,d,e) {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e)}} #define FIELD_LITERAL(a,b,c,d,e) {{LIMB(a),LIMB(b),LIMB(c),LIMB(d),LIMB(e)}}


@@ -12,21 +12,20 @@ void gf_add_RAW (gf out, const gf a, const gf b) {
for (unsigned int i=0; i<10; i++) { for (unsigned int i=0; i<10; i++) {
out->limb[i] = a->limb[i] + b->limb[i]; out->limb[i] = a->limb[i] + b->limb[i];
} }
gf_weak_reduce(out);
} }


void gf_sub_RAW (gf out, const gf a, const gf b) { void gf_sub_RAW (gf out, const gf a, const gf b) {
uint32_t coe = ((1ull<<26)-1)*2, coo = ((1ull<<25)-1)*2, co0 = coe-36;
for (unsigned int i=0; i<10; i+=2) {
out->limb[i] = a->limb[i] - b->limb[i] + ((i==0) ? co0 : coe);
out->limb[i+1] = a->limb[i+1] - b->limb[i+1] + coo;
for (unsigned int i=0; i<10; i++) {
out->limb[i] = a->limb[i] - b->limb[i];
} }
gf_weak_reduce(out);
} }


void gf_bias (gf a, int amt) { void gf_bias (gf a, int amt) {
(void) a;
(void) amt;
uint32_t coe = ((1ull<<26)-1)*amt, coo = ((1ull<<25)-1)*amt, co0 = coe-18*amt;
for (unsigned int i=0; i<10; i+=2) {
a->limb[i] += ((i==0) ? co0 : coe);
a->limb[i+1] += coo;
}
} }


void gf_weak_reduce (gf a) { void gf_weak_reduce (gf a) {


+ 1
- 1
src/p25519/arch_ref64/f_impl.h View File

@@ -2,7 +2,7 @@
* Released under the MIT License. See LICENSE.txt for license information. * Released under the MIT License. See LICENSE.txt for license information.
*/ */


#define GF_HEADROOM 933
#define GF_HEADROOM 9999 /* Always reduced */
#define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }} #define FIELD_LITERAL(a,b,c,d,e) {{ a,b,c,d,e }}


#define LIMB_PLACE_VALUE(i) 51 #define LIMB_PLACE_VALUE(i) 51


Loading…
Cancel
Save