Browse Source

unroll loops in arch_32/f_impl.c except on -Os

master
Michael Hamburg 8 years ago
parent
commit
4a13ad4b8b
4 changed files with 27 additions and 32 deletions
  1. +3
    -5
      src/GENERATED/c/curve25519/decaf.c
  2. +3
    -5
      src/GENERATED/c/ed448goldilocks/decaf.c
  3. +18
    -17
      src/p448/arch_32/f_impl.c
  4. +3
    -5
      src/per_curve/decaf.tmpl.c

+ 3
- 5
src/GENERATED/c/curve25519/decaf.c View File

@@ -1156,12 +1156,10 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) (
mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] & 0x80);
enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] &= ~0x80;
mask_t succ = DECAF_TRUE;
mask_t succ = gf_deserialize(p->y, enc2, 1);
#if 7 == 0
succ = word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1]);
succ &= word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1]);
#endif
succ &= gf_deserialize(p->y, enc2, 1);

gf_sqr(p->x,p->y);
gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */
@@ -1247,7 +1245,7 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) (
decaf_bzero(enc2,sizeof(enc2));
assert(API_NS(point_valid)(p) || ~succ);
return decaf_succeed_if(succ);
return decaf_succeed_if(mask_to_bool(succ));
}

decaf_error_t decaf_x25519 (


+ 3
- 5
src/GENERATED/c/ed448goldilocks/decaf.c View File

@@ -1156,12 +1156,10 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) (
mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] & 0x80);
enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] &= ~0x80;
mask_t succ = DECAF_TRUE;
mask_t succ = gf_deserialize(p->y, enc2, 1);
#if 0 == 0
succ = word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1]);
succ &= word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1]);
#endif
succ &= gf_deserialize(p->y, enc2, 1);

gf_sqr(p->x,p->y);
gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */
@@ -1247,7 +1245,7 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) (
decaf_bzero(enc2,sizeof(enc2));
assert(API_NS(point_valid)(p) || ~succ);
return decaf_succeed_if(succ);
return decaf_succeed_if(mask_to_bool(succ));
}

decaf_error_t decaf_x448 (


+ 18
- 17
src/p448/arch_32/f_impl.c View File

@@ -4,6 +4,14 @@

#include "f_field.h"

#if (defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) && !I_HATE_UNROLLED_LOOPS) \
|| defined(DECAF_FORCE_UNROLL)
#define REPEAT8(_x) _x _x _x _x _x _x _x _x
#define FOR_LIMB(_i,_start,_end,_x) do { _i=_start; REPEAT8( if (_i<_end) { _x; } _i++;) } while (0)
#else
#define FOR_LIMB(_i,_start,_end,_x) do { for (_i=_start; _i<_end; _i++) _x; } while (0)
#endif

void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
const uint32_t *a = as->limb, *b = bs->limb;
uint32_t *c = cs->limb;
@@ -19,24 +27,24 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
bb[i] = b[i] + b[i+8];
}
for (j=0; j<8; j++) {
FOR_LIMB(j,0,8,{
accum2 = 0;
for (i=0; i<=j; i++) {
FOR_LIMB (i,0,j+1,{
accum2 += widemul(a[j-i],b[i]);
accum1 += widemul(aa[j-i],bb[i]);
accum0 += widemul(a[8+j-i], b[8+i]);
}
});
accum1 -= accum2;
accum0 += accum2;
accum2 = 0;
for (; i<8; i++) {
FOR_LIMB (i,j+1,8,{
accum0 -= widemul(a[8+j-i], b[i]);
accum2 += widemul(aa[8+j-i], bb[i]);
accum1 += widemul(a[16+j-i], b[8+i]);
}
});

accum1 += accum2;
accum0 += accum2;
@@ -46,7 +54,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {

accum0 >>= 28;
accum1 >>= 28;
}
});
accum0 += accum1;
accum0 += c[8];
@@ -66,24 +74,17 @@ void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
const uint32_t *a = as->limb;
uint32_t *c = cs->limb;

uint64_t accum0, accum8;
uint64_t accum0 = 0, accum8 = 0;
uint32_t mask = (1ull<<28)-1;

int i;

accum0 = widemul(b, a[0]);
accum8 = widemul(b, a[8]);

c[0] = accum0 & mask; accum0 >>= 28;
c[8] = accum8 & mask; accum8 >>= 28;
for (i=1; i<8; i++) {
FOR_LIMB(i,0,8,{
accum0 += widemul(b, a[i]);
accum8 += widemul(b, a[i+8]);

c[i] = accum0 & mask; accum0 >>= 28;
c[i+8] = accum8 & mask; accum8 >>= 28;
}
});

accum0 += accum8 + c[8];
c[8] = accum0 & mask;


+ 3
- 5
src/per_curve/decaf.tmpl.c View File

@@ -1145,12 +1145,10 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) (
mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1] & 0x80);
enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1] &= ~0x80;
mask_t succ = DECAF_TRUE;
mask_t succ = gf_deserialize(p->y, enc2, 1);
#if $(gf_bits % 8) == 0
succ = word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1]);
succ &= word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1]);
#endif
succ &= gf_deserialize(p->y, enc2, 1);

gf_sqr(p->x,p->y);
gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */
@@ -1236,7 +1234,7 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) (
decaf_bzero(enc2,sizeof(enc2));
assert(API_NS(point_valid)(p) || ~succ);
return decaf_succeed_if(succ);
return decaf_succeed_if(mask_to_bool(succ));
}

decaf_error_t decaf_x$(gf_shortname) (


Loading…
Cancel
Save