@@ -1156,12 +1156,10 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( | |||||
mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] & 0x80); | mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] & 0x80); | ||||
enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] &= ~0x80; | enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] &= ~0x80; | ||||
mask_t succ = DECAF_TRUE; | |||||
mask_t succ = gf_deserialize(p->y, enc2, 1); | |||||
#if 7 == 0 | #if 7 == 0 | ||||
succ = word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1]); | |||||
succ &= word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1]); | |||||
#endif | #endif | ||||
succ &= gf_deserialize(p->y, enc2, 1); | |||||
gf_sqr(p->x,p->y); | gf_sqr(p->x,p->y); | ||||
gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */ | gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */ | ||||
@@ -1247,7 +1245,7 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( | |||||
decaf_bzero(enc2,sizeof(enc2)); | decaf_bzero(enc2,sizeof(enc2)); | ||||
assert(API_NS(point_valid)(p) || ~succ); | assert(API_NS(point_valid)(p) || ~succ); | ||||
return decaf_succeed_if(succ); | |||||
return decaf_succeed_if(mask_to_bool(succ)); | |||||
} | } | ||||
decaf_error_t decaf_x25519 ( | decaf_error_t decaf_x25519 ( | ||||
@@ -1156,12 +1156,10 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( | |||||
mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] & 0x80); | mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] & 0x80); | ||||
enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] &= ~0x80; | enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] &= ~0x80; | ||||
mask_t succ = DECAF_TRUE; | |||||
mask_t succ = gf_deserialize(p->y, enc2, 1); | |||||
#if 0 == 0 | #if 0 == 0 | ||||
succ = word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1]); | |||||
succ &= word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1]); | |||||
#endif | #endif | ||||
succ &= gf_deserialize(p->y, enc2, 1); | |||||
gf_sqr(p->x,p->y); | gf_sqr(p->x,p->y); | ||||
gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */ | gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */ | ||||
@@ -1247,7 +1245,7 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( | |||||
decaf_bzero(enc2,sizeof(enc2)); | decaf_bzero(enc2,sizeof(enc2)); | ||||
assert(API_NS(point_valid)(p) || ~succ); | assert(API_NS(point_valid)(p) || ~succ); | ||||
return decaf_succeed_if(succ); | |||||
return decaf_succeed_if(mask_to_bool(succ)); | |||||
} | } | ||||
decaf_error_t decaf_x448 ( | decaf_error_t decaf_x448 ( | ||||
@@ -4,6 +4,14 @@ | |||||
#include "f_field.h" | #include "f_field.h" | ||||
#if (defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) && !I_HATE_UNROLLED_LOOPS) \ | |||||
|| defined(DECAF_FORCE_UNROLL) | |||||
#define REPEAT8(_x) _x _x _x _x _x _x _x _x | |||||
#define FOR_LIMB(_i,_start,_end,_x) do { _i=_start; REPEAT8( if (_i<_end) { _x; } _i++;) } while (0) | |||||
#else | |||||
#define FOR_LIMB(_i,_start,_end,_x) do { for (_i=_start; _i<_end; _i++) _x; } while (0) | |||||
#endif | |||||
void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { | void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { | ||||
const uint32_t *a = as->limb, *b = bs->limb; | const uint32_t *a = as->limb, *b = bs->limb; | ||||
uint32_t *c = cs->limb; | uint32_t *c = cs->limb; | ||||
@@ -19,24 +27,24 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { | |||||
bb[i] = b[i] + b[i+8]; | bb[i] = b[i] + b[i+8]; | ||||
} | } | ||||
for (j=0; j<8; j++) { | |||||
FOR_LIMB(j,0,8,{ | |||||
accum2 = 0; | accum2 = 0; | ||||
for (i=0; i<=j; i++) { | |||||
FOR_LIMB (i,0,j+1,{ | |||||
accum2 += widemul(a[j-i],b[i]); | accum2 += widemul(a[j-i],b[i]); | ||||
accum1 += widemul(aa[j-i],bb[i]); | accum1 += widemul(aa[j-i],bb[i]); | ||||
accum0 += widemul(a[8+j-i], b[8+i]); | accum0 += widemul(a[8+j-i], b[8+i]); | ||||
} | |||||
}); | |||||
accum1 -= accum2; | accum1 -= accum2; | ||||
accum0 += accum2; | accum0 += accum2; | ||||
accum2 = 0; | accum2 = 0; | ||||
for (; i<8; i++) { | |||||
FOR_LIMB (i,j+1,8,{ | |||||
accum0 -= widemul(a[8+j-i], b[i]); | accum0 -= widemul(a[8+j-i], b[i]); | ||||
accum2 += widemul(aa[8+j-i], bb[i]); | accum2 += widemul(aa[8+j-i], bb[i]); | ||||
accum1 += widemul(a[16+j-i], b[8+i]); | accum1 += widemul(a[16+j-i], b[8+i]); | ||||
} | |||||
}); | |||||
accum1 += accum2; | accum1 += accum2; | ||||
accum0 += accum2; | accum0 += accum2; | ||||
@@ -46,7 +54,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { | |||||
accum0 >>= 28; | accum0 >>= 28; | ||||
accum1 >>= 28; | accum1 >>= 28; | ||||
} | |||||
}); | |||||
accum0 += accum1; | accum0 += accum1; | ||||
accum0 += c[8]; | accum0 += c[8]; | ||||
@@ -66,24 +74,17 @@ void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) { | |||||
const uint32_t *a = as->limb; | const uint32_t *a = as->limb; | ||||
uint32_t *c = cs->limb; | uint32_t *c = cs->limb; | ||||
uint64_t accum0, accum8; | |||||
uint64_t accum0 = 0, accum8 = 0; | |||||
uint32_t mask = (1ull<<28)-1; | uint32_t mask = (1ull<<28)-1; | ||||
int i; | int i; | ||||
accum0 = widemul(b, a[0]); | |||||
accum8 = widemul(b, a[8]); | |||||
c[0] = accum0 & mask; accum0 >>= 28; | |||||
c[8] = accum8 & mask; accum8 >>= 28; | |||||
for (i=1; i<8; i++) { | |||||
FOR_LIMB(i,0,8,{ | |||||
accum0 += widemul(b, a[i]); | accum0 += widemul(b, a[i]); | ||||
accum8 += widemul(b, a[i+8]); | accum8 += widemul(b, a[i+8]); | ||||
c[i] = accum0 & mask; accum0 >>= 28; | c[i] = accum0 & mask; accum0 >>= 28; | ||||
c[i+8] = accum8 & mask; accum8 >>= 28; | c[i+8] = accum8 & mask; accum8 >>= 28; | ||||
} | |||||
}); | |||||
accum0 += accum8 + c[8]; | accum0 += accum8 + c[8]; | ||||
c[8] = accum0 & mask; | c[8] = accum0 & mask; | ||||
@@ -1145,12 +1145,10 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( | |||||
mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1] & 0x80); | mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1] & 0x80); | ||||
enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1] &= ~0x80; | enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1] &= ~0x80; | ||||
mask_t succ = DECAF_TRUE; | |||||
mask_t succ = gf_deserialize(p->y, enc2, 1); | |||||
#if $(gf_bits % 8) == 0 | #if $(gf_bits % 8) == 0 | ||||
succ = word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1]); | |||||
succ &= word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1]); | |||||
#endif | #endif | ||||
succ &= gf_deserialize(p->y, enc2, 1); | |||||
gf_sqr(p->x,p->y); | gf_sqr(p->x,p->y); | ||||
gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */ | gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */ | ||||
@@ -1236,7 +1234,7 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( | |||||
decaf_bzero(enc2,sizeof(enc2)); | decaf_bzero(enc2,sizeof(enc2)); | ||||
assert(API_NS(point_valid)(p) || ~succ); | assert(API_NS(point_valid)(p) || ~succ); | ||||
return decaf_succeed_if(succ); | |||||
return decaf_succeed_if(mask_to_bool(succ)); | |||||
} | } | ||||
decaf_error_t decaf_x$(gf_shortname) ( | decaf_error_t decaf_x$(gf_shortname) ( | ||||