| @@ -1156,12 +1156,10 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( | |||
| mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] & 0x80); | |||
| enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1] &= ~0x80; | |||
| mask_t succ = DECAF_TRUE; | |||
| mask_t succ = gf_deserialize(p->y, enc2, 1); | |||
| #if 7 == 0 | |||
| succ = word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1]); | |||
| succ &= word_is_zero(enc2[DECAF_EDDSA_25519_PRIVATE_BYTES-1]); | |||
| #endif | |||
| succ &= gf_deserialize(p->y, enc2, 1); | |||
| gf_sqr(p->x,p->y); | |||
| gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */ | |||
| @@ -1247,7 +1245,7 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( | |||
| decaf_bzero(enc2,sizeof(enc2)); | |||
| assert(API_NS(point_valid)(p) || ~succ); | |||
| return decaf_succeed_if(succ); | |||
| return decaf_succeed_if(mask_to_bool(succ)); | |||
| } | |||
| decaf_error_t decaf_x25519 ( | |||
| @@ -1156,12 +1156,10 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( | |||
| mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] & 0x80); | |||
| enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1] &= ~0x80; | |||
| mask_t succ = DECAF_TRUE; | |||
| mask_t succ = gf_deserialize(p->y, enc2, 1); | |||
| #if 0 == 0 | |||
| succ = word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1]); | |||
| succ &= word_is_zero(enc2[DECAF_EDDSA_448_PRIVATE_BYTES-1]); | |||
| #endif | |||
| succ &= gf_deserialize(p->y, enc2, 1); | |||
| gf_sqr(p->x,p->y); | |||
| gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */ | |||
| @@ -1247,7 +1245,7 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( | |||
| decaf_bzero(enc2,sizeof(enc2)); | |||
| assert(API_NS(point_valid)(p) || ~succ); | |||
| return decaf_succeed_if(succ); | |||
| return decaf_succeed_if(mask_to_bool(succ)); | |||
| } | |||
| decaf_error_t decaf_x448 ( | |||
| @@ -4,6 +4,14 @@ | |||
| #include "f_field.h" | |||
| #if (defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__) && !I_HATE_UNROLLED_LOOPS) \ | |||
| || defined(DECAF_FORCE_UNROLL) | |||
| #define REPEAT8(_x) _x _x _x _x _x _x _x _x | |||
| #define FOR_LIMB(_i,_start,_end,_x) do { _i=_start; REPEAT8( if (_i<_end) { _x; } _i++;) } while (0) | |||
| #else | |||
| #define FOR_LIMB(_i,_start,_end,_x) do { for (_i=_start; _i<_end; _i++) _x; } while (0) | |||
| #endif | |||
| void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { | |||
| const uint32_t *a = as->limb, *b = bs->limb; | |||
| uint32_t *c = cs->limb; | |||
| @@ -19,24 +27,24 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { | |||
| bb[i] = b[i] + b[i+8]; | |||
| } | |||
| for (j=0; j<8; j++) { | |||
| FOR_LIMB(j,0,8,{ | |||
| accum2 = 0; | |||
| for (i=0; i<=j; i++) { | |||
| FOR_LIMB (i,0,j+1,{ | |||
| accum2 += widemul(a[j-i],b[i]); | |||
| accum1 += widemul(aa[j-i],bb[i]); | |||
| accum0 += widemul(a[8+j-i], b[8+i]); | |||
| } | |||
| }); | |||
| accum1 -= accum2; | |||
| accum0 += accum2; | |||
| accum2 = 0; | |||
| for (; i<8; i++) { | |||
| FOR_LIMB (i,j+1,8,{ | |||
| accum0 -= widemul(a[8+j-i], b[i]); | |||
| accum2 += widemul(aa[8+j-i], bb[i]); | |||
| accum1 += widemul(a[16+j-i], b[8+i]); | |||
| } | |||
| }); | |||
| accum1 += accum2; | |||
| accum0 += accum2; | |||
| @@ -46,7 +54,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) { | |||
| accum0 >>= 28; | |||
| accum1 >>= 28; | |||
| } | |||
| }); | |||
| accum0 += accum1; | |||
| accum0 += c[8]; | |||
| @@ -66,24 +74,17 @@ void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) { | |||
| const uint32_t *a = as->limb; | |||
| uint32_t *c = cs->limb; | |||
| uint64_t accum0, accum8; | |||
| uint64_t accum0 = 0, accum8 = 0; | |||
| uint32_t mask = (1ull<<28)-1; | |||
| int i; | |||
| accum0 = widemul(b, a[0]); | |||
| accum8 = widemul(b, a[8]); | |||
| c[0] = accum0 & mask; accum0 >>= 28; | |||
| c[8] = accum8 & mask; accum8 >>= 28; | |||
| for (i=1; i<8; i++) { | |||
| FOR_LIMB(i,0,8,{ | |||
| accum0 += widemul(b, a[i]); | |||
| accum8 += widemul(b, a[i+8]); | |||
| c[i] = accum0 & mask; accum0 >>= 28; | |||
| c[i+8] = accum8 & mask; accum8 >>= 28; | |||
| } | |||
| }); | |||
| accum0 += accum8 + c[8]; | |||
| c[8] = accum0 & mask; | |||
| @@ -1145,12 +1145,10 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( | |||
| mask_t low = ~word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1] & 0x80); | |||
| enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1] &= ~0x80; | |||
| mask_t succ = DECAF_TRUE; | |||
| mask_t succ = gf_deserialize(p->y, enc2, 1); | |||
| #if $(gf_bits % 8) == 0 | |||
| succ = word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1]); | |||
| succ &= word_is_zero(enc2[DECAF_EDDSA_$(gf_shortname)_PRIVATE_BYTES-1]); | |||
| #endif | |||
| succ &= gf_deserialize(p->y, enc2, 1); | |||
| gf_sqr(p->x,p->y); | |||
| gf_sub(p->z,ONE,p->x); /* num = 1-y^2 */ | |||
| @@ -1236,7 +1234,7 @@ decaf_error_t API_NS(point_decode_like_eddsa_and_ignore_cofactor) ( | |||
| decaf_bzero(enc2,sizeof(enc2)); | |||
| assert(API_NS(point_valid)(p) || ~succ); | |||
| return decaf_succeed_if(succ); | |||
| return decaf_succeed_if(mask_to_bool(succ)); | |||
| } | |||
| decaf_error_t decaf_x$(gf_shortname) ( | |||