Continuing demagication and factoring of field code. Removing high-level ops from p448.h and putting them in field.h. That way they won't need rewriting for new fields and architectures. Create constant_time.h which contains constant-time lookups, condswaps, etc. That way the code is the same on all architectures, instead of varying depending on whether the field size is a multiple of the vector register size. I should still add a constant_time_select to factor out field_cond_negate. TODO: I need to test this for correctness and performance on various platforms. It works on my Mac, but since Yosemite the timing is totally unpredictable (background tasks? variable boost?).master
| @@ -22,13 +22,6 @@ p448_set_ui ( | |||
| p448_t *out, | |||
| uint64_t x | |||
| ) __attribute__((unused,always_inline)); | |||
| static __inline__ void | |||
| p448_cond_swap ( | |||
| p448_t *a, | |||
| p448_t *b, | |||
| mask_t do_swap | |||
| ) __attribute__((unused,always_inline)); | |||
| static __inline__ void | |||
| p448_add ( | |||
| @@ -114,13 +107,6 @@ p448_sqr ( | |||
| p448_t *__restrict__ out, | |||
| const p448_t *a | |||
| ); | |||
| static __inline__ void | |||
| p448_sqrn ( | |||
| p448_t *__restrict__ y, | |||
| const p448_t *x, | |||
| int n | |||
| ) __attribute__((unused,always_inline)); | |||
| void | |||
| p448_serialize ( | |||
| @@ -133,24 +119,6 @@ p448_deserialize ( | |||
| p448_t *x, | |||
| const uint8_t serial[56] | |||
| ); | |||
| static __inline__ void | |||
| p448_mask( | |||
| struct p448_t *a, | |||
| const struct p448_t *b, | |||
| mask_t mask | |||
| ) __attribute__((unused,always_inline)); | |||
| /** | |||
| * Returns 1/x. | |||
| * | |||
| * If x=0, returns 0. | |||
| */ | |||
| void | |||
| p448_inverse ( | |||
| struct p448_t* a, | |||
| const struct p448_t* x | |||
| ); | |||
| static inline mask_t | |||
| p448_eq ( | |||
| @@ -172,24 +140,6 @@ p448_set_ui ( | |||
| out->limb[i] = 0; | |||
| } | |||
| } | |||
| void | |||
| p448_cond_swap ( | |||
| p448_t *a, | |||
| p448_t *b, | |||
| mask_t doswap | |||
| ) { | |||
| big_register_t *aa = (big_register_t*)a; | |||
| big_register_t *bb = (big_register_t*)b; | |||
| big_register_t m = br_set_to_mask(doswap); | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||
| big_register_t x = m & (aa[i]^bb[i]); | |||
| aa[i] ^= x; | |||
| bb[i] ^= x; | |||
| } | |||
| } | |||
| void | |||
| p448_add ( | |||
| @@ -315,28 +265,6 @@ p448_weak_reduce ( | |||
| a->limb[0] = (a->limb[0] & mask) + tmp; | |||
| } | |||
| void | |||
| p448_sqrn ( | |||
| p448_t *__restrict__ y, | |||
| const p448_t *x, | |||
| int n | |||
| ) { | |||
| p448_t tmp; | |||
| assert(n>0); | |||
| if (n&1) { | |||
| p448_sqr(y,x); | |||
| n--; | |||
| } else { | |||
| p448_sqr(&tmp,x); | |||
| p448_sqr(y,&tmp); | |||
| n-=2; | |||
| } | |||
| for (; n; n-=2) { | |||
| p448_sqr(&tmp,y); | |||
| p448_sqr(y,&tmp); | |||
| } | |||
| } | |||
| mask_t | |||
| p448_eq ( | |||
| const struct p448_t *a, | |||
| @@ -352,18 +280,6 @@ p448_eq ( | |||
| return p448_is_zero(&ra); | |||
| } | |||
| void | |||
| p448_mask ( | |||
| struct p448_t *a, | |||
| const struct p448_t *b, | |||
| mask_t mask | |||
| ) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) { | |||
| a->limb[i] = b->limb[i] & mask; | |||
| } | |||
| } | |||
| #ifdef __cplusplus | |||
| }; /* extern "C" */ | |||
| #endif | |||
| @@ -22,13 +22,6 @@ p448_set_ui ( | |||
| p448_t *out, | |||
| uint64_t x | |||
| ) __attribute__((unused,always_inline)); | |||
| static __inline__ void | |||
| p448_cond_swap ( | |||
| p448_t *a, | |||
| p448_t *b, | |||
| mask_t do_swap | |||
| ) __attribute__((unused,always_inline)); | |||
| static __inline__ void | |||
| p448_add ( | |||
| @@ -114,13 +107,6 @@ p448_sqr ( | |||
| p448_t *__restrict__ out, | |||
| const p448_t *a | |||
| ); | |||
| static __inline__ void | |||
| p448_sqrn ( | |||
| p448_t *__restrict__ y, | |||
| const p448_t *x, | |||
| int n | |||
| ) __attribute__((unused,always_inline)); | |||
| void | |||
| p448_serialize ( | |||
| @@ -133,24 +119,6 @@ p448_deserialize ( | |||
| p448_t *x, | |||
| const uint8_t serial[56] | |||
| ); | |||
| static __inline__ void | |||
| p448_mask( | |||
| struct p448_t *a, | |||
| const struct p448_t *b, | |||
| mask_t mask | |||
| ) __attribute__((unused,always_inline)); | |||
| /** | |||
| * Returns 1/x. | |||
| * | |||
| * If x=0, returns 0. | |||
| */ | |||
| void | |||
| p448_inverse ( | |||
| struct p448_t* a, | |||
| const struct p448_t* x | |||
| ); | |||
| static inline mask_t | |||
| p448_eq ( | |||
| @@ -172,28 +140,6 @@ p448_set_ui ( | |||
| out->limb[i] = 0; | |||
| } | |||
| } | |||
| void | |||
| p448_cond_swap ( | |||
| p448_t *a, | |||
| p448_t *b, | |||
| mask_t doswap | |||
| ) { | |||
| big_register_t *aa = (big_register_t*)a; | |||
| big_register_t *bb = (big_register_t*)b; | |||
| #if __ARM_NEON__ | |||
| big_register_t m = vdupq_n_u32(doswap); | |||
| #else | |||
| big_register_t m = doswap; | |||
| #endif | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||
| big_register_t x = m & (aa[i]^bb[i]); | |||
| aa[i] ^= x; | |||
| bb[i] ^= x; | |||
| } | |||
| } | |||
| void | |||
| p448_add ( | |||
| @@ -323,28 +269,6 @@ p448_weak_reduce ( | |||
| a->limb[0] = (a->limb[0] & mask) + tmp; | |||
| } | |||
| void | |||
| p448_sqrn ( | |||
| p448_t *__restrict__ y, | |||
| const p448_t *x, | |||
| int n | |||
| ) { | |||
| p448_t tmp; | |||
| assert(n>0); | |||
| if (n&1) { | |||
| p448_sqr(y,x); | |||
| n--; | |||
| } else { | |||
| p448_sqr(&tmp,x); | |||
| p448_sqr(y,&tmp); | |||
| n-=2; | |||
| } | |||
| for (; n; n-=2) { | |||
| p448_sqr(&tmp,y); | |||
| p448_sqr(y,&tmp); | |||
| } | |||
| } | |||
| mask_t | |||
| p448_eq ( | |||
| const struct p448_t *a, | |||
| @@ -360,18 +284,6 @@ p448_eq ( | |||
| return p448_is_zero(&ra); | |||
| } | |||
| void | |||
| p448_mask ( | |||
| struct p448_t *a, | |||
| const struct p448_t *b, | |||
| mask_t mask | |||
| ) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) { | |||
| a->limb[i] = b->limb[i] & mask; | |||
| } | |||
| } | |||
| #ifdef __cplusplus | |||
| }; /* extern "C" */ | |||
| #endif | |||
| @@ -22,13 +22,6 @@ p448_set_ui ( | |||
| p448_t *out, | |||
| uint64_t x | |||
| ) __attribute__((unused,always_inline)); | |||
| static __inline__ void | |||
| p448_cond_swap ( | |||
| p448_t *a, | |||
| p448_t *b, | |||
| mask_t do_swap | |||
| ) __attribute__((unused,always_inline)); | |||
| static __inline__ void | |||
| p448_add ( | |||
| @@ -114,13 +107,6 @@ p448_sqr ( | |||
| p448_t *__restrict__ out, | |||
| const p448_t *a | |||
| ); | |||
| static __inline__ void | |||
| p448_sqrn ( | |||
| p448_t *__restrict__ y, | |||
| const p448_t *x, | |||
| int n | |||
| ) __attribute__((unused,always_inline)); | |||
| void | |||
| p448_serialize ( | |||
| @@ -133,24 +119,6 @@ p448_deserialize ( | |||
| p448_t *x, | |||
| const uint8_t serial[56] | |||
| ); | |||
| static __inline__ void | |||
| p448_mask( | |||
| struct p448_t *a, | |||
| const struct p448_t *b, | |||
| mask_t mask | |||
| ) __attribute__((unused,always_inline)); | |||
| /** | |||
| * Returns 1/x. | |||
| * | |||
| * If x=0, returns 0. | |||
| */ | |||
| void | |||
| p448_inverse ( | |||
| struct p448_t* a, | |||
| const struct p448_t* x | |||
| ); | |||
| static inline mask_t | |||
| p448_eq ( | |||
| @@ -172,24 +140,6 @@ p448_set_ui ( | |||
| out->limb[i] = 0; | |||
| } | |||
| } | |||
| void | |||
| p448_cond_swap ( | |||
| p448_t *a, | |||
| p448_t *b, | |||
| mask_t doswap | |||
| ) { | |||
| big_register_t *aa = (big_register_t*)a; | |||
| big_register_t *bb = (big_register_t*)b; | |||
| big_register_t m = br_set_to_mask(doswap); | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||
| big_register_t x = m & (aa[i]^bb[i]); | |||
| aa[i] ^= x; | |||
| bb[i] ^= x; | |||
| } | |||
| } | |||
| void | |||
| p448_add ( | |||
| @@ -315,28 +265,6 @@ p448_weak_reduce ( | |||
| a->limb[0] = (a->limb[0] & mask) + tmp; | |||
| } | |||
| void | |||
| p448_sqrn ( | |||
| p448_t *__restrict__ y, | |||
| const p448_t *x, | |||
| int n | |||
| ) { | |||
| p448_t tmp; | |||
| assert(n>0); | |||
| if (n&1) { | |||
| p448_sqr(y,x); | |||
| n--; | |||
| } else { | |||
| p448_sqr(&tmp,x); | |||
| p448_sqr(y,&tmp); | |||
| n-=2; | |||
| } | |||
| for (; n; n-=2) { | |||
| p448_sqr(&tmp,y); | |||
| p448_sqr(y,&tmp); | |||
| } | |||
| } | |||
| mask_t | |||
| p448_eq ( | |||
| const struct p448_t *a, | |||
| @@ -352,18 +280,6 @@ p448_eq ( | |||
| return p448_is_zero(&ra); | |||
| } | |||
| void | |||
| p448_mask ( | |||
| struct p448_t *a, | |||
| const struct p448_t *b, | |||
| mask_t mask | |||
| ) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) { | |||
| a->limb[i] = b->limb[i] & mask; | |||
| } | |||
| } | |||
| #ifdef __cplusplus | |||
| }; /* extern "C" */ | |||
| #endif | |||
| @@ -25,13 +25,6 @@ p448_set_ui ( | |||
| p448_t *out, | |||
| uint64_t x | |||
| ) __attribute__((unused,always_inline)); | |||
| static __inline__ void | |||
| p448_cond_swap ( | |||
| p448_t *a, | |||
| p448_t *b, | |||
| mask_t do_swap | |||
| ) __attribute__((unused,always_inline)); | |||
| static __inline__ void | |||
| p448_add ( | |||
| @@ -117,13 +110,6 @@ p448_sqr ( | |||
| p448_t *__restrict__ out, | |||
| const p448_t *a | |||
| ); | |||
| static __inline__ void | |||
| p448_sqrn ( | |||
| p448_t *__restrict__ y, | |||
| const p448_t *x, | |||
| int n | |||
| ) __attribute__((unused,always_inline)); | |||
| void | |||
| p448_serialize ( | |||
| @@ -136,24 +122,6 @@ p448_deserialize ( | |||
| p448_t *x, | |||
| const uint8_t serial[56] | |||
| ); | |||
| static __inline__ void | |||
| p448_mask( | |||
| struct p448_t *a, | |||
| const struct p448_t *b, | |||
| mask_t mask | |||
| ) __attribute__((unused,always_inline)); | |||
| /** | |||
| * Returns 1/x. | |||
| * | |||
| * If x=0, returns 0. | |||
| */ | |||
| void | |||
| p448_inverse ( | |||
| struct p448_t* a, | |||
| const struct p448_t* x | |||
| ); | |||
| static inline mask_t | |||
| p448_eq ( | |||
| @@ -175,24 +143,6 @@ p448_set_ui ( | |||
| out->limb[0] = x & ((1<<28)-1); | |||
| out->limb[2] = x>>28; | |||
| } | |||
| void | |||
| p448_cond_swap ( | |||
| p448_t *a, | |||
| p448_t *b, | |||
| mask_t doswap | |||
| ) { | |||
| big_register_t *aa = (big_register_t*)a; | |||
| big_register_t *bb = (big_register_t*)b; | |||
| big_register_t m = br_set_to_mask(doswap); | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||
| big_register_t x = m & (aa[i]^bb[i]); | |||
| aa[i] ^= x; | |||
| bb[i] ^= x; | |||
| } | |||
| } | |||
| void | |||
| p448_add ( | |||
| @@ -313,28 +263,6 @@ p448_weak_reduce ( | |||
| aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp&vm2); | |||
| } | |||
| void | |||
| p448_sqrn ( | |||
| p448_t *__restrict__ y, | |||
| const p448_t *x, | |||
| int n | |||
| ) { | |||
| p448_t tmp; | |||
| assert(n>0); | |||
| if (n&1) { | |||
| p448_sqr(y,x); | |||
| n--; | |||
| } else { | |||
| p448_sqr(&tmp,x); | |||
| p448_sqr(y,&tmp); | |||
| n-=2; | |||
| } | |||
| for (; n; n-=2) { | |||
| p448_sqr(&tmp,y); | |||
| p448_sqr(y,&tmp); | |||
| } | |||
| } | |||
| mask_t | |||
| p448_eq ( | |||
| const struct p448_t *a, | |||
| @@ -350,18 +278,6 @@ p448_eq ( | |||
| return p448_is_zero(&ra); | |||
| } | |||
| void | |||
| p448_mask ( | |||
| struct p448_t *a, | |||
| const struct p448_t *b, | |||
| mask_t mask | |||
| ) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) { | |||
| a->limb[i] = b->limb[i] & mask; | |||
| } | |||
| } | |||
| #ifdef __cplusplus | |||
| }; /* extern "C" */ | |||
| #endif | |||
| @@ -23,13 +23,6 @@ p448_set_ui ( | |||
| p448_t *out, | |||
| uint64_t x | |||
| ) __attribute__((unused)); | |||
| static __inline__ void | |||
| p448_cond_swap ( | |||
| p448_t *a, | |||
| p448_t *b, | |||
| mask_t do_swap | |||
| ) __attribute__((unused)); | |||
| static __inline__ void | |||
| p448_add ( | |||
| @@ -121,13 +114,6 @@ p448_sqr ( | |||
| p448_t *__restrict__ out, | |||
| const p448_t *a | |||
| ); | |||
| static __inline__ void | |||
| p448_sqrn ( | |||
| p448_t *__restrict__ y, | |||
| const p448_t *x, | |||
| int n | |||
| ) __attribute__((unused)); | |||
| void | |||
| p448_serialize ( | |||
| @@ -140,24 +126,6 @@ p448_deserialize ( | |||
| p448_t *x, | |||
| const uint8_t serial[56] | |||
| ); | |||
| static __inline__ void | |||
| p448_mask( | |||
| struct p448_t *a, | |||
| const struct p448_t *b, | |||
| mask_t mask | |||
| ) __attribute__((unused)); | |||
| /** | |||
| * Returns 1/x. | |||
| * | |||
| * If x=0, returns 0. | |||
| */ | |||
| void | |||
| p448_inverse ( | |||
| struct p448_t* a, | |||
| const struct p448_t* x | |||
| ); | |||
| static inline mask_t | |||
| p448_eq ( | |||
| @@ -178,20 +146,6 @@ p448_set_ui ( | |||
| out->limb[i] = 0; | |||
| } | |||
| } | |||
| void | |||
| p448_cond_swap ( | |||
| p448_t *a, | |||
| p448_t *b, | |||
| mask_t doswap | |||
| ) { | |||
| unsigned int i; | |||
| for (i=0; i<8; i++) { | |||
| uint64_t x = doswap & (a->limb[i]^b->limb[i]); | |||
| a->limb[i] ^= x; | |||
| b->limb[i] ^= x; | |||
| } | |||
| } | |||
| void | |||
| p448_add ( | |||
| @@ -313,28 +267,6 @@ p448_weak_reduce ( | |||
| a->limb[0] = (a->limb[0] & mask) + tmp; | |||
| } | |||
| void | |||
| p448_sqrn ( | |||
| p448_t *__restrict__ y, | |||
| const p448_t *x, | |||
| int n | |||
| ) { | |||
| p448_t tmp; | |||
| assert(n>0); | |||
| if (n&1) { | |||
| p448_sqr(y,x); | |||
| n--; | |||
| } else { | |||
| p448_sqr(&tmp,x); | |||
| p448_sqr(y,&tmp); | |||
| n-=2; | |||
| } | |||
| for (; n; n-=2) { | |||
| p448_sqr(&tmp,y); | |||
| p448_sqr(y,&tmp); | |||
| } | |||
| } | |||
| mask_t | |||
| p448_eq ( | |||
| const struct p448_t *a, | |||
| @@ -347,18 +279,6 @@ p448_eq ( | |||
| return p448_is_zero(&ra); | |||
| } | |||
| void | |||
| p448_mask ( | |||
| struct p448_t *a, | |||
| const struct p448_t *b, | |||
| mask_t mask | |||
| ) { | |||
| unsigned int i; | |||
| for (i=0; i<8; i++) { | |||
| a->limb[i] = b->limb[i] & mask; | |||
| } | |||
| } | |||
| #ifdef __cplusplus | |||
| }; /* extern "C" */ | |||
| #endif | |||
| @@ -22,13 +22,6 @@ p448_set_ui ( | |||
| p448_t *out, | |||
| uint64_t x | |||
| ) __attribute__((unused,always_inline)); | |||
| static __inline__ void | |||
| p448_cond_swap ( | |||
| p448_t *a, | |||
| p448_t *b, | |||
| mask_t do_swap | |||
| ) __attribute__((unused,always_inline)); | |||
| static __inline__ void | |||
| p448_add ( | |||
| @@ -114,13 +107,6 @@ p448_sqr ( | |||
| p448_t *__restrict__ out, | |||
| const p448_t *a | |||
| ); | |||
| static __inline__ void | |||
| p448_sqrn ( | |||
| p448_t *__restrict__ y, | |||
| const p448_t *x, | |||
| int n | |||
| ) __attribute__((unused,always_inline)); | |||
| void | |||
| p448_serialize ( | |||
| @@ -133,24 +119,6 @@ p448_deserialize ( | |||
| p448_t *x, | |||
| const uint8_t serial[56] | |||
| ); | |||
| static __inline__ void | |||
| p448_mask( | |||
| struct p448_t *a, | |||
| const struct p448_t *b, | |||
| mask_t mask | |||
| ) __attribute__((unused,always_inline)); | |||
| /** | |||
| * Returns 1/x. | |||
| * | |||
| * If x=0, returns 0. | |||
| */ | |||
| void | |||
| p448_inverse ( | |||
| struct p448_t* a, | |||
| const struct p448_t* x | |||
| ); | |||
| static inline mask_t | |||
| p448_eq ( | |||
| @@ -171,24 +139,6 @@ p448_set_ui ( | |||
| out->limb[i] = 0; | |||
| } | |||
| } | |||
| void | |||
| p448_cond_swap ( | |||
| p448_t *a, | |||
| p448_t *b, | |||
| mask_t doswap | |||
| ) { | |||
| big_register_t *aa = (big_register_t*)a; | |||
| big_register_t *bb = (big_register_t*)b; | |||
| big_register_t m = br_set_to_mask(doswap); | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||
| big_register_t x = m & (aa[i]^bb[i]); | |||
| aa[i] ^= x; | |||
| bb[i] ^= x; | |||
| } | |||
| } | |||
| void | |||
| p448_add ( | |||
| @@ -331,55 +281,6 @@ p448_weak_reduce ( | |||
| a->limb[0] = (a->limb[0] & mask) + tmp; | |||
| } | |||
| void | |||
| p448_sqrn ( | |||
| p448_t *__restrict__ y, | |||
| const p448_t *x, | |||
| int n | |||
| ) { | |||
| p448_t tmp; | |||
| assert(n>0); | |||
| if (n&1) { | |||
| p448_sqr(y,x); | |||
| n--; | |||
| } else { | |||
| p448_sqr(&tmp,x); | |||
| p448_sqr(y,&tmp); | |||
| n-=2; | |||
| } | |||
| for (; n; n-=2) { | |||
| p448_sqr(&tmp,y); | |||
| p448_sqr(y,&tmp); | |||
| } | |||
| } | |||
| mask_t | |||
| p448_eq ( | |||
| const struct p448_t *a, | |||
| const struct p448_t *b | |||
| ) { | |||
| struct p448_t ra, rb; | |||
| p448_copy(&ra, a); | |||
| p448_copy(&rb, b); | |||
| p448_weak_reduce(&ra); | |||
| p448_weak_reduce(&rb); | |||
| p448_sub(&ra, &ra, &rb); | |||
| p448_bias(&ra, 2); | |||
| return p448_is_zero(&ra); | |||
| } | |||
| void | |||
| p448_mask ( | |||
| struct p448_t *a, | |||
| const struct p448_t *b, | |||
| mask_t mask | |||
| ) { | |||
| unsigned int i; | |||
| for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) { | |||
| a->limb[i] = b->limb[i] & mask; | |||
| } | |||
| } | |||
| #ifdef __cplusplus | |||
| }; /* extern "C" */ | |||
| #endif | |||
| @@ -11,6 +11,21 @@ | |||
| #include "field.h" | |||
| #include "ec_point.h" // TODO | |||
| mask_t | |||
| field_eq ( | |||
| const struct field_t *a, | |||
| const struct field_t *b | |||
| ) { | |||
| struct field_t ra, rb; | |||
| field_copy(&ra, a); | |||
| field_copy(&rb, b); | |||
| field_weak_reduce(&ra); | |||
| field_weak_reduce(&rb); | |||
| field_sub(&ra, &ra, &rb); | |||
| field_bias(&ra, 2); | |||
| return field_is_zero(&ra); | |||
| } | |||
| void | |||
| field_inverse ( | |||
| struct field_t* a, | |||
| @@ -52,8 +52,30 @@ field_mulw_scc_wr ( | |||
| field_weak_reduce(out); | |||
| } | |||
| void | |||
| field_isr ( | |||
| static __inline__ void | |||
| field_sqrn ( | |||
| field_t *__restrict__ y, | |||
| const field_t *x, | |||
| int n | |||
| ) { | |||
| field_t tmp; | |||
| assert(n>0); | |||
| if (n&1) { | |||
| field_sqr(y,x); | |||
| n--; | |||
| } else { | |||
| field_sqr(&tmp,x); | |||
| field_sqr(y,&tmp); | |||
| n-=2; | |||
| } | |||
| for (; n; n-=2) { | |||
| field_sqr(&tmp,y); | |||
| field_sqr(y,&tmp); | |||
| } | |||
| } | |||
| void | |||
| field_isr ( /* TODO: MAGIC */ | |||
| struct field_t* a, | |||
| const struct field_t* x | |||
| ) { | |||
| @@ -433,7 +455,7 @@ serialize_montgomery ( | |||
| field_mul ( &L0, &a->xd, &L2 ); | |||
| L5 = field_is_zero( &a->zd ); | |||
| L6 = - L5; | |||
| field_mask ( &L1, &L0, L5 ); | |||
| constant_time_mask ( &L1, &L0, sizeof(L1), L5 ); | |||
| field_add ( &L2, &L1, &a->zd ); | |||
| L4 = ~ L5; | |||
| field_mul ( &L1, sbz, &L3 ); | |||
| @@ -446,7 +468,7 @@ serialize_montgomery ( | |||
| field_mul ( &L2, &L1, &L0 ); | |||
| field_sqr ( &L1, &L0 ); | |||
| field_mul ( &L0, &L3, &L1 ); | |||
| field_mask ( b, &L2, L4 ); | |||
| constant_time_mask ( b, &L2, sizeof(L1), L4 ); | |||
| field_subw ( &L0, 1 ); | |||
| field_bias ( &L0, 1 ); | |||
| L5 = field_is_zero( &L0 ); | |||
| @@ -0,0 +1,230 @@ | |||
| /** | |||
| * @file constant_time.h | |||
| * @copyright | |||
| * Copyright (c) 2014 Cryptography Research, Inc. \n | |||
| * Released under the MIT License. See LICENSE.txt for license information. | |||
| * @author Mike Hamburg | |||
| * | |||
| * @brief Constant-time routines. | |||
| */ | |||
| #ifndef __CONSTANT_TIME_H__ | |||
| #define __CONSTANT_TIME_H__ 1 | |||
| #include "word.h" | |||
| /* | |||
| * Constant-time operations on hopefully-compile-time-sized memory | |||
| * regions. Needed for flexibility / demagication: not all fields | |||
| * have sizes which are multiples of the vector width, necessitating | |||
| * a change from the Ed448 versions. | |||
| * | |||
| * These routines would be much simpler to define at the byte level, | |||
| * but if not vectorized they would be a significant fraction of the | |||
| * runtime. Eg on NEON-less ARM, constant_time_lookup is like 15% of | |||
| * signing time, vs 6% on Haswell with its fancy AVX2 vectors. | |||
| * | |||
| * If the compiler could do a good job of autovectorizing the code, | |||
| * we could just leave it with the byte definition. But that's unlikely | |||
| * on most deployed compilers, especially if you consider that pcmpeq[size] | |||
| * is much faster than moving a scalar to the vector unit (which is what | |||
| * a naive autovectorizer will do with constant_time_lookup on Intel). | |||
| * | |||
| * Instead, we're putting our trust in the loop unroller and unswitcher. | |||
| * | |||
| * TODO: verify correctness and performance on each platform, to make sure | |||
| * that there are no regressions. | |||
| */ | |||
| /** | |||
| * Unaligned big (vector?) register. | |||
| */ | |||
| typedef struct { | |||
| big_register_t unaligned; | |||
| } __attribute__((packed)) unaligned_br_t; | |||
| /** | |||
| * Unaligned word register, for architectures where that matters. | |||
| */ | |||
| typedef struct { | |||
| word_t unaligned; | |||
| } __attribute__((packed)) unaligned_word_t; | |||
| /** | |||
| * @brief Constant-time conditional swap. | |||
| * | |||
| * If doswap, then swap elem_bytes between *a and *b. | |||
| * | |||
| * *a and *b must not alias. Also, they must be at least as aligned | |||
| * as their sizes, if the CPU cares about that sort of thing. | |||
| */ | |||
| static __inline__ void | |||
| __attribute__((unused,always_inline)) | |||
| constant_time_cond_swap ( | |||
| void *__restrict__ a_, | |||
| void *__restrict__ b_, | |||
| word_t elem_bytes, | |||
| mask_t doswap | |||
| ) { | |||
| word_t k; | |||
| unsigned char *a = (unsigned char *)a_; | |||
| unsigned char *b = (unsigned char *)b_; | |||
| big_register_t br_mask = br_set_to_mask(doswap); | |||
| for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) { | |||
| if (elem_bytes % sizeof(big_register_t)) { | |||
| /* unaligned */ | |||
| big_register_t xor = | |||
| ((unaligned_br_t*)(&a[k]))->unaligned | |||
| ^ ((unaligned_br_t*)(&b[k]))->unaligned; | |||
| xor &= br_mask; | |||
| ((unaligned_br_t*)(&a[k]))->unaligned ^= xor; | |||
| ((unaligned_br_t*)(&b[k]))->unaligned ^= xor; | |||
| } else { | |||
| /* aligned */ | |||
| big_register_t xor = | |||
| *((big_register_t*)(&a[k])) | |||
| ^ *((big_register_t*)(&b[k])); | |||
| xor &= br_mask; | |||
| *((big_register_t*)(&a[k])) ^= xor; | |||
| *((big_register_t*)(&b[k])) ^= xor; | |||
| } | |||
| } | |||
| if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { | |||
| for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) { | |||
| if (elem_bytes % sizeof(word_t)) { | |||
| /* unaligned */ | |||
| word_t xor = | |||
| ((unaligned_word_t*)(&a[k]))->unaligned | |||
| ^ ((unaligned_word_t*)(&b[k]))->unaligned; | |||
| xor &= doswap; | |||
| ((unaligned_word_t*)(&a[k]))->unaligned ^= xor; | |||
| ((unaligned_word_t*)(&b[k]))->unaligned ^= xor; | |||
| } else { | |||
| /* aligned */ | |||
| word_t xor = | |||
| *((word_t*)(&a[k])) | |||
| ^ *((word_t*)(&b[k])); | |||
| xor &= doswap; | |||
| *((word_t*)(&a[k])) ^= xor; | |||
| *((word_t*)(&b[k])) ^= xor; | |||
| } | |||
| } | |||
| } | |||
| if (elem_bytes % sizeof(word_t)) { | |||
| for (; k<elem_bytes; k+=1) { | |||
| unsigned char xor = a[k] ^ b[k]; | |||
| xor &= doswap; | |||
| a[k] ^= xor; | |||
| b[k] ^= xor; | |||
| } | |||
| } | |||
| } | |||
| /** | |||
| * @brief Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes); | |||
| * | |||
| * The table must be at least as aligned as elem_bytes. The output must be vector aligned. | |||
| * The table and output must not alias. | |||
| */ | |||
| static __inline__ void | |||
| __attribute__((unused,always_inline)) | |||
| constant_time_lookup ( | |||
| void *__restrict__ out_, | |||
| const void *table_, | |||
| word_t elem_bytes, | |||
| word_t n_table, | |||
| word_t idx | |||
| ) { | |||
| big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx); | |||
| /* Can't do pointer arithmetic on void* */ | |||
| unsigned char *out = (unsigned char *)out_; | |||
| const unsigned char *table = (const unsigned char *)table_; | |||
| word_t j,k; | |||
| really_memset(out, 0, elem_bytes); | |||
| for (j=0; j<n_table; j++, big_i-=big_one) { | |||
| big_register_t br_mask = br_is_zero(big_i); | |||
| for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) { | |||
| if (elem_bytes % sizeof(big_register_t)) { | |||
| /* input unaligned, output aligned */ | |||
| *(big_register_t *)(out+k) |= br_mask & ((const unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned; | |||
| } else { | |||
| /* aligned */ | |||
| *(big_register_t *)(out+k) |= br_mask & *(const big_register_t*)(&table[k+j*elem_bytes]); | |||
| } | |||
| } | |||
| word_t mask = word_is_zero(idx^j); | |||
| if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { | |||
| for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) { | |||
| if (elem_bytes % sizeof(word_t)) { | |||
| /* input unaligned, output aligned */ | |||
| *(word_t *)(out+k) |= mask & ((const unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned; | |||
| } else { | |||
| /* aligned */ | |||
| *(word_t *)(out+k) |= mask & *(const word_t*)(&table[k+j*elem_bytes]); | |||
| } | |||
| } | |||
| } | |||
| if (elem_bytes % sizeof(word_t)) { | |||
| for (; k<elem_bytes; k+=1) { | |||
| out[k] |= mask & table[k+j*elem_bytes]; | |||
| } | |||
| } | |||
| } | |||
| } | |||
| /** | |||
| * @brief Constant-time a = b&mask. | |||
| * | |||
| * The input and output must be at least as aligned as elem_bytes. | |||
| */ | |||
| static __inline__ void | |||
| __attribute__((unused,always_inline)) | |||
| constant_time_mask ( | |||
| void *__restrict__ a_, | |||
| const void *b_, | |||
| word_t elem_bytes, | |||
| mask_t mask | |||
| ) { | |||
| unsigned char *a = (unsigned char *)a_; | |||
| const unsigned char *b = (const unsigned char *)b_; | |||
| word_t k; | |||
| big_register_t br_mask = br_set_to_mask(mask); | |||
| for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) { | |||
| if (elem_bytes % sizeof(big_register_t)) { | |||
| /* unaligned */ | |||
| ((unaligned_br_t*)(&a[k]))->unaligned = br_mask & ((const unaligned_br_t*)(&b[k]))->unaligned; | |||
| } else { | |||
| /* aligned */ | |||
| *(big_register_t *)(a+k) = br_mask & *(const big_register_t*)(&b[k]); | |||
| } | |||
| } | |||
| if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { | |||
| for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) { | |||
| if (elem_bytes % sizeof(word_t)) { | |||
| /* unaligned */ | |||
| ((unaligned_word_t*)(&a[k]))->unaligned = mask & ((const unaligned_word_t*)(&b[k]))->unaligned; | |||
| } else { | |||
| /* aligned */ | |||
| *(word_t *)(a+k) = mask & *(const word_t*)(&b[k]); | |||
| } | |||
| } | |||
| } | |||
| if (elem_bytes % sizeof(word_t)) { | |||
| for (; k<elem_bytes; k+=1) { | |||
| a[k] = mask & b[k]; | |||
| } | |||
| } | |||
| } | |||
| #endif /* __CONSTANT_TIME_H__ */ | |||
| @@ -11,6 +11,7 @@ | |||
| #define __CC_INCLUDED_EC_POINT_H__ | |||
| #include "field.h" | |||
| #include "constant_time.h" | |||
| #ifdef __cplusplus | |||
| extern "C" { | |||
| @@ -150,43 +151,6 @@ copy_tw_pniels ( | |||
| const struct tw_pniels_t* ds | |||
| ) __attribute__((unused,always_inline)); | |||
| /** | |||
| * Returns 1/sqrt(+- x). | |||
| * | |||
| * The Legendre symbol of the result is the same as that of the | |||
| * input. | |||
| * | |||
| * If x=0, returns 0. | |||
| */ | |||
| void | |||
| field_isr ( | |||
| struct field_t* a, | |||
| const struct field_t* x | |||
| ); | |||
| /** | |||
| * Batch inverts out[i] = 1/in[i] | |||
| * | |||
| * If any input is zero, all the outputs will be zero. | |||
| */ | |||
| void | |||
| field_simultaneous_invert ( | |||
| struct p448_t *__restrict__ out, | |||
| const struct p448_t *in, | |||
| unsigned int n | |||
| ); | |||
| /** | |||
| * Returns 1/x. | |||
| * | |||
| * If x=0, returns 0. | |||
| */ | |||
| void | |||
| field_inverse ( | |||
| struct field_t* a, | |||
| const struct field_t* x | |||
| ); | |||
| /** | |||
| * Add two points on a twisted Edwards curve, one in Extensible form | |||
| * and the other in half-Niels form. | |||
| @@ -490,7 +454,7 @@ cond_negate_tw_niels ( | |||
| struct tw_niels_t *n, | |||
| mask_t doNegate | |||
| ) { | |||
| field_cond_swap(&n->a, &n->b, doNegate); | |||
| constant_time_cond_swap(&n->a, &n->b, sizeof(n->a), doNegate); | |||
| field_cond_neg(&n->c, doNegate); | |||
| } | |||
| @@ -9,21 +9,13 @@ | |||
| #ifndef __FIELD_H__ | |||
| #define __FIELD_H__ | |||
| #include "p448.h" | |||
| #include <string.h> | |||
| #include "p448.h" | |||
| #define FIELD_BITS 448 | |||
| #define FIELD_BYTES (1+(FIELD_BITS-1)/8) | |||
| #define FIELD_WORDS (1+(FIELD_BITS-1)/sizeof(word_t)) | |||
| /** | |||
| * @brief For GMP tests: little-endian representation of the field modulus. | |||
| */ | |||
| extern const uint8_t FIELD_MODULUS[FIELD_BYTES]; | |||
| #define field_t p448_t | |||
| #define field_mul p448_mul | |||
| #define field_sqr p448_sqr | |||
| #define field_sqrn p448_sqrn | |||
| #define field_add p448_add | |||
| #define field_sub p448_sub | |||
| #define field_mulw p448_mulw | |||
| @@ -32,15 +24,80 @@ extern const uint8_t FIELD_MODULUS[FIELD_BYTES]; | |||
| #define field_neg p448_neg | |||
| #define field_set_ui p448_set_ui | |||
| #define field_bias p448_bias | |||
| #define field_copy p448_copy | |||
| #define field_mask p448_mask | |||
| #define field_weak_reduce p448_weak_reduce | |||
| #define field_strong_reduce p448_strong_reduce | |||
| #define field_cond_swap p448_cond_swap | |||
| #define field_cond_neg p448_cond_neg | |||
| #define field_serialize p448_serialize | |||
| #define field_deserialize p448_deserialize | |||
| #define field_eq p448_eq | |||
| #define field_is_zero p448_is_zero | |||
| /** @brief Bytes in a field element */ | |||
| #define FIELD_BYTES (1+(FIELD_BITS-1)/8) | |||
| /** @brief Words in a field element */ | |||
| #define FIELD_WORDS (1+(FIELD_BITS-1)/sizeof(word_t)) | |||
| /** | |||
| * @brief For GMP tests: little-endian representation of the field modulus. | |||
| */ | |||
| extern const uint8_t FIELD_MODULUS[FIELD_BYTES]; | |||
| /** | |||
| * Copy one field element to another. | |||
| */ | |||
| static inline void | |||
| __attribute__((unused,always_inline)) | |||
| field_copy ( | |||
| struct field_t *__restrict__ a, | |||
| const struct field_t *__restrict__ b | |||
| ) { | |||
| memcpy(a,b,sizeof(*a)); | |||
| } | |||
| /** | |||
| * Returns 1/sqrt(+- x). | |||
| * | |||
| * The Legendre symbol of the result is the same as that of the | |||
| * input. | |||
| * | |||
| * If x=0, returns 0. | |||
| */ | |||
| void | |||
| field_isr ( | |||
| struct field_t* a, | |||
| const struct field_t* x | |||
| ); | |||
| /** | |||
| * Batch inverts out[i] = 1/in[i] | |||
| * | |||
| * If any input is zero, all the outputs will be zero. | |||
| */ | |||
| void | |||
| field_simultaneous_invert ( | |||
| struct p448_t *__restrict__ out, | |||
| const struct p448_t *in, | |||
| unsigned int n | |||
| ); | |||
| /** | |||
| * Returns 1/x. | |||
| * | |||
| * If x=0, returns 0. | |||
| */ | |||
| void | |||
| field_inverse ( | |||
| struct field_t* a, | |||
| const struct field_t* x | |||
| ); | |||
| /** | |||
| * Returns -1 if a==b, 0 otherwise. | |||
| */ | |||
| mask_t | |||
| field_eq ( | |||
| const struct field_t *a, | |||
| const struct field_t *b | |||
| ); | |||
| #endif /* __FIELD_H__ */ | |||
| @@ -143,6 +143,15 @@ typedef word_t vecmask_t __attribute__((vector_size(32))); | |||
| return (big_register_t)x; | |||
| } | |||
| #endif | |||
| /** | |||
| * Return -1 if x==0, and 0 otherwise. | |||
| */ | |||
| static __inline__ mask_t | |||
| __attribute__((always_inline,unused)) | |||
| word_is_zero(word_t x) { | |||
| return (mask_t)((((dword_t)(x)) - 1)>>WORD_BITS); | |||
| } | |||
| #if __AVX2__ | |||
| static __inline__ big_register_t | |||
| @@ -11,6 +11,7 @@ | |||
| #include "intrinsics.h" | |||
| #include "scalarmul.h" | |||
| #include "barrett_field.h" | |||
| #include "constant_time.h" | |||
| mask_t | |||
| montgomery_ladder ( | |||
| @@ -29,15 +30,15 @@ montgomery_ladder ( | |||
| word_t w = scalar[j]; | |||
| for (i=n; i>=0; i--) { | |||
| mask_t flip = -((w>>i)&1); | |||
| field_cond_swap(&mont.xa,&mont.xd,flip^pflip); | |||
| field_cond_swap(&mont.za,&mont.zd,flip^pflip); | |||
| constant_time_cond_swap(&mont.xa,&mont.xd,sizeof(mont.xd),flip^pflip); | |||
| constant_time_cond_swap(&mont.za,&mont.zd,sizeof(mont.xd),flip^pflip); | |||
| montgomery_step(&mont); | |||
| pflip = flip; | |||
| } | |||
| n = WORD_BITS-1; | |||
| } | |||
| field_cond_swap(&mont.xa,&mont.xd,pflip); | |||
| field_cond_swap(&mont.za,&mont.zd,pflip); | |||
| constant_time_cond_swap(&mont.xa,&mont.xd,sizeof(mont.xd),pflip); | |||
| constant_time_cond_swap(&mont.za,&mont.zd,sizeof(mont.xd),pflip); | |||
| assert(n_extra_doubles < INT_MAX); | |||
| for (j=0; j<(int)n_extra_doubles; j++) { | |||
| @@ -47,6 +48,29 @@ montgomery_ladder ( | |||
| return serialize_montgomery(out, &mont, in); | |||
| } | |||
| static __inline__ void | |||
| __attribute__((unused,always_inline)) | |||
| constant_time_lookup_tw_pniels ( | |||
| struct tw_pniels_t *out, | |||
| const struct tw_pniels_t *in, | |||
| int nin, | |||
| int idx | |||
| ) { | |||
| constant_time_lookup(out,in,sizeof(*out),nin,idx); | |||
| } | |||
| static __inline__ void | |||
| __attribute__((unused,always_inline)) | |||
| constant_time_lookup_tw_niels ( | |||
| struct tw_niels_t *out, | |||
| const struct tw_niels_t *in, | |||
| int nin, | |||
| int idx | |||
| ) { | |||
| constant_time_lookup(out,in,sizeof(*out),nin,idx); | |||
| } | |||
| /* | |||
| static __inline__ void | |||
| constant_time_lookup_tw_pniels ( | |||
| struct tw_pniels_t *out, | |||
| @@ -90,6 +114,7 @@ constant_time_lookup_tw_niels ( | |||
| } | |||
| } | |||
| } | |||
| */ | |||
| static void | |||
| convert_to_signed_window_form ( | |||