Continuing demagication and factoring of field code. Removing high-level ops from p448.h and putting them in field.h. That way they won't need rewriting for new fields and architectures. Create constant_time.h which contains constant-time lookups, condswaps, etc. That way the code is the same on all architectures, instead of varying depending on whether the field size is a multiple of the vector register size. I should still add a constant_time_select to factor out field_cond_negate. TODO: I need to test this for correctness and performance on various platforms. It works on my Mac, but since Yosemite the timing is totally unpredictable (background tasks? variable boost?).master
@@ -22,13 +22,6 @@ p448_set_ui ( | |||
p448_t *out, | |||
uint64_t x | |||
) __attribute__((unused,always_inline)); | |||
static __inline__ void | |||
p448_cond_swap ( | |||
p448_t *a, | |||
p448_t *b, | |||
mask_t do_swap | |||
) __attribute__((unused,always_inline)); | |||
static __inline__ void | |||
p448_add ( | |||
@@ -114,13 +107,6 @@ p448_sqr ( | |||
p448_t *__restrict__ out, | |||
const p448_t *a | |||
); | |||
static __inline__ void | |||
p448_sqrn ( | |||
p448_t *__restrict__ y, | |||
const p448_t *x, | |||
int n | |||
) __attribute__((unused,always_inline)); | |||
void | |||
p448_serialize ( | |||
@@ -133,24 +119,6 @@ p448_deserialize ( | |||
p448_t *x, | |||
const uint8_t serial[56] | |||
); | |||
static __inline__ void | |||
p448_mask( | |||
struct p448_t *a, | |||
const struct p448_t *b, | |||
mask_t mask | |||
) __attribute__((unused,always_inline)); | |||
/** | |||
* Returns 1/x. | |||
* | |||
* If x=0, returns 0. | |||
*/ | |||
void | |||
p448_inverse ( | |||
struct p448_t* a, | |||
const struct p448_t* x | |||
); | |||
static inline mask_t | |||
p448_eq ( | |||
@@ -172,24 +140,6 @@ p448_set_ui ( | |||
out->limb[i] = 0; | |||
} | |||
} | |||
void | |||
p448_cond_swap ( | |||
p448_t *a, | |||
p448_t *b, | |||
mask_t doswap | |||
) { | |||
big_register_t *aa = (big_register_t*)a; | |||
big_register_t *bb = (big_register_t*)b; | |||
big_register_t m = br_set_to_mask(doswap); | |||
unsigned int i; | |||
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||
big_register_t x = m & (aa[i]^bb[i]); | |||
aa[i] ^= x; | |||
bb[i] ^= x; | |||
} | |||
} | |||
void | |||
p448_add ( | |||
@@ -315,28 +265,6 @@ p448_weak_reduce ( | |||
a->limb[0] = (a->limb[0] & mask) + tmp; | |||
} | |||
void | |||
p448_sqrn ( | |||
p448_t *__restrict__ y, | |||
const p448_t *x, | |||
int n | |||
) { | |||
p448_t tmp; | |||
assert(n>0); | |||
if (n&1) { | |||
p448_sqr(y,x); | |||
n--; | |||
} else { | |||
p448_sqr(&tmp,x); | |||
p448_sqr(y,&tmp); | |||
n-=2; | |||
} | |||
for (; n; n-=2) { | |||
p448_sqr(&tmp,y); | |||
p448_sqr(y,&tmp); | |||
} | |||
} | |||
mask_t | |||
p448_eq ( | |||
const struct p448_t *a, | |||
@@ -352,18 +280,6 @@ p448_eq ( | |||
return p448_is_zero(&ra); | |||
} | |||
void | |||
p448_mask ( | |||
struct p448_t *a, | |||
const struct p448_t *b, | |||
mask_t mask | |||
) { | |||
unsigned int i; | |||
for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) { | |||
a->limb[i] = b->limb[i] & mask; | |||
} | |||
} | |||
#ifdef __cplusplus | |||
}; /* extern "C" */ | |||
#endif | |||
@@ -22,13 +22,6 @@ p448_set_ui ( | |||
p448_t *out, | |||
uint64_t x | |||
) __attribute__((unused,always_inline)); | |||
static __inline__ void | |||
p448_cond_swap ( | |||
p448_t *a, | |||
p448_t *b, | |||
mask_t do_swap | |||
) __attribute__((unused,always_inline)); | |||
static __inline__ void | |||
p448_add ( | |||
@@ -114,13 +107,6 @@ p448_sqr ( | |||
p448_t *__restrict__ out, | |||
const p448_t *a | |||
); | |||
static __inline__ void | |||
p448_sqrn ( | |||
p448_t *__restrict__ y, | |||
const p448_t *x, | |||
int n | |||
) __attribute__((unused,always_inline)); | |||
void | |||
p448_serialize ( | |||
@@ -133,24 +119,6 @@ p448_deserialize ( | |||
p448_t *x, | |||
const uint8_t serial[56] | |||
); | |||
static __inline__ void | |||
p448_mask( | |||
struct p448_t *a, | |||
const struct p448_t *b, | |||
mask_t mask | |||
) __attribute__((unused,always_inline)); | |||
/** | |||
* Returns 1/x. | |||
* | |||
* If x=0, returns 0. | |||
*/ | |||
void | |||
p448_inverse ( | |||
struct p448_t* a, | |||
const struct p448_t* x | |||
); | |||
static inline mask_t | |||
p448_eq ( | |||
@@ -172,28 +140,6 @@ p448_set_ui ( | |||
out->limb[i] = 0; | |||
} | |||
} | |||
void | |||
p448_cond_swap ( | |||
p448_t *a, | |||
p448_t *b, | |||
mask_t doswap | |||
) { | |||
big_register_t *aa = (big_register_t*)a; | |||
big_register_t *bb = (big_register_t*)b; | |||
#if __ARM_NEON__ | |||
big_register_t m = vdupq_n_u32(doswap); | |||
#else | |||
big_register_t m = doswap; | |||
#endif | |||
unsigned int i; | |||
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||
big_register_t x = m & (aa[i]^bb[i]); | |||
aa[i] ^= x; | |||
bb[i] ^= x; | |||
} | |||
} | |||
void | |||
p448_add ( | |||
@@ -323,28 +269,6 @@ p448_weak_reduce ( | |||
a->limb[0] = (a->limb[0] & mask) + tmp; | |||
} | |||
void | |||
p448_sqrn ( | |||
p448_t *__restrict__ y, | |||
const p448_t *x, | |||
int n | |||
) { | |||
p448_t tmp; | |||
assert(n>0); | |||
if (n&1) { | |||
p448_sqr(y,x); | |||
n--; | |||
} else { | |||
p448_sqr(&tmp,x); | |||
p448_sqr(y,&tmp); | |||
n-=2; | |||
} | |||
for (; n; n-=2) { | |||
p448_sqr(&tmp,y); | |||
p448_sqr(y,&tmp); | |||
} | |||
} | |||
mask_t | |||
p448_eq ( | |||
const struct p448_t *a, | |||
@@ -360,18 +284,6 @@ p448_eq ( | |||
return p448_is_zero(&ra); | |||
} | |||
void | |||
p448_mask ( | |||
struct p448_t *a, | |||
const struct p448_t *b, | |||
mask_t mask | |||
) { | |||
unsigned int i; | |||
for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) { | |||
a->limb[i] = b->limb[i] & mask; | |||
} | |||
} | |||
#ifdef __cplusplus | |||
}; /* extern "C" */ | |||
#endif | |||
@@ -22,13 +22,6 @@ p448_set_ui ( | |||
p448_t *out, | |||
uint64_t x | |||
) __attribute__((unused,always_inline)); | |||
static __inline__ void | |||
p448_cond_swap ( | |||
p448_t *a, | |||
p448_t *b, | |||
mask_t do_swap | |||
) __attribute__((unused,always_inline)); | |||
static __inline__ void | |||
p448_add ( | |||
@@ -114,13 +107,6 @@ p448_sqr ( | |||
p448_t *__restrict__ out, | |||
const p448_t *a | |||
); | |||
static __inline__ void | |||
p448_sqrn ( | |||
p448_t *__restrict__ y, | |||
const p448_t *x, | |||
int n | |||
) __attribute__((unused,always_inline)); | |||
void | |||
p448_serialize ( | |||
@@ -133,24 +119,6 @@ p448_deserialize ( | |||
p448_t *x, | |||
const uint8_t serial[56] | |||
); | |||
static __inline__ void | |||
p448_mask( | |||
struct p448_t *a, | |||
const struct p448_t *b, | |||
mask_t mask | |||
) __attribute__((unused,always_inline)); | |||
/** | |||
* Returns 1/x. | |||
* | |||
* If x=0, returns 0. | |||
*/ | |||
void | |||
p448_inverse ( | |||
struct p448_t* a, | |||
const struct p448_t* x | |||
); | |||
static inline mask_t | |||
p448_eq ( | |||
@@ -172,24 +140,6 @@ p448_set_ui ( | |||
out->limb[i] = 0; | |||
} | |||
} | |||
void | |||
p448_cond_swap ( | |||
p448_t *a, | |||
p448_t *b, | |||
mask_t doswap | |||
) { | |||
big_register_t *aa = (big_register_t*)a; | |||
big_register_t *bb = (big_register_t*)b; | |||
big_register_t m = br_set_to_mask(doswap); | |||
unsigned int i; | |||
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||
big_register_t x = m & (aa[i]^bb[i]); | |||
aa[i] ^= x; | |||
bb[i] ^= x; | |||
} | |||
} | |||
void | |||
p448_add ( | |||
@@ -315,28 +265,6 @@ p448_weak_reduce ( | |||
a->limb[0] = (a->limb[0] & mask) + tmp; | |||
} | |||
void | |||
p448_sqrn ( | |||
p448_t *__restrict__ y, | |||
const p448_t *x, | |||
int n | |||
) { | |||
p448_t tmp; | |||
assert(n>0); | |||
if (n&1) { | |||
p448_sqr(y,x); | |||
n--; | |||
} else { | |||
p448_sqr(&tmp,x); | |||
p448_sqr(y,&tmp); | |||
n-=2; | |||
} | |||
for (; n; n-=2) { | |||
p448_sqr(&tmp,y); | |||
p448_sqr(y,&tmp); | |||
} | |||
} | |||
mask_t | |||
p448_eq ( | |||
const struct p448_t *a, | |||
@@ -352,18 +280,6 @@ p448_eq ( | |||
return p448_is_zero(&ra); | |||
} | |||
void | |||
p448_mask ( | |||
struct p448_t *a, | |||
const struct p448_t *b, | |||
mask_t mask | |||
) { | |||
unsigned int i; | |||
for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) { | |||
a->limb[i] = b->limb[i] & mask; | |||
} | |||
} | |||
#ifdef __cplusplus | |||
}; /* extern "C" */ | |||
#endif | |||
@@ -25,13 +25,6 @@ p448_set_ui ( | |||
p448_t *out, | |||
uint64_t x | |||
) __attribute__((unused,always_inline)); | |||
static __inline__ void | |||
p448_cond_swap ( | |||
p448_t *a, | |||
p448_t *b, | |||
mask_t do_swap | |||
) __attribute__((unused,always_inline)); | |||
static __inline__ void | |||
p448_add ( | |||
@@ -117,13 +110,6 @@ p448_sqr ( | |||
p448_t *__restrict__ out, | |||
const p448_t *a | |||
); | |||
static __inline__ void | |||
p448_sqrn ( | |||
p448_t *__restrict__ y, | |||
const p448_t *x, | |||
int n | |||
) __attribute__((unused,always_inline)); | |||
void | |||
p448_serialize ( | |||
@@ -136,24 +122,6 @@ p448_deserialize ( | |||
p448_t *x, | |||
const uint8_t serial[56] | |||
); | |||
static __inline__ void | |||
p448_mask( | |||
struct p448_t *a, | |||
const struct p448_t *b, | |||
mask_t mask | |||
) __attribute__((unused,always_inline)); | |||
/** | |||
* Returns 1/x. | |||
* | |||
* If x=0, returns 0. | |||
*/ | |||
void | |||
p448_inverse ( | |||
struct p448_t* a, | |||
const struct p448_t* x | |||
); | |||
static inline mask_t | |||
p448_eq ( | |||
@@ -175,24 +143,6 @@ p448_set_ui ( | |||
out->limb[0] = x & ((1<<28)-1); | |||
out->limb[2] = x>>28; | |||
} | |||
void | |||
p448_cond_swap ( | |||
p448_t *a, | |||
p448_t *b, | |||
mask_t doswap | |||
) { | |||
big_register_t *aa = (big_register_t*)a; | |||
big_register_t *bb = (big_register_t*)b; | |||
big_register_t m = br_set_to_mask(doswap); | |||
unsigned int i; | |||
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||
big_register_t x = m & (aa[i]^bb[i]); | |||
aa[i] ^= x; | |||
bb[i] ^= x; | |||
} | |||
} | |||
void | |||
p448_add ( | |||
@@ -313,28 +263,6 @@ p448_weak_reduce ( | |||
aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp&vm2); | |||
} | |||
void | |||
p448_sqrn ( | |||
p448_t *__restrict__ y, | |||
const p448_t *x, | |||
int n | |||
) { | |||
p448_t tmp; | |||
assert(n>0); | |||
if (n&1) { | |||
p448_sqr(y,x); | |||
n--; | |||
} else { | |||
p448_sqr(&tmp,x); | |||
p448_sqr(y,&tmp); | |||
n-=2; | |||
} | |||
for (; n; n-=2) { | |||
p448_sqr(&tmp,y); | |||
p448_sqr(y,&tmp); | |||
} | |||
} | |||
mask_t | |||
p448_eq ( | |||
const struct p448_t *a, | |||
@@ -350,18 +278,6 @@ p448_eq ( | |||
return p448_is_zero(&ra); | |||
} | |||
void | |||
p448_mask ( | |||
struct p448_t *a, | |||
const struct p448_t *b, | |||
mask_t mask | |||
) { | |||
unsigned int i; | |||
for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) { | |||
a->limb[i] = b->limb[i] & mask; | |||
} | |||
} | |||
#ifdef __cplusplus | |||
}; /* extern "C" */ | |||
#endif | |||
@@ -23,13 +23,6 @@ p448_set_ui ( | |||
p448_t *out, | |||
uint64_t x | |||
) __attribute__((unused)); | |||
static __inline__ void | |||
p448_cond_swap ( | |||
p448_t *a, | |||
p448_t *b, | |||
mask_t do_swap | |||
) __attribute__((unused)); | |||
static __inline__ void | |||
p448_add ( | |||
@@ -121,13 +114,6 @@ p448_sqr ( | |||
p448_t *__restrict__ out, | |||
const p448_t *a | |||
); | |||
static __inline__ void | |||
p448_sqrn ( | |||
p448_t *__restrict__ y, | |||
const p448_t *x, | |||
int n | |||
) __attribute__((unused)); | |||
void | |||
p448_serialize ( | |||
@@ -140,24 +126,6 @@ p448_deserialize ( | |||
p448_t *x, | |||
const uint8_t serial[56] | |||
); | |||
static __inline__ void | |||
p448_mask( | |||
struct p448_t *a, | |||
const struct p448_t *b, | |||
mask_t mask | |||
) __attribute__((unused)); | |||
/** | |||
* Returns 1/x. | |||
* | |||
* If x=0, returns 0. | |||
*/ | |||
void | |||
p448_inverse ( | |||
struct p448_t* a, | |||
const struct p448_t* x | |||
); | |||
static inline mask_t | |||
p448_eq ( | |||
@@ -178,20 +146,6 @@ p448_set_ui ( | |||
out->limb[i] = 0; | |||
} | |||
} | |||
void | |||
p448_cond_swap ( | |||
p448_t *a, | |||
p448_t *b, | |||
mask_t doswap | |||
) { | |||
unsigned int i; | |||
for (i=0; i<8; i++) { | |||
uint64_t x = doswap & (a->limb[i]^b->limb[i]); | |||
a->limb[i] ^= x; | |||
b->limb[i] ^= x; | |||
} | |||
} | |||
void | |||
p448_add ( | |||
@@ -313,28 +267,6 @@ p448_weak_reduce ( | |||
a->limb[0] = (a->limb[0] & mask) + tmp; | |||
} | |||
void | |||
p448_sqrn ( | |||
p448_t *__restrict__ y, | |||
const p448_t *x, | |||
int n | |||
) { | |||
p448_t tmp; | |||
assert(n>0); | |||
if (n&1) { | |||
p448_sqr(y,x); | |||
n--; | |||
} else { | |||
p448_sqr(&tmp,x); | |||
p448_sqr(y,&tmp); | |||
n-=2; | |||
} | |||
for (; n; n-=2) { | |||
p448_sqr(&tmp,y); | |||
p448_sqr(y,&tmp); | |||
} | |||
} | |||
mask_t | |||
p448_eq ( | |||
const struct p448_t *a, | |||
@@ -347,18 +279,6 @@ p448_eq ( | |||
return p448_is_zero(&ra); | |||
} | |||
void | |||
p448_mask ( | |||
struct p448_t *a, | |||
const struct p448_t *b, | |||
mask_t mask | |||
) { | |||
unsigned int i; | |||
for (i=0; i<8; i++) { | |||
a->limb[i] = b->limb[i] & mask; | |||
} | |||
} | |||
#ifdef __cplusplus | |||
}; /* extern "C" */ | |||
#endif | |||
@@ -22,13 +22,6 @@ p448_set_ui ( | |||
p448_t *out, | |||
uint64_t x | |||
) __attribute__((unused,always_inline)); | |||
static __inline__ void | |||
p448_cond_swap ( | |||
p448_t *a, | |||
p448_t *b, | |||
mask_t do_swap | |||
) __attribute__((unused,always_inline)); | |||
static __inline__ void | |||
p448_add ( | |||
@@ -114,13 +107,6 @@ p448_sqr ( | |||
p448_t *__restrict__ out, | |||
const p448_t *a | |||
); | |||
static __inline__ void | |||
p448_sqrn ( | |||
p448_t *__restrict__ y, | |||
const p448_t *x, | |||
int n | |||
) __attribute__((unused,always_inline)); | |||
void | |||
p448_serialize ( | |||
@@ -133,24 +119,6 @@ p448_deserialize ( | |||
p448_t *x, | |||
const uint8_t serial[56] | |||
); | |||
static __inline__ void | |||
p448_mask( | |||
struct p448_t *a, | |||
const struct p448_t *b, | |||
mask_t mask | |||
) __attribute__((unused,always_inline)); | |||
/** | |||
* Returns 1/x. | |||
* | |||
* If x=0, returns 0. | |||
*/ | |||
void | |||
p448_inverse ( | |||
struct p448_t* a, | |||
const struct p448_t* x | |||
); | |||
static inline mask_t | |||
p448_eq ( | |||
@@ -171,24 +139,6 @@ p448_set_ui ( | |||
out->limb[i] = 0; | |||
} | |||
} | |||
void | |||
p448_cond_swap ( | |||
p448_t *a, | |||
p448_t *b, | |||
mask_t doswap | |||
) { | |||
big_register_t *aa = (big_register_t*)a; | |||
big_register_t *bb = (big_register_t*)b; | |||
big_register_t m = br_set_to_mask(doswap); | |||
unsigned int i; | |||
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) { | |||
big_register_t x = m & (aa[i]^bb[i]); | |||
aa[i] ^= x; | |||
bb[i] ^= x; | |||
} | |||
} | |||
void | |||
p448_add ( | |||
@@ -331,55 +281,6 @@ p448_weak_reduce ( | |||
a->limb[0] = (a->limb[0] & mask) + tmp; | |||
} | |||
void | |||
p448_sqrn ( | |||
p448_t *__restrict__ y, | |||
const p448_t *x, | |||
int n | |||
) { | |||
p448_t tmp; | |||
assert(n>0); | |||
if (n&1) { | |||
p448_sqr(y,x); | |||
n--; | |||
} else { | |||
p448_sqr(&tmp,x); | |||
p448_sqr(y,&tmp); | |||
n-=2; | |||
} | |||
for (; n; n-=2) { | |||
p448_sqr(&tmp,y); | |||
p448_sqr(y,&tmp); | |||
} | |||
} | |||
mask_t | |||
p448_eq ( | |||
const struct p448_t *a, | |||
const struct p448_t *b | |||
) { | |||
struct p448_t ra, rb; | |||
p448_copy(&ra, a); | |||
p448_copy(&rb, b); | |||
p448_weak_reduce(&ra); | |||
p448_weak_reduce(&rb); | |||
p448_sub(&ra, &ra, &rb); | |||
p448_bias(&ra, 2); | |||
return p448_is_zero(&ra); | |||
} | |||
void | |||
p448_mask ( | |||
struct p448_t *a, | |||
const struct p448_t *b, | |||
mask_t mask | |||
) { | |||
unsigned int i; | |||
for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) { | |||
a->limb[i] = b->limb[i] & mask; | |||
} | |||
} | |||
#ifdef __cplusplus | |||
}; /* extern "C" */ | |||
#endif | |||
@@ -11,6 +11,21 @@ | |||
#include "field.h" | |||
#include "ec_point.h" // TODO | |||
mask_t | |||
field_eq ( | |||
const struct field_t *a, | |||
const struct field_t *b | |||
) { | |||
struct field_t ra, rb; | |||
field_copy(&ra, a); | |||
field_copy(&rb, b); | |||
field_weak_reduce(&ra); | |||
field_weak_reduce(&rb); | |||
field_sub(&ra, &ra, &rb); | |||
field_bias(&ra, 2); | |||
return field_is_zero(&ra); | |||
} | |||
void | |||
field_inverse ( | |||
struct field_t* a, | |||
@@ -52,8 +52,30 @@ field_mulw_scc_wr ( | |||
field_weak_reduce(out); | |||
} | |||
void | |||
field_isr ( | |||
static __inline__ void | |||
field_sqrn ( | |||
field_t *__restrict__ y, | |||
const field_t *x, | |||
int n | |||
) { | |||
field_t tmp; | |||
assert(n>0); | |||
if (n&1) { | |||
field_sqr(y,x); | |||
n--; | |||
} else { | |||
field_sqr(&tmp,x); | |||
field_sqr(y,&tmp); | |||
n-=2; | |||
} | |||
for (; n; n-=2) { | |||
field_sqr(&tmp,y); | |||
field_sqr(y,&tmp); | |||
} | |||
} | |||
void | |||
field_isr ( /* TODO: MAGIC */ | |||
struct field_t* a, | |||
const struct field_t* x | |||
) { | |||
@@ -433,7 +455,7 @@ serialize_montgomery ( | |||
field_mul ( &L0, &a->xd, &L2 ); | |||
L5 = field_is_zero( &a->zd ); | |||
L6 = - L5; | |||
field_mask ( &L1, &L0, L5 ); | |||
constant_time_mask ( &L1, &L0, sizeof(L1), L5 ); | |||
field_add ( &L2, &L1, &a->zd ); | |||
L4 = ~ L5; | |||
field_mul ( &L1, sbz, &L3 ); | |||
@@ -446,7 +468,7 @@ serialize_montgomery ( | |||
field_mul ( &L2, &L1, &L0 ); | |||
field_sqr ( &L1, &L0 ); | |||
field_mul ( &L0, &L3, &L1 ); | |||
field_mask ( b, &L2, L4 ); | |||
constant_time_mask ( b, &L2, sizeof(L1), L4 ); | |||
field_subw ( &L0, 1 ); | |||
field_bias ( &L0, 1 ); | |||
L5 = field_is_zero( &L0 ); | |||
@@ -0,0 +1,230 @@ | |||
/** | |||
* @file constant_time.h | |||
* @copyright | |||
* Copyright (c) 2014 Cryptography Research, Inc. \n | |||
* Released under the MIT License. See LICENSE.txt for license information. | |||
* @author Mike Hamburg | |||
* | |||
* @brief Constant-time routines. | |||
*/ | |||
#ifndef __CONSTANT_TIME_H__ | |||
#define __CONSTANT_TIME_H__ 1 | |||
#include "word.h" | |||
/* | |||
* Constant-time operations on hopefully-compile-time-sized memory | |||
* regions. Needed for flexibility / demagication: not all fields | |||
* have sizes which are multiples of the vector width, necessitating | |||
* a change from the Ed448 versions. | |||
* | |||
* These routines would be much simpler to define at the byte level, | |||
* but if not vectorized they would be a significant fraction of the | |||
* runtime. Eg on NEON-less ARM, constant_time_lookup is like 15% of | |||
* signing time, vs 6% on Haswell with its fancy AVX2 vectors. | |||
* | |||
* If the compiler could do a good job of autovectorizing the code, | |||
* we could just leave it with the byte definition. But that's unlikely | |||
* on most deployed compilers, especially if you consider that pcmpeq[size] | |||
* is much faster than moving a scalar to the vector unit (which is what | |||
* a naive autovectorizer will do with constant_time_lookup on Intel). | |||
* | |||
* Instead, we're putting our trust in the loop unroller and unswitcher. | |||
* | |||
* TODO: verify correctness and performance on each platform, to make sure | |||
* that there are no regressions. | |||
*/ | |||
/** | |||
* Unaligned big (vector?) register. | |||
*/ | |||
typedef struct { | |||
big_register_t unaligned; | |||
} __attribute__((packed)) unaligned_br_t; | |||
/** | |||
* Unaligned word register, for architectures where that matters. | |||
*/ | |||
typedef struct { | |||
word_t unaligned; | |||
} __attribute__((packed)) unaligned_word_t; | |||
/** | |||
* @brief Constant-time conditional swap. | |||
* | |||
* If doswap, then swap elem_bytes between *a and *b. | |||
* | |||
* *a and *b must not alias. Also, they must be at least as aligned | |||
* as their sizes, if the CPU cares about that sort of thing. | |||
*/ | |||
static __inline__ void | |||
__attribute__((unused,always_inline)) | |||
constant_time_cond_swap ( | |||
void *__restrict__ a_, | |||
void *__restrict__ b_, | |||
word_t elem_bytes, | |||
mask_t doswap | |||
) { | |||
word_t k; | |||
unsigned char *a = (unsigned char *)a_; | |||
unsigned char *b = (unsigned char *)b_; | |||
big_register_t br_mask = br_set_to_mask(doswap); | |||
for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) { | |||
if (elem_bytes % sizeof(big_register_t)) { | |||
/* unaligned */ | |||
big_register_t xor = | |||
((unaligned_br_t*)(&a[k]))->unaligned | |||
^ ((unaligned_br_t*)(&b[k]))->unaligned; | |||
xor &= br_mask; | |||
((unaligned_br_t*)(&a[k]))->unaligned ^= xor; | |||
((unaligned_br_t*)(&b[k]))->unaligned ^= xor; | |||
} else { | |||
/* aligned */ | |||
big_register_t xor = | |||
*((big_register_t*)(&a[k])) | |||
^ *((big_register_t*)(&b[k])); | |||
xor &= br_mask; | |||
*((big_register_t*)(&a[k])) ^= xor; | |||
*((big_register_t*)(&b[k])) ^= xor; | |||
} | |||
} | |||
if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { | |||
for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) { | |||
if (elem_bytes % sizeof(word_t)) { | |||
/* unaligned */ | |||
word_t xor = | |||
((unaligned_word_t*)(&a[k]))->unaligned | |||
^ ((unaligned_word_t*)(&b[k]))->unaligned; | |||
xor &= doswap; | |||
((unaligned_word_t*)(&a[k]))->unaligned ^= xor; | |||
((unaligned_word_t*)(&b[k]))->unaligned ^= xor; | |||
} else { | |||
/* aligned */ | |||
word_t xor = | |||
*((word_t*)(&a[k])) | |||
^ *((word_t*)(&b[k])); | |||
xor &= doswap; | |||
*((word_t*)(&a[k])) ^= xor; | |||
*((word_t*)(&b[k])) ^= xor; | |||
} | |||
} | |||
} | |||
if (elem_bytes % sizeof(word_t)) { | |||
for (; k<elem_bytes; k+=1) { | |||
unsigned char xor = a[k] ^ b[k]; | |||
xor &= doswap; | |||
a[k] ^= xor; | |||
b[k] ^= xor; | |||
} | |||
} | |||
} | |||
/** | |||
* @brief Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes); | |||
* | |||
* The table must be at least as aligned as elem_bytes. The output must be vector aligned. | |||
* The table and output must not alias. | |||
*/ | |||
static __inline__ void | |||
__attribute__((unused,always_inline)) | |||
constant_time_lookup ( | |||
void *__restrict__ out_, | |||
const void *table_, | |||
word_t elem_bytes, | |||
word_t n_table, | |||
word_t idx | |||
) { | |||
big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx); | |||
/* Can't do pointer arithmetic on void* */ | |||
unsigned char *out = (unsigned char *)out_; | |||
const unsigned char *table = (const unsigned char *)table_; | |||
word_t j,k; | |||
really_memset(out, 0, elem_bytes); | |||
for (j=0; j<n_table; j++, big_i-=big_one) { | |||
big_register_t br_mask = br_is_zero(big_i); | |||
for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) { | |||
if (elem_bytes % sizeof(big_register_t)) { | |||
/* input unaligned, output aligned */ | |||
*(big_register_t *)(out+k) |= br_mask & ((const unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned; | |||
} else { | |||
/* aligned */ | |||
*(big_register_t *)(out+k) |= br_mask & *(const big_register_t*)(&table[k+j*elem_bytes]); | |||
} | |||
} | |||
word_t mask = word_is_zero(idx^j); | |||
if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { | |||
for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) { | |||
if (elem_bytes % sizeof(word_t)) { | |||
/* input unaligned, output aligned */ | |||
*(word_t *)(out+k) |= mask & ((const unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned; | |||
} else { | |||
/* aligned */ | |||
*(word_t *)(out+k) |= mask & *(const word_t*)(&table[k+j*elem_bytes]); | |||
} | |||
} | |||
} | |||
if (elem_bytes % sizeof(word_t)) { | |||
for (; k<elem_bytes; k+=1) { | |||
out[k] |= mask & table[k+j*elem_bytes]; | |||
} | |||
} | |||
} | |||
} | |||
/** | |||
* @brief Constant-time a = b&mask. | |||
* | |||
* The input and output must be at least as aligned as elem_bytes. | |||
*/ | |||
static __inline__ void | |||
__attribute__((unused,always_inline)) | |||
constant_time_mask ( | |||
void *__restrict__ a_, | |||
const void *b_, | |||
word_t elem_bytes, | |||
mask_t mask | |||
) { | |||
unsigned char *a = (unsigned char *)a_; | |||
const unsigned char *b = (const unsigned char *)b_; | |||
word_t k; | |||
big_register_t br_mask = br_set_to_mask(mask); | |||
for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) { | |||
if (elem_bytes % sizeof(big_register_t)) { | |||
/* unaligned */ | |||
((unaligned_br_t*)(&a[k]))->unaligned = br_mask & ((const unaligned_br_t*)(&b[k]))->unaligned; | |||
} else { | |||
/* aligned */ | |||
*(big_register_t *)(a+k) = br_mask & *(const big_register_t*)(&b[k]); | |||
} | |||
} | |||
if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { | |||
for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) { | |||
if (elem_bytes % sizeof(word_t)) { | |||
/* unaligned */ | |||
((unaligned_word_t*)(&a[k]))->unaligned = mask & ((const unaligned_word_t*)(&b[k]))->unaligned; | |||
} else { | |||
/* aligned */ | |||
*(word_t *)(a+k) = mask & *(const word_t*)(&b[k]); | |||
} | |||
} | |||
} | |||
if (elem_bytes % sizeof(word_t)) { | |||
for (; k<elem_bytes; k+=1) { | |||
a[k] = mask & b[k]; | |||
} | |||
} | |||
} | |||
#endif /* __CONSTANT_TIME_H__ */ |
@@ -11,6 +11,7 @@ | |||
#define __CC_INCLUDED_EC_POINT_H__ | |||
#include "field.h" | |||
#include "constant_time.h" | |||
#ifdef __cplusplus | |||
extern "C" { | |||
@@ -150,43 +151,6 @@ copy_tw_pniels ( | |||
const struct tw_pniels_t* ds | |||
) __attribute__((unused,always_inline)); | |||
/** | |||
* Returns 1/sqrt(+- x). | |||
* | |||
* The Legendre symbol of the result is the same as that of the | |||
* input. | |||
* | |||
* If x=0, returns 0. | |||
*/ | |||
void | |||
field_isr ( | |||
struct field_t* a, | |||
const struct field_t* x | |||
); | |||
/** | |||
* Batch inverts out[i] = 1/in[i] | |||
* | |||
* If any input is zero, all the outputs will be zero. | |||
*/ | |||
void | |||
field_simultaneous_invert ( | |||
struct p448_t *__restrict__ out, | |||
const struct p448_t *in, | |||
unsigned int n | |||
); | |||
/** | |||
* Returns 1/x. | |||
* | |||
* If x=0, returns 0. | |||
*/ | |||
void | |||
field_inverse ( | |||
struct field_t* a, | |||
const struct field_t* x | |||
); | |||
/** | |||
* Add two points on a twisted Edwards curve, one in Extensible form | |||
* and the other in half-Niels form. | |||
@@ -490,7 +454,7 @@ cond_negate_tw_niels ( | |||
struct tw_niels_t *n, | |||
mask_t doNegate | |||
) { | |||
field_cond_swap(&n->a, &n->b, doNegate); | |||
constant_time_cond_swap(&n->a, &n->b, sizeof(n->a), doNegate); | |||
field_cond_neg(&n->c, doNegate); | |||
} | |||
@@ -9,21 +9,13 @@ | |||
#ifndef __FIELD_H__ | |||
#define __FIELD_H__ | |||
#include "p448.h" | |||
#include <string.h> | |||
#include "p448.h" | |||
#define FIELD_BITS 448 | |||
#define FIELD_BYTES (1+(FIELD_BITS-1)/8) | |||
#define FIELD_WORDS (1+(FIELD_BITS-1)/sizeof(word_t)) | |||
/** | |||
* @brief For GMP tests: little-endian representation of the field modulus. | |||
*/ | |||
extern const uint8_t FIELD_MODULUS[FIELD_BYTES]; | |||
#define field_t p448_t | |||
#define field_mul p448_mul | |||
#define field_sqr p448_sqr | |||
#define field_sqrn p448_sqrn | |||
#define field_add p448_add | |||
#define field_sub p448_sub | |||
#define field_mulw p448_mulw | |||
@@ -32,15 +24,80 @@ extern const uint8_t FIELD_MODULUS[FIELD_BYTES]; | |||
#define field_neg p448_neg | |||
#define field_set_ui p448_set_ui | |||
#define field_bias p448_bias | |||
#define field_copy p448_copy | |||
#define field_mask p448_mask | |||
#define field_weak_reduce p448_weak_reduce | |||
#define field_strong_reduce p448_strong_reduce | |||
#define field_cond_swap p448_cond_swap | |||
#define field_cond_neg p448_cond_neg | |||
#define field_serialize p448_serialize | |||
#define field_deserialize p448_deserialize | |||
#define field_eq p448_eq | |||
#define field_is_zero p448_is_zero | |||
/** @brief Bytes in a field element */ | |||
#define FIELD_BYTES (1+(FIELD_BITS-1)/8) | |||
/** @brief Words in a field element */ | |||
#define FIELD_WORDS (1+(FIELD_BITS-1)/sizeof(word_t)) | |||
/** | |||
* @brief For GMP tests: little-endian representation of the field modulus. | |||
*/ | |||
extern const uint8_t FIELD_MODULUS[FIELD_BYTES]; | |||
/** | |||
* Copy one field element to another. | |||
*/ | |||
static inline void | |||
__attribute__((unused,always_inline)) | |||
field_copy ( | |||
struct field_t *__restrict__ a, | |||
const struct field_t *__restrict__ b | |||
) { | |||
memcpy(a,b,sizeof(*a)); | |||
} | |||
/** | |||
* Returns 1/sqrt(+- x). | |||
* | |||
* The Legendre symbol of the result is the same as that of the | |||
* input. | |||
* | |||
* If x=0, returns 0. | |||
*/ | |||
void | |||
field_isr ( | |||
struct field_t* a, | |||
const struct field_t* x | |||
); | |||
/** | |||
* Batch inverts out[i] = 1/in[i] | |||
* | |||
* If any input is zero, all the outputs will be zero. | |||
*/ | |||
void | |||
field_simultaneous_invert ( | |||
struct p448_t *__restrict__ out, | |||
const struct p448_t *in, | |||
unsigned int n | |||
); | |||
/** | |||
* Returns 1/x. | |||
* | |||
* If x=0, returns 0. | |||
*/ | |||
void | |||
field_inverse ( | |||
struct field_t* a, | |||
const struct field_t* x | |||
); | |||
/** | |||
* Returns -1 if a==b, 0 otherwise. | |||
*/ | |||
mask_t | |||
field_eq ( | |||
const struct field_t *a, | |||
const struct field_t *b | |||
); | |||
#endif /* __FIELD_H__ */ |
@@ -143,6 +143,15 @@ typedef word_t vecmask_t __attribute__((vector_size(32))); | |||
return (big_register_t)x; | |||
} | |||
#endif | |||
/** | |||
* Return -1 if x==0, and 0 otherwise. | |||
*/ | |||
static __inline__ mask_t | |||
__attribute__((always_inline,unused)) | |||
word_is_zero(word_t x) { | |||
return (mask_t)((((dword_t)(x)) - 1)>>WORD_BITS); | |||
} | |||
#if __AVX2__ | |||
static __inline__ big_register_t | |||
@@ -11,6 +11,7 @@ | |||
#include "intrinsics.h" | |||
#include "scalarmul.h" | |||
#include "barrett_field.h" | |||
#include "constant_time.h" | |||
mask_t | |||
montgomery_ladder ( | |||
@@ -29,15 +30,15 @@ montgomery_ladder ( | |||
word_t w = scalar[j]; | |||
for (i=n; i>=0; i--) { | |||
mask_t flip = -((w>>i)&1); | |||
field_cond_swap(&mont.xa,&mont.xd,flip^pflip); | |||
field_cond_swap(&mont.za,&mont.zd,flip^pflip); | |||
constant_time_cond_swap(&mont.xa,&mont.xd,sizeof(mont.xd),flip^pflip); | |||
constant_time_cond_swap(&mont.za,&mont.zd,sizeof(mont.xd),flip^pflip); | |||
montgomery_step(&mont); | |||
pflip = flip; | |||
} | |||
n = WORD_BITS-1; | |||
} | |||
field_cond_swap(&mont.xa,&mont.xd,pflip); | |||
field_cond_swap(&mont.za,&mont.zd,pflip); | |||
constant_time_cond_swap(&mont.xa,&mont.xd,sizeof(mont.xd),pflip); | |||
constant_time_cond_swap(&mont.za,&mont.zd,sizeof(mont.xd),pflip); | |||
assert(n_extra_doubles < INT_MAX); | |||
for (j=0; j<(int)n_extra_doubles; j++) { | |||
@@ -47,6 +48,29 @@ montgomery_ladder ( | |||
return serialize_montgomery(out, &mont, in); | |||
} | |||
static __inline__ void | |||
__attribute__((unused,always_inline)) | |||
constant_time_lookup_tw_pniels ( | |||
struct tw_pniels_t *out, | |||
const struct tw_pniels_t *in, | |||
int nin, | |||
int idx | |||
) { | |||
constant_time_lookup(out,in,sizeof(*out),nin,idx); | |||
} | |||
static __inline__ void | |||
__attribute__((unused,always_inline)) | |||
constant_time_lookup_tw_niels ( | |||
struct tw_niels_t *out, | |||
const struct tw_niels_t *in, | |||
int nin, | |||
int idx | |||
) { | |||
constant_time_lookup(out,in,sizeof(*out),nin,idx); | |||
} | |||
/* | |||
static __inline__ void | |||
constant_time_lookup_tw_pniels ( | |||
struct tw_pniels_t *out, | |||
@@ -90,6 +114,7 @@ constant_time_lookup_tw_niels ( | |||
} | |||
} | |||
} | |||
*/ | |||
static void | |||
convert_to_signed_window_form ( | |||