diff --git a/src/arch_32/p448.h b/src/arch_32/p448.h index 60ecac5..a3b575b 100644 --- a/src/arch_32/p448.h +++ b/src/arch_32/p448.h @@ -22,13 +22,6 @@ p448_set_ui ( p448_t *out, uint64_t x ) __attribute__((unused,always_inline)); - -static __inline__ void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t do_swap -) __attribute__((unused,always_inline)); static __inline__ void p448_add ( @@ -114,13 +107,6 @@ p448_sqr ( p448_t *__restrict__ out, const p448_t *a ); - -static __inline__ void -p448_sqrn ( - p448_t *__restrict__ y, - const p448_t *x, - int n -) __attribute__((unused,always_inline)); void p448_serialize ( @@ -133,24 +119,6 @@ p448_deserialize ( p448_t *x, const uint8_t serial[56] ); - -static __inline__ void -p448_mask( - struct p448_t *a, - const struct p448_t *b, - mask_t mask -) __attribute__((unused,always_inline)); - -/** -* Returns 1/x. -* -* If x=0, returns 0. -*/ -void -p448_inverse ( - struct p448_t* a, - const struct p448_t* x -); static inline mask_t p448_eq ( @@ -172,24 +140,6 @@ p448_set_ui ( out->limb[i] = 0; } } - -void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t doswap -) { - big_register_t *aa = (big_register_t*)a; - big_register_t *bb = (big_register_t*)b; - big_register_t m = br_set_to_mask(doswap); - - unsigned int i; - for (i=0; ilimb[0] = (a->limb[0] & mask) + tmp; } -void -p448_sqrn ( - p448_t *__restrict__ y, - const p448_t *x, - int n -) { - p448_t tmp; - assert(n>0); - if (n&1) { - p448_sqr(y,x); - n--; - } else { - p448_sqr(&tmp,x); - p448_sqr(y,&tmp); - n-=2; - } - for (; n; n-=2) { - p448_sqr(&tmp,y); - p448_sqr(y,&tmp); - } -} - mask_t p448_eq ( const struct p448_t *a, @@ -352,18 +280,6 @@ p448_eq ( return p448_is_zero(&ra); } -void -p448_mask ( - struct p448_t *a, - const struct p448_t *b, - mask_t mask -) { - unsigned int i; - for (i=0; ilimb[0]); i++) { - a->limb[i] = b->limb[i] & mask; - } -} - #ifdef __cplusplus }; /* extern "C" */ #endif diff --git a/src/arch_arm_32/p448.h b/src/arch_arm_32/p448.h index 0878e9c..8419d8b 100644 --- a/src/arch_arm_32/p448.h +++ b/src/arch_arm_32/p448.h @@ -22,13 +22,6 @@ p448_set_ui ( p448_t *out, uint64_t x ) __attribute__((unused,always_inline)); - -static __inline__ void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t do_swap -) __attribute__((unused,always_inline)); static __inline__ void p448_add ( @@ -114,13 +107,6 @@ p448_sqr ( p448_t *__restrict__ out, const p448_t *a ); - -static __inline__ void -p448_sqrn ( - p448_t *__restrict__ y, - const p448_t *x, - int n -) __attribute__((unused,always_inline)); void p448_serialize ( @@ -133,24 +119,6 @@ p448_deserialize ( p448_t *x, const uint8_t serial[56] ); - -static __inline__ void -p448_mask( - struct p448_t *a, - const struct p448_t *b, - mask_t mask -) __attribute__((unused,always_inline)); - -/** -* Returns 1/x. -* -* If x=0, returns 0. -*/ -void -p448_inverse ( - struct p448_t* a, - const struct p448_t* x -); static inline mask_t p448_eq ( @@ -172,28 +140,6 @@ p448_set_ui ( out->limb[i] = 0; } } - -void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t doswap -) { - big_register_t *aa = (big_register_t*)a; - big_register_t *bb = (big_register_t*)b; -#if __ARM_NEON__ - big_register_t m = vdupq_n_u32(doswap); -#else - big_register_t m = doswap; -#endif - - unsigned int i; - for (i=0; ilimb[0] = (a->limb[0] & mask) + tmp; } -void -p448_sqrn ( - p448_t *__restrict__ y, - const p448_t *x, - int n -) { - p448_t tmp; - assert(n>0); - if (n&1) { - p448_sqr(y,x); - n--; - } else { - p448_sqr(&tmp,x); - p448_sqr(y,&tmp); - n-=2; - } - for (; n; n-=2) { - p448_sqr(&tmp,y); - p448_sqr(y,&tmp); - } -} - mask_t p448_eq ( const struct p448_t *a, @@ -360,18 +284,6 @@ p448_eq ( return p448_is_zero(&ra); } -void -p448_mask ( - struct p448_t *a, - const struct p448_t *b, - mask_t mask -) { - unsigned int i; - for (i=0; ilimb[0]); i++) { - a->limb[i] = b->limb[i] & mask; - } -} - #ifdef __cplusplus }; /* extern "C" */ #endif diff --git a/src/arch_neon/p448.h b/src/arch_neon/p448.h index 60ecac5..a3b575b 100644 --- a/src/arch_neon/p448.h +++ b/src/arch_neon/p448.h @@ -22,13 +22,6 @@ p448_set_ui ( p448_t *out, uint64_t x ) __attribute__((unused,always_inline)); - -static __inline__ void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t do_swap -) __attribute__((unused,always_inline)); static __inline__ void p448_add ( @@ -114,13 +107,6 @@ p448_sqr ( p448_t *__restrict__ out, const p448_t *a ); - -static __inline__ void -p448_sqrn ( - p448_t *__restrict__ y, - const p448_t *x, - int n -) __attribute__((unused,always_inline)); void p448_serialize ( @@ -133,24 +119,6 @@ p448_deserialize ( p448_t *x, const uint8_t serial[56] ); - -static __inline__ void -p448_mask( - struct p448_t *a, - const struct p448_t *b, - mask_t mask -) __attribute__((unused,always_inline)); - -/** -* Returns 1/x. -* -* If x=0, returns 0. -*/ -void -p448_inverse ( - struct p448_t* a, - const struct p448_t* x -); static inline mask_t p448_eq ( @@ -172,24 +140,6 @@ p448_set_ui ( out->limb[i] = 0; } } - -void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t doswap -) { - big_register_t *aa = (big_register_t*)a; - big_register_t *bb = (big_register_t*)b; - big_register_t m = br_set_to_mask(doswap); - - unsigned int i; - for (i=0; ilimb[0] = (a->limb[0] & mask) + tmp; } -void -p448_sqrn ( - p448_t *__restrict__ y, - const p448_t *x, - int n -) { - p448_t tmp; - assert(n>0); - if (n&1) { - p448_sqr(y,x); - n--; - } else { - p448_sqr(&tmp,x); - p448_sqr(y,&tmp); - n-=2; - } - for (; n; n-=2) { - p448_sqr(&tmp,y); - p448_sqr(y,&tmp); - } -} - mask_t p448_eq ( const struct p448_t *a, @@ -352,18 +280,6 @@ p448_eq ( return p448_is_zero(&ra); } -void -p448_mask ( - struct p448_t *a, - const struct p448_t *b, - mask_t mask -) { - unsigned int i; - for (i=0; ilimb[0]); i++) { - a->limb[i] = b->limb[i] & mask; - } -} - #ifdef __cplusplus }; /* extern "C" */ #endif diff --git a/src/arch_neon_experimental/p448.h b/src/arch_neon_experimental/p448.h index e2a034d..90c58b4 100644 --- a/src/arch_neon_experimental/p448.h +++ b/src/arch_neon_experimental/p448.h @@ -25,13 +25,6 @@ p448_set_ui ( p448_t *out, uint64_t x ) __attribute__((unused,always_inline)); - -static __inline__ void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t do_swap -) __attribute__((unused,always_inline)); static __inline__ void p448_add ( @@ -117,13 +110,6 @@ p448_sqr ( p448_t *__restrict__ out, const p448_t *a ); - -static __inline__ void -p448_sqrn ( - p448_t *__restrict__ y, - const p448_t *x, - int n -) __attribute__((unused,always_inline)); void p448_serialize ( @@ -136,24 +122,6 @@ p448_deserialize ( p448_t *x, const uint8_t serial[56] ); - -static __inline__ void -p448_mask( - struct p448_t *a, - const struct p448_t *b, - mask_t mask -) __attribute__((unused,always_inline)); - -/** -* Returns 1/x. -* -* If x=0, returns 0. -*/ -void -p448_inverse ( - struct p448_t* a, - const struct p448_t* x -); static inline mask_t p448_eq ( @@ -175,24 +143,6 @@ p448_set_ui ( out->limb[0] = x & ((1<<28)-1); out->limb[2] = x>>28; } - -void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t doswap -) { - big_register_t *aa = (big_register_t*)a; - big_register_t *bb = (big_register_t*)b; - big_register_t m = br_set_to_mask(doswap); - - unsigned int i; - for (i=0; i0); - if (n&1) { - p448_sqr(y,x); - n--; - } else { - p448_sqr(&tmp,x); - p448_sqr(y,&tmp); - n-=2; - } - for (; n; n-=2) { - p448_sqr(&tmp,y); - p448_sqr(y,&tmp); - } -} - mask_t p448_eq ( const struct p448_t *a, @@ -350,18 +278,6 @@ p448_eq ( return p448_is_zero(&ra); } -void -p448_mask ( - struct p448_t *a, - const struct p448_t *b, - mask_t mask -) { - unsigned int i; - for (i=0; ilimb[0]); i++) { - a->limb[i] = b->limb[i] & mask; - } -} - #ifdef __cplusplus }; /* extern "C" */ #endif diff --git a/src/arch_ref64/p448.h b/src/arch_ref64/p448.h index 5fb28b4..58a3012 100644 --- a/src/arch_ref64/p448.h +++ b/src/arch_ref64/p448.h @@ -23,13 +23,6 @@ p448_set_ui ( p448_t *out, uint64_t x ) __attribute__((unused)); - -static __inline__ void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t do_swap -) __attribute__((unused)); static __inline__ void p448_add ( @@ -121,13 +114,6 @@ p448_sqr ( p448_t *__restrict__ out, const p448_t *a ); - -static __inline__ void -p448_sqrn ( - p448_t *__restrict__ y, - const p448_t *x, - int n -) __attribute__((unused)); void p448_serialize ( @@ -140,24 +126,6 @@ p448_deserialize ( p448_t *x, const uint8_t serial[56] ); - -static __inline__ void -p448_mask( - struct p448_t *a, - const struct p448_t *b, - mask_t mask -) __attribute__((unused)); - -/** -* Returns 1/x. -* -* If x=0, returns 0. -*/ -void -p448_inverse ( - struct p448_t* a, - const struct p448_t* x -); static inline mask_t p448_eq ( @@ -178,20 +146,6 @@ p448_set_ui ( out->limb[i] = 0; } } - -void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t doswap -) { - unsigned int i; - for (i=0; i<8; i++) { - uint64_t x = doswap & (a->limb[i]^b->limb[i]); - a->limb[i] ^= x; - b->limb[i] ^= x; - } -} void p448_add ( @@ -313,28 +267,6 @@ p448_weak_reduce ( a->limb[0] = (a->limb[0] & mask) + tmp; } -void -p448_sqrn ( - p448_t *__restrict__ y, - const p448_t *x, - int n -) { - p448_t tmp; - assert(n>0); - if (n&1) { - p448_sqr(y,x); - n--; - } else { - p448_sqr(&tmp,x); - p448_sqr(y,&tmp); - n-=2; - } - for (; n; n-=2) { - p448_sqr(&tmp,y); - p448_sqr(y,&tmp); - } -} - mask_t p448_eq ( const struct p448_t *a, @@ -347,18 +279,6 @@ p448_eq ( return p448_is_zero(&ra); } -void -p448_mask ( - struct p448_t *a, - const struct p448_t *b, - mask_t mask -) { - unsigned int i; - for (i=0; i<8; i++) { - a->limb[i] = b->limb[i] & mask; - } -} - #ifdef __cplusplus }; /* extern "C" */ #endif diff --git a/src/arch_x86_64/p448.h b/src/arch_x86_64/p448.h index 4b04445..e928b00 100644 --- a/src/arch_x86_64/p448.h +++ b/src/arch_x86_64/p448.h @@ -22,13 +22,6 @@ p448_set_ui ( p448_t *out, uint64_t x ) __attribute__((unused,always_inline)); - -static __inline__ void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t do_swap -) __attribute__((unused,always_inline)); static __inline__ void p448_add ( @@ -114,13 +107,6 @@ p448_sqr ( p448_t *__restrict__ out, const p448_t *a ); - -static __inline__ void -p448_sqrn ( - p448_t *__restrict__ y, - const p448_t *x, - int n -) __attribute__((unused,always_inline)); void p448_serialize ( @@ -133,24 +119,6 @@ p448_deserialize ( p448_t *x, const uint8_t serial[56] ); - -static __inline__ void -p448_mask( - struct p448_t *a, - const struct p448_t *b, - mask_t mask -) __attribute__((unused,always_inline)); - -/** -* Returns 1/x. -* -* If x=0, returns 0. -*/ -void -p448_inverse ( - struct p448_t* a, - const struct p448_t* x -); static inline mask_t p448_eq ( @@ -171,24 +139,6 @@ p448_set_ui ( out->limb[i] = 0; } } - -void -p448_cond_swap ( - p448_t *a, - p448_t *b, - mask_t doswap -) { - big_register_t *aa = (big_register_t*)a; - big_register_t *bb = (big_register_t*)b; - big_register_t m = br_set_to_mask(doswap); - - unsigned int i; - for (i=0; ilimb[0] = (a->limb[0] & mask) + tmp; } -void -p448_sqrn ( - p448_t *__restrict__ y, - const p448_t *x, - int n -) { - p448_t tmp; - assert(n>0); - if (n&1) { - p448_sqr(y,x); - n--; - } else { - p448_sqr(&tmp,x); - p448_sqr(y,&tmp); - n-=2; - } - for (; n; n-=2) { - p448_sqr(&tmp,y); - p448_sqr(y,&tmp); - } -} - -mask_t -p448_eq ( - const struct p448_t *a, - const struct p448_t *b -) { - struct p448_t ra, rb; - p448_copy(&ra, a); - p448_copy(&rb, b); - p448_weak_reduce(&ra); - p448_weak_reduce(&rb); - p448_sub(&ra, &ra, &rb); - p448_bias(&ra, 2); - return p448_is_zero(&ra); -} - -void -p448_mask ( - struct p448_t *a, - const struct p448_t *b, - mask_t mask -) { - unsigned int i; - for (i=0; ilimb[0]); i++) { - a->limb[i] = b->limb[i] & mask; - } -} - #ifdef __cplusplus }; /* extern "C" */ #endif diff --git a/src/arithmetic.c b/src/arithmetic.c index de463fe..add3b49 100644 --- a/src/arithmetic.c +++ b/src/arithmetic.c @@ -11,6 +11,21 @@ #include "field.h" #include "ec_point.h" // TODO +mask_t +field_eq ( + const struct field_t *a, + const struct field_t *b +) { + struct field_t ra, rb; + field_copy(&ra, a); + field_copy(&rb, b); + field_weak_reduce(&ra); + field_weak_reduce(&rb); + field_sub(&ra, &ra, &rb); + field_bias(&ra, 2); + return field_is_zero(&ra); +} + void field_inverse ( struct field_t* a, diff --git a/src/ec_point.c b/src/ec_point.c index a08a618..eabc3a3 100644 --- a/src/ec_point.c +++ b/src/ec_point.c @@ -52,8 +52,30 @@ field_mulw_scc_wr ( field_weak_reduce(out); } -void -field_isr ( +static __inline__ void +field_sqrn ( + field_t *__restrict__ y, + const field_t *x, + int n +) { + field_t tmp; + assert(n>0); + if (n&1) { + field_sqr(y,x); + n--; + } else { + field_sqr(&tmp,x); + field_sqr(y,&tmp); + n-=2; + } + for (; n; n-=2) { + field_sqr(&tmp,y); + field_sqr(y,&tmp); + } +} + +void +field_isr ( /* TODO: MAGIC */ struct field_t* a, const struct field_t* x ) { @@ -433,7 +455,7 @@ serialize_montgomery ( field_mul ( &L0, &a->xd, &L2 ); L5 = field_is_zero( &a->zd ); L6 = - L5; - field_mask ( &L1, &L0, L5 ); + constant_time_mask ( &L1, &L0, sizeof(L1), L5 ); field_add ( &L2, &L1, &a->zd ); L4 = ~ L5; field_mul ( &L1, sbz, &L3 ); @@ -446,7 +468,7 @@ serialize_montgomery ( field_mul ( &L2, &L1, &L0 ); field_sqr ( &L1, &L0 ); field_mul ( &L0, &L3, &L1 ); - field_mask ( b, &L2, L4 ); + constant_time_mask ( b, &L2, sizeof(L1), L4 ); field_subw ( &L0, 1 ); field_bias ( &L0, 1 ); L5 = field_is_zero( &L0 ); diff --git a/src/include/constant_time.h b/src/include/constant_time.h new file mode 100644 index 0000000..cf19da0 --- /dev/null +++ b/src/include/constant_time.h @@ -0,0 +1,230 @@ +/** + * @file constant_time.h + * @copyright + * Copyright (c) 2014 Cryptography Research, Inc. \n + * Released under the MIT License. See LICENSE.txt for license information. + * @author Mike Hamburg + * + * @brief Constant-time routines. + */ + +#ifndef __CONSTANT_TIME_H__ +#define __CONSTANT_TIME_H__ 1 + +#include "word.h" + +/* + * Constant-time operations on hopefully-compile-time-sized memory + * regions. Needed for flexibility / demagication: not all fields + * have sizes which are multiples of the vector width, necessitating + * a change from the Ed448 versions. + * + * These routines would be much simpler to define at the byte level, + * but if not vectorized they would be a significant fraction of the + * runtime. Eg on NEON-less ARM, constant_time_lookup is like 15% of + * signing time, vs 6% on Haswell with its fancy AVX2 vectors. + * + * If the compiler could do a good job of autovectorizing the code, + * we could just leave it with the byte definition. But that's unlikely + * on most deployed compilers, especially if you consider that pcmpeq[size] + * is much faster than moving a scalar to the vector unit (which is what + * a naive autovectorizer will do with constant_time_lookup on Intel). + * + * Instead, we're putting our trust in the loop unroller and unswitcher. + * + * TODO: verify correctness and performance on each platform, to make sure + * that there are no regressions. + */ + + +/** + * Unaligned big (vector?) register. + */ +typedef struct { + big_register_t unaligned; +} __attribute__((packed)) unaligned_br_t; + +/** + * Unaligned word register, for architectures where that matters. + */ +typedef struct { + word_t unaligned; +} __attribute__((packed)) unaligned_word_t; + +/** + * @brief Constant-time conditional swap. + * + * If doswap, then swap elem_bytes between *a and *b. + * + * *a and *b must not alias. Also, they must be at least as aligned + * as their sizes, if the CPU cares about that sort of thing. + */ +static __inline__ void +__attribute__((unused,always_inline)) +constant_time_cond_swap ( + void *__restrict__ a_, + void *__restrict__ b_, + word_t elem_bytes, + mask_t doswap +) { + word_t k; + unsigned char *a = (unsigned char *)a_; + unsigned char *b = (unsigned char *)b_; + + big_register_t br_mask = br_set_to_mask(doswap); + for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) { + if (elem_bytes % sizeof(big_register_t)) { + /* unaligned */ + big_register_t xor = + ((unaligned_br_t*)(&a[k]))->unaligned + ^ ((unaligned_br_t*)(&b[k]))->unaligned; + xor &= br_mask; + ((unaligned_br_t*)(&a[k]))->unaligned ^= xor; + ((unaligned_br_t*)(&b[k]))->unaligned ^= xor; + } else { + /* aligned */ + big_register_t xor = + *((big_register_t*)(&a[k])) + ^ *((big_register_t*)(&b[k])); + xor &= br_mask; + *((big_register_t*)(&a[k])) ^= xor; + *((big_register_t*)(&b[k])) ^= xor; + } + } + + if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { + for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) { + if (elem_bytes % sizeof(word_t)) { + /* unaligned */ + word_t xor = + ((unaligned_word_t*)(&a[k]))->unaligned + ^ ((unaligned_word_t*)(&b[k]))->unaligned; + xor &= doswap; + ((unaligned_word_t*)(&a[k]))->unaligned ^= xor; + ((unaligned_word_t*)(&b[k]))->unaligned ^= xor; + } else { + /* aligned */ + word_t xor = + *((word_t*)(&a[k])) + ^ *((word_t*)(&b[k])); + xor &= doswap; + *((word_t*)(&a[k])) ^= xor; + *((word_t*)(&b[k])) ^= xor; + } + } + } + + if (elem_bytes % sizeof(word_t)) { + for (; kunaligned; + } else { + /* aligned */ + *(big_register_t *)(out+k) |= br_mask & *(const big_register_t*)(&table[k+j*elem_bytes]); + } + } + + word_t mask = word_is_zero(idx^j); + if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { + for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) { + if (elem_bytes % sizeof(word_t)) { + /* input unaligned, output aligned */ + *(word_t *)(out+k) |= mask & ((const unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned; + } else { + /* aligned */ + *(word_t *)(out+k) |= mask & *(const word_t*)(&table[k+j*elem_bytes]); + } + } + } + + if (elem_bytes % sizeof(word_t)) { + for (; kunaligned = br_mask & ((const unaligned_br_t*)(&b[k]))->unaligned; + } else { + /* aligned */ + *(big_register_t *)(a+k) = br_mask & *(const big_register_t*)(&b[k]); + } + } + + if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { + for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) { + if (elem_bytes % sizeof(word_t)) { + /* unaligned */ + ((unaligned_word_t*)(&a[k]))->unaligned = mask & ((const unaligned_word_t*)(&b[k]))->unaligned; + } else { + /* aligned */ + *(word_t *)(a+k) = mask & *(const word_t*)(&b[k]); + } + } + } + + if (elem_bytes % sizeof(word_t)) { + for (; ka, &n->b, doNegate); + constant_time_cond_swap(&n->a, &n->b, sizeof(n->a), doNegate); field_cond_neg(&n->c, doNegate); } diff --git a/src/include/field.h b/src/include/field.h index 06fd93f..9da18d4 100644 --- a/src/include/field.h +++ b/src/include/field.h @@ -9,21 +9,13 @@ #ifndef __FIELD_H__ #define __FIELD_H__ -#include "p448.h" +#include +#include "p448.h" #define FIELD_BITS 448 -#define FIELD_BYTES (1+(FIELD_BITS-1)/8) -#define FIELD_WORDS (1+(FIELD_BITS-1)/sizeof(word_t)) - -/** - * @brief For GMP tests: little-endian representation of the field modulus. - */ -extern const uint8_t FIELD_MODULUS[FIELD_BYTES]; - #define field_t p448_t #define field_mul p448_mul #define field_sqr p448_sqr -#define field_sqrn p448_sqrn #define field_add p448_add #define field_sub p448_sub #define field_mulw p448_mulw @@ -32,15 +24,80 @@ extern const uint8_t FIELD_MODULUS[FIELD_BYTES]; #define field_neg p448_neg #define field_set_ui p448_set_ui #define field_bias p448_bias -#define field_copy p448_copy -#define field_mask p448_mask #define field_weak_reduce p448_weak_reduce #define field_strong_reduce p448_strong_reduce -#define field_cond_swap p448_cond_swap #define field_cond_neg p448_cond_neg #define field_serialize p448_serialize #define field_deserialize p448_deserialize -#define field_eq p448_eq #define field_is_zero p448_is_zero +/** @brief Bytes in a field element */ +#define FIELD_BYTES (1+(FIELD_BITS-1)/8) + +/** @brief Words in a field element */ +#define FIELD_WORDS (1+(FIELD_BITS-1)/sizeof(word_t)) + +/** + * @brief For GMP tests: little-endian representation of the field modulus. + */ +extern const uint8_t FIELD_MODULUS[FIELD_BYTES]; + +/** + * Copy one field element to another. + */ +static inline void +__attribute__((unused,always_inline)) +field_copy ( + struct field_t *__restrict__ a, + const struct field_t *__restrict__ b +) { + memcpy(a,b,sizeof(*a)); +} + +/** + * Returns 1/sqrt(+- x). + * + * The Legendre symbol of the result is the same as that of the + * input. + * + * If x=0, returns 0. + */ +void +field_isr ( + struct field_t* a, + const struct field_t* x +); + +/** + * Batch inverts out[i] = 1/in[i] + * + * If any input is zero, all the outputs will be zero. + */ +void +field_simultaneous_invert ( + struct p448_t *__restrict__ out, + const struct p448_t *in, + unsigned int n +); + +/** + * Returns 1/x. + * + * If x=0, returns 0. + */ +void +field_inverse ( + struct field_t* a, + const struct field_t* x +); + +/** + * Returns -1 if a==b, 0 otherwise. + */ +mask_t +field_eq ( + const struct field_t *a, + const struct field_t *b +); + #endif /* __FIELD_H__ */ diff --git a/src/include/word.h b/src/include/word.h index f493ecc..ddc8d36 100644 --- a/src/include/word.h +++ b/src/include/word.h @@ -143,6 +143,15 @@ typedef word_t vecmask_t __attribute__((vector_size(32))); return (big_register_t)x; } #endif + +/** + * Return -1 if x==0, and 0 otherwise. + */ +static __inline__ mask_t +__attribute__((always_inline,unused)) +word_is_zero(word_t x) { + return (mask_t)((((dword_t)(x)) - 1)>>WORD_BITS); +} #if __AVX2__ static __inline__ big_register_t diff --git a/src/scalarmul.c b/src/scalarmul.c index e67112c..502dd3f 100644 --- a/src/scalarmul.c +++ b/src/scalarmul.c @@ -11,6 +11,7 @@ #include "intrinsics.h" #include "scalarmul.h" #include "barrett_field.h" +#include "constant_time.h" mask_t montgomery_ladder ( @@ -29,15 +30,15 @@ montgomery_ladder ( word_t w = scalar[j]; for (i=n; i>=0; i--) { mask_t flip = -((w>>i)&1); - field_cond_swap(&mont.xa,&mont.xd,flip^pflip); - field_cond_swap(&mont.za,&mont.zd,flip^pflip); + constant_time_cond_swap(&mont.xa,&mont.xd,sizeof(mont.xd),flip^pflip); + constant_time_cond_swap(&mont.za,&mont.zd,sizeof(mont.xd),flip^pflip); montgomery_step(&mont); pflip = flip; } n = WORD_BITS-1; } - field_cond_swap(&mont.xa,&mont.xd,pflip); - field_cond_swap(&mont.za,&mont.zd,pflip); + constant_time_cond_swap(&mont.xa,&mont.xd,sizeof(mont.xd),pflip); + constant_time_cond_swap(&mont.za,&mont.zd,sizeof(mont.xd),pflip); assert(n_extra_doubles < INT_MAX); for (j=0; j<(int)n_extra_doubles; j++) { @@ -47,6 +48,29 @@ montgomery_ladder ( return serialize_montgomery(out, &mont, in); } +static __inline__ void +__attribute__((unused,always_inline)) +constant_time_lookup_tw_pniels ( + struct tw_pniels_t *out, + const struct tw_pniels_t *in, + int nin, + int idx +) { + constant_time_lookup(out,in,sizeof(*out),nin,idx); +} + +static __inline__ void +__attribute__((unused,always_inline)) +constant_time_lookup_tw_niels ( + struct tw_niels_t *out, + const struct tw_niels_t *in, + int nin, + int idx +) { + constant_time_lookup(out,in,sizeof(*out),nin,idx); +} + +/* static __inline__ void constant_time_lookup_tw_pniels ( struct tw_pniels_t *out, @@ -90,6 +114,7 @@ constant_time_lookup_tw_niels ( } } } +*/ static void convert_to_signed_window_form (