From edc6afe496689f72a0513bbccdd74e0f206d7536 Mon Sep 17 00:00:00 2001 From: Mike Hamburg Date: Mon, 20 Oct 2014 18:20:35 -0700 Subject: [PATCH] no perf regression on haswell. Also, factored out field_cond_neg; restored p448_ prefixes in case of multiple fields in the same lib --- src/arch_32/p448.h | 46 ------------------------ src/arch_arm_32/p448.h | 50 -------------------------- src/arch_neon/p448.h | 46 ------------------------ src/arch_neon_experimental/p448.h | 46 ------------------------ src/arch_ref64/p448.h | 41 --------------------- src/arch_x86_64/p448.h | 31 ---------------- src/include/constant_time.h | 60 +++++++++++++++++++++++++++++++ src/include/field.h | 26 ++++++++++++-- 8 files changed, 83 insertions(+), 263 deletions(-) diff --git a/src/arch_32/p448.h b/src/arch_32/p448.h index a3b575b..cf90611 100644 --- a/src/arch_32/p448.h +++ b/src/arch_32/p448.h @@ -42,12 +42,6 @@ p448_neg ( p448_t *out, const p448_t *a ) __attribute__((unused,always_inline)); - -static __inline__ void -p448_cond_neg ( - p448_t *a, - mask_t doNegate -) __attribute__((unused,always_inline)); static __inline__ void p448_addw ( @@ -120,12 +114,6 @@ p448_deserialize ( const uint8_t serial[56] ); -static inline mask_t -p448_eq ( - const struct p448_t *a, - const struct p448_t *b -) __attribute__((always_inline,unused)); - /* -------------- Inline functions begin here -------------- */ void @@ -194,25 +182,6 @@ p448_neg ( */ } -void -p448_cond_neg( - p448_t *a, - mask_t doNegate -) { - unsigned int i; - struct p448_t negated; - big_register_t *aa = (big_register_t *)a; - big_register_t *nn = (big_register_t*)&negated; - big_register_t m = br_set_to_mask(doNegate); - - p448_neg(&negated, a); - p448_bias(&negated, 2); - - for (i=0; ilimb[0] = (a->limb[0] & mask) + tmp; } -mask_t -p448_eq ( - const struct p448_t *a, - const struct p448_t *b -) { - struct p448_t ra, rb; - p448_copy(&ra, a); - p448_copy(&rb, b); - p448_weak_reduce(&ra); - p448_weak_reduce(&rb); - p448_sub(&ra, &ra, &rb); - p448_bias(&ra, 2); - return p448_is_zero(&ra); -} - #ifdef __cplusplus }; /* extern "C" */ #endif diff --git a/src/arch_arm_32/p448.h b/src/arch_arm_32/p448.h index 8419d8b..cf90611 100644 --- a/src/arch_arm_32/p448.h +++ b/src/arch_arm_32/p448.h @@ -42,12 +42,6 @@ p448_neg ( p448_t *out, const p448_t *a ) __attribute__((unused,always_inline)); - -static __inline__ void -p448_cond_neg ( - p448_t *a, - mask_t doNegate -) __attribute__((unused,always_inline)); static __inline__ void p448_addw ( @@ -119,12 +113,6 @@ p448_deserialize ( p448_t *x, const uint8_t serial[56] ); - -static inline mask_t -p448_eq ( - const struct p448_t *a, - const struct p448_t *b -) __attribute__((always_inline,unused)); /* -------------- Inline functions begin here -------------- */ @@ -194,29 +182,6 @@ p448_neg ( */ } -void -p448_cond_neg( - p448_t *a, - mask_t doNegate -) { - unsigned int i; - struct p448_t negated; - big_register_t *aa = (big_register_t *)a; - big_register_t *nn = (big_register_t*)&negated; -#if __ARM_NEON__ - big_register_t m = vdupq_n_u32(doNegate); -#else - big_register_t m = doNegate; -#endif - - p448_neg(&negated, a); - p448_bias(&negated, 2); - - for (i=0; ilimb[0] = (a->limb[0] & mask) + tmp; } -mask_t -p448_eq ( - const struct p448_t *a, - const struct p448_t *b -) { - struct p448_t ra, rb; - p448_copy(&ra, a); - p448_copy(&rb, b); - p448_weak_reduce(&ra); - p448_weak_reduce(&rb); - p448_sub(&ra, &ra, &rb); - p448_bias(&ra, 2); - return p448_is_zero(&ra); -} - #ifdef __cplusplus }; /* extern "C" */ #endif diff --git a/src/arch_neon/p448.h b/src/arch_neon/p448.h index a3b575b..cf90611 100644 --- a/src/arch_neon/p448.h +++ b/src/arch_neon/p448.h @@ -42,12 +42,6 @@ p448_neg ( p448_t *out, const p448_t *a ) __attribute__((unused,always_inline)); - -static __inline__ void -p448_cond_neg ( - p448_t *a, - mask_t doNegate -) __attribute__((unused,always_inline)); static __inline__ void p448_addw ( @@ -120,12 +114,6 @@ p448_deserialize ( const uint8_t serial[56] ); -static inline mask_t -p448_eq ( - const struct p448_t *a, - const struct p448_t *b -) __attribute__((always_inline,unused)); - /* -------------- Inline functions begin here -------------- */ void @@ -194,25 +182,6 @@ p448_neg ( */ } -void -p448_cond_neg( - p448_t *a, - mask_t doNegate -) { - unsigned int i; - struct p448_t negated; - big_register_t *aa = (big_register_t *)a; - big_register_t *nn = (big_register_t*)&negated; - big_register_t m = br_set_to_mask(doNegate); - - p448_neg(&negated, a); - p448_bias(&negated, 2); - - for (i=0; ilimb[0] = (a->limb[0] & mask) + tmp; } -mask_t -p448_eq ( - const struct p448_t *a, - const struct p448_t *b -) { - struct p448_t ra, rb; - p448_copy(&ra, a); - p448_copy(&rb, b); - p448_weak_reduce(&ra); - p448_weak_reduce(&rb); - p448_sub(&ra, &ra, &rb); - p448_bias(&ra, 2); - return p448_is_zero(&ra); -} - #ifdef __cplusplus }; /* extern "C" */ #endif diff --git a/src/arch_neon_experimental/p448.h b/src/arch_neon_experimental/p448.h index 90c58b4..144d86c 100644 --- a/src/arch_neon_experimental/p448.h +++ b/src/arch_neon_experimental/p448.h @@ -45,12 +45,6 @@ p448_neg ( p448_t *out, const p448_t *a ) __attribute__((unused,always_inline)); - -static __inline__ void -p448_cond_neg ( - p448_t *a, - mask_t doNegate -) __attribute__((unused,always_inline)); static __inline__ void p448_addw ( @@ -123,12 +117,6 @@ p448_deserialize ( const uint8_t serial[56] ); -static inline mask_t -p448_eq ( - const struct p448_t *a, - const struct p448_t *b -) __attribute__((always_inline,unused)); - /* -------------- Inline functions begin here -------------- */ void @@ -191,25 +179,6 @@ p448_neg ( */ } -void -p448_cond_neg( - p448_t *a, - mask_t doNegate -) { - unsigned int i; - struct p448_t negated; - big_register_t *aa = (big_register_t *)a; - big_register_t *nn = (big_register_t*)&negated; - big_register_t m = br_set_to_mask(doNegate); - - p448_neg(&negated, a); - p448_bias(&negated, 2); - - for (i=0; ilimb[i] = ( a->limb[i] & ~doNegate ) - | ( negated.limb[i] & doNegate ); - } -} - void p448_addw ( p448_t *a, @@ -267,18 +238,6 @@ p448_weak_reduce ( a->limb[0] = (a->limb[0] & mask) + tmp; } -mask_t -p448_eq ( - const struct p448_t *a, - const struct p448_t *b -) { - struct p448_t ra, rb; - p448_copy(&ra, a); - p448_copy(&rb, b); - p448_sub(&ra, &ra, &rb); - return p448_is_zero(&ra); -} - #ifdef __cplusplus }; /* extern "C" */ #endif diff --git a/src/arch_x86_64/p448.h b/src/arch_x86_64/p448.h index e928b00..0772d23 100644 --- a/src/arch_x86_64/p448.h +++ b/src/arch_x86_64/p448.h @@ -42,12 +42,6 @@ p448_neg ( p448_t *out, const p448_t *a ) __attribute__((unused,always_inline)); - -static __inline__ void -p448_cond_neg ( - p448_t *a, - mask_t doNegate -) __attribute__((unused,always_inline)); static __inline__ void p448_addw ( @@ -120,12 +114,6 @@ p448_deserialize ( const uint8_t serial[56] ); -static inline mask_t -p448_eq ( - const struct p448_t *a, - const struct p448_t *b -) __attribute__((always_inline,unused)); - /* -------------- Inline functions begin here -------------- */ void @@ -193,25 +181,6 @@ p448_neg ( */ } -void -p448_cond_neg( - struct p448_t *a, - mask_t doNegate -) { - unsigned int i; - struct p448_t negated; - big_register_t *aa = (big_register_t *)a; - big_register_t *nn = (big_register_t*)&negated; - big_register_t m = br_set_to_mask(doNegate); - - p448_neg(&negated, a); - p448_bias(&negated, 2); - - for (i=0; iunaligned = + ( br_mask & ((const unaligned_br_t*)(&bTrue [k]))->unaligned) + | (~br_mask & ((const unaligned_br_t*)(&bFalse[k]))->unaligned); + } else { + /* aligned */ + *(big_register_t *)(a+k) = + ( br_mask & *(const big_register_t*)(&bTrue [k])) + | (~br_mask & *(const big_register_t*)(&bFalse[k])); + } + } + + if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) { + for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) { + if (elem_bytes % sizeof(word_t)) { + /* unaligned */ + ((unaligned_word_t*)(&a[k]))->unaligned = + ( mask & ((const unaligned_word_t*)(&bTrue [k]))->unaligned) + | (~mask & ((const unaligned_word_t*)(&bFalse[k]))->unaligned); + } else { + /* aligned */ + *(word_t *)(a+k) = + ( mask & *(const word_t*)(&bTrue [k])) + | (~mask & *(const word_t*)(&bFalse[k])); + } + } + } + + if (elem_bytes % sizeof(word_t)) { + for (; k +#include "constant_time.h" #include "p448.h" #define FIELD_BITS 448 @@ -24,9 +25,13 @@ #define field_neg p448_neg #define field_set_ui p448_set_ui #define field_bias p448_bias +#define field_cond_neg p448_cond_neg +#define field_inverse p448_inverse +#define field_eq p448_eq +#define field_isr p448_isr +#define field_simultaneous_invert p448_simultaneous_invert #define field_weak_reduce p448_weak_reduce #define field_strong_reduce p448_strong_reduce -#define field_cond_neg p448_cond_neg #define field_serialize p448_serialize #define field_deserialize p448_deserialize #define field_is_zero p448_is_zero @@ -54,6 +59,21 @@ field_copy ( memcpy(a,b,sizeof(*a)); } +/** + * Negate a in place if doNegate. + */ +static inline void +__attribute__((unused,always_inline)) +field_cond_neg( + field_t *a, + mask_t doNegate +) { + struct field_t negated; + field_neg(&negated, a); + field_bias(&negated, 2); + constant_time_select(a, &negated, a, sizeof(negated), doNegate); +} + /** * Returns 1/sqrt(+- x). * @@ -75,8 +95,8 @@ field_isr ( */ void field_simultaneous_invert ( - struct p448_t *__restrict__ out, - const struct p448_t *in, + struct field_t *__restrict__ out, + const struct field_t *in, unsigned int n );