Browse Source

no perf regression on haswell. Also, factored out field_cond_neg; restored p448_ prefixes in case of multiple fields in the same lib

master
Mike Hamburg 10 years ago
parent
commit
edc6afe496
8 changed files with 83 additions and 263 deletions
  1. +0
    -46
      src/arch_32/p448.h
  2. +0
    -50
      src/arch_arm_32/p448.h
  3. +0
    -46
      src/arch_neon/p448.h
  4. +0
    -46
      src/arch_neon_experimental/p448.h
  5. +0
    -41
      src/arch_ref64/p448.h
  6. +0
    -31
      src/arch_x86_64/p448.h
  7. +60
    -0
      src/include/constant_time.h
  8. +23
    -3
      src/include/field.h

+ 0
- 46
src/arch_32/p448.h View File

@@ -42,12 +42,6 @@ p448_neg (
p448_t *out,
const p448_t *a
) __attribute__((unused,always_inline));
static __inline__ void
p448_cond_neg (
p448_t *a,
mask_t doNegate
) __attribute__((unused,always_inline));

static __inline__ void
p448_addw (
@@ -120,12 +114,6 @@ p448_deserialize (
const uint8_t serial[56]
);

static inline mask_t
p448_eq (
const struct p448_t *a,
const struct p448_t *b
) __attribute__((always_inline,unused));

/* -------------- Inline functions begin here -------------- */

void
@@ -194,25 +182,6 @@ p448_neg (
*/
}

void
p448_cond_neg(
p448_t *a,
mask_t doNegate
) {
unsigned int i;
struct p448_t negated;
big_register_t *aa = (big_register_t *)a;
big_register_t *nn = (big_register_t*)&negated;
big_register_t m = br_set_to_mask(doNegate);
p448_neg(&negated, a);
p448_bias(&negated, 2);
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
aa[i] = (aa[i] & ~m) | (nn[i] & m);
}
}

void
p448_addw (
p448_t *a,
@@ -265,21 +234,6 @@ p448_weak_reduce (
a->limb[0] = (a->limb[0] & mask) + tmp;
}

mask_t
p448_eq (
const struct p448_t *a,
const struct p448_t *b
) {
struct p448_t ra, rb;
p448_copy(&ra, a);
p448_copy(&rb, b);
p448_weak_reduce(&ra);
p448_weak_reduce(&rb);
p448_sub(&ra, &ra, &rb);
p448_bias(&ra, 2);
return p448_is_zero(&ra);
}

#ifdef __cplusplus
}; /* extern "C" */
#endif


+ 0
- 50
src/arch_arm_32/p448.h View File

@@ -42,12 +42,6 @@ p448_neg (
p448_t *out,
const p448_t *a
) __attribute__((unused,always_inline));
static __inline__ void
p448_cond_neg (
p448_t *a,
mask_t doNegate
) __attribute__((unused,always_inline));

static __inline__ void
p448_addw (
@@ -119,12 +113,6 @@ p448_deserialize (
p448_t *x,
const uint8_t serial[56]
);
static inline mask_t
p448_eq (
const struct p448_t *a,
const struct p448_t *b
) __attribute__((always_inline,unused));

/* -------------- Inline functions begin here -------------- */

@@ -194,29 +182,6 @@ p448_neg (
*/
}

void
p448_cond_neg(
p448_t *a,
mask_t doNegate
) {
unsigned int i;
struct p448_t negated;
big_register_t *aa = (big_register_t *)a;
big_register_t *nn = (big_register_t*)&negated;
#if __ARM_NEON__
big_register_t m = vdupq_n_u32(doNegate);
#else
big_register_t m = doNegate;
#endif

p448_neg(&negated, a);
p448_bias(&negated, 2);
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
aa[i] = (aa[i] & ~m) | (nn[i] & m);
}
}

void
p448_addw (
p448_t *a,
@@ -269,21 +234,6 @@ p448_weak_reduce (
a->limb[0] = (a->limb[0] & mask) + tmp;
}

mask_t
p448_eq (
const struct p448_t *a,
const struct p448_t *b
) {
struct p448_t ra, rb;
p448_copy(&ra, a);
p448_copy(&rb, b);
p448_weak_reduce(&ra);
p448_weak_reduce(&rb);
p448_sub(&ra, &ra, &rb);
p448_bias(&ra, 2);
return p448_is_zero(&ra);
}

#ifdef __cplusplus
}; /* extern "C" */
#endif


+ 0
- 46
src/arch_neon/p448.h View File

@@ -42,12 +42,6 @@ p448_neg (
p448_t *out,
const p448_t *a
) __attribute__((unused,always_inline));
static __inline__ void
p448_cond_neg (
p448_t *a,
mask_t doNegate
) __attribute__((unused,always_inline));

static __inline__ void
p448_addw (
@@ -120,12 +114,6 @@ p448_deserialize (
const uint8_t serial[56]
);

static inline mask_t
p448_eq (
const struct p448_t *a,
const struct p448_t *b
) __attribute__((always_inline,unused));

/* -------------- Inline functions begin here -------------- */

void
@@ -194,25 +182,6 @@ p448_neg (
*/
}

void
p448_cond_neg(
p448_t *a,
mask_t doNegate
) {
unsigned int i;
struct p448_t negated;
big_register_t *aa = (big_register_t *)a;
big_register_t *nn = (big_register_t*)&negated;
big_register_t m = br_set_to_mask(doNegate);
p448_neg(&negated, a);
p448_bias(&negated, 2);
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
aa[i] = (aa[i] & ~m) | (nn[i] & m);
}
}

void
p448_addw (
p448_t *a,
@@ -265,21 +234,6 @@ p448_weak_reduce (
a->limb[0] = (a->limb[0] & mask) + tmp;
}

mask_t
p448_eq (
const struct p448_t *a,
const struct p448_t *b
) {
struct p448_t ra, rb;
p448_copy(&ra, a);
p448_copy(&rb, b);
p448_weak_reduce(&ra);
p448_weak_reduce(&rb);
p448_sub(&ra, &ra, &rb);
p448_bias(&ra, 2);
return p448_is_zero(&ra);
}

#ifdef __cplusplus
}; /* extern "C" */
#endif


+ 0
- 46
src/arch_neon_experimental/p448.h View File

@@ -45,12 +45,6 @@ p448_neg (
p448_t *out,
const p448_t *a
) __attribute__((unused,always_inline));
static __inline__ void
p448_cond_neg (
p448_t *a,
mask_t doNegate
) __attribute__((unused,always_inline));

static __inline__ void
p448_addw (
@@ -123,12 +117,6 @@ p448_deserialize (
const uint8_t serial[56]
);

static inline mask_t
p448_eq (
const struct p448_t *a,
const struct p448_t *b
) __attribute__((always_inline,unused));

/* -------------- Inline functions begin here -------------- */

void
@@ -191,25 +179,6 @@ p448_neg (
*/
}

void
p448_cond_neg(
p448_t *a,
mask_t doNegate
) {
unsigned int i;
struct p448_t negated;
big_register_t *aa = (big_register_t *)a;
big_register_t *nn = (big_register_t*)&negated;
big_register_t m = br_set_to_mask(doNegate);
p448_neg(&negated, a);
p448_bias(&negated, 2);
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
aa[i] = (aa[i] & ~m) | (nn[i] & m);
}
}

void
p448_addw (
p448_t *a,
@@ -263,21 +232,6 @@ p448_weak_reduce (
aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp&vm2);
}

mask_t
p448_eq (
const struct p448_t *a,
const struct p448_t *b
) {
struct p448_t ra, rb;
p448_copy(&ra, a);
p448_copy(&rb, b);
p448_weak_reduce(&ra);
p448_weak_reduce(&rb);
p448_sub(&ra, &ra, &rb);
p448_bias(&ra, 2);
return p448_is_zero(&ra);
}

#ifdef __cplusplus
}; /* extern "C" */
#endif


+ 0
- 41
src/arch_ref64/p448.h View File

@@ -43,12 +43,6 @@ p448_neg (
p448_t *out,
const p448_t *a
) __attribute__((unused));
static __inline__ void
p448_cond_neg (
p448_t *a,
mask_t doNegate
) __attribute__((unused));

static __inline__ void
p448_addw (
@@ -127,12 +121,6 @@ p448_deserialize (
const uint8_t serial[56]
);

static inline mask_t
p448_eq (
const struct p448_t *a,
const struct p448_t *b
) __attribute__((always_inline,unused));

/* -------------- Inline functions begin here -------------- */

void
@@ -187,23 +175,6 @@ p448_neg (
p448_weak_reduce(out);
}

void
p448_cond_neg(
struct p448_t *a,
mask_t doNegate
) {
unsigned int i;
struct p448_t negated;
p448_neg(&negated, a);
p448_bias(&negated, 2);
for (i=0; i<8; i++) {
a->limb[i] = ( a->limb[i] & ~doNegate )
| ( negated.limb[i] & doNegate );
}
}

void
p448_addw (
p448_t *a,
@@ -267,18 +238,6 @@ p448_weak_reduce (
a->limb[0] = (a->limb[0] & mask) + tmp;
}

mask_t
p448_eq (
const struct p448_t *a,
const struct p448_t *b
) {
struct p448_t ra, rb;
p448_copy(&ra, a);
p448_copy(&rb, b);
p448_sub(&ra, &ra, &rb);
return p448_is_zero(&ra);
}

#ifdef __cplusplus
}; /* extern "C" */
#endif


+ 0
- 31
src/arch_x86_64/p448.h View File

@@ -42,12 +42,6 @@ p448_neg (
p448_t *out,
const p448_t *a
) __attribute__((unused,always_inline));
static __inline__ void
p448_cond_neg (
p448_t *a,
mask_t doNegate
) __attribute__((unused,always_inline));

static __inline__ void
p448_addw (
@@ -120,12 +114,6 @@ p448_deserialize (
const uint8_t serial[56]
);

static inline mask_t
p448_eq (
const struct p448_t *a,
const struct p448_t *b
) __attribute__((always_inline,unused));

/* -------------- Inline functions begin here -------------- */

void
@@ -193,25 +181,6 @@ p448_neg (
*/
}

void
p448_cond_neg(
struct p448_t *a,
mask_t doNegate
) {
unsigned int i;
struct p448_t negated;
big_register_t *aa = (big_register_t *)a;
big_register_t *nn = (big_register_t*)&negated;
big_register_t m = br_set_to_mask(doNegate);
p448_neg(&negated, a);
p448_bias(&negated, 2);
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
aa[i] = (aa[i] & ~m) | (nn[i] & m);
}
}

void
p448_addw (
p448_t *a,


+ 60
- 0
src/include/constant_time.h View File

@@ -227,4 +227,64 @@ constant_time_mask (
}
}

/**
* @brief Constant-time a = mask ? bTrue : bFalse.
*
* The input and output must be at least as aligned as elem_bytes.
*
* Note that the output is not __restrict__, but if it overlaps either
* input, it must be equal and not partially overlap.
*/
static __inline__ void
__attribute__((unused,always_inline))
constant_time_select (
void *a_,
const void *bTrue_,
const void *bFalse_,
word_t elem_bytes,
mask_t mask
) {
unsigned char *a = (unsigned char *)a_;
const unsigned char *bTrue = (const unsigned char *)bTrue_;
const unsigned char *bFalse = (const unsigned char *)bFalse_;
word_t k;
big_register_t br_mask = br_set_to_mask(mask);
for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
if (elem_bytes % sizeof(big_register_t)) {
/* unaligned */
((unaligned_br_t*)(&a[k]))->unaligned =
( br_mask & ((const unaligned_br_t*)(&bTrue [k]))->unaligned)
| (~br_mask & ((const unaligned_br_t*)(&bFalse[k]))->unaligned);
} else {
/* aligned */
*(big_register_t *)(a+k) =
( br_mask & *(const big_register_t*)(&bTrue [k]))
| (~br_mask & *(const big_register_t*)(&bFalse[k]));
}
}

if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
if (elem_bytes % sizeof(word_t)) {
/* unaligned */
((unaligned_word_t*)(&a[k]))->unaligned =
( mask & ((const unaligned_word_t*)(&bTrue [k]))->unaligned)
| (~mask & ((const unaligned_word_t*)(&bFalse[k]))->unaligned);
} else {
/* aligned */
*(word_t *)(a+k) =
( mask & *(const word_t*)(&bTrue [k]))
| (~mask & *(const word_t*)(&bFalse[k]));
}
}
}
if (elem_bytes % sizeof(word_t)) {
for (; k<elem_bytes; k+=1) {
a[k] = ( mask & bTrue[k]) | (~mask & bFalse[k]);
}
}
}

#endif /* __CONSTANT_TIME_H__ */

+ 23
- 3
src/include/field.h View File

@@ -10,6 +10,7 @@
#define __FIELD_H__

#include <string.h>
#include "constant_time.h"

#include "p448.h"
#define FIELD_BITS 448
@@ -24,9 +25,13 @@
#define field_neg p448_neg
#define field_set_ui p448_set_ui
#define field_bias p448_bias
#define field_cond_neg p448_cond_neg
#define field_inverse p448_inverse
#define field_eq p448_eq
#define field_isr p448_isr
#define field_simultaneous_invert p448_simultaneous_invert
#define field_weak_reduce p448_weak_reduce
#define field_strong_reduce p448_strong_reduce
#define field_cond_neg p448_cond_neg
#define field_serialize p448_serialize
#define field_deserialize p448_deserialize
#define field_is_zero p448_is_zero
@@ -54,6 +59,21 @@ field_copy (
memcpy(a,b,sizeof(*a));
}

/**
* Negate a in place if doNegate.
*/
static inline void
__attribute__((unused,always_inline))
field_cond_neg(
field_t *a,
mask_t doNegate
) {
struct field_t negated;
field_neg(&negated, a);
field_bias(&negated, 2);
constant_time_select(a, &negated, a, sizeof(negated), doNegate);
}

/**
* Returns 1/sqrt(+- x).
*
@@ -75,8 +95,8 @@ field_isr (
*/
void
field_simultaneous_invert (
struct p448_t *__restrict__ out,
const struct p448_t *in,
struct field_t *__restrict__ out,
const struct field_t *in,
unsigned int n
);



Loading…
Cancel
Save