Browse Source

WARNING: This commit is largely untested.

Continuing demagication and factoring of field code.

Removing high-level ops from p448.h and putting them in field.h.  That way they
won't need rewriting for new fields and architectures.

Create constant_time.h which contains constant-time lookups, condswaps, etc.
That way the code is the same on all architectures, instead of varying depending
on whether the field size is a multiple of the vector register size.  I should
still add a constant_time_select to factor out field_cond_negate.

TODO: I need to test this for correctness and performance on various platforms.
It works on my Mac, but since Yosemite the timing is totally unpredictable
(background tasks?  variable boost?).
master
Michael Hamburg 10 years ago
parent
commit
c6d69dec2e
13 changed files with 382 additions and 579 deletions
  1. +0
    -84
      src/arch_32/p448.h
  2. +0
    -88
      src/arch_arm_32/p448.h
  3. +0
    -84
      src/arch_neon/p448.h
  4. +0
    -84
      src/arch_neon_experimental/p448.h
  5. +0
    -80
      src/arch_ref64/p448.h
  6. +0
    -99
      src/arch_x86_64/p448.h
  7. +15
    -0
      src/arithmetic.c
  8. +26
    -4
      src/ec_point.c
  9. +230
    -0
      src/include/constant_time.h
  10. +2
    -38
      src/include/ec_point.h
  11. +71
    -14
      src/include/field.h
  12. +9
    -0
      src/include/word.h
  13. +29
    -4
      src/scalarmul.c

+ 0
- 84
src/arch_32/p448.h View File

@@ -22,13 +22,6 @@ p448_set_ui (
p448_t *out,
uint64_t x
) __attribute__((unused,always_inline));
static __inline__ void
p448_cond_swap (
p448_t *a,
p448_t *b,
mask_t do_swap
) __attribute__((unused,always_inline));

static __inline__ void
p448_add (
@@ -114,13 +107,6 @@ p448_sqr (
p448_t *__restrict__ out,
const p448_t *a
);
static __inline__ void
p448_sqrn (
p448_t *__restrict__ y,
const p448_t *x,
int n
) __attribute__((unused,always_inline));

void
p448_serialize (
@@ -133,24 +119,6 @@ p448_deserialize (
p448_t *x,
const uint8_t serial[56]
);
static __inline__ void
p448_mask(
struct p448_t *a,
const struct p448_t *b,
mask_t mask
) __attribute__((unused,always_inline));

/**
* Returns 1/x.
*
* If x=0, returns 0.
*/
void
p448_inverse (
struct p448_t* a,
const struct p448_t* x
);

static inline mask_t
p448_eq (
@@ -172,24 +140,6 @@ p448_set_ui (
out->limb[i] = 0;
}
}
void
p448_cond_swap (
p448_t *a,
p448_t *b,
mask_t doswap
) {
big_register_t *aa = (big_register_t*)a;
big_register_t *bb = (big_register_t*)b;
big_register_t m = br_set_to_mask(doswap);

unsigned int i;
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
big_register_t x = m & (aa[i]^bb[i]);
aa[i] ^= x;
bb[i] ^= x;
}
}

void
p448_add (
@@ -315,28 +265,6 @@ p448_weak_reduce (
a->limb[0] = (a->limb[0] & mask) + tmp;
}

void
p448_sqrn (
p448_t *__restrict__ y,
const p448_t *x,
int n
) {
p448_t tmp;
assert(n>0);
if (n&1) {
p448_sqr(y,x);
n--;
} else {
p448_sqr(&tmp,x);
p448_sqr(y,&tmp);
n-=2;
}
for (; n; n-=2) {
p448_sqr(&tmp,y);
p448_sqr(y,&tmp);
}
}

mask_t
p448_eq (
const struct p448_t *a,
@@ -352,18 +280,6 @@ p448_eq (
return p448_is_zero(&ra);
}

void
p448_mask (
struct p448_t *a,
const struct p448_t *b,
mask_t mask
) {
unsigned int i;
for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) {
a->limb[i] = b->limb[i] & mask;
}
}

#ifdef __cplusplus
}; /* extern "C" */
#endif


+ 0
- 88
src/arch_arm_32/p448.h View File

@@ -22,13 +22,6 @@ p448_set_ui (
p448_t *out,
uint64_t x
) __attribute__((unused,always_inline));
static __inline__ void
p448_cond_swap (
p448_t *a,
p448_t *b,
mask_t do_swap
) __attribute__((unused,always_inline));

static __inline__ void
p448_add (
@@ -114,13 +107,6 @@ p448_sqr (
p448_t *__restrict__ out,
const p448_t *a
);
static __inline__ void
p448_sqrn (
p448_t *__restrict__ y,
const p448_t *x,
int n
) __attribute__((unused,always_inline));

void
p448_serialize (
@@ -133,24 +119,6 @@ p448_deserialize (
p448_t *x,
const uint8_t serial[56]
);
static __inline__ void
p448_mask(
struct p448_t *a,
const struct p448_t *b,
mask_t mask
) __attribute__((unused,always_inline));

/**
* Returns 1/x.
*
* If x=0, returns 0.
*/
void
p448_inverse (
struct p448_t* a,
const struct p448_t* x
);
static inline mask_t
p448_eq (
@@ -172,28 +140,6 @@ p448_set_ui (
out->limb[i] = 0;
}
}
void
p448_cond_swap (
p448_t *a,
p448_t *b,
mask_t doswap
) {
big_register_t *aa = (big_register_t*)a;
big_register_t *bb = (big_register_t*)b;
#if __ARM_NEON__
big_register_t m = vdupq_n_u32(doswap);
#else
big_register_t m = doswap;
#endif

unsigned int i;
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
big_register_t x = m & (aa[i]^bb[i]);
aa[i] ^= x;
bb[i] ^= x;
}
}

void
p448_add (
@@ -323,28 +269,6 @@ p448_weak_reduce (
a->limb[0] = (a->limb[0] & mask) + tmp;
}

void
p448_sqrn (
p448_t *__restrict__ y,
const p448_t *x,
int n
) {
p448_t tmp;
assert(n>0);
if (n&1) {
p448_sqr(y,x);
n--;
} else {
p448_sqr(&tmp,x);
p448_sqr(y,&tmp);
n-=2;
}
for (; n; n-=2) {
p448_sqr(&tmp,y);
p448_sqr(y,&tmp);
}
}

mask_t
p448_eq (
const struct p448_t *a,
@@ -360,18 +284,6 @@ p448_eq (
return p448_is_zero(&ra);
}

void
p448_mask (
struct p448_t *a,
const struct p448_t *b,
mask_t mask
) {
unsigned int i;
for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) {
a->limb[i] = b->limb[i] & mask;
}
}

#ifdef __cplusplus
}; /* extern "C" */
#endif


+ 0
- 84
src/arch_neon/p448.h View File

@@ -22,13 +22,6 @@ p448_set_ui (
p448_t *out,
uint64_t x
) __attribute__((unused,always_inline));
static __inline__ void
p448_cond_swap (
p448_t *a,
p448_t *b,
mask_t do_swap
) __attribute__((unused,always_inline));

static __inline__ void
p448_add (
@@ -114,13 +107,6 @@ p448_sqr (
p448_t *__restrict__ out,
const p448_t *a
);
static __inline__ void
p448_sqrn (
p448_t *__restrict__ y,
const p448_t *x,
int n
) __attribute__((unused,always_inline));

void
p448_serialize (
@@ -133,24 +119,6 @@ p448_deserialize (
p448_t *x,
const uint8_t serial[56]
);
static __inline__ void
p448_mask(
struct p448_t *a,
const struct p448_t *b,
mask_t mask
) __attribute__((unused,always_inline));

/**
* Returns 1/x.
*
* If x=0, returns 0.
*/
void
p448_inverse (
struct p448_t* a,
const struct p448_t* x
);

static inline mask_t
p448_eq (
@@ -172,24 +140,6 @@ p448_set_ui (
out->limb[i] = 0;
}
}
void
p448_cond_swap (
p448_t *a,
p448_t *b,
mask_t doswap
) {
big_register_t *aa = (big_register_t*)a;
big_register_t *bb = (big_register_t*)b;
big_register_t m = br_set_to_mask(doswap);

unsigned int i;
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
big_register_t x = m & (aa[i]^bb[i]);
aa[i] ^= x;
bb[i] ^= x;
}
}

void
p448_add (
@@ -315,28 +265,6 @@ p448_weak_reduce (
a->limb[0] = (a->limb[0] & mask) + tmp;
}

void
p448_sqrn (
p448_t *__restrict__ y,
const p448_t *x,
int n
) {
p448_t tmp;
assert(n>0);
if (n&1) {
p448_sqr(y,x);
n--;
} else {
p448_sqr(&tmp,x);
p448_sqr(y,&tmp);
n-=2;
}
for (; n; n-=2) {
p448_sqr(&tmp,y);
p448_sqr(y,&tmp);
}
}

mask_t
p448_eq (
const struct p448_t *a,
@@ -352,18 +280,6 @@ p448_eq (
return p448_is_zero(&ra);
}

void
p448_mask (
struct p448_t *a,
const struct p448_t *b,
mask_t mask
) {
unsigned int i;
for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) {
a->limb[i] = b->limb[i] & mask;
}
}

#ifdef __cplusplus
}; /* extern "C" */
#endif


+ 0
- 84
src/arch_neon_experimental/p448.h View File

@@ -25,13 +25,6 @@ p448_set_ui (
p448_t *out,
uint64_t x
) __attribute__((unused,always_inline));
static __inline__ void
p448_cond_swap (
p448_t *a,
p448_t *b,
mask_t do_swap
) __attribute__((unused,always_inline));

static __inline__ void
p448_add (
@@ -117,13 +110,6 @@ p448_sqr (
p448_t *__restrict__ out,
const p448_t *a
);
static __inline__ void
p448_sqrn (
p448_t *__restrict__ y,
const p448_t *x,
int n
) __attribute__((unused,always_inline));

void
p448_serialize (
@@ -136,24 +122,6 @@ p448_deserialize (
p448_t *x,
const uint8_t serial[56]
);
static __inline__ void
p448_mask(
struct p448_t *a,
const struct p448_t *b,
mask_t mask
) __attribute__((unused,always_inline));

/**
* Returns 1/x.
*
* If x=0, returns 0.
*/
void
p448_inverse (
struct p448_t* a,
const struct p448_t* x
);

static inline mask_t
p448_eq (
@@ -175,24 +143,6 @@ p448_set_ui (
out->limb[0] = x & ((1<<28)-1);
out->limb[2] = x>>28;
}
void
p448_cond_swap (
p448_t *a,
p448_t *b,
mask_t doswap
) {
big_register_t *aa = (big_register_t*)a;
big_register_t *bb = (big_register_t*)b;
big_register_t m = br_set_to_mask(doswap);

unsigned int i;
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
big_register_t x = m & (aa[i]^bb[i]);
aa[i] ^= x;
bb[i] ^= x;
}
}

void
p448_add (
@@ -313,28 +263,6 @@ p448_weak_reduce (
aa[0] = (aa[0] & vmask) + vrev64_u32(tmp) + (tmp&vm2);
}

void
p448_sqrn (
p448_t *__restrict__ y,
const p448_t *x,
int n
) {
p448_t tmp;
assert(n>0);
if (n&1) {
p448_sqr(y,x);
n--;
} else {
p448_sqr(&tmp,x);
p448_sqr(y,&tmp);
n-=2;
}
for (; n; n-=2) {
p448_sqr(&tmp,y);
p448_sqr(y,&tmp);
}
}

mask_t
p448_eq (
const struct p448_t *a,
@@ -350,18 +278,6 @@ p448_eq (
return p448_is_zero(&ra);
}

void
p448_mask (
struct p448_t *a,
const struct p448_t *b,
mask_t mask
) {
unsigned int i;
for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) {
a->limb[i] = b->limb[i] & mask;
}
}

#ifdef __cplusplus
}; /* extern "C" */
#endif


+ 0
- 80
src/arch_ref64/p448.h View File

@@ -23,13 +23,6 @@ p448_set_ui (
p448_t *out,
uint64_t x
) __attribute__((unused));
static __inline__ void
p448_cond_swap (
p448_t *a,
p448_t *b,
mask_t do_swap
) __attribute__((unused));

static __inline__ void
p448_add (
@@ -121,13 +114,6 @@ p448_sqr (
p448_t *__restrict__ out,
const p448_t *a
);
static __inline__ void
p448_sqrn (
p448_t *__restrict__ y,
const p448_t *x,
int n
) __attribute__((unused));

void
p448_serialize (
@@ -140,24 +126,6 @@ p448_deserialize (
p448_t *x,
const uint8_t serial[56]
);
static __inline__ void
p448_mask(
struct p448_t *a,
const struct p448_t *b,
mask_t mask
) __attribute__((unused));

/**
* Returns 1/x.
*
* If x=0, returns 0.
*/
void
p448_inverse (
struct p448_t* a,
const struct p448_t* x
);

static inline mask_t
p448_eq (
@@ -178,20 +146,6 @@ p448_set_ui (
out->limb[i] = 0;
}
}
void
p448_cond_swap (
p448_t *a,
p448_t *b,
mask_t doswap
) {
unsigned int i;
for (i=0; i<8; i++) {
uint64_t x = doswap & (a->limb[i]^b->limb[i]);
a->limb[i] ^= x;
b->limb[i] ^= x;
}
}

void
p448_add (
@@ -313,28 +267,6 @@ p448_weak_reduce (
a->limb[0] = (a->limb[0] & mask) + tmp;
}

void
p448_sqrn (
p448_t *__restrict__ y,
const p448_t *x,
int n
) {
p448_t tmp;
assert(n>0);
if (n&1) {
p448_sqr(y,x);
n--;
} else {
p448_sqr(&tmp,x);
p448_sqr(y,&tmp);
n-=2;
}
for (; n; n-=2) {
p448_sqr(&tmp,y);
p448_sqr(y,&tmp);
}
}

mask_t
p448_eq (
const struct p448_t *a,
@@ -347,18 +279,6 @@ p448_eq (
return p448_is_zero(&ra);
}

void
p448_mask (
struct p448_t *a,
const struct p448_t *b,
mask_t mask
) {
unsigned int i;
for (i=0; i<8; i++) {
a->limb[i] = b->limb[i] & mask;
}
}

#ifdef __cplusplus
}; /* extern "C" */
#endif


+ 0
- 99
src/arch_x86_64/p448.h View File

@@ -22,13 +22,6 @@ p448_set_ui (
p448_t *out,
uint64_t x
) __attribute__((unused,always_inline));
static __inline__ void
p448_cond_swap (
p448_t *a,
p448_t *b,
mask_t do_swap
) __attribute__((unused,always_inline));

static __inline__ void
p448_add (
@@ -114,13 +107,6 @@ p448_sqr (
p448_t *__restrict__ out,
const p448_t *a
);
static __inline__ void
p448_sqrn (
p448_t *__restrict__ y,
const p448_t *x,
int n
) __attribute__((unused,always_inline));

void
p448_serialize (
@@ -133,24 +119,6 @@ p448_deserialize (
p448_t *x,
const uint8_t serial[56]
);
static __inline__ void
p448_mask(
struct p448_t *a,
const struct p448_t *b,
mask_t mask
) __attribute__((unused,always_inline));

/**
* Returns 1/x.
*
* If x=0, returns 0.
*/
void
p448_inverse (
struct p448_t* a,
const struct p448_t* x
);

static inline mask_t
p448_eq (
@@ -171,24 +139,6 @@ p448_set_ui (
out->limb[i] = 0;
}
}
void
p448_cond_swap (
p448_t *a,
p448_t *b,
mask_t doswap
) {
big_register_t *aa = (big_register_t*)a;
big_register_t *bb = (big_register_t*)b;
big_register_t m = br_set_to_mask(doswap);

unsigned int i;
for (i=0; i<sizeof(*a)/sizeof(*aa); i++) {
big_register_t x = m & (aa[i]^bb[i]);
aa[i] ^= x;
bb[i] ^= x;
}
}

void
p448_add (
@@ -331,55 +281,6 @@ p448_weak_reduce (
a->limb[0] = (a->limb[0] & mask) + tmp;
}

void
p448_sqrn (
p448_t *__restrict__ y,
const p448_t *x,
int n
) {
p448_t tmp;
assert(n>0);
if (n&1) {
p448_sqr(y,x);
n--;
} else {
p448_sqr(&tmp,x);
p448_sqr(y,&tmp);
n-=2;
}
for (; n; n-=2) {
p448_sqr(&tmp,y);
p448_sqr(y,&tmp);
}
}

mask_t
p448_eq (
const struct p448_t *a,
const struct p448_t *b
) {
struct p448_t ra, rb;
p448_copy(&ra, a);
p448_copy(&rb, b);
p448_weak_reduce(&ra);
p448_weak_reduce(&rb);
p448_sub(&ra, &ra, &rb);
p448_bias(&ra, 2);
return p448_is_zero(&ra);
}

void
p448_mask (
struct p448_t *a,
const struct p448_t *b,
mask_t mask
) {
unsigned int i;
for (i=0; i<sizeof(*a)/sizeof(a->limb[0]); i++) {
a->limb[i] = b->limb[i] & mask;
}
}

#ifdef __cplusplus
}; /* extern "C" */
#endif


+ 15
- 0
src/arithmetic.c View File

@@ -11,6 +11,21 @@
#include "field.h"
#include "ec_point.h" // TODO

mask_t
field_eq (
const struct field_t *a,
const struct field_t *b
) {
struct field_t ra, rb;
field_copy(&ra, a);
field_copy(&rb, b);
field_weak_reduce(&ra);
field_weak_reduce(&rb);
field_sub(&ra, &ra, &rb);
field_bias(&ra, 2);
return field_is_zero(&ra);
}

void
field_inverse (
struct field_t* a,


+ 26
- 4
src/ec_point.c View File

@@ -52,8 +52,30 @@ field_mulw_scc_wr (
field_weak_reduce(out);
}

void
field_isr (
static __inline__ void
field_sqrn (
field_t *__restrict__ y,
const field_t *x,
int n
) {
field_t tmp;
assert(n>0);
if (n&1) {
field_sqr(y,x);
n--;
} else {
field_sqr(&tmp,x);
field_sqr(y,&tmp);
n-=2;
}
for (; n; n-=2) {
field_sqr(&tmp,y);
field_sqr(y,&tmp);
}
}

void
field_isr ( /* TODO: MAGIC */
struct field_t* a,
const struct field_t* x
) {
@@ -433,7 +455,7 @@ serialize_montgomery (
field_mul ( &L0, &a->xd, &L2 );
L5 = field_is_zero( &a->zd );
L6 = - L5;
field_mask ( &L1, &L0, L5 );
constant_time_mask ( &L1, &L0, sizeof(L1), L5 );
field_add ( &L2, &L1, &a->zd );
L4 = ~ L5;
field_mul ( &L1, sbz, &L3 );
@@ -446,7 +468,7 @@ serialize_montgomery (
field_mul ( &L2, &L1, &L0 );
field_sqr ( &L1, &L0 );
field_mul ( &L0, &L3, &L1 );
field_mask ( b, &L2, L4 );
constant_time_mask ( b, &L2, sizeof(L1), L4 );
field_subw ( &L0, 1 );
field_bias ( &L0, 1 );
L5 = field_is_zero( &L0 );


+ 230
- 0
src/include/constant_time.h View File

@@ -0,0 +1,230 @@
/**
* @file constant_time.h
* @copyright
* Copyright (c) 2014 Cryptography Research, Inc. \n
* Released under the MIT License. See LICENSE.txt for license information.
* @author Mike Hamburg
*
* @brief Constant-time routines.
*/

#ifndef __CONSTANT_TIME_H__
#define __CONSTANT_TIME_H__ 1

#include "word.h"

/*
* Constant-time operations on hopefully-compile-time-sized memory
* regions. Needed for flexibility / demagication: not all fields
* have sizes which are multiples of the vector width, necessitating
* a change from the Ed448 versions.
*
* These routines would be much simpler to define at the byte level,
* but if not vectorized they would be a significant fraction of the
* runtime. Eg on NEON-less ARM, constant_time_lookup is like 15% of
* signing time, vs 6% on Haswell with its fancy AVX2 vectors.
*
* If the compiler could do a good job of autovectorizing the code,
* we could just leave it with the byte definition. But that's unlikely
* on most deployed compilers, especially if you consider that pcmpeq[size]
* is much faster than moving a scalar to the vector unit (which is what
* a naive autovectorizer will do with constant_time_lookup on Intel).
*
* Instead, we're putting our trust in the loop unroller and unswitcher.
*
* TODO: verify correctness and performance on each platform, to make sure
* that there are no regressions.
*/


/**
* Unaligned big (vector?) register.
*/
typedef struct {
big_register_t unaligned;
} __attribute__((packed)) unaligned_br_t;

/**
* Unaligned word register, for architectures where that matters.
*/
typedef struct {
word_t unaligned;
} __attribute__((packed)) unaligned_word_t;

/**
* @brief Constant-time conditional swap.
*
* If doswap, then swap elem_bytes between *a and *b.
*
* *a and *b must not alias. Also, they must be at least as aligned
* as their sizes, if the CPU cares about that sort of thing.
*/
static __inline__ void
__attribute__((unused,always_inline))
constant_time_cond_swap (
void *__restrict__ a_,
void *__restrict__ b_,
word_t elem_bytes,
mask_t doswap
) {
word_t k;
unsigned char *a = (unsigned char *)a_;
unsigned char *b = (unsigned char *)b_;
big_register_t br_mask = br_set_to_mask(doswap);
for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
if (elem_bytes % sizeof(big_register_t)) {
/* unaligned */
big_register_t xor =
((unaligned_br_t*)(&a[k]))->unaligned
^ ((unaligned_br_t*)(&b[k]))->unaligned;
xor &= br_mask;
((unaligned_br_t*)(&a[k]))->unaligned ^= xor;
((unaligned_br_t*)(&b[k]))->unaligned ^= xor;
} else {
/* aligned */
big_register_t xor =
*((big_register_t*)(&a[k]))
^ *((big_register_t*)(&b[k]));
xor &= br_mask;
*((big_register_t*)(&a[k])) ^= xor;
*((big_register_t*)(&b[k])) ^= xor;
}
}

if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
if (elem_bytes % sizeof(word_t)) {
/* unaligned */
word_t xor =
((unaligned_word_t*)(&a[k]))->unaligned
^ ((unaligned_word_t*)(&b[k]))->unaligned;
xor &= doswap;
((unaligned_word_t*)(&a[k]))->unaligned ^= xor;
((unaligned_word_t*)(&b[k]))->unaligned ^= xor;
} else {
/* aligned */
word_t xor =
*((word_t*)(&a[k]))
^ *((word_t*)(&b[k]));
xor &= doswap;
*((word_t*)(&a[k])) ^= xor;
*((word_t*)(&b[k])) ^= xor;
}
}
}
if (elem_bytes % sizeof(word_t)) {
for (; k<elem_bytes; k+=1) {
unsigned char xor = a[k] ^ b[k];
xor &= doswap;
a[k] ^= xor;
b[k] ^= xor;
}
}
}

/**
* @brief Constant-time equivalent of memcpy(out, table + elem_bytes*idx, elem_bytes);
*
* The table must be at least as aligned as elem_bytes. The output must be vector aligned.
* The table and output must not alias.
*/
static __inline__ void
__attribute__((unused,always_inline))
constant_time_lookup (
void *__restrict__ out_,
const void *table_,
word_t elem_bytes,
word_t n_table,
word_t idx
) {
big_register_t big_one = br_set_to_mask(1), big_i = br_set_to_mask(idx);
/* Can't do pointer arithmetic on void* */
unsigned char *out = (unsigned char *)out_;
const unsigned char *table = (const unsigned char *)table_;
word_t j,k;
really_memset(out, 0, elem_bytes);
for (j=0; j<n_table; j++, big_i-=big_one) {
big_register_t br_mask = br_is_zero(big_i);
for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
if (elem_bytes % sizeof(big_register_t)) {
/* input unaligned, output aligned */
*(big_register_t *)(out+k) |= br_mask & ((const unaligned_br_t*)(&table[k+j*elem_bytes]))->unaligned;
} else {
/* aligned */
*(big_register_t *)(out+k) |= br_mask & *(const big_register_t*)(&table[k+j*elem_bytes]);
}
}

word_t mask = word_is_zero(idx^j);
if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
if (elem_bytes % sizeof(word_t)) {
/* input unaligned, output aligned */
*(word_t *)(out+k) |= mask & ((const unaligned_word_t*)(&table[k+j*elem_bytes]))->unaligned;
} else {
/* aligned */
*(word_t *)(out+k) |= mask & *(const word_t*)(&table[k+j*elem_bytes]);
}
}
}
if (elem_bytes % sizeof(word_t)) {
for (; k<elem_bytes; k+=1) {
out[k] |= mask & table[k+j*elem_bytes];
}
}
}
}

/**
* @brief Constant-time a = b&mask.
*
* The input and output must be at least as aligned as elem_bytes.
*/
static __inline__ void
__attribute__((unused,always_inline))
constant_time_mask (
void *__restrict__ a_,
const void *b_,
word_t elem_bytes,
mask_t mask
) {
unsigned char *a = (unsigned char *)a_;
const unsigned char *b = (const unsigned char *)b_;
word_t k;
big_register_t br_mask = br_set_to_mask(mask);
for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {
if (elem_bytes % sizeof(big_register_t)) {
/* unaligned */
((unaligned_br_t*)(&a[k]))->unaligned = br_mask & ((const unaligned_br_t*)(&b[k]))->unaligned;
} else {
/* aligned */
*(big_register_t *)(a+k) = br_mask & *(const big_register_t*)(&b[k]);
}
}

if (elem_bytes % sizeof(big_register_t) >= sizeof(word_t)) {
for (; k<=elem_bytes-sizeof(word_t); k+=sizeof(word_t)) {
if (elem_bytes % sizeof(word_t)) {
/* unaligned */
((unaligned_word_t*)(&a[k]))->unaligned = mask & ((const unaligned_word_t*)(&b[k]))->unaligned;
} else {
/* aligned */
*(word_t *)(a+k) = mask & *(const word_t*)(&b[k]);
}
}
}
if (elem_bytes % sizeof(word_t)) {
for (; k<elem_bytes; k+=1) {
a[k] = mask & b[k];
}
}
}

#endif /* __CONSTANT_TIME_H__ */

+ 2
- 38
src/include/ec_point.h View File

@@ -11,6 +11,7 @@
#define __CC_INCLUDED_EC_POINT_H__

#include "field.h"
#include "constant_time.h"

#ifdef __cplusplus
extern "C" {
@@ -150,43 +151,6 @@ copy_tw_pniels (
const struct tw_pniels_t* ds
) __attribute__((unused,always_inline));

/**
* Returns 1/sqrt(+- x).
*
* The Legendre symbol of the result is the same as that of the
* input.
*
* If x=0, returns 0.
*/
void
field_isr (
struct field_t* a,
const struct field_t* x
);
/**
* Batch inverts out[i] = 1/in[i]
*
* If any input is zero, all the outputs will be zero.
*/
void
field_simultaneous_invert (
struct p448_t *__restrict__ out,
const struct p448_t *in,
unsigned int n
);

/**
* Returns 1/x.
*
* If x=0, returns 0.
*/
void
field_inverse (
struct field_t* a,
const struct field_t* x
);

/**
* Add two points on a twisted Edwards curve, one in Extensible form
* and the other in half-Niels form.
@@ -490,7 +454,7 @@ cond_negate_tw_niels (
struct tw_niels_t *n,
mask_t doNegate
) {
field_cond_swap(&n->a, &n->b, doNegate);
constant_time_cond_swap(&n->a, &n->b, sizeof(n->a), doNegate);
field_cond_neg(&n->c, doNegate);
}



+ 71
- 14
src/include/field.h View File

@@ -9,21 +9,13 @@
#ifndef __FIELD_H__
#define __FIELD_H__

#include "p448.h"
#include <string.h>

#include "p448.h"
#define FIELD_BITS 448
#define FIELD_BYTES (1+(FIELD_BITS-1)/8)
#define FIELD_WORDS (1+(FIELD_BITS-1)/sizeof(word_t))

/**
* @brief For GMP tests: little-endian representation of the field modulus.
*/
extern const uint8_t FIELD_MODULUS[FIELD_BYTES];

#define field_t p448_t
#define field_mul p448_mul
#define field_sqr p448_sqr
#define field_sqrn p448_sqrn
#define field_add p448_add
#define field_sub p448_sub
#define field_mulw p448_mulw
@@ -32,15 +24,80 @@ extern const uint8_t FIELD_MODULUS[FIELD_BYTES];
#define field_neg p448_neg
#define field_set_ui p448_set_ui
#define field_bias p448_bias
#define field_copy p448_copy
#define field_mask p448_mask
#define field_weak_reduce p448_weak_reduce
#define field_strong_reduce p448_strong_reduce
#define field_cond_swap p448_cond_swap
#define field_cond_neg p448_cond_neg
#define field_serialize p448_serialize
#define field_deserialize p448_deserialize
#define field_eq p448_eq
#define field_is_zero p448_is_zero

/** @brief Bytes in a field element */
#define FIELD_BYTES (1+(FIELD_BITS-1)/8)

/** @brief Words in a field element */
#define FIELD_WORDS (1+(FIELD_BITS-1)/sizeof(word_t))

/**
* @brief For GMP tests: little-endian representation of the field modulus.
*/
extern const uint8_t FIELD_MODULUS[FIELD_BYTES];

/**
* Copy one field element to another.
*/
static inline void
__attribute__((unused,always_inline))
field_copy (
struct field_t *__restrict__ a,
const struct field_t *__restrict__ b
) {
memcpy(a,b,sizeof(*a));
}

/**
* Returns 1/sqrt(+- x).
*
* The Legendre symbol of the result is the same as that of the
* input.
*
* If x=0, returns 0.
*/
void
field_isr (
struct field_t* a,
const struct field_t* x
);
/**
* Batch inverts out[i] = 1/in[i]
*
* If any input is zero, all the outputs will be zero.
*/
void
field_simultaneous_invert (
struct p448_t *__restrict__ out,
const struct p448_t *in,
unsigned int n
);

/**
* Returns 1/x.
*
* If x=0, returns 0.
*/
void
field_inverse (
struct field_t* a,
const struct field_t* x
);

/**
* Returns -1 if a==b, 0 otherwise.
*/
mask_t
field_eq (
const struct field_t *a,
const struct field_t *b
);

#endif /* __FIELD_H__ */

+ 9
- 0
src/include/word.h View File

@@ -143,6 +143,15 @@ typedef word_t vecmask_t __attribute__((vector_size(32)));
return (big_register_t)x;
}
#endif
/**
* Return -1 if x==0, and 0 otherwise.
*/
static __inline__ mask_t
__attribute__((always_inline,unused))
word_is_zero(word_t x) {
return (mask_t)((((dword_t)(x)) - 1)>>WORD_BITS);
}

#if __AVX2__
static __inline__ big_register_t


+ 29
- 4
src/scalarmul.c View File

@@ -11,6 +11,7 @@
#include "intrinsics.h"
#include "scalarmul.h"
#include "barrett_field.h"
#include "constant_time.h"

mask_t
montgomery_ladder (
@@ -29,15 +30,15 @@ montgomery_ladder (
word_t w = scalar[j];
for (i=n; i>=0; i--) {
mask_t flip = -((w>>i)&1);
field_cond_swap(&mont.xa,&mont.xd,flip^pflip);
field_cond_swap(&mont.za,&mont.zd,flip^pflip);
constant_time_cond_swap(&mont.xa,&mont.xd,sizeof(mont.xd),flip^pflip);
constant_time_cond_swap(&mont.za,&mont.zd,sizeof(mont.xd),flip^pflip);
montgomery_step(&mont);
pflip = flip;
}
n = WORD_BITS-1;
}
field_cond_swap(&mont.xa,&mont.xd,pflip);
field_cond_swap(&mont.za,&mont.zd,pflip);
constant_time_cond_swap(&mont.xa,&mont.xd,sizeof(mont.xd),pflip);
constant_time_cond_swap(&mont.za,&mont.zd,sizeof(mont.xd),pflip);
assert(n_extra_doubles < INT_MAX);
for (j=0; j<(int)n_extra_doubles; j++) {
@@ -47,6 +48,29 @@ montgomery_ladder (
return serialize_montgomery(out, &mont, in);
}

static __inline__ void
__attribute__((unused,always_inline))
constant_time_lookup_tw_pniels (
struct tw_pniels_t *out,
const struct tw_pniels_t *in,
int nin,
int idx
) {
constant_time_lookup(out,in,sizeof(*out),nin,idx);
}

static __inline__ void
__attribute__((unused,always_inline))
constant_time_lookup_tw_niels (
struct tw_niels_t *out,
const struct tw_niels_t *in,
int nin,
int idx
) {
constant_time_lookup(out,in,sizeof(*out),nin,idx);
}

/*
static __inline__ void
constant_time_lookup_tw_pniels (
struct tw_pniels_t *out,
@@ -90,6 +114,7 @@ constant_time_lookup_tw_niels (
}
}
}
*/

static void
convert_to_signed_window_form (


Loading…
Cancel
Save