Browse Source

Separate API word size from arch word size. This enables compiling

a 32-bit arch on a 64-bit platform, eg NEON on AARCH64.  It's probably
more useful for cross-platform testing, though.

The breakdown is as follows:
* decaf_bool_t, decaf_word_t and decaf_error_t are as defined in the API.
* DECAF_WORD_BITS is the size of a decaf_word_t.
* decaf_word_t is used for scalars, so on every curve the scalar impls are the same
(i.e. they follow the API's word size).
* SC_LIMB macro always takes a 64-bit word.

* non-prefixed word_t, mask_t, etc are as defined by the per-curve arch.
* ARCH_WORD_BITS is the size of a word_t.
* word_t is used for gf elements, so the curves may have different guts.
master
Mike Hamburg 9 years ago
parent
commit
8d6c51ee28
8 changed files with 92 additions and 60 deletions
  1. +73
    -52
      src/decaf.c
  2. +1
    -1
      src/include/arch_32/arch_intrinsics.h
  3. +1
    -1
      src/include/arch_arm_32/arch_intrinsics.h
  4. +1
    -1
      src/include/arch_neon/arch_intrinsics.h
  5. +1
    -1
      src/include/arch_ref64/arch_intrinsics.h
  6. +1
    -1
      src/include/arch_x86_64/arch_intrinsics.h
  7. +10
    -3
      src/include/word.h
  8. +4
    -0
      src/public_include/decaf/common.h

+ 73
- 52
src/decaf.c View File

@@ -38,10 +38,7 @@
extern const gf SQRT_MINUS_ONE;
#endif

/* FIXME: this can be different from DECAF_WORD_BITS, and word_t can be different from decaf_word_t,
* eg when mixing and matching implementations for different curves. Homogenize this.
*/
#define WBITS WORD_BITS
#define WBITS DECAF_WORD_BITS /* NB this may be different from ARCH_WORD_BITS */

const scalar_t API_NS(scalar_one) = {{{1}}}, API_NS(scalar_zero) = {{{0}}};
extern const scalar_t API_NS(sc_r2);
@@ -65,15 +62,41 @@ const size_t API_NS2(alignof,precomputed_s) = 32;
#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<NLIMBS; i++) { op; }}
#define FOR_LIMB_U(i,op) { unsigned int i=0; UNROLL for (i=0; i<NLIMBS; i++) { op; }}

/* The plan on booleans:
*
* The external interface uses decaf_bool_t, but this might be a different
* size than our particular arch's word_t (and thus mask_t). Also, the caller
* isn't guaranteed to pass it as nonzero. So bool_to_mask converts word sizes
* and checks nonzero.
*
* On the flip side, mask_t is always -1 or 0, but it might be a different size
* than decaf_bool_t.
*
* On the third hand, we have success vs boolean types, but that's handled in
* common.h: it converts between decaf_bool_t and decaf_error_t.
*/
static INLINE decaf_bool_t mask_to_bool (mask_t m) {
return (decaf_sword_t)(sword_t)m;
}

static INLINE mask_t bool_to_mask (decaf_bool_t m) {
/* On most arches this will be optimized to a simple cast. */
mask_t ret = 0;
for (unsigned int i=0; i<1 || i<sizeof(decaf_bool_t)/sizeof(mask_t); i++) {
ret |= ~ word_is_zero(m >> (i*8*sizeof(word_t)));
}
return ret;
}

/** Constant time, x = is_z ? z : y */
static INLINE void
cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) {
cond_sel(gf x, const gf y, const gf z, mask_t is_z) {
constant_time_select(x,z,y,sizeof(gf),is_z,0);
}

/** Constant time, if (neg) x=-x; */
static void
cond_neg(gf x, decaf_bool_t neg) {
cond_neg(gf x, mask_t neg) {
gf y;
gf_sub(y,ZERO,x);
cond_sel(x,x,y,neg);
@@ -81,7 +104,7 @@ cond_neg(gf x, decaf_bool_t neg) {

/** Constant time, if (swap) (x,y) = (y,x); */
static INLINE void
cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) {
cond_swap(gf x, gf_s *__restrict__ y, mask_t swap) {
UNROLL for (unsigned int i=0; i<sizeof(x->limb)/sizeof(x->limb[0]); i++) {
decaf_word_t s = (x->limb[i] ^ y->limb[i]) & swap;
x->limb[i] ^= s;
@@ -90,8 +113,8 @@ cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) {
}

/** Inverse square root using addition chain. */
static decaf_bool_t
gf_isqrt_chk(gf y, const gf x, decaf_bool_t allow_zero) {
static mask_t
gf_isqrt_chk(gf y, const gf x, mask_t allow_zero) {
gf tmp0, tmp1;
gf_isr((gf_s *)y, (const gf_s *)x);
gf_sqr(tmp0,y);
@@ -151,14 +174,14 @@ sc_subx(
const scalar_t p,
decaf_word_t extra
) {
dsword_t chain = 0;
decaf_dsword_t chain = 0;
unsigned int i;
for (i=0; i<SCALAR_LIMBS; i++) {
chain = (chain + accum[i]) - sub->limb[i];
out->limb[i] = chain;
chain >>= WBITS;
}
decaf_bool_t borrow = chain+extra; /* = 0 or -1 */
decaf_word_t borrow = chain+extra; /* = 0 or -1 */
chain = 0;
for (i=0; i<SCALAR_LIMBS; i++) {
@@ -343,7 +366,7 @@ API_NS(scalar_eq) (
for (i=0; i<SCALAR_LIMBS; i++) {
diff |= a->limb[i] ^ b->limb[i];
}
return word_is_zero(diff);
return mask_to_bool(word_is_zero(diff));
}

/** identity = (0,1) */
@@ -354,9 +377,9 @@ deisogenize (
gf_s *__restrict__ s,
gf_s *__restrict__ minus_t_over_s,
const point_t p,
decaf_bool_t toggle_hibit_s,
decaf_bool_t toggle_hibit_t_over_s,
decaf_bool_t toggle_rotation
mask_t toggle_hibit_s,
mask_t toggle_hibit_t_over_s,
mask_t toggle_rotation
) {
#if COFACTOR == 4 && !IMAGINE_TWIST
(void) toggle_rotation;
@@ -372,7 +395,7 @@ deisogenize (
gf_sub ( b, p->z, p->y );
gf_mul ( c, b, a );
gf_mulw_sgn ( b, c, -EDWARDS_D ); /* (a-d)(Z+Y)(Z-Y) */
decaf_bool_t ok = gf_isqrt_chk ( a, b, DECAF_TRUE ); /* r in the paper */
mask_t ok = gf_isqrt_chk ( a, b, DECAF_TRUE ); /* r in the paper */
(void)ok; assert(ok);
gf_mulw_sgn ( b, a, -EDWARDS_D ); /* u in the paper */
gf_mul ( c, b, a ); /* ur */
@@ -413,13 +436,13 @@ deisogenize (
gf_mul ( a, p->z, t ); /* "tz" = T*Z */
gf_sqr ( b, a );
gf_mul ( d, b, c ); /* (TZ)^2 * (Z^2-aX^2) */
decaf_bool_t ok = gf_isqrt_chk ( b, d, DECAF_TRUE );
mask_t ok = gf_isqrt_chk ( b, d, DECAF_TRUE );
(void)ok; assert(ok);
gf_mul ( d, b, a ); /* "osx" = 1 / sqrt(z^2-ax^2) */
gf_mul ( a, b, c );
gf_mul ( b, a, d ); /* 1/tz */

decaf_bool_t rotate;
mask_t rotate;
#if (COFACTOR == 8)
{
gf e;
@@ -439,7 +462,7 @@ deisogenize (
gf_mul ( c, a, d ); // new "osx"
gf_mul ( a, c, p->z );
gf_add ( a, a, a ); // 2 * "osx" * Z
decaf_bool_t tg1 = rotate ^ toggle_hibit_t_over_s ^~ hibit(a);
mask_t tg1 = rotate ^ toggle_hibit_t_over_s ^~ hibit(a);
cond_neg ( c, tg1 );
cond_neg ( a, rotate ^ tg1 );
gf_mul ( d, b, p->z );
@@ -462,9 +485,9 @@ decaf_error_t API_NS(point_decode) (
decaf_bool_t allow_identity
) {
gf s, a, b, c, d, e, f;
decaf_bool_t succ = gf_deserialize(s, ser), zero = gf_eq(s, ZERO);
allow_identity = ~word_is_zero(allow_identity);
succ &= allow_identity | ~zero;
mask_t succ = gf_deserialize(s, ser);
mask_t zero = gf_eq(s, ZERO);
succ &= bool_to_mask(allow_identity) | ~zero;
succ &= ~hibit(s);
gf_sqr ( a, s );
#if IMAGINE_TWIST
@@ -484,7 +507,7 @@ decaf_error_t API_NS(point_decode) (
gf_mul ( b, e, d ); /* 1/t */
gf_mul ( d, e, c ); /* d = t / (s(1-as^2)) */
gf_mul ( e, d, f ); /* t/s */
decaf_bool_t negtos = hibit(e);
mask_t negtos = hibit(e);
cond_neg(b, negtos);
cond_neg(d, negtos);

@@ -513,7 +536,7 @@ decaf_error_t API_NS(point_decode) (
assert(API_NS(point_valid)(p) | ~succ);
return decaf_succeed_if(succ);
return decaf_succeed_if(mask_to_bool(succ));
}

#if IMAGINE_TWIST
@@ -596,7 +619,7 @@ static NOINLINE void
point_double_internal (
point_t p,
const point_t q,
decaf_bool_t before_double
int before_double
) {
gf a, b, c, d;
gf_sqr ( c, q->x );
@@ -651,7 +674,7 @@ decaf_error_t API_NS(scalar_decode)(
) {
unsigned int i;
scalar_decode_short(s, ser, SER_BYTES);
dsword_t accum = 0;
decaf_dsword_t accum = 0;
for (i=0; i<SCALAR_LIMBS; i++) {
accum = (accum + s->limb[i] - sc_p->limb[i]) >> WBITS;
}
@@ -659,7 +682,7 @@ decaf_error_t API_NS(scalar_decode)(
API_NS(scalar_mul)(s,s,API_NS(scalar_one)); /* ham-handed reduce */
return decaf_succeed_if(accum);
return decaf_succeed_if(~word_is_zero(accum));
}

void API_NS(scalar_destroy) (
@@ -726,7 +749,7 @@ void API_NS(scalar_encode)(
static INLINE void
cond_neg_niels (
niels_t n,
decaf_bool_t neg
mask_t neg
) {
cond_swap(n->a, n->b, neg);
cond_neg(n->c, neg);
@@ -770,7 +793,7 @@ static NOINLINE void
add_niels_to_pt (
point_t d,
const niels_t e,
decaf_bool_t before_double
int before_double
) {
gf a, b, c;
gf_sub_nr ( b, d->y, d->x );
@@ -792,7 +815,7 @@ static NOINLINE void
sub_niels_from_pt (
point_t d,
const niels_t e,
decaf_bool_t before_double
int before_double
) {
gf a, b, c;
gf_sub_nr ( b, d->y, d->x );
@@ -814,7 +837,7 @@ static void
add_pniels_to_pt (
point_t p,
const pniels_t pn,
decaf_bool_t before_double
int before_double
) {
gf L0;
gf_mul ( L0, p->z, pn->z );
@@ -826,7 +849,7 @@ static void
sub_pniels_from_pt (
point_t p,
const pniels_t pn,
decaf_bool_t before_double
int before_double
) {
gf L0;
gf_mul ( L0, p->z, pn->z );
@@ -1111,7 +1134,7 @@ decaf_bool_t API_NS(point_eq) ( const point_t p, const point_t q ) {
gf a, b;
gf_mul ( a, p->y, q->x );
gf_mul ( b, q->y, p->x );
decaf_bool_t succ = gf_eq(a,b);
mask_t succ = gf_eq(a,b);
#if (COFACTOR == 8) && IMAGINE_TWIST
gf_mul ( a, p->y, q->y );
@@ -1129,7 +1152,7 @@ decaf_bool_t API_NS(point_eq) ( const point_t p, const point_t q ) {
succ |= gf_eq(a,b);
#endif
return succ;
return mask_to_bool(succ);
}

void API_NS(point_from_hash_nonuniform) (
@@ -1166,7 +1189,7 @@ void API_NS(point_from_hash_nonuniform) (
gf_mul(rN,r,N);
gf_mul(a,rN,D);
decaf_bool_t square = gf_isqrt_chk(e,a,DECAF_FALSE);
mask_t square = gf_isqrt_chk(e,a,DECAF_FALSE);
/* b <- t/s */
cond_sel(c,r0,r,square); /* r? = sqr ? r : 1 */
@@ -1192,7 +1215,7 @@ void API_NS(point_from_hash_nonuniform) (
gf_mul(c,a,b);
/* Normalize/negate */
decaf_bool_t neg_s = hibit(a) ^ ~square;
mask_t neg_s = hibit(a) ^ ~square;
cond_neg(a,neg_s); /* ends up negative if ~square */
/* b <- t */
@@ -1222,8 +1245,8 @@ API_NS(invert_elligator_nonuniform) (
const point_t p,
uint16_t hint_
) {
decaf_bool_t hint = hint_;
decaf_bool_t sgn_s = -(hint & 1),
mask_t hint = hint_;
mask_t sgn_s = -(hint & 1),
sgn_t_over_s = -(hint>>1 & 1),
sgn_r0 = -(hint>>2 & 1),
sgn_ed_T = -(hint>>3 & 1);
@@ -1234,7 +1257,7 @@ API_NS(invert_elligator_nonuniform) (
gf_mul(b,c,a);
gf_sub(b,ONE,b); /* t+1 */
gf_sqr(c,a); /* s^2 */
decaf_bool_t is_identity = gf_eq(p->t,ZERO);
mask_t is_identity = gf_eq(p->t,ZERO);
{
/* identity adjustments */
/* in case of identity, currently c=0, t=0, b=1, will encode to 1 */
@@ -1253,7 +1276,7 @@ API_NS(invert_elligator_nonuniform) (
#else
gf_sub(d,ZERO,b);
#endif
decaf_bool_t succ = gf_isqrt_chk(c,d,DECAF_TRUE);
mask_t succ = gf_isqrt_chk(c,d,DECAF_TRUE);
gf_mul(b,a,c);
cond_neg(b, sgn_r0^hibit(b));
@@ -1264,7 +1287,7 @@ API_NS(invert_elligator_nonuniform) (
gf_serialize(recovered_hash, b);
/* TODO: deal with overflow flag */
return decaf_succeed_if(succ);
return decaf_succeed_if(mask_to_bool(succ));
}

void API_NS(point_from_hash_uniform) (
@@ -1295,7 +1318,7 @@ decaf_bool_t API_NS(point_valid) (
gf a,b,c;
gf_mul(a,p->x,p->y);
gf_mul(b,p->z,p->t);
decaf_bool_t out = gf_eq(a,b);
mask_t out = gf_eq(a,b);
gf_sqr(a,p->x);
gf_sqr(b,p->y);
gf_sub(a,b,a);
@@ -1305,7 +1328,7 @@ decaf_bool_t API_NS(point_valid) (
gf_add(b,b,c);
out &= gf_eq(a,b);
out &= ~gf_eq(p->z,ZERO);
return out;
return mask_to_bool(out);
}

void API_NS(point_debugging_torque) (
@@ -1502,7 +1525,7 @@ void API_NS(precomputed_scalarmul) (
}
}
decaf_bool_t invert = (tab>>(t-1))-1;
mask_t invert = (tab>>(t-1))-1;
tab ^= invert;
tab &= (1<<(t-1)) - 1;

@@ -1527,8 +1550,7 @@ void API_NS(point_cond_sel) (
const point_t b,
decaf_bool_t pick_b
) {
pick_b = ~word_is_zero(pick_b);
constant_time_select(out,b,a,sizeof(point_t),pick_b,0);
constant_time_select(out,b,a,sizeof(point_t),bool_to_mask(pick_b),0);
}

void API_NS(scalar_cond_sel) (
@@ -1537,8 +1559,7 @@ void API_NS(scalar_cond_sel) (
const scalar_t b,
decaf_bool_t pick_b
) {
pick_b = ~word_is_zero(pick_b);
constant_time_select(out,b,a,sizeof(scalar_t),pick_b,sizeof(out->limb[0]));
constant_time_select(out,b,a,sizeof(scalar_t),bool_to_mask(pick_b),sizeof(out->limb[0]));
}

/* FUTURE: restore Curve25519 Montgomery ladder? */
@@ -1550,13 +1571,13 @@ decaf_error_t API_NS(direct_scalarmul) (
decaf_bool_t short_circuit
) {
point_t basep;
decaf_bool_t succ = decaf_successful(API_NS(point_decode)(basep, base, allow_identity));
if (short_circuit && ~succ) return DECAF_FAILURE;
decaf_error_t succ = API_NS(point_decode)(basep, base, allow_identity);
if (short_circuit && succ != DECAF_SUCCESS) return succ;
API_NS(point_cond_sel)(basep, API_NS(point_base), basep, succ);
API_NS(point_scalarmul)(basep, basep, scalar);
API_NS(point_encode)(scaled, basep);
API_NS(point_destroy)(basep);
return decaf_succeed_if(succ);
return succ;
}

/**
@@ -1580,7 +1601,7 @@ static int recode_wnaf (
* PERF MINOR: not technically WNAF, since last digits can be adjacent. Could be rtl.
*/
for (i=SCALAR_BITS-1; i >= 0; i--) {
int bit = (scalar->limb[i/WORD_BITS] >> (i%WORD_BITS)) & 1;
int bit = (scalar->limb[i/WBITS] >> (i%WBITS)) & 1;
current = 2*current + bit;

/*


+ 1
- 1
src/include/arch_32/arch_intrinsics.h View File

@@ -5,7 +5,7 @@
#ifndef __ARCH_ARCH_32_ARCH_INTRINSICS_H__
#define __ARCH_ARCH_32_ARCH_INTRINSICS_H__

#define WORD_BITS 32
#define ARCH_WORD_BITS 32

static __inline__ __attribute((always_inline,unused))
uint32_t word_is_zero(uint32_t a) {


+ 1
- 1
src/include/arch_arm_32/arch_intrinsics.h View File

@@ -5,7 +5,7 @@
#ifndef __ARCH_ARM_32_ARCH_INTRINSICS_H__
#define __ARCH_ARM_32_ARCH_INTRINSICS_H__

#define WORD_BITS 32
#define ARCH_WORD_BITS 32

static __inline__ __attribute((always_inline,unused))
uint32_t word_is_zero(uint32_t a) {


+ 1
- 1
src/include/arch_neon/arch_intrinsics.h View File

@@ -5,7 +5,7 @@
#ifndef __ARCH_NEON_ARCH_INTRINSICS_H__
#define __ARCH_NEON_ARCH_INTRINSICS_H__

#define WORD_BITS 32
#define ARCH_WORD_BITS 32

static __inline__ __attribute((always_inline,unused))
uint32_t word_is_zero(uint32_t a) {


+ 1
- 1
src/include/arch_ref64/arch_intrinsics.h View File

@@ -5,7 +5,7 @@
#ifndef __ARCH_REF64_ARCH_INTRINSICS_H__
#define __ARCH_REF64_ARCH_INTRINSICS_H__

#define WORD_BITS 64
#define ARCH_WORD_BITS 64

static __inline__ __attribute((always_inline,unused))
uint64_t word_is_zero(uint64_t a) {


+ 1
- 1
src/include/arch_x86_64/arch_intrinsics.h View File

@@ -5,7 +5,7 @@
#ifndef __ARCH_X86_64_ARCH_INTRINSICS_H__
#define __ARCH_X86_64_ARCH_INTRINSICS_H__

#define WORD_BITS 64
#define ARCH_WORD_BITS 64

#include <stdint.h>



+ 10
- 3
src/include/word.h View File

@@ -31,7 +31,7 @@
#include <immintrin.h>
#endif

#if (WORD_BITS == 64)
#if (ARCH_WORD_BITS == 64)
typedef uint64_t word_t, mask_t;
typedef __uint128_t dword_t;
typedef int32_t hsword_t;
@@ -47,8 +47,7 @@
#define U56LE(x) x##ull
#define U60LE(x) x##ull
#define letohWORD letoh64
#define SC_LIMB(x) (x##ull)
#elif (WORD_BITS == 32)
#elif (ARCH_WORD_BITS == 32)
typedef uint32_t word_t, mask_t;
typedef uint64_t dword_t;
typedef int16_t hsword_t;
@@ -63,6 +62,14 @@
#define U56LE(x) (x##ull)&((1ull<<28)-1), (x##ull)>>28
#define U60LE(x) (x##ull)&((1ull<<30)-1), (x##ull)>>30
#define letohWORD letoh32
#else
#error "For now, libdecaf only supports 32- and 64-bit architectures."
#endif
/* Scalar limbs are keyed off of the API word size instead of the arch word size. */
#if DECAF_WORD_BITS == 64
#define SC_LIMB(x) (x##ull)
#elif DECAF_WORD_BITS == 32
#define SC_LIMB(x) ((uint32_t)x##ull),(x##ull>>32)
#else
#error "For now, libdecaf only supports 32- and 64-bit architectures."


+ 4
- 0
src/public_include/decaf/common.h View File

@@ -49,13 +49,17 @@ extern "C" {
&& !defined(DECAF_FORCE_32_BIT)
#define DECAF_WORD_BITS 64 /**< The number of bits in a word */
typedef uint64_t decaf_word_t; /**< Word size for internal computations */
typedef int64_t decaf_sword_t; /**< Signed word size for internal computations */
typedef uint64_t decaf_bool_t; /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
typedef __uint128_t decaf_dword_t; /**< Double-word size for internal computations */
typedef __int128_t decaf_dsword_t; /**< Signed double-word size for internal computations */
#else
#define DECAF_WORD_BITS 32 /**< The number of bits in a word */
typedef uint32_t decaf_word_t; /**< Word size for internal computations */
typedef int32_t decaf_sword_t; /**< Signed word size for internal computations */
typedef uint32_t decaf_bool_t; /**< "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
typedef uint64_t decaf_dword_t; /**< Double-word size for internal computations */
typedef uint64_t decaf_dsword_t; /**< Signed double-word size for internal computations */
#endif
/** DECAF_TRUE = -1 so that DECAF_TRUE & x = x */


Loading…
Cancel
Save