Browse Source

factor out elligator, decaf, scalar code. scalar_halve is now public

master
Michael Hamburg 9 years ago
parent
commit
a585d7f148
17 changed files with 628 additions and 579 deletions
  1. +9
    -2
      Makefile
  2. +40
    -2
      src/include/field.h
  3. +35
    -0
      src/include/word.h
  4. +1
    -1
      src/p25519/arch_32/f_impl.c
  5. +1
    -1
      src/p25519/arch_ref64/f_impl.c
  6. +1
    -1
      src/p25519/arch_x86_64/f_impl.c
  7. +1
    -1
      src/p448/arch_32/f_impl.c
  8. +1
    -1
      src/p448/arch_arm_32/f_impl.c
  9. +1
    -1
      src/p448/arch_neon/f_impl.c
  10. +1
    -1
      src/p448/arch_ref64/f_impl.c
  11. +1
    -1
      src/p448/arch_x86_64/f_impl.c
  12. +44
    -565
      src/per_curve/decaf.tmpl.c
  13. +10
    -0
      src/per_curve/decaf.tmpl.h
  14. +3
    -0
      src/per_curve/decaf.tmpl.hxx
  15. +149
    -0
      src/per_curve/elligator.tmpl.c
  16. +328
    -0
      src/per_curve/scalar.tmpl.c
  17. +2
    -2
      src/per_field/f_field.tmpl.h

+ 9
- 2
Makefile View File

@@ -165,7 +165,8 @@ endef
################################################################
define define_curve

LIBCOMPONENTS += $$(BUILD_OBJ)/$(1)/decaf.o $$(BUILD_OBJ)/$(1)/crypto.o $$(BUILD_OBJ)/$(1)/decaf_tables.o
LIBCOMPONENTS += $$(BUILD_OBJ)/$(1)/decaf.o $$(BUILD_OBJ)/$(1)/elligator.o $$(BUILD_OBJ)/$(1)/scalar.o \
$$(BUILD_OBJ)/$(1)/crypto.o $$(BUILD_OBJ)/$(1)/decaf_tables.o
PER_OBJ_DIRS += $$(BUILD_OBJ)/$(1)
GLOBAL_HEADERS_OF_$(1) = $(BUILD_INC)/decaf/decaf_$(3).h $(BUILD_INC)/decaf/decaf_$(3).hxx \
$(BUILD_INC)/decaf/crypto_$(3).h $(BUILD_INC)/decaf/crypto_$(3).hxx
@@ -181,11 +182,17 @@ $$(BUILD_H)/$(1)/%.h: src/per_curve/%.tmpl.h src/gen_headers/* $$(HEADERS_OF_$(2
$$(BUILD_INC)/decaf/decaf_$(3).%: src/per_curve/decaf.tmpl.% src/gen_headers/* $$(HEADERS_OF_$(2))
python -B src/gen_headers/template.py --per=curve --item=$(1) --guard=$$(@:$(BUILD_INC)/%=%) -o $$@ $$<
$$(BUILD_INC)/decaf/elligator_$(3).%: src/per_curve/elligator.tmpl.% src/gen_headers/* $$(HEADERS_OF_$(2))
python -B src/gen_headers/template.py --per=curve --item=$(1) --guard=$$(@:$(BUILD_INC)/%=%) -o $$@ $$<
$$(BUILD_INC)/decaf/scalar_$(3).%: src/per_curve/scalar.tmpl.% src/gen_headers/* $$(HEADERS_OF_$(2))
python -B src/gen_headers/template.py --per=curve --item=$(1) --guard=$$(@:$(BUILD_INC)/%=%) -o $$@ $$<
$$(BUILD_INC)/decaf/crypto_$(3).%: src/per_curve/crypto.tmpl.% src/gen_headers/* $$(HEADERS_OF_$(2))
python -B src/gen_headers/template.py --per=curve --item=$(1) --guard=$$(@:$(BUILD_INC)/%=%) -o $$@ $$<

$$(BUILD_IBIN)/decaf_gen_tables_$(1): $$(BUILD_OBJ)/$(1)/decaf_gen_tables.o \
$$(BUILD_OBJ)/$(1)/decaf.o $$(BUILD_OBJ)/utils.o \
$$(BUILD_OBJ)/$(1)/decaf.o $$(BUILD_OBJ)/$(1)/scalar.o $$(BUILD_OBJ)/utils.o \
$$(COMPONENTS_OF_$(2))
$$(LD) $$(LDFLAGS) -o $$@ $$^



+ 40
- 2
src/include/field.h View File

@@ -15,8 +15,7 @@
#include <string.h>
/** Square x, n times. */
static INLINE UNUSED void
gf_sqrn (
static INLINE UNUSED void gf_sqrn (
gf_s *__restrict__ y,
const gf x,
int n
@@ -58,5 +57,44 @@ static inline void gf_subx_nr ( gf c, const gf a, const gf b, int amt ) {
if (sizeof(word_t)==4) gf_weak_reduce(c); // HACK PERF MAGIC
}

/** Mul by signed int. Not constant-time WRT the sign of that int. */
static inline void gf_mulw(gf c, const gf a, int32_t w) {
if (w>0) {
gf_mulw_unsigned(c, a, w);
} else {
gf_mulw_unsigned(c, a, -w);
gf_sub(c,ZERO,c);
}
}

/** Constant time, x = is_z ? z : y */
static inline void gf_cond_sel(gf x, const gf y, const gf z, mask_t is_z) {
constant_time_select(x,y,z,sizeof(gf),is_z,0);
}

/** Constant time, if (neg) x=-x; */
static inline void gf_cond_neg(gf x, mask_t neg) {
gf y;
gf_sub(y,ZERO,x);
gf_cond_sel(x,x,y,neg);
}

/** Constant time, if (swap) (x,y) = (y,x); */
static inline void
gf_cond_swap(gf x, gf_s *__restrict__ y, mask_t swap) {
constant_time_cond_swap(x,y,sizeof(gf_s),swap);
}

static INLINE void gf_mul_qnr(gf_s *__restrict__ out, gf x) {
#if P_MOD_8 == 5
/* r = QNR * r0^2 */
gf_mul(out,x,SQRT_MINUS_ONE);
#elif P_MOD_8 == 3 || P_MOD_8 == 7
gf_sub(out,ZERO,x);
#else
#error "Only supporting p=3,5,7 mod 8"
#endif
}


#endif // __GF_H__

+ 35
- 0
src/include/word.h View File

@@ -7,7 +7,10 @@

/* for posix_memalign */
#define _XOPEN_SOURCE 600
#define __STDC_WANT_LIB_EXT1__ 1 /* for memset_s */
#include <string.h>

#include <assert.h>
#include <stdint.h>
#include "arch_intrinsics.h"

@@ -240,4 +243,36 @@ malloc_vector(size_t size) {
#define UNROLL
#endif

/* The plan on booleans:
*
* The external interface uses decaf_bool_t, but this might be a different
* size than our particular arch's word_t (and thus mask_t). Also, the caller
* isn't guaranteed to pass it as nonzero. So bool_to_mask converts word sizes
* and checks nonzero.
*
* On the flip side, mask_t is always -1 or 0, but it might be a different size
* than decaf_bool_t.
*
* On the third hand, we have success vs boolean types, but that's handled in
* common.h: it converts between decaf_bool_t and decaf_error_t.
*/
static INLINE decaf_bool_t mask_to_bool (mask_t m) {
return (decaf_sword_t)(sword_t)m;
}

static INLINE mask_t bool_to_mask (decaf_bool_t m) {
/* On most arches this will be optimized to a simple cast. */
mask_t ret = 0;
unsigned int limit = sizeof(decaf_bool_t)/sizeof(mask_t);
if (limit < 1) limit = 1;
for (unsigned int i=0; i<limit; i++) {
ret |= ~ word_is_zero(m >> (i*8*sizeof(word_t)));
}
return ret;
}

static INLINE void ignore_result ( decaf_bool_t boo ) {
(void)boo;
}

#endif /* __WORD_H__ */

+ 1
- 1
src/p25519/arch_32/f_impl.c View File

@@ -51,7 +51,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
c[1] += accum;
}

void gf_mulw (gf_s *__restrict__ cs, const gf as, uint32_t b) {
void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
const uint32_t *a = as->limb, maske = ((1<<26)-1), masko = ((1<<25)-1);
uint32_t blo = b & maske, bhi = b>>26, bhi2 = 2*bhi;
uint32_t *c = cs->limb;


+ 1
- 1
src/p25519/arch_ref64/f_impl.c View File

@@ -34,7 +34,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
c[1] += accum;
}

void gf_mulw (gf_s *__restrict__ cs, const gf as, uint32_t b) {
void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
const uint64_t *a = as->limb, mask = ((1ull<<51)-1);
int i;


+ 1
- 1
src/p25519/arch_x86_64/f_impl.c View File

@@ -140,7 +140,7 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
c[1] = c1 + (accum1>>51);
}

void gf_mulw (gf_s *__restrict__ cs, const gf as, uint32_t b) {
void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
const uint64_t *a = as->limb, mask = ((1ull<<51)-1);
uint64_t *c = cs->limb;



+ 1
- 1
src/p448/arch_32/f_impl.c View File

@@ -60,7 +60,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
c[1] += ((uint32_t)(accum1));
}

void gf_mulw (gf_s *__restrict__ cs, const gf as, uint32_t b) {
void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
assert(b<1<<28);
const uint32_t *a = as->limb;


+ 1
- 1
src/p448/arch_arm_32/f_impl.c View File

@@ -721,7 +721,7 @@ void gf_sqr (gf_s *__restrict__ cs, const gf as) {
c[1] += ((uint32_t)(accum1));
}

void gf_mulw (
void gf_mulw_unsigned (
gf_s *__restrict__ cs,
const gf as,
uint32_t b


+ 1
- 1
src/p448/arch_neon/f_impl.c View File

@@ -549,7 +549,7 @@ void gf_sqr (gf_s *__restrict__ cs, const gf bs) {
);
}

void gf_mulw (gf_s *__restrict__ cs, const gf as, uint32_t b) {
void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
uint32x2_t vmask = {(1<<28) - 1, (1<<28)-1};
assert(b<(1<<28));


+ 1
- 1
src/p448/arch_ref64/f_impl.c View File

@@ -165,7 +165,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
c[1] += ((uint64_t)(accum1));
}

void gf_mulw (gf_s *__restrict__ cs, const gf as, uint32_t b) {
void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
const uint64_t *a = as->limb;
uint64_t *c = cs->limb;



+ 1
- 1
src/p448/arch_x86_64/f_impl.c View File

@@ -139,7 +139,7 @@ void gf_mul (gf_s *__restrict__ cs, const gf as, const gf bs) {
c[0] += ((uint64_t)(accum1));
}

void gf_mulw (gf_s *__restrict__ cs, const gf as, uint32_t b) {
void gf_mulw_unsigned (gf_s *__restrict__ cs, const gf as, uint32_t b) {
const uint64_t *a = as->limb;
uint64_t *c = cs->limb;



+ 44
- 565
src/per_curve/decaf.tmpl.c View File

@@ -1,9 +1,6 @@
/** @brief Decaf high-level functions. */

#define _XOPEN_SOURCE 600 /* for posix_memalign */
#define __STDC_WANT_LIB_EXT1__ 1 /* for memset_s */
#include <string.h>

#include "word.h"
#include "field.h"

@@ -29,16 +26,11 @@
#define DECAF_WNAF_VAR_TABLE_BITS $(wnaf.var)

static const int EDWARDS_D = $(d);
static const scalar_t sc_p = {{{
$(ser(q,64,"SC_LIMB"))
}}}, sc_r2 = {{{
$(ser(((2**128)**((scalar_bits+63)/64))%q,64,"SC_LIMB"))
}}}, point_scalarmul_adjustment = {{{
static const scalar_t point_scalarmul_adjustment = {{{
$(ser((2**(scalar_bits-1+window_bits - ((scalar_bits-1)%window_bits)) - 1) % q,64,"SC_LIMB"))
}}}, precomputed_scalarmul_adjustment = {{{
$(ser((2**(combs.n*combs.t*combs.s) - 1) % q,64,"SC_LIMB"))
}}};
static const decaf_word_t MONTGOMERY_FACTOR = (decaf_word_t)0x$("%x" % pow(-q,2**64-1,2**64))ull;

const uint8_t API_NS(x_base_point)[X_SER_BYTES] = { $(ser(mont_base,8)) };

@@ -50,27 +42,26 @@ const uint8_t API_NS(x_base_point)[X_SER_BYTES] = { $(ser(mont_base,8)) };

/* End of template stuff */

/* Sanity */
#if (COFACTOR == 8) && !IMAGINE_TWIST
/* FUTURE: Curve41417 doesn't have these properties. */
#error "Currently require IMAGINE_TWIST (and thus p=5 mod 8) for cofactor 8"
#error "Currently require IMAGINE_TWIST (and thus p=5 mod 8) for cofactor 8"
#endif

#if IMAGINE_TWIST && (P_MOD_8 != 5)
#error "Cannot use IMAGINE_TWIST except for p == 5 mod 8"
#error "Cannot use IMAGINE_TWIST except for p == 5 mod 8"
#endif

#if (COFACTOR != 8) && (COFACTOR != 4)
#error "COFACTOR must be 4 or 8"
#error "COFACTOR must be 4 or 8"
#endif
#if IMAGINE_TWIST
extern const gf SQRT_MINUS_ONE;
extern const gf SQRT_MINUS_ONE;
#endif

#define WBITS DECAF_WORD_BITS /* NB this may be different from ARCH_WORD_BITS */

const scalar_t API_NS(scalar_one) = {{{1}}}, API_NS(scalar_zero) = {{{0}}};
extern const point_t API_NS(point_base);

/* Projective Niels coordinates */
@@ -88,57 +79,6 @@ const precomputed_s *API_NS(precomputed_base) =
const size_t API_NS(sizeof_precomputed_s) = sizeof(precomputed_s);
const size_t API_NS(alignof_precomputed_s) = sizeof(big_register_t);

#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<NLIMBS; i++) { op; }}
#define FOR_LIMB_U(i,op) { unsigned int i=0; UNROLL for (i=0; i<NLIMBS; i++) { op; }}

/* The plan on booleans:
*
* The external interface uses decaf_bool_t, but this might be a different
* size than our particular arch's word_t (and thus mask_t). Also, the caller
* isn't guaranteed to pass it as nonzero. So bool_to_mask converts word sizes
* and checks nonzero.
*
* On the flip side, mask_t is always -1 or 0, but it might be a different size
* than decaf_bool_t.
*
* On the third hand, we have success vs boolean types, but that's handled in
* common.h: it converts between decaf_bool_t and decaf_error_t.
*/
static INLINE decaf_bool_t mask_to_bool (mask_t m) {
return (decaf_sword_t)(sword_t)m;
}

static INLINE mask_t bool_to_mask (decaf_bool_t m) {
/* On most arches this will be optimized to a simple cast. */
mask_t ret = 0;
unsigned int limit = sizeof(decaf_bool_t)/sizeof(mask_t);
if (limit < 1) limit = 1;
for (unsigned int i=0; i<limit; i++) {
ret |= ~ word_is_zero(m >> (i*8*sizeof(word_t)));
}
return ret;
}

/** Constant time, x = is_z ? z : y */
static INLINE void
cond_sel(gf x, const gf y, const gf z, mask_t is_z) {
constant_time_select(x,y,z,sizeof(gf),is_z,0);
}

/** Constant time, if (neg) x=-x; */
static void
cond_neg(gf x, mask_t neg) {
gf y;
gf_sub(y,ZERO,x);
cond_sel(x,x,y,neg);
}

/** Constant time, if (swap) (x,y) = (y,x); */
static INLINE void
cond_swap(gf x, gf_s *__restrict__ y, mask_t swap) {
constant_time_cond_swap(x,y,sizeof(gf_s),swap);
}

/** Inverse. */
static void
gf_invert(gf y, const gf x) {
@@ -151,17 +91,6 @@ gf_invert(gf y, const gf x) {
gf_copy(y, t2);
}

/** Mul by signed int. Not constant-time WRT the sign of that int. */
static INLINE void
gf_mulw_sgn(gf c, const gf a, int32_t w) {
if (w>0) {
gf_mulw(c, a, w);
} else {
gf_mulw(c, a, -w);
gf_sub(c,ZERO,c);
}
}

#if COFACTOR==8
/** Return high bit of x = low bit of 2x mod p */
static mask_t gf_lobit(const gf x) {
@@ -172,221 +101,10 @@ static mask_t gf_lobit(const gf x) {
}
#endif

/** {extra,accum} - sub +? p
* Must have extra <= 1
*/
static NOINLINE void
sc_subx(
scalar_t out,
const decaf_word_t accum[SCALAR_LIMBS],
const scalar_t sub,
const scalar_t p,
decaf_word_t extra
) {
decaf_dsword_t chain = 0;
unsigned int i;
for (i=0; i<SCALAR_LIMBS; i++) {
chain = (chain + accum[i]) - sub->limb[i];
out->limb[i] = chain;
chain >>= WBITS;
}
decaf_word_t borrow = chain+extra; /* = 0 or -1 */
chain = 0;
for (i=0; i<SCALAR_LIMBS; i++) {
chain = (chain + out->limb[i]) + (p->limb[i] & borrow);
out->limb[i] = chain;
chain >>= WBITS;
}
}

static NOINLINE void
sc_montmul (
scalar_t out,
const scalar_t a,
const scalar_t b
) {
unsigned int i,j;
decaf_word_t accum[SCALAR_LIMBS+1] = {0};
decaf_word_t hi_carry = 0;
for (i=0; i<SCALAR_LIMBS; i++) {
decaf_word_t mand = a->limb[i];
const decaf_word_t *mier = b->limb;
decaf_dword_t chain = 0;
for (j=0; j<SCALAR_LIMBS; j++) {
chain += ((decaf_dword_t)mand)*mier[j] + accum[j];
accum[j] = chain;
chain >>= WBITS;
}
accum[j] = chain;
mand = accum[0] * MONTGOMERY_FACTOR;
chain = 0;
mier = sc_p->limb;
for (j=0; j<SCALAR_LIMBS; j++) {
chain += (decaf_dword_t)mand*mier[j] + accum[j];
if (j) accum[j-1] = chain;
chain >>= WBITS;
}
chain += accum[j];
chain += hi_carry;
accum[j-1] = chain;
hi_carry = chain >> WBITS;
}
sc_subx(out, accum, sc_p, sc_p, hi_carry);
}

void API_NS(scalar_mul) (
scalar_t out,
const scalar_t a,
const scalar_t b
) {
sc_montmul(out,a,b);
sc_montmul(out,out,sc_r2);
}

/* PERF: could implement this */
static INLINE void sc_montsqr (scalar_t out, const scalar_t a) {
sc_montmul(out,a,a);
}

decaf_error_t API_NS(scalar_invert) (
scalar_t out,
const scalar_t a
) {
/* Fermat's little theorem, sliding window.
* Sliding window is fine here because the modulus isn't secret.
*/
const int SCALAR_WINDOW_BITS = 3;
scalar_t precmp[1<<SCALAR_WINDOW_BITS];
const int LAST = (1<<SCALAR_WINDOW_BITS)-1;

/* Precompute precmp = [a^1,a^3,...] */
sc_montmul(precmp[0],a,sc_r2);
if (LAST > 0) sc_montmul(precmp[LAST],precmp[0],precmp[0]);

int i;
for (i=1; i<=LAST; i++) {
sc_montmul(precmp[i],precmp[i-1],precmp[LAST]);
}
/* Sliding window */
unsigned residue = 0, trailing = 0, started = 0;
for (i=SCALAR_BITS-1; i>=-SCALAR_WINDOW_BITS; i--) {
if (started) sc_montsqr(out,out);
decaf_word_t w = (i>=0) ? sc_p->limb[i/WBITS] : 0;
if (i >= 0 && i<WBITS) {
assert(w >= 2);
w-=2;
}
residue = (residue<<1) | ((w>>(i%WBITS))&1);
if (residue>>SCALAR_WINDOW_BITS != 0) {
assert(trailing == 0);
trailing = residue;
residue = 0;
}
if (trailing > 0 && (trailing & ((1<<SCALAR_WINDOW_BITS)-1)) == 0) {
if (started) {
sc_montmul(out,out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]);
} else {
API_NS(scalar_copy)(out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]);
started = 1;
}
trailing = 0;
}
trailing <<= 1;
}
assert(residue==0);
assert(trailing==0);
/* Demontgomerize */
sc_montmul(out,out,API_NS(scalar_one));
decaf_bzero(precmp, sizeof(precmp));
return decaf_succeed_if(~API_NS(scalar_eq)(out,API_NS(scalar_zero)));
}

void API_NS(scalar_sub) (
scalar_t out,
const scalar_t a,
const scalar_t b
) {
sc_subx(out, a->limb, b, sc_p, 0);
}

void API_NS(scalar_add) (
scalar_t out,
const scalar_t a,
const scalar_t b
) {
decaf_dword_t chain = 0;
unsigned int i;
for (i=0; i<SCALAR_LIMBS; i++) {
chain = (chain + a->limb[i]) + b->limb[i];
out->limb[i] = chain;
chain >>= WBITS;
}
sc_subx(out, out->limb, sc_p, sc_p, chain);
}

static NOINLINE void
sc_halve (
scalar_t out,
const scalar_t a,
const scalar_t p
) {
decaf_word_t mask = -(a->limb[0] & 1);
decaf_dword_t chain = 0;
unsigned int i;
for (i=0; i<SCALAR_LIMBS; i++) {
chain = (chain + a->limb[i]) + (p->limb[i] & mask);
out->limb[i] = chain;
chain >>= WBITS;
}
for (i=0; i<SCALAR_LIMBS-1; i++) {
out->limb[i] = out->limb[i]>>1 | out->limb[i+1]<<(WBITS-1);
}
out->limb[i] = out->limb[i]>>1 | chain<<(WBITS-1);
}

void
API_NS(scalar_set_unsigned) (
scalar_t out,
uint64_t w
) {
memset(out,0,sizeof(scalar_t));
unsigned int i = 0;
for (; i<sizeof(uint64_t)/sizeof(decaf_word_t); i++) {
out->limb[i] = w;
w >>= (sizeof(uint64_t) > sizeof(decaf_word_t)) ? 8*sizeof(decaf_word_t) : 0;
}
}

decaf_bool_t
API_NS(scalar_eq) (
const scalar_t a,
const scalar_t b
) {
decaf_word_t diff = 0;
unsigned int i;
for (i=0; i<SCALAR_LIMBS; i++) {
diff |= a->limb[i] ^ b->limb[i];
}
return mask_to_bool(word_is_zero(diff));
}

/** identity = (0,1) */
const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}};

static void
deisogenize (
void API_NS(deisogenize) (
gf_s *__restrict__ s,
gf_s *__restrict__ minus_t_over_s,
const point_t p,
@@ -399,28 +117,28 @@ deisogenize (
gf b, d;
gf_s *c = s, *a = minus_t_over_s;
gf_mulw_sgn(a, p->y, 1-EDWARDS_D);
gf_mulw(a, p->y, 1-EDWARDS_D);
gf_mul(c, a, p->t); /* -dYT, with EDWARDS_D = d-1 */
gf_mul(a, p->x, p->z);
gf_sub(d, c, a); /* aXZ-dYT with a=-1 */
gf_add(a, p->z, p->y);
gf_sub(b, p->z, p->y);
gf_mul(c, b, a);
gf_mulw_sgn(b, c, -EDWARDS_D); /* (a-d)(Z+Y)(Z-Y) */
gf_mulw(b, c, -EDWARDS_D); /* (a-d)(Z+Y)(Z-Y) */
mask_t ok = gf_isr (a,b); /* r in the paper */
(void)ok; assert(ok | gf_eq(b,ZERO));
gf_mulw_sgn (b, a, -EDWARDS_D); /* u in the paper */
gf_mulw (b, a, -EDWARDS_D); /* u in the paper */

gf_mul(c,a,d); /* r(aZX-dYT) */
gf_mul(a,b,p->z); /* uZ */
gf_add(a,a,a); /* 2uZ */
cond_neg(c, toggle_hibit_t_over_s ^ ~gf_hibit(a)); /* u <- -u if negative. */
cond_neg(a, toggle_hibit_t_over_s ^ ~gf_hibit(a)); /* t/s <-? -t/s */
gf_cond_neg(c, toggle_hibit_t_over_s ^ ~gf_hibit(a)); /* u <- -u if negative. */
gf_cond_neg(a, toggle_hibit_t_over_s ^ ~gf_hibit(a)); /* t/s <-? -t/s */
gf_add(d,c,p->y);
gf_mul(s,b,d);
cond_neg(s, toggle_hibit_s ^ gf_hibit(s));
gf_cond_neg(s, toggle_hibit_s ^ gf_hibit(s));
#else
/* More complicated because of rotation */
/* MAGIC This code is wrong for certain non-Curve25519 curves;
@@ -441,7 +159,7 @@ deisogenize (
gf_mul ( c, a, b ); /* "zx" = Z^2 - aX^2 = Z^2 - X^2 */
#else
const gf_s *x = p->x, *t = p->t;
/* Won't hit the cond_sel below because COFACTOR==8 requires IMAGINE_TWIST for now. */
/* Won't hit the gf_cond_sel below because COFACTOR==8 requires IMAGINE_TWIST for now. */
gf_sqr ( a, p->z );
gf_sqr ( b, p->x );
@@ -465,8 +183,8 @@ deisogenize (
rotate = gf_hibit(a) ^ toggle_rotation;
/* Curve25519: cond select between zx * 1/tz or sqrt(1-d); y=-x */
gf_mul ( a, b, c );
cond_sel ( a, a, SQRT_ONE_MINUS_D, rotate );
cond_sel ( x, p->y, x, rotate );
gf_cond_sel ( a, a, SQRT_ONE_MINUS_D, rotate );
gf_cond_sel ( x, p->y, x, rotate );
#else
(void)toggle_rotation;
rotate = 0;
@@ -476,19 +194,19 @@ deisogenize (
gf_mul ( a, c, p->z );
gf_add ( a, a, a ); // 2 * "osx" * Z
mask_t tg1 = rotate ^ toggle_hibit_t_over_s ^~ gf_hibit(a);
cond_neg ( c, tg1 );
cond_neg ( a, rotate ^ tg1 );
gf_cond_neg ( c, tg1 );
gf_cond_neg ( a, rotate ^ tg1 );
gf_mul ( d, b, p->z );
gf_add ( d, d, c );
gf_mul ( b, d, x ); /* here "x" = y unless rotate */
cond_neg ( b, toggle_hibit_s ^ gf_hibit(b) );
gf_cond_neg ( b, toggle_hibit_s ^ gf_hibit(b) );
#endif
}

void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) {
gf s, mtos;
deisogenize(s,mtos,p,0,0,0);
API_NS(deisogenize)(s,mtos,p,0,0,0);
gf_serialize(ser,s,0);
}

@@ -509,7 +227,7 @@ decaf_error_t API_NS(point_decode) (
#endif
succ &= ~ gf_eq( f, ZERO );
gf_sqr ( b, f );
gf_mulw_sgn ( c, a, 4*IMAGINE_TWIST-4*EDWARDS_D );
gf_mulw ( c, a, 4*IMAGINE_TWIST-4*EDWARDS_D );
gf_add ( c, c, b ); /* t^2 */
gf_mul ( d, f, s ); /* s(1-as^2) for denoms */
gf_sqr ( e, d );
@@ -520,8 +238,8 @@ decaf_error_t API_NS(point_decode) (
gf_mul ( d, e, c ); /* d = t / (s(1-as^2)) */
gf_mul ( e, d, f ); /* t/s */
mask_t negtos = gf_hibit(e);
cond_neg(b, negtos);
cond_neg(d, negtos);
gf_cond_neg(b, negtos);
gf_cond_neg(d, negtos);

#if IMAGINE_TWIST
gf_add ( p->z, ONE, a); /* Z = 1+as^2 = 1-s^2 */
@@ -578,7 +296,7 @@ void API_NS(point_sub) (
gf_add_nr ( b, q->y, q->x );
gf_mul ( p->y, d, b );
gf_mul ( b, r->t, q->t );
gf_mulw_sgn ( p->x, b, 2*EFF_D );
gf_mulw ( p->x, b, 2*EFF_D );
gf_add_nr ( b, a, p->y );
gf_sub_nr ( c, p->y, a );
gf_mul ( a, q->z, r->z );
@@ -609,7 +327,7 @@ void API_NS(point_add) (
gf_add_nr ( b, q->y, q->x );
gf_mul ( p->y, d, b );
gf_mul ( b, r->t, q->t );
gf_mulw_sgn ( p->x, b, 2*EFF_D );
gf_mulw ( p->x, b, 2*EFF_D );
gf_add_nr ( b, a, p->y );
gf_sub_nr ( c, p->y, a );
gf_mul ( a, q->z, r->z );
@@ -664,107 +382,14 @@ void API_NS(point_negate) (
gf_sub(nega->t, ZERO, a->t);
}

static INLINE void
scalar_decode_short (
scalar_t s,
const unsigned char *ser,
unsigned int nbytes
) {
unsigned int i,j,k=0;
for (i=0; i<SCALAR_LIMBS; i++) {
decaf_word_t out = 0;
for (j=0; j<sizeof(decaf_word_t) && k<nbytes; j++,k++) {
out |= ((decaf_word_t)ser[k])<<(8*j);
}
s->limb[i] = out;
}
}

decaf_error_t API_NS(scalar_decode)(
scalar_t s,
const unsigned char ser[SCALAR_SER_BYTES]
) {
unsigned int i;
scalar_decode_short(s, ser, SCALAR_SER_BYTES);
decaf_dsword_t accum = 0;
for (i=0; i<SCALAR_LIMBS; i++) {
accum = (accum + s->limb[i] - sc_p->limb[i]) >> WBITS;
}
/* Here accum == 0 or -1 */
API_NS(scalar_mul)(s,s,API_NS(scalar_one)); /* ham-handed reduce */
return decaf_succeed_if(~word_is_zero(accum));
}

void API_NS(scalar_destroy) (
scalar_t scalar
) {
decaf_bzero(scalar, sizeof(scalar_t));
}

static INLINE void ignore_result ( decaf_bool_t boo ) {
(void)boo;
}

void API_NS(scalar_decode_long)(
scalar_t s,
const unsigned char *ser,
size_t ser_len
) {
if (ser_len == 0) {
API_NS(scalar_copy)(s, API_NS(scalar_zero));
return;
}
size_t i;
scalar_t t1, t2;

i = ser_len - (ser_len%SCALAR_SER_BYTES);
if (i==ser_len) i -= SCALAR_SER_BYTES;
scalar_decode_short(t1, &ser[i], ser_len-i);

if (ser_len == sizeof(scalar_t)) {
assert(i==0);
/* ham-handed reduce */
API_NS(scalar_mul)(s,t1,API_NS(scalar_one));
API_NS(scalar_destroy)(t1);
return;
}

while (i) {
i -= SCALAR_SER_BYTES;
sc_montmul(t1,t1,sc_r2);
ignore_result( API_NS(scalar_decode)(t2, ser+i) );
API_NS(scalar_add)(t1, t1, t2);
}

API_NS(scalar_copy)(s, t1);
API_NS(scalar_destroy)(t1);
API_NS(scalar_destroy)(t2);
}

void API_NS(scalar_encode)(
unsigned char ser[SCALAR_SER_BYTES],
const scalar_t s
) {
unsigned int i,j,k=0;
for (i=0; i<SCALAR_LIMBS; i++) {
for (j=0; j<sizeof(decaf_word_t); j++,k++) {
ser[k] = s->limb[i] >> (8*j);
}
}
}

/* Operations on [p]niels */
static INLINE void
cond_neg_niels (
niels_t n,
mask_t neg
) {
cond_swap(n->a, n->b, neg);
cond_neg(n->c, neg);
gf_cond_swap(n->a, n->b, neg);
gf_cond_neg(n->c, neg);
}

static NOINLINE void pt_to_pniels (
@@ -773,7 +398,7 @@ static NOINLINE void pt_to_pniels (
) {
gf_sub ( b->n->a, a->y, a->x );
gf_add ( b->n->b, a->x, a->y );
gf_mulw_sgn ( b->n->c, a->t, 2*TWISTED_D );
gf_mulw ( b->n->c, a->t, 2*TWISTED_D );
gf_add ( b->z, a->z, a->z );
}

@@ -915,7 +540,7 @@ void API_NS(point_scalarmul) (
scalar_t scalar1x;
API_NS(scalar_add)(scalar1x, scalar, point_scalarmul_adjustment);
sc_halve(scalar1x,scalar1x,sc_p);
API_NS(scalar_halve)(scalar1x,scalar1x);
/* Set up a precomputed table with odd multiples of b. */
pniels_t pn, multiples[NTABLE];
@@ -977,9 +602,9 @@ void API_NS(point_double_scalarmul) (
scalar_t scalar1x, scalar2x;
API_NS(scalar_add)(scalar1x, scalarb, point_scalarmul_adjustment);
sc_halve(scalar1x,scalar1x,sc_p);
API_NS(scalar_halve)(scalar1x,scalar1x);
API_NS(scalar_add)(scalar2x, scalarc, point_scalarmul_adjustment);
sc_halve(scalar2x,scalar2x,sc_p);
API_NS(scalar_halve)(scalar2x,scalar2x);
/* Set up a precomputed table with odd multiples of b. */
pniels_t pn, multiples1[NTABLE], multiples2[NTABLE];
@@ -1053,9 +678,9 @@ void API_NS(point_dual_scalarmul) (
scalar_t scalar1x, scalar2x;
API_NS(scalar_add)(scalar1x, scalar1, point_scalarmul_adjustment);
sc_halve(scalar1x,scalar1x,sc_p);
API_NS(scalar_halve)(scalar1x,scalar1x);
API_NS(scalar_add)(scalar2x, scalar2, point_scalarmul_adjustment);
sc_halve(scalar2x,scalar2x,sc_p);
API_NS(scalar_halve)(scalar2x,scalar2x);
/* Set up a precomputed table with odd multiples of b. */
point_t multiples1[NTABLE], multiples2[NTABLE], working, tmp;
@@ -1165,144 +790,6 @@ decaf_bool_t API_NS(point_eq) ( const point_t p, const point_t q ) {
return mask_to_bool(succ);
}

void API_NS(point_from_hash_nonuniform) (
point_t p,
const unsigned char ser[SER_BYTES]
) {
gf r0,r,a,b,c,N,e;
ignore_result(gf_deserialize(r0,ser,0));
gf_strong_reduce(r0);
gf_sqr(a,r0);
#if P_MOD_8 == 5
/* r = QNR * r0^2 */
gf_mul(r,a,SQRT_MINUS_ONE);
#elif P_MOD_8 == 3 || P_MOD_8 == 7
gf_sub(r,ZERO,a);
#else
#error "Only supporting p=3,5,7 mod 8"
#endif

/* Compute D@c := (dr+a-d)(dr-ar-d) with a=1 */
gf_sub(a,r,ONE);
gf_mulw_sgn(b,a,EDWARDS_D); /* dr-d */
gf_add(a,b,ONE);
gf_sub(b,b,r);
gf_mul(c,a,b);
/* compute N := (r+1)(a-2d) */
gf_add(a,r,ONE);
gf_mulw_sgn(N,a,1-2*EDWARDS_D);
/* e = +-sqrt(1/ND) or +-r0 * sqrt(qnr/ND) */
gf_mul(a,c,N);
mask_t square = gf_isr(b,a);
cond_sel(c,r0,ONE,square); /* r? = square ? 1 : r0 */
gf_mul(e,b,c);
/* s@a = +-|N.e| */
gf_mul(a,N,e);
cond_neg(a,gf_hibit(a)^square); /* NB this is - what is listen in the paper */
/* t@b = -+ cN(r-1)((a-2d)e)^2 - 1 */
gf_mulw_sgn(c,e,1-2*EDWARDS_D); /* (a-2d)e */
gf_sqr(b,c);
gf_sub(e,r,ONE);
gf_mul(c,b,e);
gf_mul(b,c,N);
cond_neg(b,square);
gf_sub(b,b,ONE);

/* isogenize */
#if IMAGINE_TWIST
gf_mul(c,a,SQRT_MINUS_ONE);
gf_copy(a,c);
#endif
gf_sqr(c,a); /* s^2 */
gf_add(a,a,a); /* 2s */
gf_add(e,c,ONE);
gf_mul(p->t,a,e); /* 2s(1+s^2) */
gf_mul(p->x,a,b); /* 2st */
gf_sub(a,ONE,c);
gf_mul(p->y,e,a); /* (1+s^2)(1-s^2) */
gf_mul(p->z,a,b); /* (1-s^2)t */
assert(API_NS(point_valid)(p));
}

decaf_error_t
API_NS(invert_elligator_nonuniform) (
unsigned char recovered_hash[SER_BYTES],
const point_t p,
uint16_t hint_
) {
mask_t hint = hint_;
mask_t sgn_s = -(hint & 1),
sgn_t_over_s = -(hint>>1 & 1),
sgn_r0 = -(hint>>2 & 1), /* FIXME: but it's SER_BYTES ... */
sgn_ed_T = -(hint>>3 & 1);
gf a, b, c, d;
deisogenize(a,c,p,sgn_s,sgn_t_over_s,sgn_ed_T);
/* ok, a = s; c = -t/s */
gf_mul(b,c,a);
gf_sub(b,ONE,b); /* t+1 */
gf_sqr(c,a); /* s^2 */
mask_t is_identity = gf_eq(p->t,ZERO);
{
/* identity adjustments */
/* in case of identity, currently c=0, t=0, b=1, will encode to 1 */
/* if hint is 0, -> 0 */
/* if hint is to neg t/s, then go to infinity, effectively set s to 1 */
cond_sel(c,c,ONE,is_identity & sgn_t_over_s);
cond_sel(b,b,ZERO,is_identity & ~sgn_t_over_s & ~sgn_s); /* identity adjust */
}
gf_mulw_sgn(d,c,2*EDWARDS_D-1); /* $d = (2d-a)s^2 */
gf_add(a,b,d); /* num? */
gf_sub(d,d,b); /* den? */
gf_mul(b,a,d); /* n*d */
cond_sel(a,d,a,sgn_s);
#if P_MOD_8 == 5
gf_mul(d,b,SQRT_MINUS_ONE);
#else
gf_sub(d,ZERO,b);
#endif
mask_t succ = gf_isr(c,d)|gf_eq(d,ZERO);
gf_mul(b,a,c);
cond_neg(b, sgn_r0^gf_hibit(b));
succ &= ~(gf_eq(b,ZERO) & sgn_r0);
#if COFACTOR == 8
succ &= ~(is_identity & sgn_ed_T); /* NB: there are no preimages of rotated identity. */
#endif
gf_serialize(recovered_hash,b,1); /* FIXME: ,0 */
/* TODO: deal with overflow flag */
return decaf_succeed_if(mask_to_bool(succ));
}

void API_NS(point_from_hash_uniform) (
point_t pt,
const unsigned char hashed_data[2*SER_BYTES]
) {
point_t pt2;
API_NS(point_from_hash_nonuniform)(pt,hashed_data);
API_NS(point_from_hash_nonuniform)(pt2,&hashed_data[SER_BYTES]);
API_NS(point_add)(pt,pt,pt2);
}

decaf_error_t
API_NS(invert_elligator_uniform) (
unsigned char partial_hash[2*SER_BYTES],
const point_t p,
uint16_t hint
) {
point_t pt2;
API_NS(point_from_hash_nonuniform)(pt2,&partial_hash[SER_BYTES]);
API_NS(point_sub)(pt2,p,pt2);
return API_NS(invert_elligator_nonuniform)(partial_hash,pt2,hint);
}

decaf_bool_t API_NS(point_valid) (
const point_t p
) {
@@ -1314,7 +801,7 @@ decaf_bool_t API_NS(point_valid) (
gf_sqr(b,p->y);
gf_sub(a,b,a);
gf_sqr(b,p->t);
gf_mulw_sgn(c,b,TWISTED_D);
gf_mulw(c,b,TWISTED_D);
gf_sqr(b,p->z);
gf_add(b,b,c);
out &= gf_eq(a,b);
@@ -1349,7 +836,7 @@ void API_NS(point_debugging_pscale) (
gf gfac,tmp;
/* NB this means you'll never pscale by negative numbers for p521 */
ignore_result(gf_deserialize(gfac,factor,0));
cond_sel(gfac,gfac,ONE,gf_eq(gfac,ZERO));
gf_cond_sel(gfac,gfac,ONE,gf_eq(gfac,ZERO));
gf_mul(tmp,p->x,gfac);
gf_copy(q->x,tmp);
gf_mul(tmp,p->y,gfac);
@@ -1498,7 +985,7 @@ void API_NS(precomputed_scalarmul) (
scalar_t scalar1x;
API_NS(scalar_add)(scalar1x, scalar, precomputed_scalarmul_adjustment);
sc_halve(scalar1x,scalar1x,sc_p);
API_NS(scalar_halve)(scalar1x,scalar1x);
niels_t ni;
@@ -1543,15 +1030,6 @@ void API_NS(point_cond_sel) (
constant_time_select(out,a,b,sizeof(point_t),bool_to_mask(pick_b),0);
}

void API_NS(scalar_cond_sel) (
scalar_t out,
const scalar_t a,
const scalar_t b,
decaf_bool_t pick_b
) {
constant_time_select(out,a,b,sizeof(scalar_t),bool_to_mask(pick_b),sizeof(out->limb[0]));
}

/* FUTURE: restore Curve25519 Montgomery ladder? */
decaf_error_t API_NS(direct_scalarmul) (
uint8_t scaled[SER_BYTES],
@@ -1596,8 +1074,8 @@ decaf_error_t API_NS(x_direct_scalarmul) (
k_t = -k_t; /* set to all 0s or all 1s */
swap ^= k_t;
cond_swap(x2,x3,swap);
cond_swap(z2,z3,swap);
gf_cond_swap(x2,x3,swap);
gf_cond_swap(z2,z3,swap);
swap = k_t;
gf_add_nr(t1,x2,z2); /* A = x2 + z2 */
@@ -1617,14 +1095,14 @@ decaf_error_t API_NS(x_direct_scalarmul) (
gf_mul(x2,z2,t1); /* x2 = AA*BB */
gf_sub_nr(t2,z2,t1); /* E = AA-BB */
gf_mulw_sgn(t1,t2,-EDWARDS_D); /* E*-d = a24*E */
gf_mulw(t1,t2,-EDWARDS_D); /* E*-d = a24*E */
gf_add_nr(t1,t1,z2); /* AA + a24*E */
gf_mul(z2,t2,t1); /* z2 = E(AA+a24*E) */
}
/* Finish */
cond_swap(x2,x3,swap);
cond_swap(z2,z3,swap);
gf_cond_swap(x2,x3,swap);
gf_cond_swap(z2,z3,swap);
gf_invert(z2,z2);
gf_mul(x1,x2,z2);
gf_serialize(out,x1,1);
@@ -1668,13 +1146,13 @@ void API_NS(x_base_scalarmul) (
* Jacobi -> Edwards -> Jacobi -> Montgomery,
* we pick up only a factor of 2 over Jacobi -> Montgomery.
*/
sc_halve(the_scalar,the_scalar,sc_p);
API_NS(scalar_halve)(the_scalar,the_scalar);
#if COFACTOR==8
/* If the base point isn't in the prime-order subgroup (PERF:
* guarantee that it is?) then a 4-isogeny isn't necessarily
* enough to clear the cofactor. So add another doubling.
*/
sc_halve(the_scalar,the_scalar,sc_p);
API_NS(scalar_halve)(the_scalar,the_scalar);
#endif
point_t p;
API_NS(precomputed_scalarmul)(p,API_NS(precomputed_base),the_scalar);
@@ -1781,6 +1259,7 @@ prepare_wnaf_table(
}
API_NS(point_destroy)(tmp);
decaf_bzero(twop,sizeof(twop));
}

extern const gf API_NS(precomputed_wnaf_as_fe)[];


+ 10
- 0
src/per_curve/decaf.tmpl.h View File

@@ -167,6 +167,16 @@ void $(c_ns)_scalar_mul (
const $(c_ns)_scalar_t a,
const $(c_ns)_scalar_t b
) API_VIS NONNULL3 NOINLINE;
/**
* @brief Halve a scalar. The scalars may use the same memory.
* @param [in] a A scalar.
* @param [out] out a/2.
*/
void $(c_ns)_scalar_halve (
$(c_ns)_scalar_t out,
const $(c_ns)_scalar_t a
) API_VIS NONNULL2 NOINLINE;

/**
* @brief Invert a scalar. When passed zero, return 0. The input and output may alias.


+ 3
- 0
src/per_curve/decaf.tmpl.hxx View File

@@ -193,6 +193,9 @@ public:
/** Divide by inverting q. If q == 0, return 0. */
inline Scalar &operator/=(const Scalar &q) throw(CryptoException) { return *this *= q.inverse(); }

/** Return half this scalar. Much faster than /2. */
inline Scalar half() const { Scalar out; $(c_ns)_scalar_halve(out.s,s); return out; }

/** Compare in constant time */
inline bool operator!=(const Scalar &q) const NOEXCEPT { return !(*this == q); }



+ 149
- 0
src/per_curve/elligator.tmpl.c View File

@@ -0,0 +1,149 @@
/** @brief Elligator high-level functions. */

#include "word.h"
#include "field.h"
#include <decaf.h>

/* Template stuff */
#define API_NS(_id) $(c_ns)_##_id
#define point_t API_NS(point_t)
#define IMAGINE_TWIST $(imagine_twist)
#define COFACTOR $(cofactor)
static const int EDWARDS_D = $(d);
/* End of template stuff */

extern void API_NS(deisogenize) (
gf_s *__restrict__ s,
gf_s *__restrict__ minus_t_over_s,
const point_t p,
mask_t toggle_hibit_s,
mask_t toggle_hibit_t_over_s,
mask_t toggle_rotation
);

void API_NS(point_from_hash_nonuniform) (
point_t p,
const unsigned char ser[SER_BYTES]
) {
gf r0,r,a,b,c,N,e;
ignore_result(gf_deserialize(r0,ser,0));
gf_strong_reduce(r0);
gf_sqr(a,r0);
gf_mul_qnr(r,a);

/* Compute D@c := (dr+a-d)(dr-ar-d) with a=1 */
gf_sub(a,r,ONE);
gf_mulw(b,a,EDWARDS_D); /* dr-d */
gf_add(a,b,ONE);
gf_sub(b,b,r);
gf_mul(c,a,b);
/* compute N := (r+1)(a-2d) */
gf_add(a,r,ONE);
gf_mulw(N,a,1-2*EDWARDS_D);
/* e = +-sqrt(1/ND) or +-r0 * sqrt(qnr/ND) */
gf_mul(a,c,N);
mask_t square = gf_isr(b,a);
gf_cond_sel(c,r0,ONE,square); /* r? = square ? 1 : r0 */
gf_mul(e,b,c);
/* s@a = +-|N.e| */
gf_mul(a,N,e);
gf_cond_neg(a,gf_hibit(a)^square); /* NB this is - what is listen in the paper */
/* t@b = -+ cN(r-1)((a-2d)e)^2 - 1 */
gf_mulw(c,e,1-2*EDWARDS_D); /* (a-2d)e */
gf_sqr(b,c);
gf_sub(e,r,ONE);
gf_mul(c,b,e);
gf_mul(b,c,N);
gf_cond_neg(b,square);
gf_sub(b,b,ONE);

/* isogenize */
#if IMAGINE_TWIST
gf_mul(c,a,SQRT_MINUS_ONE);
gf_copy(a,c);
#endif
gf_sqr(c,a); /* s^2 */
gf_add(a,a,a); /* 2s */
gf_add(e,c,ONE);
gf_mul(p->t,a,e); /* 2s(1+s^2) */
gf_mul(p->x,a,b); /* 2st */
gf_sub(a,ONE,c);
gf_mul(p->y,e,a); /* (1+s^2)(1-s^2) */
gf_mul(p->z,a,b); /* (1-s^2)t */
assert(API_NS(point_valid)(p));
}

void API_NS(point_from_hash_uniform) (
point_t pt,
const unsigned char hashed_data[2*SER_BYTES]
) {
point_t pt2;
API_NS(point_from_hash_nonuniform)(pt,hashed_data);
API_NS(point_from_hash_nonuniform)(pt2,&hashed_data[SER_BYTES]);
API_NS(point_add)(pt,pt,pt2);
}

decaf_error_t
API_NS(invert_elligator_nonuniform) (
unsigned char recovered_hash[SER_BYTES],
const point_t p,
uint16_t hint_
) {
mask_t hint = hint_;
mask_t sgn_s = -(hint & 1),
sgn_t_over_s = -(hint>>1 & 1),
sgn_r0 = -(hint>>2 & 1), /* FIXME: but it's SER_BYTES ... */
sgn_ed_T = -(hint>>3 & 1);
gf a, b, c, d;
API_NS(deisogenize)(a,c,p,sgn_s,sgn_t_over_s,sgn_ed_T);
/* ok, a = s; c = -t/s */
gf_mul(b,c,a);
gf_sub(b,ONE,b); /* t+1 */
gf_sqr(c,a); /* s^2 */
mask_t is_identity = gf_eq(p->t,ZERO);

/* identity adjustments */
/* in case of identity, currently c=0, t=0, b=1, will encode to 1 */
/* if hint is 0, -> 0 */
/* if hint is to neg t/s, then go to infinity, effectively set s to 1 */
gf_cond_sel(c,c,ONE,is_identity & sgn_t_over_s);
gf_cond_sel(b,b,ZERO,is_identity & ~sgn_t_over_s & ~sgn_s);
gf_mulw(d,c,2*EDWARDS_D-1); /* $d = (2d-a)s^2 */
gf_add(a,b,d); /* num? */
gf_sub(d,d,b); /* den? */
gf_mul(b,a,d); /* n*d */
gf_cond_sel(a,d,a,sgn_s);
gf_mul_qnr(d,b);
mask_t succ = gf_isr(c,d)|gf_eq(d,ZERO);
gf_mul(b,a,c);
gf_cond_neg(b, sgn_r0^gf_hibit(b));
succ &= ~(gf_eq(b,ZERO) & sgn_r0);
#if COFACTOR == 8
succ &= ~(is_identity & sgn_ed_T); /* NB: there are no preimages of rotated identity. */
#endif
gf_serialize(recovered_hash,b,1); /* FIXME: ,0 */
/* TODO: deal with overflow flag */
return decaf_succeed_if(mask_to_bool(succ));
}

decaf_error_t
API_NS(invert_elligator_uniform) (
unsigned char partial_hash[2*SER_BYTES],
const point_t p,
uint16_t hint
) {
point_t pt2;
API_NS(point_from_hash_nonuniform)(pt2,&partial_hash[SER_BYTES]);
API_NS(point_sub)(pt2,p,pt2);
return API_NS(invert_elligator_nonuniform)(partial_hash,pt2,hint);
}

+ 328
- 0
src/per_curve/scalar.tmpl.c View File

@@ -0,0 +1,328 @@
/** @brief Decaf high-level functions. */

#include "word.h"
#include "constant_time.h"
#include <decaf.h>

/* Template stuff */
#define API_NS(_id) $(c_ns)_##_id
#define SCALAR_BITS $(C_NS)_SCALAR_BITS
#define SCALAR_SER_BYTES $(C_NS)_SCALAR_BYTES
#define SCALAR_LIMBS $(C_NS)_SCALAR_LIMBS
#define scalar_t API_NS(scalar_t)

static const decaf_word_t MONTGOMERY_FACTOR = (decaf_word_t)0x$("%x" % pow(-q,2**64-1,2**64))ull;
static const scalar_t sc_p = {{{
$(ser(q,64,"SC_LIMB"))
}}}, sc_r2 = {{{
$(ser(((2**128)**((scalar_bits+63)/64))%q,64,"SC_LIMB"))
}}};
/* End of template stuff */

#define WBITS DECAF_WORD_BITS /* NB this may be different from ARCH_WORD_BITS */

const scalar_t API_NS(scalar_one) = {{{1}}}, API_NS(scalar_zero) = {{{0}}};

/** {extra,accum} - sub +? p
* Must have extra <= 1
*/
static NOINLINE void sc_subx(
scalar_t out,
const decaf_word_t accum[SCALAR_LIMBS],
const scalar_t sub,
const scalar_t p,
decaf_word_t extra
) {
decaf_dsword_t chain = 0;
unsigned int i;
for (i=0; i<SCALAR_LIMBS; i++) {
chain = (chain + accum[i]) - sub->limb[i];
out->limb[i] = chain;
chain >>= WBITS;
}
decaf_word_t borrow = chain+extra; /* = 0 or -1 */
chain = 0;
for (i=0; i<SCALAR_LIMBS; i++) {
chain = (chain + out->limb[i]) + (p->limb[i] & borrow);
out->limb[i] = chain;
chain >>= WBITS;
}
}

static NOINLINE void sc_montmul (
scalar_t out,
const scalar_t a,
const scalar_t b
) {
unsigned int i,j;
decaf_word_t accum[SCALAR_LIMBS+1] = {0};
decaf_word_t hi_carry = 0;
for (i=0; i<SCALAR_LIMBS; i++) {
decaf_word_t mand = a->limb[i];
const decaf_word_t *mier = b->limb;
decaf_dword_t chain = 0;
for (j=0; j<SCALAR_LIMBS; j++) {
chain += ((decaf_dword_t)mand)*mier[j] + accum[j];
accum[j] = chain;
chain >>= WBITS;
}
accum[j] = chain;
mand = accum[0] * MONTGOMERY_FACTOR;
chain = 0;
mier = sc_p->limb;
for (j=0; j<SCALAR_LIMBS; j++) {
chain += (decaf_dword_t)mand*mier[j] + accum[j];
if (j) accum[j-1] = chain;
chain >>= WBITS;
}
chain += accum[j];
chain += hi_carry;
accum[j-1] = chain;
hi_carry = chain >> WBITS;
}
sc_subx(out, accum, sc_p, sc_p, hi_carry);
}

void API_NS(scalar_mul) (
scalar_t out,
const scalar_t a,
const scalar_t b
) {
sc_montmul(out,a,b);
sc_montmul(out,out,sc_r2);
}

/* PERF: could implement this */
static INLINE void sc_montsqr (scalar_t out, const scalar_t a) {
sc_montmul(out,a,a);
}

decaf_error_t API_NS(scalar_invert) (
scalar_t out,
const scalar_t a
) {
/* Fermat's little theorem, sliding window.
* Sliding window is fine here because the modulus isn't secret.
*/
const int SCALAR_WINDOW_BITS = 3;
scalar_t precmp[1<<SCALAR_WINDOW_BITS];
const int LAST = (1<<SCALAR_WINDOW_BITS)-1;

/* Precompute precmp = [a^1,a^3,...] */
sc_montmul(precmp[0],a,sc_r2);
if (LAST > 0) sc_montmul(precmp[LAST],precmp[0],precmp[0]);

int i;
for (i=1; i<=LAST; i++) {
sc_montmul(precmp[i],precmp[i-1],precmp[LAST]);
}
/* Sliding window */
unsigned residue = 0, trailing = 0, started = 0;
for (i=SCALAR_BITS-1; i>=-SCALAR_WINDOW_BITS; i--) {
if (started) sc_montsqr(out,out);
decaf_word_t w = (i>=0) ? sc_p->limb[i/WBITS] : 0;
if (i >= 0 && i<WBITS) {
assert(w >= 2);
w-=2;
}
residue = (residue<<1) | ((w>>(i%WBITS))&1);
if (residue>>SCALAR_WINDOW_BITS != 0) {
assert(trailing == 0);
trailing = residue;
residue = 0;
}
if (trailing > 0 && (trailing & ((1<<SCALAR_WINDOW_BITS)-1)) == 0) {
if (started) {
sc_montmul(out,out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]);
} else {
API_NS(scalar_copy)(out,precmp[trailing>>(SCALAR_WINDOW_BITS+1)]);
started = 1;
}
trailing = 0;
}
trailing <<= 1;
}
assert(residue==0);
assert(trailing==0);
/* Demontgomerize */
sc_montmul(out,out,API_NS(scalar_one));
decaf_bzero(precmp, sizeof(precmp));
return decaf_succeed_if(~API_NS(scalar_eq)(out,API_NS(scalar_zero)));
}

void API_NS(scalar_sub) (
scalar_t out,
const scalar_t a,
const scalar_t b
) {
sc_subx(out, a->limb, b, sc_p, 0);
}

void API_NS(scalar_add) (
scalar_t out,
const scalar_t a,
const scalar_t b
) {
decaf_dword_t chain = 0;
unsigned int i;
for (i=0; i<SCALAR_LIMBS; i++) {
chain = (chain + a->limb[i]) + b->limb[i];
out->limb[i] = chain;
chain >>= WBITS;
}
sc_subx(out, out->limb, sc_p, sc_p, chain);
}

void
API_NS(scalar_set_unsigned) (
scalar_t out,
uint64_t w
) {
memset(out,0,sizeof(scalar_t));
unsigned int i = 0;
for (; i<sizeof(uint64_t)/sizeof(decaf_word_t); i++) {
out->limb[i] = w;
w >>= (sizeof(uint64_t) > sizeof(decaf_word_t)) ? 8*sizeof(decaf_word_t) : 0;
}
}

decaf_bool_t
API_NS(scalar_eq) (
const scalar_t a,
const scalar_t b
) {
decaf_word_t diff = 0;
unsigned int i;
for (i=0; i<SCALAR_LIMBS; i++) {
diff |= a->limb[i] ^ b->limb[i];
}
return mask_to_bool(word_is_zero(diff));
}

static INLINE void scalar_decode_short (
scalar_t s,
const unsigned char *ser,
unsigned int nbytes
) {
unsigned int i,j,k=0;
for (i=0; i<SCALAR_LIMBS; i++) {
decaf_word_t out = 0;
for (j=0; j<sizeof(decaf_word_t) && k<nbytes; j++,k++) {
out |= ((decaf_word_t)ser[k])<<(8*j);
}
s->limb[i] = out;
}
}

decaf_error_t API_NS(scalar_decode)(
scalar_t s,
const unsigned char ser[SCALAR_SER_BYTES]
) {
unsigned int i;
scalar_decode_short(s, ser, SCALAR_SER_BYTES);
decaf_dsword_t accum = 0;
for (i=0; i<SCALAR_LIMBS; i++) {
accum = (accum + s->limb[i] - sc_p->limb[i]) >> WBITS;
}
/* Here accum == 0 or -1 */
API_NS(scalar_mul)(s,s,API_NS(scalar_one)); /* ham-handed reduce */
return decaf_succeed_if(~word_is_zero(accum));
}

void API_NS(scalar_destroy) (
scalar_t scalar
) {
decaf_bzero(scalar, sizeof(scalar_t));
}

void API_NS(scalar_decode_long)(
scalar_t s,
const unsigned char *ser,
size_t ser_len
) {
if (ser_len == 0) {
API_NS(scalar_copy)(s, API_NS(scalar_zero));
return;
}
size_t i;
scalar_t t1, t2;

i = ser_len - (ser_len%SCALAR_SER_BYTES);
if (i==ser_len) i -= SCALAR_SER_BYTES;
scalar_decode_short(t1, &ser[i], ser_len-i);

if (ser_len == sizeof(scalar_t)) {
assert(i==0);
/* ham-handed reduce */
API_NS(scalar_mul)(s,t1,API_NS(scalar_one));
API_NS(scalar_destroy)(t1);
return;
}

while (i) {
i -= SCALAR_SER_BYTES;
sc_montmul(t1,t1,sc_r2);
ignore_result( API_NS(scalar_decode)(t2, ser+i) );
API_NS(scalar_add)(t1, t1, t2);
}

API_NS(scalar_copy)(s, t1);
API_NS(scalar_destroy)(t1);
API_NS(scalar_destroy)(t2);
}

void API_NS(scalar_encode)(
unsigned char ser[SCALAR_SER_BYTES],
const scalar_t s
) {
unsigned int i,j,k=0;
for (i=0; i<SCALAR_LIMBS; i++) {
for (j=0; j<sizeof(decaf_word_t); j++,k++) {
ser[k] = s->limb[i] >> (8*j);
}
}
}

void API_NS(scalar_cond_sel) (
scalar_t out,
const scalar_t a,
const scalar_t b,
decaf_bool_t pick_b
) {
constant_time_select(out,a,b,sizeof(scalar_t),bool_to_mask(pick_b),sizeof(out->limb[0]));
}

void API_NS(scalar_halve) (
scalar_t out,
const scalar_t a
) {
decaf_word_t mask = -(a->limb[0] & 1);
decaf_dword_t chain = 0;
unsigned int i;
for (i=0; i<SCALAR_LIMBS; i++) {
chain = (chain + a->limb[i]) + (sc_p->limb[i] & mask);
out->limb[i] = chain;
chain >>= DECAF_WORD_BITS;
}
for (i=0; i<SCALAR_LIMBS-1; i++) {
out->limb[i] = out->limb[i]>>1 | out->limb[i+1]<<(WBITS-1);
}
out->limb[i] = out->limb[i]>>1 | chain<<(WBITS-1);
}


+ 2
- 2
src/per_field/f_field.tmpl.h View File

@@ -33,7 +33,7 @@ typedef struct gf_$(gf_shortname)_s {
#define gf_strong_reduce gf_$(gf_shortname)_strong_reduce
#define gf_mul gf_$(gf_shortname)_mul
#define gf_sqr gf_$(gf_shortname)_sqr
#define gf_mulw gf_$(gf_shortname)_mulw
#define gf_mulw_unsigned gf_$(gf_shortname)_mulw_unsigned
#define gf_isr gf_$(gf_shortname)_isr
#define gf_serialize gf_$(gf_shortname)_serialize
#define gf_deserialize gf_$(gf_shortname)_deserialize
@@ -62,7 +62,7 @@ void gf_strong_reduce (gf inout);
void gf_add (gf out, const gf a, const gf b);
void gf_sub (gf out, const gf a, const gf b);
void gf_mul (gf_s *__restrict__ out, const gf a, const gf b);
void gf_mulw (gf_s *__restrict__ out, const gf a, uint32_t b);
void gf_mulw_unsigned (gf_s *__restrict__ out, const gf a, uint32_t b);
void gf_sqr (gf_s *__restrict__ out, const gf a);
mask_t gf_isr(gf a, const gf x); /** a^2 x = 1, QNR, or 0 if x=0. Return true if successful */
mask_t gf_eq (const gf x, const gf y);


Loading…
Cancel
Save