Browse Source

copying existing decaf impl to "fast" version. not that fast yet, but uses native field mul code. Also rework precomputed tables to be agnostic of underlying impl

master
Michael Hamburg 10 years ago
parent
commit
ae24f96e09
5 changed files with 801 additions and 16 deletions
  1. +3
    -1
      Makefile
  2. +10
    -8
      include/decaf.h
  3. +13
    -5
      src/decaf.c
  4. +772
    -0
      src/decaf_fast.c
  5. +3
    -2
      test/test_scalarmul.c

+ 3
- 1
Makefile View File

@@ -13,6 +13,8 @@ endif
LD = $(CC)
ASM ?= $(CC)

DECAF ?= decaf

ifneq (,$(findstring x86_64,$(MACHINE)))
ARCH ?= arch_x86_64
else
@@ -68,7 +70,7 @@ LIBCOMPONENTS= build/goldilocks.o build/barrett_field.o build/crandom.o \
build/$(FIELD).o build/ec_point.o build/scalarmul.o build/sha512.o build/magic.o \
build/f_arithmetic.o build/arithmetic.o

DECAFCOMPONENTS= build/decaf.o build/shake.o build/decaf_crypto.o
DECAFCOMPONENTS= build/$(DECAF).o build/shake.o build/decaf_crypto.o

TESTCOMPONENTS=build/test.o build/test_scalarmul.o build/test_sha512.o \
build/test_pointops.o build/test_arithmetic.o build/test_goldilocks.o build/magic.o \


+ 10
- 8
include/decaf.h View File

@@ -63,12 +63,14 @@ typedef uint32_t decaf_word_t, decaf_bool_t;
/** Twisted Edwards (-1,d-1) extended homogeneous coordinates */
typedef struct decaf_448_point_s {
decaf_word_t x[DECAF_448_LIMBS],y[DECAF_448_LIMBS],z[DECAF_448_LIMBS],t[DECAF_448_LIMBS];
} decaf_448_point_t[1];
} __attribute__((aligned(32))) decaf_448_point_t[1];

/** Precomputed table based on a point. Can be trivial implementation. */
typedef struct decaf_448_precomputed_s {
decaf_448_point_t p[1];
} decaf_448_precomputed_t[1];
struct decaf_448_precomputed_s;
typedef struct decaf_448_precomputed_s decaf_448_precomputed_s;

/** Size and alignment of precomputed point tables. */
extern const size_t sizeof_decaf_448_precomputed_s, alignof_decaf_448_precomputed_s;

/** Scalar is stored packed, because we don't need the speed. */
typedef struct decaf_448_scalar_s {
@@ -101,10 +103,10 @@ extern const decaf_448_point_t decaf_448_point_identity API_VIS;
* Equal to Ed448-Goldilocks base point defined by DJB, except of course that
* it's on the twist in this case. TODO: choose a base point with nice encoding?
*/
extern const struct decaf_448_point_s *decaf_448_point_base API_VIS;
extern const decaf_448_point_t decaf_448_point_base API_VIS;

/** Precomputed table for the base point on the curve. */
extern const decaf_448_precomputed_t decaf_448_precomputed_base API_VIS;
extern const struct decaf_448_precomputed_s *decaf_448_precomputed_base API_VIS;

#ifdef __cplusplus
extern "C" {
@@ -332,7 +334,7 @@ void decaf_448_point_scalarmul (
* @param [in] b Any point.
*/
void decaf_448_precompute (
decaf_448_precomputed_t a,
struct decaf_448_precomputed_s *a,
const decaf_448_point_t b
) API_VIS NONNULL2;

@@ -349,7 +351,7 @@ void decaf_448_precompute (
*/
void decaf_448_precomputed_scalarmul (
decaf_448_point_t scaled,
const decaf_448_precomputed_t base,
const struct decaf_448_precomputed_s *base,
const decaf_448_scalar_t scalar
) API_VIS NONNULL3;



+ 13
- 5
src/decaf.c View File

@@ -69,7 +69,7 @@ static const decaf_word_t DECAF_MONTGOMERY_FACTOR = (decaf_word_t)(0x3bd440fae91

/** base = twist of Goldilocks base point (~,19). */

const decaf_448_precomputed_t decaf_448_precomputed_base = {{{{{
const decaf_448_point_t decaf_448_point_base = {{
{ LIMB(0xb39a2d57e08c7b),LIMB(0xb38639c75ff281),
LIMB(0x2ec981082b3288),LIMB(0x99fe8607e5237c),
LIMB(0x0e33fbb1fadd1f),LIMB(0xe714f67055eb4a),
@@ -83,9 +83,17 @@ const decaf_448_precomputed_t decaf_448_precomputed_base = {{{{{
LIMB(0x0d79c0a7729a69),LIMB(0xc18d3f24aebc1c),
LIMB(0x1fbb5389b3fda5),LIMB(0xbb24f674635948),
LIMB(0x723a55709a3983),LIMB(0xe1c0107a823dd4) }
}}}}};
}};

const struct decaf_448_point_s *decaf_448_point_base = decaf_448_precomputed_base->p[0];
struct decaf_448_precomputed_s {
decaf_448_point_t p[1];
};

const struct decaf_448_precomputed_s *decaf_448_precomputed_base =
(const struct decaf_448_precomputed_s *)decaf_448_point_base;

const size_t sizeof_decaf_448_precomputed_s = sizeof(struct decaf_448_precomputed_s);
const size_t alignof_decaf_448_precomputed_s = 32;

#if (defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__)) || defined(DECAF_FORCE_UNROLL)
#if DECAF_448_LIMBS==8
@@ -764,7 +772,7 @@ decaf_bool_t decaf_448_point_valid (
}

void decaf_448_precompute (
decaf_448_precomputed_t a,
decaf_448_precomputed_s *a,
const decaf_448_point_t b
) {
decaf_448_point_copy(a->p[0],b);
@@ -772,7 +780,7 @@ void decaf_448_precompute (

void decaf_448_precomputed_scalarmul (
decaf_448_point_t a,
const decaf_448_precomputed_t b,
const decaf_448_precomputed_s *b,
const decaf_448_scalar_t scalar
) {
decaf_448_point_scalarmul(a,b->p[0],scalar);


+ 772
- 0
src/decaf_fast.c View File

@@ -0,0 +1,772 @@
/* Copyright (c) 2015 Cryptography Research, Inc.
* Released under the MIT License. See LICENSE.txt for license information.
*/

/**
* @file decaf.c
* @author Mike Hamburg
* @brief Decaf high-level functions.
*/

#define __STDC_WANT_LIB_EXT1__ 1 /* for memset_s */
#include "decaf.h"
#include <string.h>
#include "field.h"

#define WBITS DECAF_WORD_BITS

#if WBITS == 64
#define LBITS 56
typedef __uint128_t decaf_dword_t;
typedef __int128_t decaf_sdword_t;
#define LIMB(x) (x##ull)
#define SC_LIMB(x) (x##ull)
#elif WBITS == 32
typedef uint64_t decaf_dword_t;
typedef int64_t decaf_sdword_t;
#define LBITS 28
#define LIMB(x) (x##ull)&((1ull<<LBITS)-1), (x##ull)>>LBITS
#define SC_LIMB(x) (x##ull)&((1ull<<32)-1), (x##ull)>>32
#else
#error "Only supporting 32- and 64-bit platforms right now"
#endif

static const int QUADRATIC_NONRESIDUE = -1;

#define sv static void
typedef decaf_word_t gf[DECAF_448_LIMBS] __attribute__((aligned(32)));
static const gf ZERO = {0}, ONE = {1}, TWO = {2};

#define LMASK ((((decaf_word_t)1)<<LBITS)-1)
#if WBITS == 64
static const gf P = { LMASK, LMASK, LMASK, LMASK, LMASK-1, LMASK, LMASK, LMASK };
#else
static const gf P = { LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK,
LMASK-1, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK, LMASK };
#endif
static const int EDWARDS_D = -39081;

const decaf_448_scalar_t decaf_448_scalar_p = {{{
SC_LIMB(0x2378c292ab5844f3),
SC_LIMB(0x216cc2728dc58f55),
SC_LIMB(0xc44edb49aed63690),
SC_LIMB(0xffffffff7cca23e9),
SC_LIMB(0xffffffffffffffff),
SC_LIMB(0xffffffffffffffff),
SC_LIMB(0x3fffffffffffffff)
}}}, decaf_448_scalar_one = {{{1}}}, decaf_448_scalar_zero = {{{0}}};

static const decaf_448_scalar_t decaf_448_scalar_r2 = {{{
SC_LIMB(0xe3539257049b9b60),
SC_LIMB(0x7af32c4bc1b195d9),
SC_LIMB(0x0d66de2388ea1859),
SC_LIMB(0xae17cf725ee4d838),
SC_LIMB(0x1a9cc14ba3c47c44),
SC_LIMB(0x2052bcb7e4d070af),
SC_LIMB(0x3402a939f823b729)
}}};

static const decaf_word_t DECAF_MONTGOMERY_FACTOR = (decaf_word_t)(0x3bd440fae918bc5ull);

/** base = twist of Goldilocks base point (~,19). */

const decaf_448_point_t decaf_448_point_base = {{
{ LIMB(0xb39a2d57e08c7b),LIMB(0xb38639c75ff281),
LIMB(0x2ec981082b3288),LIMB(0x99fe8607e5237c),
LIMB(0x0e33fbb1fadd1f),LIMB(0xe714f67055eb4a),
LIMB(0xc9ae06d64067dd),LIMB(0xf7be45054760fa) },
{ LIMB(0xbd8715f551617f),LIMB(0x8c17fbeca8f5fc),
LIMB(0xaae0eec209c06f),LIMB(0xce41ad80cbe6b8),
LIMB(0xdf360b5c828c00),LIMB(0xaf25b6bbb40e3b),
LIMB(0x8ed37f0ce4ed31),LIMB(0x72a1c3214557b9) },
{ 1 },
{ LIMB(0x97ca9c8ed8bde9),LIMB(0xf0b780da83304c),
LIMB(0x0d79c0a7729a69),LIMB(0xc18d3f24aebc1c),
LIMB(0x1fbb5389b3fda5),LIMB(0xbb24f674635948),
LIMB(0x723a55709a3983),LIMB(0xe1c0107a823dd4) }
}};

struct decaf_448_precomputed_s {
decaf_448_point_t p[1];
};

const struct decaf_448_precomputed_s *decaf_448_precomputed_base =
(const struct decaf_448_precomputed_s *)decaf_448_point_base;

const size_t sizeof_decaf_448_precomputed_s = sizeof(struct decaf_448_precomputed_s);
const size_t alignof_decaf_448_precomputed_s = 32;

#if (defined(__OPTIMIZE__) && !defined(__OPTIMIZE_SIZE__)) || defined(DECAF_FORCE_UNROLL)
#if DECAF_448_LIMBS==8
#define FOR_LIMB(i,op) { unsigned int i=0; \
op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; \
}
#elif DECAF_448_LIMBS==16
#define FOR_LIMB(i,op) { unsigned int i=0; \
op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; \
op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; op;i++; \
}
#else
#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<DECAF_448_LIMBS; i++) { op; }}
#endif
#else
#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<DECAF_448_LIMBS; i++) { op; }}
#endif

/** Copy x = y */
sv gf_cpy(gf x, const gf y) { FOR_LIMB(i, x[i] = y[i]); }

/** Mostly-unoptimized multiply (PERF), but at least it's unrolled. */
static inline void gf_mul (gf c, const gf a, const gf b) {
field_mul((field_t *)c, (const field_t *)a, (const field_t *)b);
}

/** No dedicated square (PERF) */
static inline void gf_sqr (gf c, const gf a) {
field_sqr((field_t *)c, (const field_t *)a);
}

/** Inverse square root using addition chain. */
sv gf_isqrt(gf y, const gf x) {
field_isr((field_t *)y, (const field_t *)x);
}

/** Add mod p. Conservatively always weak-reduce. (PERF) */
static inline void gf_add ( gf c, const gf a, const gf b ) {
field_add((field_t *)c, (const field_t *)a, (const field_t *)b);
}

/** Subtract mod p. Conservatively always weak-reduce. (PERF) */
static inline void gf_sub ( gf c, const gf a, const gf b ) {
field_sub((field_t *)c, (const field_t *)a, (const field_t *)b);
}

/** Constant time, x = is_z ? z : y */
sv cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) {
FOR_LIMB(i, x[i] = (y[i] & ~is_z) | (z[i] & is_z) );
}

/** Constant time, if (neg) x=-x; */
sv cond_neg(gf x, decaf_bool_t neg) {
gf y;
gf_sub(y,ZERO,x);
cond_sel(x,x,y,neg);
}

/** Constant time, if (swap) (x,y) = (y,x); */
sv cond_swap(gf x, gf y, decaf_bool_t swap) {
FOR_LIMB(i, {
decaf_word_t s = (x[i] ^ y[i]) & swap;
x[i] ^= s;
y[i] ^= s;
});
}

/**
* Mul by signed int. Not constant-time WRT the sign of that int.
* Just uses a full mul (PERF)
*/
static inline void gf_mlw(gf c, const gf a, int w) {
if (w>0) {
field_mulw((field_t *)c, (const field_t *)a, w);
} else {
field_mulw((field_t *)c, (const field_t *)a, -w);
gf_sub(c,ZERO,c);
}
}

/** Canonicalize */
static inline void gf_canon ( gf a ) {
field_strong_reduce((field_t *)a);
}

/** Compare a==b */
static decaf_word_t __attribute__((noinline)) gf_eq(const gf a, const gf b) {
gf c;
gf_sub(c,a,b);
gf_canon(c);
decaf_word_t ret=0;
FOR_LIMB(i, ret |= c[i] );
/* Hope the compiler is too dumb to optimize this, thus noinline */
return ((decaf_dword_t)ret - 1) >> WBITS;
}

/** Return high bit of x = low bit of 2x mod p */
static decaf_word_t hibit(const gf x) {
gf y;
gf_add(y,x,x);
gf_canon(y);
return -(y[0]&1);
}

/* a = use_c ? c : b */
sv decaf_448_cond_sel (
decaf_448_point_t a,
const decaf_448_point_t b,
const decaf_448_point_t c,
decaf_bool_t use_c
) {
cond_sel(a->x, b->x, c->x, use_c);
cond_sel(a->y, b->y, c->y, use_c);
cond_sel(a->z, b->z, c->z, use_c);
cond_sel(a->t, b->t, c->t, use_c);
}

/** {extra,accum} - sub +? p
* Must have extra <= 1
*/
sv decaf_448_subx(
decaf_448_scalar_t out,
const decaf_word_t accum[DECAF_448_SCALAR_LIMBS],
const decaf_448_scalar_t sub,
const decaf_448_scalar_t p,
decaf_word_t extra
) {
decaf_sdword_t chain = 0;
unsigned int i;
for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
chain = (chain + accum[i]) - sub->limb[i];
out->limb[i] = chain;
chain >>= WBITS;
}
decaf_bool_t borrow = chain+extra; /* = 0 or -1 */
chain = 0;
for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
chain = (chain + out->limb[i]) + (p->limb[i] & borrow);
out->limb[i] = chain;
chain >>= WBITS;
}
}

sv decaf_448_montmul (
decaf_448_scalar_t out,
const decaf_448_scalar_t a,
const decaf_448_scalar_t b,
const decaf_448_scalar_t p,
decaf_word_t montgomery_factor
) {
unsigned int i,j;
decaf_word_t accum[DECAF_448_SCALAR_LIMBS+1] = {0};
decaf_word_t hi_carry = 0;
for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
decaf_word_t mand = a->limb[i];
const decaf_word_t *mier = b->limb;
decaf_dword_t chain = 0;
for (j=0; j<DECAF_448_SCALAR_LIMBS; j++) {
chain += ((decaf_dword_t)mand)*mier[j] + accum[j];
accum[j] = chain;
chain >>= WBITS;
}
accum[j] = chain;
mand = accum[0] * montgomery_factor;
chain = 0;
mier = p->limb;
for (j=0; j<DECAF_448_SCALAR_LIMBS; j++) {
chain += (decaf_dword_t)mand*mier[j] + accum[j];
if (j) accum[j-1] = chain;
chain >>= WBITS;
}
chain += accum[j];
chain += hi_carry;
accum[j-1] = chain;
hi_carry = chain >> WBITS;
}
decaf_448_subx(out, accum, p, p, hi_carry);
}

void decaf_448_scalar_mul (
decaf_448_scalar_t out,
const decaf_448_scalar_t a,
const decaf_448_scalar_t b
) {
decaf_448_montmul(out,a,b,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
decaf_448_montmul(out,out,decaf_448_scalar_r2,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
}

void decaf_448_scalar_sub (
decaf_448_scalar_t out,
const decaf_448_scalar_t a,
const decaf_448_scalar_t b
) {
decaf_448_subx(out, a->limb, b, decaf_448_scalar_p, 0);
}

void decaf_448_scalar_add (
decaf_448_scalar_t out,
const decaf_448_scalar_t a,
const decaf_448_scalar_t b
) {
decaf_dword_t chain = 0;
unsigned int i;
for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
chain = (chain + a->limb[i]) + b->limb[i];
out->limb[i] = chain;
chain >>= WBITS;
}
decaf_448_subx(out, out->limb, decaf_448_scalar_p, decaf_448_scalar_p, chain);
}

void decaf_448_scalar_copy (
decaf_448_scalar_t out,
const decaf_448_scalar_t a
) {
unsigned int i;
for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
out->limb[i] = a->limb[i];
}
}

decaf_bool_t decaf_448_scalar_eq (
const decaf_448_scalar_t a,
const decaf_448_scalar_t b
) {
decaf_word_t diff = 0;
unsigned int i;
for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
diff |= a->limb[i] ^ b->limb[i];
}
return (((decaf_dword_t)diff)-1)>>WBITS;
}

/* *** API begins here *** */

/** identity = (0,1) */
const decaf_448_point_t decaf_448_point_identity = {{{0},{1},{1},{0}}};

void decaf_448_point_encode( unsigned char ser[DECAF_448_SER_BYTES], const decaf_448_point_t p ) {
/* Can shave off one mul here; not important but makes consistent with paper */
gf a, b, c, d;
gf_mlw ( a, p->y, 1-EDWARDS_D );
gf_mul ( c, a, p->t );
gf_mul ( a, p->x, p->z );
gf_sub ( d, c, a );
gf_add ( a, p->z, p->y );
gf_sub ( b, p->z, p->y );
gf_mul ( c, b, a );
gf_mlw ( b, c, -EDWARDS_D );
gf_isqrt ( a, b );
gf_mlw ( b, a, -EDWARDS_D );
gf_mul ( c, b, a );
gf_mul ( a, c, d );
gf_add ( d, b, b );
gf_mul ( c, d, p->z );
cond_neg ( b, ~hibit(c) );
gf_mul ( c, b, p->y );
gf_add ( a, a, c );
cond_neg ( a, hibit(a) );
gf_canon(a);
int i, k=0, bits=0;
decaf_dword_t buf=0;
for (i=0; i<DECAF_448_LIMBS; i++) {
buf |= (decaf_dword_t)a[i]<<bits;
for (bits += LBITS; (bits>=8 || i==DECAF_448_LIMBS-1) && k<DECAF_448_SER_BYTES; bits-=8, buf>>=8) {
ser[k++]=buf;
}
}
}

/**
* Deserialize a bool, return TRUE if < p.
*/
static decaf_bool_t gf_deser(gf s, const unsigned char ser[DECAF_448_SER_BYTES]) {
unsigned int i, k=0, bits=0;
decaf_dword_t buf=0;
for (i=0; i<DECAF_448_SER_BYTES; i++) {
buf |= (decaf_dword_t)ser[i]<<bits;
for (bits += 8; (bits>=LBITS || i==DECAF_448_SER_BYTES-1) && k<DECAF_448_LIMBS; bits-=LBITS, buf>>=LBITS) {
s[k++] = buf & LMASK;
}
}
decaf_sdword_t accum = 0;
FOR_LIMB(i, accum = (accum + s[i] - P[i]) >> WBITS );
return accum;
}
/* Constant-time add or subtract */
sv decaf_448_point_add_sub (
decaf_448_point_t p,
const decaf_448_point_t q,
const decaf_448_point_t r,
decaf_bool_t do_sub
) {
/* Twisted Edward formulas, complete when 4-torsion isn't involved */
gf a, b, c, d;
gf_sub ( b, q->y, q->x );
gf_sub ( c, r->y, r->x );
gf_add ( d, r->y, r->x );
cond_swap(c,d,do_sub);
gf_mul ( a, c, b );
gf_add ( b, q->y, q->x );
gf_mul ( p->y, d, b );
gf_mul ( b, r->t, q->t );
gf_mlw ( p->x, b, 2-2*EDWARDS_D );
gf_add ( b, a, p->y );
gf_sub ( c, p->y, a );
gf_mul ( a, q->z, r->z );
gf_add ( a, a, a );
gf_add ( p->y, a, p->x );
gf_sub ( a, a, p->x );
cond_swap(a,p->y,do_sub);
gf_mul ( p->z, a, p->y );
gf_mul ( p->x, p->y, c );
gf_mul ( p->y, a, b );
gf_mul ( p->t, b, c );
}
decaf_bool_t decaf_448_point_decode (
decaf_448_point_t p,
const unsigned char ser[DECAF_448_SER_BYTES],
decaf_bool_t allow_identity
) {
gf s, a, b, c, d, e;
decaf_bool_t succ = gf_deser(s, ser), zero = gf_eq(s, ZERO);
succ &= allow_identity | ~zero;
succ &= ~hibit(s);
gf_sqr ( a, s );
gf_sub ( p->z, ONE, a );
gf_sqr ( b, p->z );
gf_mlw ( c, a, 4-4*EDWARDS_D );
gf_add ( c, c, b );
gf_mul ( b, c, a );
gf_isqrt ( d, b );
gf_sqr ( e, d );
gf_mul ( a, e, b );
gf_add ( a, a, ONE );
succ &= ~gf_eq ( a, ZERO );
gf_mul ( b, c, d );
cond_neg ( d, hibit(b) );
gf_add ( p->x, s, s );
gf_mul ( c, d, s );
gf_sub ( b, TWO, p->z );
gf_mul ( a, b, c );
gf_mul ( p->y,a,p->z );
gf_mul ( p->t,p->x,a );
p->y[0] -= zero;
/* TODO: do something safe if ~succ? */
return succ;
}

void decaf_448_point_sub (
decaf_448_point_t p,
const decaf_448_point_t q,
const decaf_448_point_t r
) {
gf a, b, c, d;
gf_sub ( b, q->y, q->x );
gf_sub ( d, r->y, r->x );
gf_add ( c, r->y, r->x );
gf_mul ( a, c, b );
gf_add ( b, q->y, q->x );
gf_mul ( p->y, d, b );
gf_mul ( b, r->t, q->t );
gf_mlw ( p->x, b, 2-2*EDWARDS_D );
gf_add ( b, a, p->y );
gf_sub ( c, p->y, a );
gf_mul ( a, q->z, r->z );
gf_add ( a, a, a );
gf_sub ( p->y, a, p->x );
gf_add ( a, a, p->x );
gf_mul ( p->z, a, p->y );
gf_mul ( p->x, p->y, c );
gf_mul ( p->y, a, b );
gf_mul ( p->t, b, c );
}
void decaf_448_point_add (
decaf_448_point_t p,
const decaf_448_point_t q,
const decaf_448_point_t r
) {
gf a, b, c, d;
gf_sub ( b, q->y, q->x );
gf_sub ( c, r->y, r->x );
gf_add ( d, r->y, r->x );
gf_mul ( a, c, b );
gf_add ( b, q->y, q->x );
gf_mul ( p->y, d, b );
gf_mul ( b, r->t, q->t );
gf_mlw ( p->x, b, 2-2*EDWARDS_D );
gf_add ( b, a, p->y );
gf_sub ( c, p->y, a );
gf_mul ( a, q->z, r->z );
gf_add ( a, a, a );
gf_add ( p->y, a, p->x );
gf_sub ( a, a, p->x );
gf_mul ( p->z, a, p->y );
gf_mul ( p->x, p->y, c );
gf_mul ( p->y, a, b );
gf_mul ( p->t, b, c );
}

/* No dedicated point double yet (PERF) */
void decaf_448_point_double(decaf_448_point_t a, const decaf_448_point_t b) {
decaf_448_point_add(a,b,b);
}

void decaf_448_point_copy (
decaf_448_point_t a,
const decaf_448_point_t b
) {
gf_cpy(a->x, b->x);
gf_cpy(a->y, b->y);
gf_cpy(a->z, b->z);
gf_cpy(a->t, b->t);
}

decaf_bool_t decaf_448_scalar_decode(
decaf_448_scalar_t s,
const unsigned char ser[DECAF_448_SER_BYTES]
) {
unsigned int i,j,k=0;
for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
decaf_word_t out = 0;
for (j=0; j<sizeof(decaf_word_t); j++,k++) {
out |= ((decaf_word_t)ser[k])<<(8*j);
}
s->limb[i] = out;
}
decaf_sdword_t accum = 0;
for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
accum = (accum + s->limb[i] - decaf_448_scalar_p->limb[i]) >> WBITS;
}
decaf_448_scalar_mul(s,s,decaf_448_scalar_one); /* ham-handed reduce */
return accum;
}

void decaf_bzero (
void *s,
size_t size
) {
#ifdef __STDC_LIB_EXT1__
memset_s(s, size, 0, size);
#else
volatile uint8_t *destroy = (volatile uint8_t *)s;
unsigned i;
for (i=0; i<size; i++) {
destroy[i] = 0;
}
#endif
}


void decaf_448_scalar_destroy (
decaf_448_scalar_t scalar
) {
decaf_bzero(scalar, sizeof(decaf_448_scalar_t));
}

static inline void ignore_result ( decaf_bool_t boo ) {
(void)boo;
}

void decaf_448_scalar_decode_long(
decaf_448_scalar_t s,
const unsigned char *ser,
size_t ser_len
) {
if (ser_len == 0) {
decaf_448_scalar_copy(s, decaf_448_scalar_zero);
return;
}
size_t i;
unsigned char tmp[DECAF_448_SER_BYTES] = {0};
decaf_448_scalar_t t1, t2;

i = ser_len - (ser_len%DECAF_448_SER_BYTES);
if (i==ser_len) i -= DECAF_448_SER_BYTES;
memcpy(tmp, ser+i, ser_len - i);
ignore_result( decaf_448_scalar_decode(t1, tmp) );
decaf_bzero(tmp, sizeof(tmp));
while (i) {
i -= DECAF_448_SER_BYTES;
decaf_448_montmul(t1,t1,decaf_448_scalar_r2,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
ignore_result( decaf_448_scalar_decode(t2, ser+i) );
decaf_448_scalar_add(t1, t1, t2);
}

decaf_448_scalar_copy(s, t1);
decaf_448_scalar_destroy(t1);
decaf_448_scalar_destroy(t2);
}

void decaf_448_scalar_encode(
unsigned char ser[DECAF_448_SER_BYTES],
const decaf_448_scalar_t s
) {
unsigned int i,j,k=0;
for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
for (j=0; j<sizeof(decaf_word_t); j++,k++) {
ser[k] = s->limb[i] >> (8*j);
}
}
}

void decaf_448_point_scalarmul (
decaf_448_point_t a,
const decaf_448_point_t b,
const decaf_448_scalar_t scalar
) {
/* w=2 signed window uses about 1.5 adds per bit.
* I figured a few extra lines was worth the 25% speedup.
*/
decaf_448_point_t w,b3,tmp;
decaf_448_point_double(w,b);
/* b3 = b*3 */
decaf_448_point_add(b3,w,b);
int i;
for (i=DECAF_448_SCALAR_BITS &~ 1; i>0; i-=2) {
decaf_word_t bits = scalar->limb[i/WBITS]>>(i%WBITS);
decaf_448_cond_sel(tmp,b,b3,((bits^(bits>>1))&1)-1);
decaf_448_point_double(w,w);
decaf_448_point_add_sub(w,w,tmp,((bits>>1)&1)-1);
decaf_448_point_double(w,w);
}
decaf_448_point_add_sub(w,w,b,((scalar->limb[0]>>1)&1)-1);
/* low bit is special because fo signed window */
decaf_448_cond_sel(tmp,b,decaf_448_point_identity,-(scalar->limb[0]&1));
decaf_448_point_sub(a,w,tmp);
}

void decaf_448_point_double_scalarmul (
decaf_448_point_t a,
const decaf_448_point_t b,
const decaf_448_scalar_t scalarb,
const decaf_448_point_t c,
const decaf_448_scalar_t scalarc
) {
/* w=2 signed window uses about 1.5 adds per bit.
* I figured a few extra lines was worth the 25% speedup.
* NB: if adapting this function to scalarmul by a
* possibly-odd number of unmasked bits, may need to mask.
*/
decaf_448_point_t w,b3,c3,tmp;
decaf_448_point_double(w,b);
decaf_448_point_double(tmp,c);
/* b3 = b*3 */
decaf_448_point_add(b3,w,b);
decaf_448_point_add(c3,tmp,c);
decaf_448_point_add(w,w,tmp);
int i;
for (i=DECAF_448_SCALAR_BITS &~ 1; i>0; i-=2) {
decaf_448_point_double(w,w);
decaf_word_t bits = scalarb->limb[i/WBITS]>>(i%WBITS);
decaf_448_cond_sel(tmp,b,b3,((bits^(bits>>1))&1)-1);
decaf_448_point_add_sub(w,w,tmp,((bits>>1)&1)-1);
bits = scalarc->limb[i/WBITS]>>(i%WBITS);
decaf_448_cond_sel(tmp,c,c3,((bits^(bits>>1))&1)-1);
decaf_448_point_add_sub(w,w,tmp,((bits>>1)&1)-1);
decaf_448_point_double(w,w);
}
decaf_448_point_add_sub(w,w,b,((scalarb->limb[0]>>1)&1)-1);
decaf_448_point_add_sub(w,w,c,((scalarc->limb[0]>>1)&1)-1);
/* low bit is special because of signed window */
decaf_448_cond_sel(tmp,b,decaf_448_point_identity,-(scalarb->limb[0]&1));
decaf_448_point_sub(w,w,tmp);
decaf_448_cond_sel(tmp,c,decaf_448_point_identity,-(scalarc->limb[0]&1));
decaf_448_point_sub(a,w,tmp);
}

decaf_bool_t decaf_448_point_eq ( const decaf_448_point_t p, const decaf_448_point_t q ) {
/* equality mod 2-torsion compares x/y */
gf a, b;
gf_mul ( a, p->y, q->x );
gf_mul ( b, q->y, p->x );
return gf_eq(a,b);
}

void decaf_448_point_from_hash_nonuniform (
decaf_448_point_t p,
const unsigned char ser[DECAF_448_SER_BYTES]
) {
gf r,urr,a,b,c,dee,e,ur2_d,udr2_1;
(void)gf_deser(r,ser);
gf_canon(r);
gf_sqr(a,r);
gf_mlw(urr,a,QUADRATIC_NONRESIDUE);
gf_mlw(dee,ONE,EDWARDS_D);
gf_add(a,urr,ONE);
gf_sub(ur2_d,dee,urr);
gf_mul(c,a,ur2_d);
gf_mlw(b,urr,-EDWARDS_D);
gf_add(udr2_1,b,ONE);
gf_mul(a,c,udr2_1);
gf_mlw(c,a,EDWARDS_D+1);
gf_isqrt(b,c); /* FIELD: if 5 mod 8, multiply result by u. */
gf_sqr(a,b);
gf_mul(e,a,c);
decaf_bool_t square = gf_eq(e,ONE);
gf_mul(a,b,r);
cond_sel(b,a,b,square);
gf_mlw(a,b,EDWARDS_D+1);
cond_swap(ur2_d,udr2_1,~square);
gf_mul(e,ur2_d,a);
cond_neg(e,hibit(e)^square);
gf_mul(b,udr2_1,a);
gf_sqr(c,b);
gf_sqr(a,e);
gf_sub(a,ONE,a);
gf_add(e,e,e);
gf_add(b,dee,c);
gf_sub(c,dee,c);
gf_mul(p->x,e,c);
gf_mul(p->z,a,c);
gf_mul(p->y,b,a);
gf_mul(p->t,b,e);
}

void decaf_448_point_from_hash_uniform (
decaf_448_point_t pt,
const unsigned char hashed_data[2*DECAF_448_SER_BYTES]
) {
decaf_448_point_t pt2;
decaf_448_point_from_hash_nonuniform(pt,hashed_data);
decaf_448_point_from_hash_nonuniform(pt2,&hashed_data[DECAF_448_SER_BYTES]);
decaf_448_point_add(pt,pt,pt2);
}

decaf_bool_t decaf_448_point_valid (
const decaf_448_point_t p
) {
gf a,b,c;
gf_mul(a,p->x,p->y);
gf_mul(b,p->z,p->t);
decaf_bool_t out = gf_eq(a,b);
gf_sqr(a,p->x);
gf_sqr(b,p->y);
gf_sub(a,b,a);
gf_sqr(b,p->t);
gf_mlw(c,b,1-EDWARDS_D);
gf_sqr(b,p->z);
gf_sub(b,b,c);
out &= gf_eq(a,b);
out &= ~gf_eq(p->z,ZERO);
return out;
}

void decaf_448_precompute (
decaf_448_precomputed_s *a,
const decaf_448_point_t b
) {
decaf_448_point_copy(a->p[0],b);
}

void decaf_448_precomputed_scalarmul (
decaf_448_point_t a,
const decaf_448_precomputed_s *b,
const decaf_448_scalar_t scalar
) {
decaf_448_point_scalarmul(a,b->p[0],scalar);
}

+ 3
- 2
test/test_scalarmul.c View File

@@ -112,7 +112,8 @@ single_scalarmul_compatibility_test (
untwist_and_double_and_serialize(vt, &work);
decaf_448_point_t ed2, ed3;
decaf_448_precomputed_t dpre;
struct decaf_448_precomputed_s *dpre;
posix_memalign((void**)&dpre, alignof_decaf_448_precomputed_s, sizeof_decaf_448_precomputed_s);
tw_extended_a_t ed;
convert_tw_extensible_to_tw_extended(ed, &text);
decaf_448_point_scalarmul(
@@ -126,7 +127,7 @@ single_scalarmul_compatibility_test (
dpre,
(struct decaf_448_scalar_s *)scalar
);
free(dpre);

scalarmul_ed(ed, scalar);
field_copy(work.x, ed->x);


Loading…
Cancel
Save