Browse Source

perf improvement in keygen, sign; fix perf regression for decaf versions of these

master
Mike Hamburg 9 years ago
parent
commit
edb25d093c
6 changed files with 76 additions and 39 deletions
  1. +23
    -22
      include/decaf.h
  2. +1
    -1
      include/decaf_crypto.h
  3. +3
    -1
      src/decaf_crypto.c
  4. +46
    -13
      src/decaf_fast.c
  5. +2
    -1
      src/include/constant_time.h
  6. +1
    -1
      src/scalarmul.c

+ 23
- 22
include/decaf.h View File

@@ -33,6 +33,7 @@
#define __attribute__((x))
#endif
#define API_VIS __attribute__((visibility("default")))
#define NOINLINE __attribute__((noinline))
#define WARN_UNUSED __attribute__((warn_unused_result))
#define NONNULL1 __attribute__((nonnull(1)))
#define NONNULL2 __attribute__((nonnull(1,2)))
@@ -130,7 +131,7 @@ extern "C" {
decaf_bool_t decaf_448_scalar_decode (
decaf_448_scalar_t s,
const unsigned char ser[DECAF_448_SCALAR_BYTES]
) API_VIS WARN_UNUSED NONNULL2;
) API_VIS WARN_UNUSED NONNULL2 NOINLINE;

/**
* @brief Read a scalar from wire format or from bytes. Reduces mod
@@ -144,7 +145,7 @@ void decaf_448_scalar_decode_long (
decaf_448_scalar_t s,
const unsigned char *ser,
size_t ser_len
) API_VIS NONNULL2;
) API_VIS NONNULL2 NOINLINE;
/**
* @brief Serialize a scalar to wire format.
@@ -155,7 +156,7 @@ void decaf_448_scalar_decode_long (
void decaf_448_scalar_encode (
unsigned char ser[DECAF_448_SCALAR_BYTES],
const decaf_448_scalar_t s
) API_VIS NONNULL2;
) API_VIS NONNULL2 NOINLINE NOINLINE;
/**
* @brief Add two scalars. The scalars may use the same memory.
@@ -167,7 +168,7 @@ void decaf_448_scalar_add (
decaf_448_scalar_t out,
const decaf_448_scalar_t a,
const decaf_448_scalar_t b
) API_VIS NONNULL3;
) API_VIS NONNULL3 NOINLINE;

/**
* @brief Compare two scalars.
@@ -179,7 +180,7 @@ void decaf_448_scalar_add (
decaf_bool_t decaf_448_scalar_eq (
const decaf_448_scalar_t a,
const decaf_448_scalar_t b
) API_VIS WARN_UNUSED NONNULL2;
) API_VIS WARN_UNUSED NONNULL2 NOINLINE;

/**
* @brief Subtract two scalars. The scalars may use the same memory.
@@ -191,7 +192,7 @@ void decaf_448_scalar_sub (
decaf_448_scalar_t out,
const decaf_448_scalar_t a,
const decaf_448_scalar_t b
) API_VIS NONNULL3;
) API_VIS NONNULL3 NOINLINE;

/**
* @brief Multiply two scalars. The scalars may use the same memory.
@@ -203,7 +204,7 @@ void decaf_448_scalar_mul (
decaf_448_scalar_t out,
const decaf_448_scalar_t a,
const decaf_448_scalar_t b
) API_VIS NONNULL3;
) API_VIS NONNULL3 NOINLINE;

/**
* @brief Copy a scalar. The scalars may use the same memory, in which
@@ -225,7 +226,7 @@ void decaf_448_scalar_copy (
void decaf_448_point_encode (
uint8_t ser[DECAF_448_SER_BYTES],
const decaf_448_point_t pt
) API_VIS NONNULL2;
) API_VIS NONNULL2 NOINLINE;

/**
* @brief Decode a point from a sequence of bytes.
@@ -244,7 +245,7 @@ decaf_bool_t decaf_448_point_decode (
decaf_448_point_t pt,
const uint8_t ser[DECAF_448_SER_BYTES],
decaf_bool_t allow_identity
) API_VIS WARN_UNUSED NONNULL2;
) API_VIS WARN_UNUSED NONNULL2 NOINLINE;

/**
* @brief Copy a point. The input and output may alias,
@@ -270,7 +271,7 @@ void decaf_448_point_copy (
decaf_bool_t decaf_448_point_eq (
const decaf_448_point_t a,
const decaf_448_point_t b
) API_VIS WARN_UNUSED NONNULL2;
) API_VIS WARN_UNUSED NONNULL2 NOINLINE;

/**
* @brief Add two points to produce a third point. The
@@ -285,7 +286,7 @@ void decaf_448_point_add (
decaf_448_point_t sum,
const decaf_448_point_t a,
const decaf_448_point_t b
) API_VIS NONNULL3;
) API_VIS NONNULL3; // TODO: NOINLINE?

/**
* @brief Double a point. Equivalent to
@@ -297,7 +298,7 @@ void decaf_448_point_add (
void decaf_448_point_double (
decaf_448_point_t two_a,
const decaf_448_point_t a
) API_VIS NONNULL2;
) API_VIS NONNULL2; // TODO: NOINLINE?

/**
* @brief Subtract two points to produce a third point. The
@@ -312,7 +313,7 @@ void decaf_448_point_sub (
decaf_448_point_t diff,
const decaf_448_point_t a,
const decaf_448_point_t b
) API_VIS NONNULL3;
) API_VIS NONNULL3; // TODO: NOINLINE?

/**
* @brief Multiply a base point by a scalar: scaled = scalar*base.
@@ -325,7 +326,7 @@ void decaf_448_point_scalarmul (
decaf_448_point_t scaled,
const decaf_448_point_t base,
const decaf_448_scalar_t scalar
) API_VIS NONNULL3;
) API_VIS NONNULL3 NOINLINE;

/**
* @brief Multiply a base point by a scalar: scaled = scalar*base.
@@ -350,7 +351,7 @@ decaf_bool_t decaf_448_direct_scalarmul (
const decaf_448_scalar_t scalar,
decaf_bool_t allow_identity,
decaf_bool_t short_circuit
) API_VIS NONNULL3 WARN_UNUSED;
) API_VIS NONNULL3 WARN_UNUSED NOINLINE;

/**
* @brief Precompute a table for fast scalar multiplication.
@@ -364,7 +365,7 @@ decaf_bool_t decaf_448_direct_scalarmul (
void decaf_448_precompute (
decaf_448_precomputed_s *a,
const decaf_448_point_t b
) API_VIS NONNULL2;
) API_VIS NONNULL2 NOINLINE;

/**
* @brief Multiply a precomputed base point by a scalar:
@@ -381,7 +382,7 @@ void decaf_448_precomputed_scalarmul (
decaf_448_point_t scaled,
const decaf_448_precomputed_s *base,
const decaf_448_scalar_t scalar
) API_VIS NONNULL3;
) API_VIS NONNULL3 NOINLINE;

/**
* @brief Multiply two base points by two scalars:
@@ -405,7 +406,7 @@ void decaf_448_point_double_scalarmul (
const decaf_448_scalar_t scalar1,
const decaf_448_point_t base2,
const decaf_448_scalar_t scalar2
) API_VIS NONNULL5;
) API_VIS NONNULL5 NOINLINE;

/**
* @brief Test that a point is valid, for debugging purposes.
@@ -416,7 +417,7 @@ void decaf_448_point_double_scalarmul (
*/
decaf_bool_t decaf_448_point_valid (
const decaf_448_point_t toTest
) API_VIS WARN_UNUSED NONNULL1;
) API_VIS WARN_UNUSED NONNULL1 NOINLINE;

/**
* @brief Almost-Elligator-like hash to curve.
@@ -448,7 +449,7 @@ decaf_bool_t decaf_448_point_valid (
void decaf_448_point_from_hash_nonuniform (
decaf_448_point_t pt,
const unsigned char hashed_data[DECAF_448_SER_BYTES]
) API_VIS NONNULL2;
) API_VIS NONNULL2 NOINLINE;

/**
* @brief Indifferentiable hash function encoding to curve.
@@ -461,7 +462,7 @@ void decaf_448_point_from_hash_nonuniform (
void decaf_448_point_from_hash_uniform (
decaf_448_point_t pt,
const unsigned char hashed_data[2*DECAF_448_SER_BYTES]
) API_VIS NONNULL2;
) API_VIS NONNULL2 NOINLINE;

/**
* @brief Overwrite data with zeros. Use memset_s if available.
@@ -469,7 +470,7 @@ void decaf_448_point_from_hash_uniform (
void decaf_bzero (
void *data,
size_t size
) NONNULL1 API_VIS;
) NONNULL1 API_VIS NOINLINE;

/**
* @brief Overwrite scalar with zeros.


+ 1
- 1
include/decaf_crypto.h View File

@@ -16,7 +16,7 @@

#define DECAF_448_SYMMETRIC_KEY_BYTES 32
/** @cond internal */
#define API_VIS __attribute__((visibility("default")))
#define API_VIS __attribute__((visibility("default"))) __attribute__((noinline)) // TODO: synergize with decaf.h
#define WARN_UNUSED __attribute__((warn_unused_result))
#define NONNULL1 __attribute__((nonnull(1)))
#define NONNULL2 __attribute__((nonnull(1,2)))


+ 3
- 1
src/decaf_crypto.c View File

@@ -10,6 +10,7 @@

#include "decaf_crypto.h"
#include <string.h>
#include "sha512.h"

static const unsigned int DECAF_448_SCALAR_OVERKILL_BYTES = DECAF_448_SCALAR_BYTES + 8;

@@ -18,9 +19,10 @@ void decaf_448_derive_private_key (
const decaf_448_symmetric_key_t proto
) {
const char *magic = "decaf_448_derive_private_key";
keccak_sponge_t sponge;
uint8_t encoded_scalar[DECAF_448_SCALAR_OVERKILL_BYTES];
decaf_448_point_t pub;

keccak_sponge_t sponge;
shake256_init(sponge);
shake256_update(sponge, proto, sizeof(decaf_448_symmetric_key_t));
shake256_update(sponge, (const unsigned char *)magic, strlen(magic));


+ 46
- 13
src/decaf_fast.c View File

@@ -71,6 +71,16 @@ static const decaf_448_scalar_t decaf_448_scalar_r2 = {{{
SC_LIMB(0x3402a939f823b729)
}}};

static const decaf_448_scalar_t decaf_448_scalar_r1 = {{{
SC_LIMB(0x721cf5b5529eec34),
SC_LIMB(0x7a4cf635c8e9c2ab),
SC_LIMB(0xeec492d944a725bf),
SC_LIMB(0x000000020cd77058),
SC_LIMB(0),
SC_LIMB(0),
SC_LIMB(0)
}}};

static const decaf_word_t DECAF_MONTGOMERY_FACTOR = (decaf_word_t)(0x3bd440fae918bc5ull);

/** base = twist of Goldilocks base point (~,19). */
@@ -611,25 +621,33 @@ void decaf_448_point_copy (
gf_cpy(a->t, b->t);
}

decaf_bool_t decaf_448_scalar_decode(
siv decaf_448_scalar_decode_short (
decaf_448_scalar_t s,
const unsigned char ser[DECAF_448_SER_BYTES]
const unsigned char ser[DECAF_448_SER_BYTES],
unsigned int nbytes
) {
unsigned int i,j,k=0;
for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
decaf_word_t out = 0;
for (j=0; j<sizeof(decaf_word_t); j++,k++) {
for (j=0; j<sizeof(decaf_word_t) && k<nbytes; j++,k++) {
out |= ((decaf_word_t)ser[k])<<(8*j);
}
s->limb[i] = out;
}
}

decaf_bool_t decaf_448_scalar_decode(
decaf_448_scalar_t s,
const unsigned char ser[DECAF_448_SER_BYTES]
) {
unsigned int i;
decaf_448_scalar_decode_short(s, ser, DECAF_448_SER_BYTES);
decaf_sdword_t accum = 0;
for (i=0; i<DECAF_448_SCALAR_LIMBS; i++) {
accum = (accum + s->limb[i] - decaf_448_scalar_p->limb[i]) >> WBITS;
}
decaf_448_scalar_mul(s,s,decaf_448_scalar_one); /* ham-handed reduce */
decaf_448_montmul(s,s,decaf_448_scalar_r1,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR); /* ham-handed reduce */
return accum;
}
@@ -671,16 +689,21 @@ void decaf_448_scalar_decode_long(
}
size_t i;
unsigned char tmp[DECAF_448_SER_BYTES] = {0};
decaf_448_scalar_t t1, t2;

i = ser_len - (ser_len%DECAF_448_SER_BYTES);
if (i==ser_len) i -= DECAF_448_SER_BYTES;
memcpy(tmp, ser+i, ser_len - i);
ignore_result( decaf_448_scalar_decode(t1, tmp) );
decaf_bzero(tmp, sizeof(tmp));
decaf_448_scalar_decode_short(t1, &ser[i], ser_len-i);

if (ser_len == sizeof(*ser)) {
assert(i==0);
/* ham-handed reduce */
decaf_448_montmul(s,t1,decaf_448_scalar_r1,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
decaf_448_scalar_destroy(t1);
return;
}

while (i) {
i -= DECAF_448_SER_BYTES;
decaf_448_montmul(t1,t1,decaf_448_scalar_r2,decaf_448_scalar_p,DECAF_MONTGOMERY_FACTOR);
@@ -1075,6 +1098,15 @@ decaf_448_precompute (

extern const decaf_448_scalar_t decaf_448_precomputed_scalarmul_adjustment;

siv constant_time_lookup_niels (
niels_s *__restrict__ ni,
const niels_t *table,
int nelts,
int idx
) {
constant_time_lookup(ni, table, sizeof(niels_s), nelts, idx);
}

void decaf_448_precomputed_scalarmul (
decaf_448_point_t out,
const decaf_448_precomputed_s *table,
@@ -1094,7 +1126,7 @@ void decaf_448_precomputed_scalarmul (
for (j=0; j<n; j++) {
int tab = 0;
for (k=0; k<t; k++) {
unsigned int bit = (s-1-i) + k*s + j*(s*t);
if (bit < SCALAR_WORDS * WBITS) {
@@ -1105,8 +1137,9 @@ void decaf_448_precomputed_scalarmul (
decaf_bool_t invert = (tab>>(t-1))-1;
tab ^= invert;
tab &= (1<<(t-1)) - 1;
constant_time_lookup(ni, &table->table[j<<(t-1)], sizeof(ni), 1<<(t-1), tab);

constant_time_lookup_niels(ni, &table->table[j<<(t-1)], 1<<(t-1), tab);

cond_neg_niels(ni, invert);
if (i||j) {
add_niels_to_pt(out, ni, j==n-1 && i<s-1);


+ 2
- 1
src/include/constant_time.h View File

@@ -12,6 +12,7 @@
#define __CONSTANT_TIME_H__ 1

#include "word.h"
#include <string.h>

/*
* Constant-time operations on hopefully-compile-time-sized memory
@@ -148,7 +149,7 @@ constant_time_lookup (
const unsigned char *table = (const unsigned char *)table_;
word_t j,k;
really_memset(out, 0, elem_bytes);
memset(out, 0, elem_bytes);
for (j=0; j<n_table; j++, big_i-=big_one) {
big_register_t br_mask = br_is_zero(big_i);
for (k=0; k<=elem_bytes-sizeof(big_register_t); k+=sizeof(big_register_t)) {


+ 1
- 1
src/scalarmul.c View File

@@ -392,7 +392,7 @@ scalarmul_fixed_base (
for (j=0; j<n; j++) {
int tab = 0;
/*
/*
* PERF: This computation takes about 1.5µs on SBR, i.e. 2-3% of the
* time of a keygen or sign op. Surely it is possible to speed it up.
*/


Loading…
Cancel
Save