Browse Source

move (some of the?) per-field code to src/per_field.c

master
Michael Hamburg 9 years ago
parent
commit
2e23ac747b
9 changed files with 129 additions and 122 deletions
  1. +8
    -3
      Makefile
  2. +0
    -1
      src/curve_ed25519/curve_data.inc.c
  3. +0
    -1
      src/curve_ed448goldilocks/curve_data.inc.c
  4. +0
    -108
      src/decaf.c
  5. +1
    -0
      src/decaf_crypto.c
  6. +7
    -2
      src/gen_headers/f_field_h.py
  7. +11
    -0
      src/include/word.h
  8. +3
    -7
      src/p25519/f_arithmetic.c
  9. +99
    -0
      src/per_field.c

+ 8
- 3
Makefile View File

@@ -126,7 +126,7 @@ $(GEN_HEADERS): src/gen_headers/*.py src/public_include/decaf/*
################################################################
define define_field
ARCH_FOR_$(1) ?= $(2)
COMPONENTS_OF_$(1) = $$(BUILD_OBJ)/$(1)_impl.o $$(BUILD_OBJ)/$(1)_arithmetic.o
COMPONENTS_OF_$(1) = $$(BUILD_OBJ)/$(1)_impl.o $$(BUILD_OBJ)/$(1)_arithmetic.o $$(BUILD_OBJ)/$(1)_per_field.o
LIBCOMPONENTS += $$(COMPONENTS_OF_$(1))

$$(BUILD_ASM)/$(1)_arithmetic.s: src/$(1)/f_arithmetic.c $$(HEADERS)
@@ -138,6 +138,11 @@ $$(BUILD_ASM)/$(1)_impl.s: src/$(1)/$$(ARCH_FOR_$(1))/f_impl.c $$(HEADERS)
$$(CC) $$(CFLAGS) -I src/$(1) -I src/$(1)/$$(ARCH_FOR_$(1)) -I $(BUILD_H)/$(1) \
-I $(BUILD_H)/$(1)/$$(ARCH_FOR_$(1)) -I src/include/$$(ARCH_FOR_$(1)) \
-S -c -o $$@ $$<

$$(BUILD_ASM)/$(1)_per_field.s: src/per_field.c $$(HEADERS)
$$(CC) $$(CFLAGS) -I src/$(1) -I src/$(1)/$$(ARCH_FOR_$(1)) -I $(BUILD_H)/$(1) \
-I $(BUILD_H)/$(1)/$$(ARCH_FOR_$(1)) -I src/include/$$(ARCH_FOR_$(1)) \
-S -c -o $$@ $$<
endef

################################################################
@@ -171,8 +176,8 @@ $$(BUILD_ASM)/decaf_$(1).s: src/decaf.c $$(HEADERS)

$$(BUILD_ASM)/decaf_crypto_$(1).s: src/decaf_crypto.c $$(HEADERS)
$$(CC) $$(CFLAGS) \
-I src/curve_$(1)/ \
-I $(BUILD_H)/curve_$(1) \
-I src/curve_$(1)/ -I src/$(2) -I src/$(2)/$$(ARCH_FOR_$(2)) -I src/include/$$(ARCH_FOR_$(2)) \
-I $(BUILD_H)/curve_$(1) -I $(BUILD_H)/$(2) -I $(BUILD_H)/$(2)/$$(ARCH_FOR_$(2)) \
-S -c -o $$@ $$<

LIBCOMPONENTS += $$(BUILD_OBJ)/decaf_$(1).o $$(BUILD_OBJ)/decaf_tables_$(1).o


+ 0
- 1
src/curve_ed25519/curve_data.inc.c View File

@@ -8,7 +8,6 @@
#define scalar_t decaf_255_scalar_t
#define point_t decaf_255_point_t
#define precomputed_s decaf_255_precomputed_s
#define SER_BYTES DECAF_255_SER_BYTES
#define IMAGINE_TWIST 1
#define P_MOD_8 5
#define COFACTOR 8


+ 0
- 1
src/curve_ed448goldilocks/curve_data.inc.c View File

@@ -7,7 +7,6 @@
#define scalar_t decaf_448_scalar_t
#define point_t decaf_448_point_t
#define precomputed_s decaf_448_precomputed_s
#define SER_BYTES DECAF_448_SER_BYTES
#define IMAGINE_TWIST 0
#define P_MOD_8 7
#define COFACTOR 4


+ 0
- 108
src/decaf.c View File

@@ -50,17 +50,8 @@ extern const gf SQRT_ONE_MINUS_D; /* TODO: Intern this? */
const scalar_t API_NS(scalar_one) = {{{1}}}, API_NS(scalar_zero) = {{{0}}};
extern const scalar_t API_NS(sc_r2);
extern const decaf_word_t API_NS(MONTGOMERY_FACTOR);

extern const point_t API_NS(point_base);

/* These are externally exposed (but private) instead of static so that
* f_arithmetic.c can use it
*/
#define ONE API_NS(ONE)
#define ZERO API_NS(ZERO)
#define gf_eq API_NS(gf_eq)
const gf ZERO = {{{0}}}, ONE = {{{1}}};

/* Projective Niels coordinates */
typedef struct { gf a, b, c; } niels_s, niels_t[1];
typedef struct { niels_t n; gf z; } __attribute__((aligned(32))) pniels_s, pniels_t[1]; /* MAGIC alignment */
@@ -75,93 +66,9 @@ const precomputed_s *API_NS(precomputed_base) =
const size_t API_NS2(sizeof,precomputed_s) = sizeof(precomputed_s);
const size_t API_NS2(alignof,precomputed_s) = 32;

/* TODO PERF: Vectorize vs unroll */
#ifdef __clang__
#if 100*__clang_major__ + __clang_minor__ > 305
#define UNROLL _Pragma("clang loop unroll(full)") // PERF TODO: vectorize?
#endif
#endif

#ifndef UNROLL
#define UNROLL
#endif

#define FOR_LIMB(i,op) { unsigned int i=0; for (i=0; i<NLIMBS; i++) { op; }}
#define FOR_LIMB_U(i,op) { unsigned int i=0; UNROLL for (i=0; i<NLIMBS; i++) { op; }}

/* FUTURE: move this code from per-curve to per-field header
* (like f_arithmetic.c but same for all fields)
*/
void gf_serialize (uint8_t serial[SER_BYTES], const gf x) {
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
unsigned int j=0, fill=0;
dword_t buffer = 0;
UNROLL for (unsigned int i=0; i<SER_BYTES; i++) {
if (fill < 8 && j < NLIMBS) {
buffer |= ((dword_t)red->limb[LIMBPERM(j)]) << fill;
fill += LIMB_PLACE_VALUE(LIMBPERM(j));
j++;
}
serial[i] = buffer;
fill -= 8;
buffer >>= 8;
}
}

mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES]) {
unsigned int j=0, fill=0;
dword_t buffer = 0;
dsword_t scarry = 0;
UNROLL for (unsigned int i=0; i<NLIMBS; i++) {
UNROLL while (fill < LIMB_PLACE_VALUE(LIMBPERM(i)) && j < SER_BYTES) {
buffer |= ((dword_t)serial[j]) << fill;
fill += 8;
j++;
}
x->limb[LIMBPERM(i)] = (i<NLIMBS-1) ? buffer & LIMB_MASK(LIMBPERM(i)) : buffer;
fill -= LIMB_PLACE_VALUE(LIMBPERM(i));
buffer >>= LIMB_PLACE_VALUE(LIMBPERM(i));
scarry = (scarry + x->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]) >> (8*sizeof(word_t));
}
return word_is_zero(buffer) & ~word_is_zero(scarry);
}

void gf_strong_reduce (gf a) {
/* first, clear high */
gf_weak_reduce(a); /* PERF: only really need one step of this, but whatevs */

/* now the total is less than 2p */

/* compute total_value - p. No need to reduce mod p. */
dsword_t scarry = 0;
for (unsigned int i=0; i<NLIMBS; i++) {
scarry = scarry + a->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)];
a->limb[i] = scarry & LIMB_MASK(LIMBPERM(i));
scarry >>= LIMB_PLACE_VALUE(LIMBPERM(i));
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255
* so let's add back in p. will carry back off the top for 2^255.
*/
assert(word_is_zero(scarry) | word_is_zero(scarry+1));

word_t scarry_0 = scarry;
dword_t carry = 0;

/* add it back */
for (unsigned int i=0; i<NLIMBS; i++) {
carry = carry + a->limb[LIMBPERM(i)] + (scarry_0 & MODULUS->limb[LIMBPERM(i)]);
a->limb[i] = carry & LIMB_MASK(LIMBPERM(i));
carry >>= LIMB_PLACE_VALUE(LIMBPERM(i));
}

assert(word_is_zero(carry + scarry_0));
}

/** Constant time, x = is_z ? z : y */
static INLINE void
cond_sel(gf x, const gf y, const gf z, decaf_bool_t is_z) {
@@ -186,21 +93,6 @@ cond_swap(gf x, gf_s *__restrict__ y, decaf_bool_t swap) {
}
}

/** Compare a==b */
/* Not static because it's used in inverse square root. */
decaf_word_t gf_eq(const gf a, const gf b);
decaf_word_t gf_eq(const gf a, const gf b) {
gf c;
gf_sub(c,a,b);
gf_strong_reduce(c);
decaf_word_t ret=0;
for (unsigned int i=0; i<sizeof(c->limb)/sizeof(c->limb[0]); i++) {
ret |= c->limb[i];
}

return word_is_zero(ret);
}

/** Inverse square root using addition chain. */
static decaf_bool_t
gf_isqrt_chk(gf y, const gf x, decaf_bool_t allow_zero) {


+ 1
- 0
src/decaf_crypto.c View File

@@ -8,6 +8,7 @@
* @brief Example Decaf crypto routines
*/

#include "f_field.h" /* for SER_BYTES; FUTURE: find a better way to do this? */
#include <decaf/crypto.h>
#include <string.h>



+ 7
- 2
src/gen_headers/f_field_h.py View File

@@ -14,14 +14,19 @@ f_field_h = gen_file(

#define __DECAF_%(gf_shortname)s_GF_DEFINED__ 1
#define NLIMBS (%(gf_impl_bits)d/sizeof(word_t)/8)
#define SER_BYTES ((%(gf_bits)d-1)/8 + 1)
typedef struct gf_%(gf_shortname)s_s {
word_t limb[NLIMBS];
} __attribute__((aligned(32))) gf_%(gf_shortname)s_s, gf_%(gf_shortname)s_t[1];

#define GF_LIT_LIMB_BITS %(gf_lit_limb_bits)d
#define GF_BITS %(gf_bits)d
#define ZERO gf_%(gf_shortname)s_ZERO
#define ONE gf_%(gf_shortname)s_ONE
#define MODULUS gf_%(gf_shortname)s_MODULUS
#define gf gf_%(gf_shortname)s_t
#define gf_s gf_%(gf_shortname)s_s
#define gf_eq gf_%(gf_shortname)s_eq
#define gf_copy gf_%(gf_shortname)s_copy
#define gf_add_RAW gf_%(gf_shortname)s_add_RAW
#define gf_sub_RAW gf_%(gf_shortname)s_sub_RAW
@@ -34,7 +39,6 @@ typedef struct gf_%(gf_shortname)s_s {
#define gf_isr gf_%(gf_shortname)s_isr
#define gf_serialize gf_%(gf_shortname)s_serialize
#define gf_deserialize gf_%(gf_shortname)s_deserialize
#define MODULUS gf_%(gf_shortname)s_MODULUS

#define SQRT_MINUS_ONE P%(gf_shortname)s_SQRT_MINUS_ONE /* might not be defined */

@@ -44,7 +48,7 @@ typedef struct gf_%(gf_shortname)s_s {
extern "C" {
#endif

const gf MODULUS;
const gf MODULUS, ZERO, ONE;

/* Defined below in f_impl.h */
static INLINE_UNUSED void gf_copy (gf out, const gf a) { *out = *a; }
@@ -58,6 +62,7 @@ void gf_mul (gf_s *__restrict__ out, const gf a, const gf b);
void gf_mulw (gf_s *__restrict__ out, const gf a, uint64_t b);
void gf_sqr (gf_s *__restrict__ out, const gf a);
void gf_serialize (uint8_t *serial, const gf x);
mask_t gf_eq (const gf x, const gf y);
mask_t gf_deserialize (gf x, const uint8_t serial[(GF_BITS-1)/8+1]);

#ifdef __cplusplus


+ 11
- 0
src/include/word.h View File

@@ -241,4 +241,15 @@ malloc_vector(size_t size) {
}
}

/* PERF: vectorize vs unroll */
#ifdef __clang__
#if 100*__clang_major__ + __clang_minor__ > 305
#define UNROLL _Pragma("clang loop unroll(full)") // PERF TODO: vectorize?
#endif
#endif

#ifndef UNROLL
#define UNROLL
#endif

#endif /* __WORD_H__ */

+ 3
- 7
src/p25519/f_arithmetic.c View File

@@ -11,7 +11,7 @@
#include "field.h"
#include "constant_time.h"

const gf_25519_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL(
const gf_25519_t SQRT_MINUS_ONE = {FIELD_LITERAL(
0x61b274a0ea0b0,
0x0d5a5fc8f189d,
0x7ef5e9cbd0c60,
@@ -22,10 +22,6 @@ const gf_25519_t P25519_SQRT_MINUS_ONE = {FIELD_LITERAL(
const gf MODULUS = {FIELD_LITERAL(
0x7ffffffffffed, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff
)};
/* TODO put in header */
extern const gf_25519_t decaf_255_ONE;
extern mask_t decaf_255_gf_eq(const gf_25519_t a, const gf_25519_t b);

/* Guarantee: a^2 x = 0 if x = 0; else a^2 x = 1 or SQRT_MINUS_ONE; */
void gf_isr (
@@ -44,8 +40,8 @@ void gf_isr (
st[i&1][0] = tmp2[0];
}
mask_t mask = decaf_255_gf_eq(st[1],decaf_255_ONE) | decaf_255_gf_eq(st[1],SQRT_MINUS_ONE);
mask_t mask = gf_eq(st[1],ONE) | gf_eq(st[1],SQRT_MINUS_ONE);
constant_time_select(tmp1, decaf_255_ONE, SQRT_MINUS_ONE, sizeof(tmp1), mask, 0);
constant_time_select(tmp1, ONE, SQRT_MINUS_ONE, sizeof(tmp1), mask, 0);
gf_mul(a,tmp1,st[0]);
}

+ 99
- 0
src/per_field.c View File

@@ -0,0 +1,99 @@
/**
* @cond internal
* @file decaf_crypto.c
* @copyright
* Copyright (c) 2015-2016 Cryptography Research, Inc. \n
* Released under the MIT License. See LICENSE.txt for license information.
* @author Mike Hamburg
* @brief Generic arithmetic which has to be compiled per field.
*/

#include "field.h"

const gf ZERO = {{{0}}}, ONE = {{{1}}};

/** Serialize to wire format. */
void gf_serialize (uint8_t serial[SER_BYTES], const gf x) {
gf red;
gf_copy(red, x);
gf_strong_reduce(red);
unsigned int j=0, fill=0;
dword_t buffer = 0;
UNROLL for (unsigned int i=0; i<SER_BYTES; i++) {
if (fill < 8 && j < NLIMBS) {
buffer |= ((dword_t)red->limb[LIMBPERM(j)]) << fill;
fill += LIMB_PLACE_VALUE(LIMBPERM(j));
j++;
}
serial[i] = buffer;
fill -= 8;
buffer >>= 8;
}
}

/** Deserialize from wire format; return -1 on success and 0 on failure. */
mask_t gf_deserialize (gf x, const uint8_t serial[SER_BYTES]) {
unsigned int j=0, fill=0;
dword_t buffer = 0;
dsword_t scarry = 0;
UNROLL for (unsigned int i=0; i<NLIMBS; i++) {
UNROLL while (fill < LIMB_PLACE_VALUE(LIMBPERM(i)) && j < SER_BYTES) {
buffer |= ((dword_t)serial[j]) << fill;
fill += 8;
j++;
}
x->limb[LIMBPERM(i)] = (i<NLIMBS-1) ? buffer & LIMB_MASK(LIMBPERM(i)) : buffer;
fill -= LIMB_PLACE_VALUE(LIMBPERM(i));
buffer >>= LIMB_PLACE_VALUE(LIMBPERM(i));
scarry = (scarry + x->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)]) >> (8*sizeof(word_t));
}
return word_is_zero(buffer) & ~word_is_zero(scarry);
}

/** Reduce to canonical form. */
void gf_strong_reduce (gf a) {
/* first, clear high */
gf_weak_reduce(a); /* Determined to have negligible perf impact. */

/* now the total is less than 2p */

/* compute total_value - p. No need to reduce mod p. */
dsword_t scarry = 0;
for (unsigned int i=0; i<NLIMBS; i++) {
scarry = scarry + a->limb[LIMBPERM(i)] - MODULUS->limb[LIMBPERM(i)];
a->limb[i] = scarry & LIMB_MASK(LIMBPERM(i));
scarry >>= LIMB_PLACE_VALUE(LIMBPERM(i));
}

/* uncommon case: it was >= p, so now scarry = 0 and this = x
* common case: it was < p, so now scarry = -1 and this = x - p + 2^255
* so let's add back in p. will carry back off the top for 2^255.
*/
assert(word_is_zero(scarry) | word_is_zero(scarry+1));

word_t scarry_0 = scarry;
dword_t carry = 0;

/* add it back */
for (unsigned int i=0; i<NLIMBS; i++) {
carry = carry + a->limb[LIMBPERM(i)] + (scarry_0 & MODULUS->limb[LIMBPERM(i)]);
a->limb[i] = carry & LIMB_MASK(LIMBPERM(i));
carry >>= LIMB_PLACE_VALUE(LIMBPERM(i));
}

assert(word_is_zero(carry + scarry_0));
}

/** Compare a==b */
mask_t gf_eq(const gf a, const gf b) {
gf c;
gf_sub(c,a,b);
gf_strong_reduce(c);
mask_t ret=0;
for (unsigned int i=0; i<sizeof(c->limb)/sizeof(c->limb[0]); i++) {
ret |= c->limb[i];
}

return word_is_zero(ret);
}

Loading…
Cancel
Save