Browse Source

Cross-curve compilation working! Still a bunch of FIXMEs though

master
Michael Hamburg 9 years ago
parent
commit
cdab495338
26 changed files with 551 additions and 427 deletions
  1. +76
    -70
      Makefile
  2. +20
    -5
      src/curve_ed25519/curve_data.inc.c
  3. +0
    -1
      src/curve_ed25519/field
  4. +25
    -4
      src/curve_ed448goldilocks/curve_data.inc.c
  5. +0
    -1
      src/curve_ed448goldilocks/field
  6. +0
    -16
      src/decaf.c
  7. +70
    -29
      src/decaf_fast.c
  8. +6
    -7
      src/decaf_gen_tables.c
  9. +1
    -1
      src/p25519/f_field.h
  10. +14
    -14
      src/p448/arch_32/f_impl.c
  11. +32
    -32
      src/p448/arch_32/f_impl.h
  12. +14
    -14
      src/p448/arch_arm_32/f_impl.c
  13. +32
    -32
      src/p448/arch_arm_32/f_impl.h
  14. +14
    -14
      src/p448/arch_neon_experimental/f_impl.c
  15. +32
    -32
      src/p448/arch_neon_experimental/f_impl.h
  16. +16
    -16
      src/p448/arch_ref64/f_impl.c
  17. +32
    -32
      src/p448/arch_ref64/f_impl.h
  18. +16
    -16
      src/p448/arch_x86_64/f_impl.c
  19. +35
    -32
      src/p448/arch_x86_64/f_impl.h
  20. +2
    -1
      src/p448/f_field.h
  21. +1
    -0
      src/public_include/decaf.hxx
  22. +10
    -4
      src/public_include/decaf/decaf_255.hxx
  23. +16
    -1
      src/public_include/decaf/decaf_448.h
  24. +7
    -1
      src/public_include/decaf/decaf_448.hxx
  25. +68
    -47
      test/bench_decaf.cxx
  26. +12
    -5
      test/test_decaf.cxx

+ 76
- 70
Makefile View File

@@ -39,8 +39,6 @@ else
ARCH ?= arch_ref32
endif

FIELD ?= p25519

WARNFLAGS = -pedantic -Wall -Wextra -Werror -Wunreachable-code \
-Wmissing-declarations -Wunused-function -Wno-overlength-strings $(EXWARN)

@@ -79,22 +77,13 @@ SAGES= $(shell ls test/*.sage)
BUILDPYS= $(SAGES:test/%.sage=$(BUILD_PY)/%.py)

.PHONY: clean all test bench todo doc lib bat sage sagetest
.PRECIOUS: $(BUILD_ASM)/%.s $(BUILD_ASM)/%_impl.s $(BUILD_ASM)/$(DECAF)_%.s $(BUILD_ASM)/decaf_tables_%.c \
$(BUILD_IBIN)/decaf_gen_tables_%

HEADERS= Makefile $(shell find src test -name "*.h") $(shell find . -name "*.hxx") $(BUILD_OBJ)/timestamp
.PRECIOUS: $(BUILD_ASM)/%.s $(BUILD_C)/%.c $(BUILD_IBIN)/%

# components needed by the table generators
GENCOMPONENTS= \
$(BUILD_OBJ)/$(DECAF)_ed25519.o $(BUILD_OBJ)/p25519_impl.o $(BUILD_OBJ)/p25519_arithmetic.o \
$(BUILD_OBJ)/utils.o \
#$(BUILD_OBJ)/p448_impl.o $(BUILD_OBJ)/p448_arithmetic.o
HEADERS= Makefile $(shell find src test -name "*.h") $(BUILD_OBJ)/timestamp
HEADERSXX = $(HEADERS) $(shell find . -name "*.hxx")

# components needed by the lib
DECAFCOMPONENTS= $(BUILD_OBJ)/shake.o $(BUILD_OBJ)/decaf_crypto.o $(GENCOMPONENTS)
ifeq ($(DECAF),decaf_fast)
DECAFCOMPONENTS += $(BUILD_OBJ)/decaf_tables_ed25519.o
endif
LIBCOMPONENTS = $(BUILD_OBJ)/utils.o $(BUILD_OBJ)/shake.o $(BUILD_OBJ)/decaf_crypto.o # and per-field components

BENCHCOMPONENTS = $(BUILD_OBJ)/bench.o $(BUILD_OBJ)/shake.o

@@ -105,26 +94,7 @@ scan: clean
-enable-checker deadcode -enable-checker llvm \
-enable-checker osx -enable-checker security -enable-checker unix \
make all
# The shakesum utility is in the public bin directory.
$(BUILD_BIN)/shakesum: $(BUILD_OBJ)/shakesum.o $(BUILD_OBJ)/shake.o $(BUILD_OBJ)/utils.o
$(LD) $(LDFLAGS) -o $@ $^

# The main decaf library, and its symlinks.
lib: $(BUILD_LIB)/libdecaf.so

$(BUILD_LIB)/libdecaf.so: $(BUILD_LIB)/libdecaf.so.1
ln -sf `basename $^` $@

$(BUILD_LIB)/libdecaf.so.1: $(DECAFCOMPONENTS)
rm -f $@
ifeq ($(UNAME),Darwin)
libtool -macosx_version_min 10.6 -dynamic -dead_strip -lc -x -o $@ \
$(DECAFCOMPONENTS)
else
$(LD) $(LDFLAGS) -shared -Wl,-soname,`basename $@` -Wl,--gc-sections -o $@ $(DECAFCOMPONENTS)
strip --discard-all $@
endif

# Internal test programs, which are not part of the final build/bin directory.
$(BUILD_IBIN)/test: $(BUILD_OBJ)/test_decaf.o lib
@@ -150,50 +120,86 @@ $(BUILD_OBJ)/timestamp:
$(BUILD_OBJ)/%.o: $(BUILD_ASM)/%.s
$(ASM) $(ASFLAGS) -c -o $@ $<

# I don't know why this rule is necessary... bug in make, or obscure pattern matching rule?
$(BUILD_OBJ)/decaf_gen_tables_%.o: $(BUILD_ASM)/decaf_gen_tables_%.s
$(ASM) $(ASFLAGS) -c -o $@ $<
################################################################
# Per-field code: call with field, arch
################################################################
define define_field
ARCH_FOR_$(1) = $(2)
COMPONENTS_OF_$(1) = $$(BUILD_OBJ)/$(1)_impl.o $$(BUILD_OBJ)/$(1)_arithmetic.o
LIBCOMPONENTS += $$(COMPONENTS_OF_$(1))

$$(BUILD_ASM)/$(1)_arithmetic.s: src/$(1)/f_arithmetic.c $$(HEADERS)
$$(CC) $$(CFLAGS) -I src/$(1) -I src/$(1)/$(2) -S -c -o $$@ $$<

$$(BUILD_ASM)/$(1)_impl.s: src/$(1)/$(2)/f_impl.c $$(HEADERS)
$$(CC) $$(CFLAGS) -I src/$(1) -I src/$(1)/$(2) -S -c -o $$@ $$<
endef

################################################################
# Per-field, per-curve code: call with curve, field
################################################################
define define_curve
$$(BUILD_IBIN)/decaf_gen_tables_$(1): $$(BUILD_OBJ)/decaf_gen_tables_$(1).o $$(BUILD_OBJ)/decaf_fast_$(1).o $$(BUILD_OBJ)/utils.o \
$$(COMPONENTS_OF_$(2))
$$(LD) $$(LDFLAGS) -o $$@ $$^

$$(BUILD_C)/decaf_tables_$(1).c: $$(BUILD_IBIN)/decaf_gen_tables_$(1)
./$$< > $$@ || (rm $$@; exit 1)

$$(BUILD_ASM)/decaf_tables_$(1).s: $$(BUILD_C)/decaf_tables_$(1).c $$(HEADERS)
$$(CC) $$(CFLAGS) -S -c -o $$@ $$< \
-I src/curve_$(1)/ -I src/$(2) -I src/$(2)/$$(ARCH_FOR_$(2)) \

$$(BUILD_ASM)/decaf_gen_tables_$(1).s: src/decaf_gen_tables.c $$(HEADERS)
$$(CC) $$(CFLAGS) \
-I src/curve_$(1)/ -I src/$(2) -I src/$(2)/$$(ARCH_FOR_$(2)) \
-S -c -o $$@ $$<

$$(BUILD_ASM)/decaf_fast_$(1).s: src/decaf_fast.c $$(HEADERS)
$$(CC) $$(CFLAGS) \
-I src/curve_$(1)/ -I src/$(2) -I src/$(2)/$$(ARCH_FOR_$(2)) \
-S -c -o $$@ $$<

LIBCOMPONENTS += $$(BUILD_OBJ)/decaf_fast_$(1).o $$(BUILD_OBJ)/decaf_tables_$(1).o
endef

################################################################
# call code above to generate curves and fields
$(eval $(call define_field,p25519,arch_x86_64))
$(eval $(call define_curve,ed25519,p25519))
$(eval $(call define_field,p448,arch_x86_64))
$(eval $(call define_curve,ed448goldilocks,p448))

$(BUILD_IBIN)/decaf_gen_tables_%: $(BUILD_OBJ)/decaf_gen_tables_%.o $(GENCOMPONENTS)
# The shakesum utility is in the public bin directory.
$(BUILD_BIN)/shakesum: $(BUILD_OBJ)/shakesum.o $(BUILD_OBJ)/shake.o $(BUILD_OBJ)/utils.o
$(LD) $(LDFLAGS) -o $@ $^
$(BUILD_C)/decaf_tables_%.c: $(BUILD_IBIN)/decaf_gen_tables_%
./$< > $@
$(BUILD_ASM)/decaf_tables_%.s: $(BUILD_C)/decaf_tables_%.c $(HEADERS)
$(CC) $(CFLAGS) -S -c -o $@ $< \
-I src/curve_$*/ -I src/curve_$*/field -I src/curve_$*/field/$(ARCH) \
$(BUILD_ASM)/decaf_gen_tables_%.s: src/decaf_gen_tables.c $(HEADERS)
$(CC) $(CFLAGS) \
-I src/curve_$*/ -I src/curve_$*/field -I src/curve_$*/field/$(ARCH) \
-S -c -o $@ $<
$(BUILD_ASM)/decaf_fast_%.s: src/decaf_fast.c $(HEADERS)
$(CC) $(CFLAGS) \
-I src/curve_$*/ -I src/curve_$*/field -I src/curve_$*/field/$(ARCH) \
-S -c -o $@ $<
$(BUILD_ASM)/%_arithmetic.s: src/%/f_arithmetic.c $(HEADERS)
$(CC) $(CFLAGS) \
-I src/$* -I src/$*/$(ARCH) \
-S -c -o $@ $<
$(BUILD_ASM)/%_impl.s: src/%/$(ARCH)/f_impl.c $(HEADERS)
$(CC) $(CFLAGS) \
-I src/$* -I src/$*/$(ARCH) \
-S -c -o $@ $<

# The main decaf library, and its symlinks.
lib: $(BUILD_LIB)/libdecaf.so

$(BUILD_LIB)/libdecaf.so: $(BUILD_LIB)/libdecaf.so.1
ln -sf `basename $^` $@

$(BUILD_LIB)/libdecaf.so.1: $(LIBCOMPONENTS)
rm -f $@
ifeq ($(UNAME),Darwin)
libtool -macosx_version_min 10.6 -dynamic -dead_strip -lc -x -o $@ \
$(LIBCOMPONENTS)
else
$(LD) $(LDFLAGS) -shared -Wl,-soname,`basename $@` -Wl,--gc-sections -o $@ $(LIBCOMPONENTS)
strip --discard-all $@
endif



$(BUILD_ASM)/%.s: src/%.c $(HEADERS)
$(CC) $(CFLAGS) -S -c -o $@ $<
$(BUILD_ASM)/%.s: src/%.cxx $(HEADERS)
$(CXX) $(CXXFLAGS) -S -c -o $@ $<

$(BUILD_ASM)/%.s: test/%.c $(HEADERS)
$(CC) $(CFLAGS) -S -c -o $@ $<

$(BUILD_ASM)/%.s: test/%.cxx $(HEADERS)
$(BUILD_ASM)/%.s: test/%.cxx $(HEADERSXX)
$(CXX) $(CXXFLAGS) -S -c -o $@ $<

# The sage test scripts


+ 20
- 5
src/curve_ed25519/curve_data.inc.c View File

@@ -1,9 +1,22 @@
/* Rename table for eventual factoring into .c.inc, MSR ECC style */
// FIXME move to arch or something
#define WBITS DECAF_WORD_BITS

#if WBITS == 64
#define LBITS 51
typedef __int128_t decaf_sdword_t;
#define LIMB(x) (x##ull)
#define SC_LIMB(x) (x##ull)
#else
#error "Only supporting 64-bit platforms right now"
#endif

#define API_NAME "decaf_255"
#define API_NS(_id) decaf_255_##_id
#define API_NS2(_pref,_id) _pref##_decaf_255_##_id

#define SCALAR_LIMBS DECAF_255_SCALAR_LIMBS
#define SCALAR_BITS DECAF_255_SCALAR_BITS
#define NLIMBS DECAF_255_LIMBS
#define API_NS(_id) decaf_255_##_id
#define API_NS2(_pref,_id) _pref##_decaf_255_##_id
#define scalar_t decaf_255_scalar_t
#define point_t decaf_255_point_t
#define precomputed_s decaf_255_precomputed_s
@@ -21,12 +34,14 @@ static const scalar_t sc_p = {{{
SC_LIMB(0x1000000000000000)
}}};

#ifdef GEN_TABLES
/* sqrt(9) = 3 from the curve spec. Not exported, but used by pregen tool. */
const unsigned char base_point_ser_for_pregen[SER_BYTES] = {
static const unsigned char base_point_ser_for_pregen[SER_BYTES] = {
3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
};
#endif

const gf SQRT_ONE_MINUS_D = {FIELD_LITERAL(
static const gf SQRT_ONE_MINUS_D = {FIELD_LITERAL(
0x6db8831bbddec,
0x38d7b56c9c165,
0x016b221394bdc,


+ 0
- 1
src/curve_ed25519/field View File

@@ -1 +0,0 @@
../p25519/

+ 25
- 4
src/curve_ed448goldilocks/curve_data.inc.c View File

@@ -1,8 +1,27 @@
// FIXME move to arch or something
#define WBITS DECAF_WORD_BITS

#if WBITS == 64
#define LBITS 56
typedef __int128_t decaf_sdword_t;
#define LIMB(x) (x##ull)
#define SC_LIMB(x) (x##ull)
#elif WBITS == 32
typedef int64_t decaf_sdword_t;
#define LBITS 28
#define LIMB(x) (x##ull)&((1ull<<LBITS)-1), (x##ull)>>LBITS
#define SC_LIMB(x) (x##ull)&((1ull<<32)-1), (x##ull)>>32
#else
#error "Only supporting 32- and 64-bit platforms right now"
#endif

#define API_NAME "decaf_448"
#define API_NS(_id) decaf_448_##_id
#define API_NS2(_pref,_id) _pref##_decaf_448_##_id

#define SCALAR_LIMBS DECAF_448_SCALAR_LIMBS
#define SCALAR_BITS DECAF_448_SCALAR_BITS
#define NLIMBS DECAF_448_LIMBS
#define API_NS(_id) decaf_448_##_id
#define API_NS2(_pref,_id) _pref##_decaf_448_##_id
#define scalar_t decaf_448_scalar_t
#define point_t decaf_448_point_t
#define precomputed_s decaf_448_precomputed_s
@@ -22,8 +41,10 @@ static const scalar_t sc_p = {{{
SC_LIMB(0xffffffffffffffff),
SC_LIMB(0x3fffffffffffffff)
}}};

#ifdef GEN_TABLES
/* sqrt(5) = 2phi-1 from the curve spec. Not exported, but used by pregen tool. */
const unsigned char base_point_ser_for_pregen[SER_BYTES] = {
static const unsigned char base_point_ser_for_pregen[SER_BYTES] = {
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1
};
#endif

+ 0
- 1
src/curve_ed448goldilocks/field View File

@@ -1 +0,0 @@
../p448/

+ 0
- 16
src/decaf.c View File

@@ -13,22 +13,6 @@
#include <string.h>
#include <assert.h>

#define WBITS DECAF_WORD_BITS

#if WBITS == 64
#define LBITS 56
typedef __int128_t decaf_sdword_t;
#define LIMB(x) (x##ull)
#define SC_LIMB(x) (x##ull)
#elif WBITS == 32
typedef int64_t decaf_sdword_t;
#define LBITS 28
#define LIMB(x) (x##ull)&((1ull<<LBITS)-1), (x##ull)>>LBITS
#define SC_LIMB(x) (x##ull)&((1ull<<32)-1), (x##ull)>>32
#else
#error "Only supporting 32- and 64-bit platforms right now"
#endif

#define sv static void
#define snv static void __attribute__((noinline))
#define siv static inline void __attribute__((always_inline))


+ 70
- 29
src/decaf_fast.c View File

@@ -15,21 +15,13 @@
#include "field.h"
#include "decaf_config.h"

#define WBITS DECAF_WORD_BITS
#if WBITS == 64
typedef __int128_t decaf_sdword_t;
#define SC_LIMB(x) (x##ull)
#elif WBITS == 32
typedef int64_t decaf_sdword_t;
#define SC_LIMB(x) (x##ull)&((1ull<<32)-1), (x##ull)>>32
#else
#error "Only supporting 32- and 64-bit platforms right now"
#endif


/* Include the curve data here */
#include "curve_data.inc.c"

#if (COFACTOR == 8) && !IMAGINE_TWIST
/* FUTURE: Curve41417 doesn't have these properties. */
#error "Currently require IMAGINE_TWIST (and thus p=5 mod 8) for cofactor 8"
#endif

#if IMAGINE_TWIST && (P_MOD_8 != 5)
#error "Cannot use IMAGINE_TWIST except for p == 5 mod 8"
@@ -162,6 +154,7 @@ static decaf_word_t hibit(const gf x) {
return -(y->limb[0]&1);
}

#if COFACTOR==8
/** Return high bit of x = low bit of 2x mod p */
static decaf_word_t lobit(const gf x) {
gf y;
@@ -169,6 +162,7 @@ static decaf_word_t lobit(const gf x) {
gf_strong_reduce(y);
return -(y->limb[0]&1);
}
#endif

/** {extra,accum} - sub +? p
* Must have extra <= 1
@@ -408,27 +402,64 @@ static void deisogenize (
decaf_bool_t toggle_hibit_t_over_s,
decaf_bool_t toggle_rotation
) {
gf c, d, x, t;
#if COFACTOR == 4 && !IMAGINE_TWIST
(void) toggle_rotation;
/* TODO: Can shave off one mul here; not important but makes consistent with paper */
gf b, d;
gf_s *a = s, *c = minus_t_over_s;
gf_mulw_sgn ( a, p->y, 1-EDWARDS_D );
gf_mul ( c, a, p->t ); /* -dYT, with EDWARDS_D = d-1 */
gf_mul ( a, p->x, p->z );
gf_sub ( d, c, a ); /* aXZ-dYT with a=-1 */
gf_add ( a, p->z, p->y );
gf_sub ( b, p->z, p->y );
gf_mul ( c, b, a );
gf_mulw_sgn ( b, c, -EDWARDS_D ); /* (a-d)(Z+Y)(Z-Y) */
decaf_bool_t ok = gf_isqrt_chk ( a, b, DECAF_TRUE ); /* r in the paper */
(void)ok; assert(ok);
gf_mulw_sgn ( b, a, -EDWARDS_D ); /* u in the paper */
gf_mul ( c, b, a ); /* ur */
gf_mul ( a, c, d ); /* ur (aZX-dYT) */
gf_add ( d, b, b ); /* 2u = -2au since a=-1 */
gf_mul ( c, d, p->z ); /* 2uZ */
cond_neg ( b, toggle_hibit_t_over_s ^ ~hibit(c) ); /* u <- -u if negative. */
cond_neg ( c, toggle_hibit_t_over_s ^ ~hibit(c) ); /* u <- -u if negative. */
gf_mul ( d, b, p->y );
gf_add ( s, a, d );
cond_neg ( s, toggle_hibit_s ^ hibit(s) );
#else
/* More complicated because of rotation */
/* FIXME This code is wrong for certain non-Curve25519 curves; check if it's because of Cofactor==8 or IMAGINE_ROTATION */
gf c, d;
gf_s *b = s, *a = minus_t_over_s;

#if IMAGINE_TWIST
gf x, t;
gf_mul ( x, p->x, SQRT_MINUS_ONE);
gf_mul ( t, p->t, SQRT_MINUS_ONE);
gf_sub ( x, ZERO, x );
gf_sub ( t, ZERO, t );
#endif
gf DEBUG;
gf_add ( a, p->z, x );
gf_sub ( b, p->z, x );
gf_mul ( c, a, b ); /* "zx" = Z^2 - X^2 */
gf_cpy(DEBUG,c);
gf_mul ( c, a, b ); /* "zx" = Z^2 - aX^2 = Z^2 - X^2 */
#else
const gf_s *x = p->x, *t = p->t;
/* Won't hit the cond_sel below because COFACTOR==8 requires IMAGINE_TWIST for now. */
gf_sqr ( a, p->z );
gf_sqr ( b, p->x );
gf_add ( c, a, b ); /* "zx" = Z^2 - aX^2 = Z^2 + X^2 */
#endif
gf_mul ( a, p->z, t ); /* "tz" = T*Z */
gf_sqr ( b, a );
gf_mul ( d, b, c ); /* (TZ)^2 * (Z^2-X^2) */
gf_mul ( d, b, c ); /* (TZ)^2 * (Z^2-aX^2) */
decaf_bool_t ok = gf_isqrt_chk ( b, d, DECAF_TRUE );
(void)ok; assert(ok);
gf_mul ( d, b, a ); /* "osx" = 1 / sqrt(z^2-x^2) */
gf_mul ( d, b, a ); /* "osx" = 1 / sqrt(z^2-ax^2) */
gf_mul ( a, b, c );
gf_mul ( b, a, d ); /* 1/tz */

@@ -445,6 +476,7 @@ static void deisogenize (
cond_sel ( x, p->y, x, rotate );
}
#else
(void)toggle_rotation;
rotate = 0;
#endif
@@ -458,6 +490,8 @@ static void deisogenize (
gf_add ( d, d, c );
gf_mul ( b, d, x ); /* here "x" = y unless rotate */
cond_neg ( b, toggle_hibit_s ^ hibit(b) );
#endif
}

void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) {
@@ -472,7 +506,7 @@ void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) {
static decaf_bool_t gf_deser(gf s, const unsigned char ser[SER_BYTES]) {
return gf_deserialize((gf_s *)s, ser);
}
decaf_bool_t API_NS(point_decode) (
point_t p,
const unsigned char ser[SER_BYTES],
@@ -483,25 +517,32 @@ decaf_bool_t API_NS(point_decode) (
succ &= allow_identity | ~zero;
succ &= ~hibit(s);
gf_sqr ( a, s );
gf_sub ( f, ONE, a ); /* f = 1-s^2 = 1-as^2 since a=1 */
#if IMAGINE_TWIST
gf_sub ( f, ONE, a ); /* f = 1-as^2 = 1-s^2*/
#else
gf_add ( f, ONE, a ); /* f = 1-as^2 = 1+s^2 */
#endif
succ &= ~ gf_eq( f, ZERO );
gf_sqr ( b, f );
gf_mulw_sgn ( c, a, 4-4*EDWARDS_D );
gf_mulw_sgn ( c, a, 4*IMAGINE_TWIST-4*EDWARDS_D );
gf_add ( c, c, b ); /* t^2 */
gf_mul ( d, f, s ); /* s(1-s^2) for denoms */
gf_mul ( d, f, s ); /* s(1-as^2) for denoms */
gf_sqr ( e, d );
gf_mul ( b, c, e );
succ &= gf_isqrt_chk ( e, b, DECAF_TRUE ); /* e = 1/(t s (1-s^2)) */
succ &= gf_isqrt_chk ( e, b, DECAF_TRUE ); /* e = 1/(t s (1-as^2)) */
gf_mul ( b, e, d ); /* 1/t */
gf_mul ( d, e, c ); /* d = t / (s(1-s^2)) */
gf_mul ( d, e, c ); /* d = t / (s(1-as^2)) */
gf_mul ( e, d, f ); /* t/s */
decaf_bool_t negtos = hibit(e);
cond_neg(b, negtos);
cond_neg(d, negtos);
gf_add ( p->z, ONE, a); /* Z = 1+s^2 */
succ &= ~gf_eq( p->z, ZERO ); /* FUTURE: unnecessary? */

#if IMAGINE_TWIST
gf_add ( p->z, ONE, a); /* Z = 1+as^2 = 1-s^2 */
#else
gf_sub ( p->z, ONE, a); /* Z = 1+as^2 = 1-s^2 */
#endif

#if COFACTOR == 8
gf_mul ( a, p->z, d); /* t(1+s^2) / s(1-s^2) = 2/xy */
@@ -745,7 +786,7 @@ static void pt_to_pniels (
) {
gf_sub ( b->n->a, a->y, a->x );
gf_add ( b->n->b, a->x, a->y );
gf_mulw_sgn ( b->n->c, a->t, 2*EFF_D );
gf_mulw_sgn ( b->n->c, a->t, 2*TWISTED_D );
gf_add ( b->z, a->z, a->z );
}



+ 6
- 7
src/decaf_gen_tables.c View File

@@ -15,8 +15,8 @@
#include "decaf_config.h"
#include "field.h"

#define API_NS(_id) decaf_255_##_id
#define API_NS2(_pref,_id) _pref##_decaf_255_##_id
#define GEN_TABLES
#include "curve_data.inc.c"

/* To satisfy linker. */
const gf API_NS(precomputed_base_as_fe)[1];
@@ -24,7 +24,6 @@ const API_NS(scalar_t) API_NS(precomputed_scalarmul_adjustment);
const API_NS(scalar_t) API_NS(point_scalarmul_adjustment);
const API_NS(scalar_t) API_NS(sc_r2) = {{{0}}};
const decaf_word_t API_NS(MONTGOMERY_FACTOR) = 0;
const unsigned char base_point_ser_for_pregen[DECAF_255_SER_BYTES];

const API_NS(point_t) API_NS(point_base);

@@ -94,8 +93,8 @@ int main(int argc, char **argv) {
printf("/** @warning: this file was automatically generated. */\n");
printf("#include <decaf.h>\n\n");
printf("#include \"field.h\"\n\n");
printf("#define API_NS(_id) decaf_255_##_id\n");
printf("#define API_NS2(_pref,_id) _pref##_decaf_255_##_id\n");
printf("#define API_NS(_id) %s_##_id\n", API_NAME);
printf("#define API_NS2(_pref,_id) _pref##_%s_##_id\n", API_NAME);
output = (const gf_s *)real_point_base;
printf("const API_NS(point_t) API_NS(point_base) = {{\n");
@@ -136,8 +135,8 @@ int main(int argc, char **argv) {
scalar_print("API_NS(precomputed_scalarmul_adjustment)", smadj);
API_NS(scalar_copy)(smadj,API_NS(scalar_one));
for (i=0; i<DECAF_255_SCALAR_BITS-1 + DECAF_WINDOW_BITS
- ((DECAF_255_SCALAR_BITS-1)%DECAF_WINDOW_BITS); i++) {
for (i=0; i<SCALAR_BITS-1 + DECAF_WINDOW_BITS
- ((SCALAR_BITS-1) % DECAF_WINDOW_BITS); i++) {
API_NS(scalar_add)(smadj,smadj,smadj);
}
API_NS(scalar_sub)(smadj, smadj, API_NS(scalar_one));


+ 1
- 1
src/p25519/f_field.h View File

@@ -15,7 +15,7 @@
#include "f_impl.h"
#define GF_LIT_LIMB_BITS 51
#define GF_BITS 255
#define gf gf_25519_t
#define gf gf_25519_t
#define gf_s gf_25519_s
#define gf_mul gf_25519_mul
#define gf_sqr gf_25519_sqr


+ 14
- 14
src/p448/arch_32/f_impl.c View File

@@ -23,9 +23,9 @@ static uint64_t widemul_32 (

void
p448_mul (
p448_t *__restrict__ cs,
const p448_t *as,
const p448_t *bs
gf_448_s *__restrict__ cs,
const gf_448_t as,
const gf_448_t bs
) {
const uint32_t *a = as->limb, *b = bs->limb;
uint32_t *c = cs->limb;
@@ -84,8 +84,8 @@ p448_mul (

void
p448_mulw (
p448_t *__restrict__ cs,
const p448_t *as,
gf_448_s *__restrict__ cs,
const gf_448_t as,
uint64_t b
) {
const uint32_t bhi = b>>28, blo = b & ((1<<28)-1);
@@ -128,15 +128,15 @@ p448_mulw (

void
p448_sqr (
p448_t *__restrict__ cs,
const p448_t *as
gf_448_s *__restrict__ cs,
const gf_448_t as
) {
p448_mul(cs,as,as); /* PERF */
}

void
p448_strong_reduce (
p448_t *a
gf_448_t a
) {
word_t mask = (1ull<<28)-1;

@@ -180,14 +180,14 @@ p448_strong_reduce (
void
p448_serialize (
uint8_t *serial,
const struct p448_t *x
const gf_448_t x
) {
int i,j;
p448_t red;
p448_copy(&red, x);
p448_strong_reduce(&red);
gf_448_t red;
p448_copy(red, x);
p448_strong_reduce(red);
for (i=0; i<8; i++) {
uint64_t limb = red.limb[2*i] + (((uint64_t)red.limb[2*i+1])<<28);
uint64_t limb = red->limb[2*i] + (((uint64_t)red->limb[2*i+1])<<28);
for (j=0; j<7; j++) {
serial[7*i+j] = limb;
limb >>= 8;
@@ -198,7 +198,7 @@ p448_serialize (

mask_t
p448_deserialize (
p448_t *x,
gf_448_t x,
const uint8_t serial[56]
) {
int i,j;


+ 32
- 32
src/p448/arch_32/f_impl.h View File

@@ -9,9 +9,9 @@
#include <stdint.h>
#include <assert.h>

typedef struct p448_t {
typedef struct gf_448_s {
uint32_t limb[16];
} __attribute__((aligned(32))) p448_t;
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];

#define LBITS 28
#define LIMB(x) (x##ull)&((1ull<<LBITS)-1), (x##ull)>>LBITS
@@ -24,69 +24,69 @@ extern "C" {

static __inline__ void
p448_add_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
p448_sub_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
p448_copy (
p448_t *out,
const p448_t *a
gf_448_t out,
const gf_448_t a
) __attribute__((unused,always_inline));
static __inline__ void
p448_weak_reduce (
p448_t *inout
gf_448_t inout
) __attribute__((unused,always_inline));
void
p448_strong_reduce (
p448_t *inout
gf_448_t inout
);
static __inline__ void
p448_bias (
p448_t *inout,
gf_448_t inout,
int amount
) __attribute__((unused,always_inline));

void
p448_mul (
p448_t *__restrict__ out,
const p448_t *a,
const p448_t *b
gf_448_s *__restrict__ out,
const gf_448_t a,
const gf_448_t b
);

void
p448_mulw (
p448_t *__restrict__ out,
const p448_t *a,
gf_448_s *__restrict__ out,
const gf_448_t a,
uint64_t b
);

void
p448_sqr (
p448_t *__restrict__ out,
const p448_t *a
gf_448_s *__restrict__ out,
const gf_448_t a
);

void
p448_serialize (
uint8_t *serial,
const struct p448_t *x
const gf_448_t x
);

mask_t
p448_deserialize (
p448_t *x,
gf_448_t x,
const uint8_t serial[56]
);

@@ -94,9 +94,9 @@ p448_deserialize (

void
p448_add_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
@@ -112,9 +112,9 @@ p448_add_RAW (

void
p448_sub_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
@@ -130,15 +130,15 @@ p448_sub_RAW (

void
p448_copy (
p448_t *out,
const p448_t *a
gf_448_t out,
const gf_448_t a
) {
*out = *a;
}

void
p448_bias (
p448_t *a,
gf_448_t a,
int amt
) {
uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
@@ -152,7 +152,7 @@ p448_bias (

void
p448_weak_reduce (
p448_t *a
gf_448_t a
) {
uint64_t mask = (1ull<<28) - 1;
uint64_t tmp = a->limb[15] >> 28;


+ 14
- 14
src/p448/arch_arm_32/f_impl.c View File

@@ -100,9 +100,9 @@ smull2 (

void
p448_mul (
p448_t *__restrict__ cs,
const p448_t *as,
const p448_t *bs
gf_448_s *__restrict__ cs,
const gf_448_t as,
const gf_448_t bs
) {
const uint32_t *a = as->limb, *b = bs->limb;
@@ -451,8 +451,8 @@ p448_mul (

void
p448_sqr (
p448_t *__restrict__ cs,
const p448_t *as
gf_448_s *__restrict__ cs,
const gf_448_t as
) {
const uint32_t *a = as->limb;
uint32_t *c = cs->limb;
@@ -749,8 +749,8 @@ p448_sqr (

void
p448_mulw (
p448_t *__restrict__ cs,
const p448_t *as,
gf_448_s *__restrict__ cs,
const gf_448_t as,
uint64_t b
) {
uint32_t mask = (1ull<<28)-1;
@@ -863,7 +863,7 @@ p448_mulw (

void
p448_strong_reduce (
p448_t *a
gf_448_t a
) {
word_t mask = (1ull<<28)-1;

@@ -907,14 +907,14 @@ p448_strong_reduce (
void
p448_serialize (
uint8_t *serial,
const struct p448_t *x
const gf_448_t x
) {
int i,j;
p448_t red;
p448_copy(&red, x);
p448_strong_reduce(&red);
gf_448_t red;
p448_copy(red, x);
p448_strong_reduce(red);
for (i=0; i<8; i++) {
uint64_t limb = red.limb[2*i] + (((uint64_t)red.limb[2*i+1])<<28);
uint64_t limb = red->limb[2*i] + (((uint64_t)red->limb[2*i+1])<<28);
for (j=0; j<7; j++) {
serial[7*i+j] = limb;
limb >>= 8;
@@ -925,7 +925,7 @@ p448_serialize (

mask_t
p448_deserialize (
p448_t *x,
gf_448_t x,
const uint8_t serial[56]
) {
int i,j;


+ 32
- 32
src/p448/arch_arm_32/f_impl.h View File

@@ -9,9 +9,9 @@
#include <stdint.h>
#include <assert.h>

typedef struct p448_t {
typedef struct gf_448_s {
uint32_t limb[16];
} __attribute__((aligned(32))) p448_t;
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];

#define LBITS 28
#define LIMB(x) (x##ull)&((1ull<<LBITS)-1), (x##ull)>>LBITS
@@ -24,69 +24,69 @@ extern "C" {

static __inline__ void
p448_add_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
p448_sub_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
p448_copy (
p448_t *out,
const p448_t *a
gf_448_t out,
const gf_448_t a
) __attribute__((unused,always_inline));
static __inline__ void
p448_weak_reduce (
p448_t *inout
gf_448_t inout
) __attribute__((unused,always_inline));
void
p448_strong_reduce (
p448_t *inout
gf_448_t inout
);
static __inline__ void
p448_bias (
p448_t *inout,
gf_448_t inout,
int amount
) __attribute__((unused,always_inline));

void
p448_mul (
p448_t *__restrict__ out,
const p448_t *a,
const p448_t *b
gf_448_s *__restrict__ out,
const gf_448_t a,
const gf_448_t b
);

void
p448_mulw (
p448_t *__restrict__ out,
const p448_t *a,
gf_448_s *__restrict__ out,
const gf_448_t a,
uint64_t b
);

void
p448_sqr (
p448_t *__restrict__ out,
const p448_t *a
gf_448_s *__restrict__ out,
const gf_448_t a
);

void
p448_serialize (
uint8_t *serial,
const struct p448_t *x
const gf_448_t x
);

mask_t
p448_deserialize (
p448_t *x,
gf_448_t x,
const uint8_t serial[56]
);

@@ -94,9 +94,9 @@ p448_deserialize (

void
p448_add_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
@@ -112,9 +112,9 @@ p448_add_RAW (

void
p448_sub_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
@@ -130,15 +130,15 @@ p448_sub_RAW (

void
p448_copy (
p448_t *out,
const p448_t *a
gf_448_t out,
const gf_448_t a
) {
*out = *a;
}

void
p448_bias (
p448_t *a,
gf_448_t a,
int amt
) {
uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
@@ -152,7 +152,7 @@ p448_bias (

void
p448_weak_reduce (
p448_t *a
gf_448_t a
) {
uint64_t mask = (1ull<<28) - 1;
uint64_t tmp = a->limb[15] >> 28;


+ 14
- 14
src/p448/arch_neon_experimental/f_impl.c View File

@@ -70,9 +70,9 @@ smull2 (

void
p448_mul (
p448_t *__restrict__ cs,
const p448_t *as,
const p448_t *bs
gf_448_s *__restrict__ cs,
const gf_448_t as,
const gf_448_t bs
) {
#define _bl0 "q0"
#define _bl0_0 "d0"
@@ -369,8 +369,8 @@ p448_mul (

void
p448_sqr (
p448_t *__restrict__ cs,
const p448_t *bs
gf_448_s *__restrict__ cs,
const gf_448_t bs
) {
int32x2_t *vc = (int32x2_t*) cs->limb;

@@ -570,8 +570,8 @@ p448_sqr (

void
p448_mulw (
p448_t *__restrict__ cs,
const p448_t *as,
gf_448_s *__restrict__ cs,
const gf_448_t as,
uint64_t b
) {
uint32x2_t vmask = {(1<<28) - 1, (1<<28)-1};
@@ -621,7 +621,7 @@ p448_mulw (
/* PERF: vectorize? */
void
p448_strong_reduce (
p448_t *a
gf_448_t a
) {
word_t mask = (1ull<<28)-1;

@@ -665,15 +665,15 @@ p448_strong_reduce (
void
p448_serialize (
uint8_t *serial,
const struct p448_t *x
const gf_448_t x
) {
int i,j;
p448_t red;
p448_copy(&red, x);
p448_strong_reduce(&red);
gf_448_t red;
p448_copy(red, x);
p448_strong_reduce(red);
for (i=0; i<8; i++) {
uint64_t limb = red.limb[LIMBPERM(2*i)] + (((uint64_t)red.limb[LIMBPERM(2*i+1)])<<28);
uint64_t limb = red->limb[LIMBPERM(2*i)] + (((uint64_t)red->limb[LIMBPERM(2*i+1)])<<28);
for (j=0; j<7; j++) {
serial[7*i+j] = limb;
limb >>= 8;
@@ -684,7 +684,7 @@ p448_serialize (

mask_t
p448_deserialize (
p448_t *x,
gf_448_t x,
const uint8_t serial[56]
) {
int i,j;


+ 32
- 32
src/p448/arch_neon_experimental/f_impl.h View File

@@ -9,9 +9,9 @@
#include <stdint.h>
#include <assert.h>

typedef struct p448_t {
typedef struct gf_448_s {
uint32_t limb[16];
} __attribute__((aligned(32))) p448_t;
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];

#define LIMBPERM(x) (((x)<<1 | (x)>>3) & 15)
#define USE_NEON_PERM 1
@@ -30,69 +30,69 @@ extern "C" {

static __inline__ void
p448_add_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
p448_sub_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
p448_copy (
p448_t *out,
const p448_t *a
gf_448_t out,
const gf_448_t a
) __attribute__((unused,always_inline));
static __inline__ void
p448_weak_reduce (
p448_t *inout
gf_448_t inout
) __attribute__((unused,always_inline));
void
p448_strong_reduce (
p448_t *inout
gf_448_t inout
);
static __inline__ void
p448_bias (
p448_t *inout,
gf_448_t inout,
int amount
) __attribute__((unused,always_inline));

void
p448_mul (
p448_t *__restrict__ out,
const p448_t *a,
const p448_t *b
gf_448_s *__restrict__ out,
const gf_448_t a,
const gf_448_t b
);

void
p448_mulw (
p448_t *__restrict__ out,
const p448_t *a,
gf_448_s *__restrict__ out,
const gf_448_t a,
uint64_t b
);

void
p448_sqr (
p448_t *__restrict__ out,
const p448_t *a
gf_448_s *__restrict__ out,
const gf_448_t a
);

void
p448_serialize (
uint8_t *serial,
const struct p448_t *x
const gf_448_t x
);

mask_t
p448_deserialize (
p448_t *x,
gf_448_t x,
const uint8_t serial[56]
);

@@ -100,9 +100,9 @@ p448_deserialize (

void
p448_add_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
@@ -112,9 +112,9 @@ p448_add_RAW (

void
p448_sub_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint32xn_t); i++) {
@@ -130,15 +130,15 @@ p448_sub_RAW (

void
p448_copy (
p448_t *out,
const p448_t *a
gf_448_t out,
const gf_448_t a
) {
*out = *a;
}

void
p448_bias (
p448_t *a,
gf_448_t a,
int amt
) {
uint32_t co1 = ((1ull<<28)-1)*amt, co2 = co1-amt;
@@ -152,7 +152,7 @@ p448_bias (

void
p448_weak_reduce (
p448_t *a
gf_448_t a
) {

uint32x2_t *aa = (uint32x2_t*) a, vmask = {(1ull<<28)-1, (1ull<<28)-1}, vm2 = {0,-1},


+ 16
- 16
src/p448/arch_ref64/f_impl.c View File

@@ -18,9 +18,9 @@ static __inline__ uint64_t is_zero(uint64_t a) {

void
p448_mul (
p448_t *__restrict__ cs,
const p448_t *as,
const p448_t *bs
gf_448_s *__restrict__ cs,
const gf_448_t as,
const gf_448_t bs
) {
const uint64_t *a = as->limb, *b = bs->limb;
uint64_t *c = cs->limb;
@@ -184,8 +184,8 @@ p448_mul (

void
p448_mulw (
p448_t *__restrict__ cs,
const p448_t *as,
gf_448_s *__restrict__ cs,
const gf_448_t as,
uint64_t b
) {
const uint64_t *a = as->limb;
@@ -213,8 +213,8 @@ p448_mulw (

void
p448_sqr (
p448_t *__restrict__ cs,
const p448_t *as
gf_448_s *__restrict__ cs,
const gf_448_t as
) {
const uint64_t *a = as->limb;
uint64_t *c = cs->limb;
@@ -328,7 +328,7 @@ p448_sqr (

void
p448_strong_reduce (
p448_t *a
gf_448_t a
) {
uint64_t mask = (1ull<<56)-1;

@@ -372,24 +372,24 @@ p448_strong_reduce (
void
p448_serialize (
uint8_t *serial,
const struct p448_t *x
const gf_448_t x
) {
int i,j;
p448_t red;
p448_copy(&red, x);
p448_strong_reduce(&red);
gf_448_t red;
p448_copy(red, x);
p448_strong_reduce(red);
for (i=0; i<8; i++) {
for (j=0; j<7; j++) {
serial[7*i+j] = red.limb[i];
red.limb[i] >>= 8;
serial[7*i+j] = red->limb[i];
red->limb[i] >>= 8;
}
assert(red.limb[i] == 0);
assert(red->limb[i] == 0);
}
}

mask_t
p448_deserialize (
p448_t *x,
gf_448_t x,
const uint8_t serial[56]
) {
int i,j;


+ 32
- 32
src/p448/arch_ref64/f_impl.h View File

@@ -10,9 +10,9 @@

#include "word.h"

typedef struct p448_t {
typedef struct gf_448_s {
uint64_t limb[8];
} __attribute__((aligned(32))) p448_t;
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];

#define LBITS 56
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}
@@ -23,69 +23,69 @@ extern "C" {

static __inline__ void
p448_add_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused));
static __inline__ void
p448_sub_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused));
static __inline__ void
p448_copy (
p448_t *out,
const p448_t *a
gf_448_t out,
const gf_448_t a
) __attribute__((unused));
static __inline__ void
p448_weak_reduce (
p448_t *inout
gf_448_t inout
) __attribute__((unused));
void
p448_strong_reduce (
p448_t *inout
gf_448_t inout
);

static __inline__ void
p448_bias (
p448_t *inout,
gf_448_t inout,
int amount
) __attribute__((unused));
void
p448_mul (
p448_t *__restrict__ out,
const p448_t *a,
const p448_t *b
gf_448_s *__restrict__ out,
const gf_448_t a,
const gf_448_t b
);

void
p448_mulw (
p448_t *__restrict__ out,
const p448_t *a,
gf_448_s *__restrict__ out,
const gf_448_t a,
uint64_t b
);

void
p448_sqr (
p448_t *__restrict__ out,
const p448_t *a
gf_448_s *__restrict__ out,
const gf_448_t a
);

void
p448_serialize (
uint8_t *serial,
const struct p448_t *x
const gf_448_t x
);

mask_t
p448_deserialize (
p448_t *x,
gf_448_t x,
const uint8_t serial[56]
);

@@ -93,9 +93,9 @@ p448_deserialize (

void
p448_add_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
unsigned int i;
for (i=0; i<8; i++) {
@@ -106,9 +106,9 @@ p448_add_RAW (

void
p448_sub_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
unsigned int i;
uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2;
@@ -120,15 +120,15 @@ p448_sub_RAW (

void
p448_copy (
p448_t *out,
const p448_t *a
gf_448_t out,
const gf_448_t a
) {
memcpy(out,a,sizeof(*a));
}

void
p448_bias (
p448_t *a,
gf_448_t a,
int amt
) {
(void) a;
@@ -137,7 +137,7 @@ p448_bias (

void
p448_weak_reduce (
p448_t *a
gf_448_t a
) {
uint64_t mask = (1ull<<56) - 1;
uint64_t tmp = a->limb[7] >> 56;


+ 16
- 16
src/p448/arch_x86_64/f_impl.c View File

@@ -7,9 +7,9 @@

void
p448_mul (
p448_t *__restrict__ cs,
const p448_t *as,
const p448_t *bs
gf_448_s *__restrict__ cs,
const gf_448_t as,
const gf_448_t bs
) {
const uint64_t *a = as->limb, *b = bs->limb;
uint64_t *c = cs->limb;
@@ -147,8 +147,8 @@ p448_mul (

void
p448_mulw (
p448_t *__restrict__ cs,
const p448_t *as,
gf_448_s *__restrict__ cs,
const gf_448_t as,
uint64_t b
) {
const uint64_t *a = as->limb;
@@ -192,8 +192,8 @@ p448_mulw (

void
p448_sqr (
p448_t *__restrict__ cs,
const p448_t *as
gf_448_s *__restrict__ cs,
const gf_448_t as
) {
const uint64_t *a = as->limb;
uint64_t *c = cs->limb;
@@ -307,7 +307,7 @@ p448_sqr (

void
p448_strong_reduce (
p448_t *a
gf_448_t a
) {
uint64_t mask = (1ull<<56)-1;

@@ -351,24 +351,24 @@ p448_strong_reduce (
void
p448_serialize (
uint8_t *serial,
const struct p448_t *x
const gf_448_t x
) {
int i,j;
p448_t red;
p448_copy(&red, x);
p448_strong_reduce(&red);
gf_448_t red;
p448_copy(red, x);
p448_strong_reduce(red);
for (i=0; i<8; i++) {
for (j=0; j<7; j++) {
serial[7*i+j] = red.limb[i];
red.limb[i] >>= 8;
serial[7*i+j] = red->limb[i];
red->limb[i] >>= 8;
}
assert(red.limb[i] == 0);
assert(red->limb[i] == 0);
}
}

mask_t
p448_deserialize (
p448_t *x,
gf_448_t x,
const uint8_t serial[56]
) {
int i,j;


+ 35
- 32
src/p448/arch_x86_64/f_impl.h View File

@@ -9,9 +9,12 @@

#include "word.h"

typedef struct p448_t {
#ifndef __DECAF_448_H__ // HACK FIXME
#define DECAF_WORD_BITS 64
typedef struct gf_448_s {
uint64_t limb[8];
} __attribute__((aligned(32))) p448_t;
} __attribute__((aligned(32))) gf_448_s, gf_448_t[1];
#endif

#define LBITS 56
#define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}
@@ -22,69 +25,69 @@ extern "C" {

static __inline__ void
p448_add_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
p448_sub_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) __attribute__((unused,always_inline));
static __inline__ void
p448_copy (
p448_t *out,
const p448_t *a
gf_448_t out,
const gf_448_t a
) __attribute__((unused,always_inline));
static __inline__ void
p448_weak_reduce (
p448_t *inout
gf_448_t inout
) __attribute__((unused,always_inline));
void
p448_strong_reduce (
p448_t *inout
gf_448_t inout
);

static __inline__ void
p448_bias (
p448_t *inout,
gf_448_t inout,
int amount
) __attribute__((unused,always_inline));
void
p448_mul (
p448_t *__restrict__ out,
const p448_t *a,
const p448_t *b
gf_448_s *__restrict__ out,
const gf_448_t a,
const gf_448_t b
);

void
p448_mulw (
p448_t *__restrict__ out,
const p448_t *a,
gf_448_s *__restrict__ out,
const gf_448_t a,
uint64_t b
);

void
p448_sqr (
p448_t *__restrict__ out,
const p448_t *a
gf_448_s *__restrict__ out,
const gf_448_t a
);

void
p448_serialize (
uint8_t *serial,
const struct p448_t *x
const gf_448_t x
);

mask_t
p448_deserialize (
p448_t *x,
gf_448_t x,
const uint8_t serial[56]
);

@@ -92,9 +95,9 @@ p448_deserialize (

void
p448_add_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
@@ -110,9 +113,9 @@ p448_add_RAW (

void
p448_sub_RAW (
p448_t *out,
const p448_t *a,
const p448_t *b
gf_448_t out,
const gf_448_t a,
const gf_448_t b
) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(uint64xn_t); i++) {
@@ -128,8 +131,8 @@ p448_sub_RAW (

void
p448_copy (
p448_t *out,
const p448_t *a
gf_448_t out,
const gf_448_t a
) {
unsigned int i;
for (i=0; i<sizeof(*out)/sizeof(big_register_t); i++) {
@@ -139,7 +142,7 @@ p448_copy (

void
p448_bias (
p448_t *a,
gf_448_t a,
int amt
) {
uint64_t co1 = ((1ull<<56)-1)*amt, co2 = co1-amt;
@@ -166,7 +169,7 @@ p448_bias (

void
p448_weak_reduce (
p448_t *a
gf_448_t a
) {
/* PERF: use pshufb/palignr if anyone cares about speed of this */
uint64_t mask = (1ull<<56) - 1;


+ 2
- 1
src/p448/f_field.h View File

@@ -15,7 +15,8 @@
#include "f_impl.h"
#define GF_LIT_LIMB_BITS 56
#define GF_BITS 448
#define gf p448_t
#define gf gf_448_t
#define gf_s gf_448_s
#define gf_mul p448_mul
#define gf_sqr p448_sqr
#define gf_add_RAW p448_add_RAW


+ 1
- 0
src/public_include/decaf.hxx View File

@@ -3,6 +3,7 @@
#define __DECAF_HXX__ 1

#include <decaf/decaf_255.hxx> // MAGIC
#include <decaf/decaf_448.hxx> // MAGIC

#endif /* __DECAF_H__ */


+ 10
- 4
src/public_include/decaf/decaf_255.hxx View File

@@ -46,7 +46,13 @@ namespace decaf {
/**
* @brief Curve25519/Decaf instantiation of group.
*/
struct Ed255 {
struct IsoEd25519 {
/** The name of the curve */
static inline const char *name() { return "IsoEd25519"; }

/** The curve's cofactor (removed, but useful for testing) */
static const int REMOVED_COFACTOR = 8;

/** @cond internal */
class Point;
@@ -533,17 +539,17 @@ public:
/** @endcond */
};

}; /* struct Ed255 */
}; /* struct IsoEd25519 */



/** @cond internal */
inline SecureBuffer Ed255::Scalar::direct_scalarmul (
inline SecureBuffer IsoEd25519::Scalar::direct_scalarmul (
const Block &in,
decaf_bool_t allow_identity,
decaf_bool_t short_circuit
) const throw(CryptoException) {
SecureBuffer out(Ed255::Point::SER_BYTES);
SecureBuffer out(IsoEd25519::Point::SER_BYTES);
if (!decaf_255_direct_scalarmul(out, in.data(), s, allow_identity, short_circuit))
throw CryptoException();
return out;


+ 16
- 1
src/public_include/decaf/decaf_448.h View File

@@ -426,7 +426,7 @@ decaf_bool_t decaf_448_point_valid (
) API_VIS WARN_UNUSED NONNULL1 NOINLINE;

/**
* @brief 2-torque a point, for debugging purposes.
* @brief Torque a point, for debugging purposes.
*
* @param [out] q The point to torque.
* @param [in] p The point to torque.
@@ -436,6 +436,21 @@ void decaf_448_point_debugging_torque (
const decaf_448_point_t p
) API_VIS NONNULL2 NOINLINE;

/**
* @brief Projectively scale a point, for debugging purposes.
* The output will be equal to the input, and will be valid
* even if the factor is zero.
*
* @param [out] q The point to scale.
* @param [in] p The point to scale.
* @param [in] factor Serialized GF factor to scale.
*/
void decaf_448_point_debugging_pscale (
decaf_448_point_t q,
const decaf_448_point_t p,
const unsigned char factor[DECAF_448_SER_BYTES]
) API_VIS NONNULL2 NOINLINE;

/**
* @brief Almost-Elligator-like hash to curve.
*


+ 7
- 1
src/public_include/decaf/decaf_448.hxx View File

@@ -46,7 +46,13 @@ namespace decaf {
/**
* @brief Ed448-Goldilocks/Decaf instantiation of group.
*/
struct Ed448 {
struct Ed448Goldilocks {
/** The name of the curve */
static inline const char *name() { return "Ed448-Goldilocks"; }

/** The curve's cofactor (removed, but useful for testing) */
static const int REMOVED_COFACTOR = 4;

/** @cond internal */
class Point;


+ 68
- 47
test/bench_decaf.cxx View File

@@ -20,9 +20,6 @@
#include <algorithm>

using namespace decaf;
typedef Ed255::Scalar Scalar;
typedef Ed255::Point Point;
typedef Ed255::Precomputed Precomputed;


static __inline__ void __attribute__((unused)) ignore_result ( int result ) { (void)result; }
@@ -140,6 +137,13 @@ public:

double Benchmark::totalCy = 0, Benchmark::totalS = 0;


template<typename Group> struct Benches {

typedef typename Group::Scalar Scalar;
typedef typename Group::Point Point;
typedef typename Group::Precomputed Precomputed;

static void tdh (
SpongeRng &clientRng,
SpongeRng &serverRng,
@@ -274,6 +278,62 @@ static void spake2ee(
server.respec(STROBE_KEYED_128);
}

static void macro() {
printf("\nMacro-benchmarks for %s:\n", Group::name());
printf("Protocol benchmarks:\n");
SpongeRng clientRng(Block("client rng seed"));
SpongeRng serverRng(Block("server rng seed"));
SecureBuffer hashedPassword("hello world");
for (Benchmark b("Spake2ee c+s",0.1); b.iter(); ) {
spake2ee(clientRng, serverRng, hashedPassword,false);
}
for (Benchmark b("Spake2ee c+s aug",0.1); b.iter(); ) {
spake2ee(clientRng, serverRng, hashedPassword,true);
}
Scalar x(clientRng);
SecureBuffer gx(Precomputed::base() * x);
Scalar y(serverRng);
SecureBuffer gy(Precomputed::base() * y);
for (Benchmark b("FHMQV c+s",0.1); b.iter(); ) {
fhmqv(clientRng, serverRng,x,gx,y,gy);
}
for (Benchmark b("TripleDH anon c+s",0.1); b.iter(); ) {
tdh(clientRng, serverRng, x,gx,y,gy);
}
}

static void micro() {
SpongeRng rng(Block("per-curve-benchmarks"));
Precomputed pBase;
Point p,q;
Scalar s,t;
SecureBuffer ep, ep2(Point::SER_BYTES*2);
printf("\nMicro-benchmarks for %s:\n", Group::name());
for (Benchmark b("Scalar add", 1000); b.iter(); ) { s+=t; }
for (Benchmark b("Scalar times", 100); b.iter(); ) { s*=t; }
for (Benchmark b("Scalar inv", 1); b.iter(); ) { s.inverse(); }
for (Benchmark b("Point add", 100); b.iter(); ) { p += q; }
for (Benchmark b("Point double", 100); b.iter(); ) { p.double_in_place(); }
for (Benchmark b("Point scalarmul"); b.iter(); ) { p * s; }
for (Benchmark b("Point encode"); b.iter(); ) { ep = SecureBuffer(p); }
for (Benchmark b("Point decode"); b.iter(); ) { p = Point(ep); }
for (Benchmark b("Point create/destroy"); b.iter(); ) { Point r; }
for (Benchmark b("Point hash nonuniform"); b.iter(); ) { Point::from_hash(ep); }
for (Benchmark b("Point hash uniform"); b.iter(); ) { Point::from_hash(ep2); }
for (Benchmark b("Point unhash nonuniform"); b.iter(); ) { ignore_result(p.invert_elligator(ep,0)); }
for (Benchmark b("Point unhash uniform"); b.iter(); ) { ignore_result(p.invert_elligator(ep2,0)); }
for (Benchmark b("Point steg"); b.iter(); ) { p.steg_encode(rng); }
for (Benchmark b("Point double scalarmul"); b.iter(); ) { Point::double_scalarmul(p,s,q,t); }
for (Benchmark b("Point precmp scalarmul"); b.iter(); ) { pBase * s; }
}

}; /* template <typename group> struct Benches */

int main(int argc, char **argv) {
bool micro = false;
if (argc >= 2 && !strcmp(argv[1], "--micro"))
@@ -293,10 +353,6 @@ int main(int argc, char **argv) {


if (micro) {
Precomputed pBase;
Point p,q;
Scalar s,t;
SecureBuffer ep, ep2(Point::SER_BYTES*2);
SpongeRng rng(Block("micro-benchmarks"));
printf("\nMicro-benchmarks:\n");
@@ -325,25 +381,12 @@ int main(int argc, char **argv) {
for (Benchmark b("STROBEk256 1kiB", 10); b.iter(); ) {
strobe.encrypt_no_auth(TmpBuffer(b1024,1024),TmpBuffer(b1024,1024),b.i>1);
}
for (Benchmark b("Scalar add", 1000); b.iter(); ) { s+=t; }
for (Benchmark b("Scalar times", 100); b.iter(); ) { s*=t; }
for (Benchmark b("Scalar inv", 1); b.iter(); ) { s.inverse(); }
for (Benchmark b("Point add", 100); b.iter(); ) { p += q; }
for (Benchmark b("Point double", 100); b.iter(); ) { p.double_in_place(); }
for (Benchmark b("Point scalarmul"); b.iter(); ) { p * s; }
for (Benchmark b("Point encode"); b.iter(); ) { ep = SecureBuffer(p); }
for (Benchmark b("Point decode"); b.iter(); ) { p = Point(ep); }
for (Benchmark b("Point create/destroy"); b.iter(); ) { Point r; }
for (Benchmark b("Point hash nonuniform"); b.iter(); ) { Point::from_hash(ep); }
for (Benchmark b("Point hash uniform"); b.iter(); ) { Point::from_hash(ep2); }
for (Benchmark b("Point unhash nonuniform"); b.iter(); ) { ignore_result(p.invert_elligator(ep,0)); }
for (Benchmark b("Point unhash uniform"); b.iter(); ) { ignore_result(p.invert_elligator(ep2,0)); }
for (Benchmark b("Point steg"); b.iter(); ) { p.steg_encode(rng); }
for (Benchmark b("Point double scalarmul"); b.iter(); ) { Point::double_scalarmul(p,s,q,t); }
for (Benchmark b("Point precmp scalarmul"); b.iter(); ) { pBase * s; }
/* TODO: scalarmul for verif, etc */
Benches<IsoEd25519>::micro();
Benches<Ed448Goldilocks>::micro();
}

/* TODO: 255->448 */
printf("\nMacro-benchmarks:\n");
for (Benchmark b("Keygen"); b.iter(); ) {
decaf_255_derive_private_key(s1,r1);
@@ -369,31 +412,9 @@ int main(int argc, char **argv) {
umessage[1]^=umessage[0];
ignore_result(ret);
}

printf("\nProtocol benchmarks:\n");
SpongeRng clientRng(Block("client rng seed"));
SpongeRng serverRng(Block("server rng seed"));
SecureBuffer hashedPassword("hello world");
for (Benchmark b("Spake2ee c+s",0.1); b.iter(); ) {
spake2ee(clientRng, serverRng, hashedPassword,false);
}
for (Benchmark b("Spake2ee c+s aug",0.1); b.iter(); ) {
spake2ee(clientRng, serverRng, hashedPassword,true);
}
Scalar x(clientRng);
SecureBuffer gx(Precomputed::base() * x);
Scalar y(serverRng);
SecureBuffer gy(Precomputed::base() * y);
for (Benchmark b("FHMQV c+s",0.1); b.iter(); ) {
fhmqv(clientRng, serverRng,x,gx,y,gy);
}
for (Benchmark b("TripleDH anon c+s",0.1); b.iter(); ) {
tdh(clientRng, serverRng, x,gx,y,gy);
}
Benches<IsoEd25519>::macro();
Benches<Ed448Goldilocks>::macro();
printf("\n");
Benchmark::calib();


+ 12
- 5
test/test_decaf.cxx View File

@@ -164,7 +164,7 @@ static void test_elligator() {
decaf::SpongeRng rng(decaf::Block("test_elligator"));
Test test("Elligator");
const int NHINTS = 1<<4;
const int NHINTS = Group::REMOVED_COFACTOR * 2;
decaf::SecureBuffer *alts[NHINTS];
bool successes[NHINTS];
decaf::SecureBuffer *alts2[NHINTS];
@@ -312,7 +312,7 @@ static void test_ec() {

}; // template<decaf::GroupId GROUP>

// FIXME cross-field
static void test_decaf() {
Test test("Sample crypto");
decaf::SpongeRng rng(decaf::Block("test_decaf"));
@@ -350,11 +350,18 @@ static void test_decaf() {
int main(int argc, char **argv) {
(void) argc; (void) argv;
Tests<decaf::Ed255>::test_arithmetic();
Tests<decaf::Ed255>::test_elligator();
Tests<decaf::Ed255>::test_ec();
printf("Testing %s:\n", decaf::IsoEd25519::name());
Tests<decaf::IsoEd25519>::test_arithmetic();
Tests<decaf::IsoEd25519>::test_elligator();
Tests<decaf::IsoEd25519>::test_ec();
test_decaf();
printf("\n");
printf("Testing %s:\n", decaf::Ed448Goldilocks::name());
Tests<decaf::Ed448Goldilocks>::test_arithmetic();
Tests<decaf::Ed448Goldilocks>::test_elligator();
Tests<decaf::Ed448Goldilocks>::test_ec();
if (passing) printf("Passed all tests.\n");
return passing ? 0 : 1;


Loading…
Cancel
Save