From cdab4953388af472369616fa32c4285dbee1a499 Mon Sep 17 00:00:00 2001 From: Michael Hamburg Date: Tue, 14 Jul 2015 18:39:28 -0700 Subject: [PATCH] Cross-curve compilation working! Still a bunch of FIXMEs though --- Makefile | 146 +++++++++++---------- src/curve_ed25519/curve_data.inc.c | 25 +++- src/curve_ed25519/field | 1 - src/curve_ed448goldilocks/curve_data.inc.c | 29 +++- src/curve_ed448goldilocks/field | 1 - src/decaf.c | 16 --- src/decaf_fast.c | 99 ++++++++++---- src/decaf_gen_tables.c | 13 +- src/p25519/f_field.h | 2 +- src/p448/arch_32/f_impl.c | 28 ++-- src/p448/arch_32/f_impl.h | 64 ++++----- src/p448/arch_arm_32/f_impl.c | 28 ++-- src/p448/arch_arm_32/f_impl.h | 64 ++++----- src/p448/arch_neon_experimental/f_impl.c | 28 ++-- src/p448/arch_neon_experimental/f_impl.h | 64 ++++----- src/p448/arch_ref64/f_impl.c | 32 ++--- src/p448/arch_ref64/f_impl.h | 64 ++++----- src/p448/arch_x86_64/f_impl.c | 32 ++--- src/p448/arch_x86_64/f_impl.h | 67 +++++----- src/p448/f_field.h | 3 +- src/public_include/decaf.hxx | 1 + src/public_include/decaf/decaf_255.hxx | 14 +- src/public_include/decaf/decaf_448.h | 17 ++- src/public_include/decaf/decaf_448.hxx | 8 +- test/bench_decaf.cxx | 115 +++++++++------- test/test_decaf.cxx | 17 ++- 26 files changed, 551 insertions(+), 427 deletions(-) delete mode 120000 src/curve_ed25519/field delete mode 120000 src/curve_ed448goldilocks/field diff --git a/Makefile b/Makefile index 3d34a49..b9221f7 100644 --- a/Makefile +++ b/Makefile @@ -39,8 +39,6 @@ else ARCH ?= arch_ref32 endif -FIELD ?= p25519 - WARNFLAGS = -pedantic -Wall -Wextra -Werror -Wunreachable-code \ -Wmissing-declarations -Wunused-function -Wno-overlength-strings $(EXWARN) @@ -79,22 +77,13 @@ SAGES= $(shell ls test/*.sage) BUILDPYS= $(SAGES:test/%.sage=$(BUILD_PY)/%.py) .PHONY: clean all test bench todo doc lib bat sage sagetest -.PRECIOUS: $(BUILD_ASM)/%.s $(BUILD_ASM)/%_impl.s $(BUILD_ASM)/$(DECAF)_%.s $(BUILD_ASM)/decaf_tables_%.c \ - $(BUILD_IBIN)/decaf_gen_tables_% - -HEADERS= Makefile $(shell find src test -name "*.h") $(shell find . -name "*.hxx") $(BUILD_OBJ)/timestamp +.PRECIOUS: $(BUILD_ASM)/%.s $(BUILD_C)/%.c $(BUILD_IBIN)/% -# components needed by the table generators -GENCOMPONENTS= \ - $(BUILD_OBJ)/$(DECAF)_ed25519.o $(BUILD_OBJ)/p25519_impl.o $(BUILD_OBJ)/p25519_arithmetic.o \ - $(BUILD_OBJ)/utils.o \ - #$(BUILD_OBJ)/p448_impl.o $(BUILD_OBJ)/p448_arithmetic.o +HEADERS= Makefile $(shell find src test -name "*.h") $(BUILD_OBJ)/timestamp +HEADERSXX = $(HEADERS) $(shell find . -name "*.hxx") # components needed by the lib -DECAFCOMPONENTS= $(BUILD_OBJ)/shake.o $(BUILD_OBJ)/decaf_crypto.o $(GENCOMPONENTS) -ifeq ($(DECAF),decaf_fast) -DECAFCOMPONENTS += $(BUILD_OBJ)/decaf_tables_ed25519.o -endif +LIBCOMPONENTS = $(BUILD_OBJ)/utils.o $(BUILD_OBJ)/shake.o $(BUILD_OBJ)/decaf_crypto.o # and per-field components BENCHCOMPONENTS = $(BUILD_OBJ)/bench.o $(BUILD_OBJ)/shake.o @@ -105,26 +94,7 @@ scan: clean -enable-checker deadcode -enable-checker llvm \ -enable-checker osx -enable-checker security -enable-checker unix \ make all - -# The shakesum utility is in the public bin directory. -$(BUILD_BIN)/shakesum: $(BUILD_OBJ)/shakesum.o $(BUILD_OBJ)/shake.o $(BUILD_OBJ)/utils.o - $(LD) $(LDFLAGS) -o $@ $^ - -# The main decaf library, and its symlinks. -lib: $(BUILD_LIB)/libdecaf.so - -$(BUILD_LIB)/libdecaf.so: $(BUILD_LIB)/libdecaf.so.1 - ln -sf `basename $^` $@ -$(BUILD_LIB)/libdecaf.so.1: $(DECAFCOMPONENTS) - rm -f $@ -ifeq ($(UNAME),Darwin) - libtool -macosx_version_min 10.6 -dynamic -dead_strip -lc -x -o $@ \ - $(DECAFCOMPONENTS) -else - $(LD) $(LDFLAGS) -shared -Wl,-soname,`basename $@` -Wl,--gc-sections -o $@ $(DECAFCOMPONENTS) - strip --discard-all $@ -endif # Internal test programs, which are not part of the final build/bin directory. $(BUILD_IBIN)/test: $(BUILD_OBJ)/test_decaf.o lib @@ -150,50 +120,86 @@ $(BUILD_OBJ)/timestamp: $(BUILD_OBJ)/%.o: $(BUILD_ASM)/%.s $(ASM) $(ASFLAGS) -c -o $@ $< -# I don't know why this rule is necessary... bug in make, or obscure pattern matching rule? -$(BUILD_OBJ)/decaf_gen_tables_%.o: $(BUILD_ASM)/decaf_gen_tables_%.s - $(ASM) $(ASFLAGS) -c -o $@ $< +################################################################ +# Per-field code: call with field, arch +################################################################ +define define_field +ARCH_FOR_$(1) = $(2) +COMPONENTS_OF_$(1) = $$(BUILD_OBJ)/$(1)_impl.o $$(BUILD_OBJ)/$(1)_arithmetic.o +LIBCOMPONENTS += $$(COMPONENTS_OF_$(1)) + +$$(BUILD_ASM)/$(1)_arithmetic.s: src/$(1)/f_arithmetic.c $$(HEADERS) + $$(CC) $$(CFLAGS) -I src/$(1) -I src/$(1)/$(2) -S -c -o $$@ $$< + +$$(BUILD_ASM)/$(1)_impl.s: src/$(1)/$(2)/f_impl.c $$(HEADERS) + $$(CC) $$(CFLAGS) -I src/$(1) -I src/$(1)/$(2) -S -c -o $$@ $$< +endef + +################################################################ +# Per-field, per-curve code: call with curve, field +################################################################ +define define_curve +$$(BUILD_IBIN)/decaf_gen_tables_$(1): $$(BUILD_OBJ)/decaf_gen_tables_$(1).o $$(BUILD_OBJ)/decaf_fast_$(1).o $$(BUILD_OBJ)/utils.o \ + $$(COMPONENTS_OF_$(2)) + $$(LD) $$(LDFLAGS) -o $$@ $$^ + +$$(BUILD_C)/decaf_tables_$(1).c: $$(BUILD_IBIN)/decaf_gen_tables_$(1) + ./$$< > $$@ || (rm $$@; exit 1) + +$$(BUILD_ASM)/decaf_tables_$(1).s: $$(BUILD_C)/decaf_tables_$(1).c $$(HEADERS) + $$(CC) $$(CFLAGS) -S -c -o $$@ $$< \ + -I src/curve_$(1)/ -I src/$(2) -I src/$(2)/$$(ARCH_FOR_$(2)) \ + +$$(BUILD_ASM)/decaf_gen_tables_$(1).s: src/decaf_gen_tables.c $$(HEADERS) + $$(CC) $$(CFLAGS) \ + -I src/curve_$(1)/ -I src/$(2) -I src/$(2)/$$(ARCH_FOR_$(2)) \ + -S -c -o $$@ $$< + +$$(BUILD_ASM)/decaf_fast_$(1).s: src/decaf_fast.c $$(HEADERS) + $$(CC) $$(CFLAGS) \ + -I src/curve_$(1)/ -I src/$(2) -I src/$(2)/$$(ARCH_FOR_$(2)) \ + -S -c -o $$@ $$< + +LIBCOMPONENTS += $$(BUILD_OBJ)/decaf_fast_$(1).o $$(BUILD_OBJ)/decaf_tables_$(1).o +endef + +################################################################ +# call code above to generate curves and fields +$(eval $(call define_field,p25519,arch_x86_64)) +$(eval $(call define_curve,ed25519,p25519)) +$(eval $(call define_field,p448,arch_x86_64)) +$(eval $(call define_curve,ed448goldilocks,p448)) -$(BUILD_IBIN)/decaf_gen_tables_%: $(BUILD_OBJ)/decaf_gen_tables_%.o $(GENCOMPONENTS) + +# The shakesum utility is in the public bin directory. +$(BUILD_BIN)/shakesum: $(BUILD_OBJ)/shakesum.o $(BUILD_OBJ)/shake.o $(BUILD_OBJ)/utils.o $(LD) $(LDFLAGS) -o $@ $^ - -$(BUILD_C)/decaf_tables_%.c: $(BUILD_IBIN)/decaf_gen_tables_% - ./$< > $@ - -$(BUILD_ASM)/decaf_tables_%.s: $(BUILD_C)/decaf_tables_%.c $(HEADERS) - $(CC) $(CFLAGS) -S -c -o $@ $< \ - -I src/curve_$*/ -I src/curve_$*/field -I src/curve_$*/field/$(ARCH) \ - -$(BUILD_ASM)/decaf_gen_tables_%.s: src/decaf_gen_tables.c $(HEADERS) - $(CC) $(CFLAGS) \ - -I src/curve_$*/ -I src/curve_$*/field -I src/curve_$*/field/$(ARCH) \ - -S -c -o $@ $< - -$(BUILD_ASM)/decaf_fast_%.s: src/decaf_fast.c $(HEADERS) - $(CC) $(CFLAGS) \ - -I src/curve_$*/ -I src/curve_$*/field -I src/curve_$*/field/$(ARCH) \ - -S -c -o $@ $< - -$(BUILD_ASM)/%_arithmetic.s: src/%/f_arithmetic.c $(HEADERS) - $(CC) $(CFLAGS) \ - -I src/$* -I src/$*/$(ARCH) \ - -S -c -o $@ $< - -$(BUILD_ASM)/%_impl.s: src/%/$(ARCH)/f_impl.c $(HEADERS) - $(CC) $(CFLAGS) \ - -I src/$* -I src/$*/$(ARCH) \ - -S -c -o $@ $< - + +# The main decaf library, and its symlinks. +lib: $(BUILD_LIB)/libdecaf.so + +$(BUILD_LIB)/libdecaf.so: $(BUILD_LIB)/libdecaf.so.1 + ln -sf `basename $^` $@ + +$(BUILD_LIB)/libdecaf.so.1: $(LIBCOMPONENTS) + rm -f $@ +ifeq ($(UNAME),Darwin) + libtool -macosx_version_min 10.6 -dynamic -dead_strip -lc -x -o $@ \ + $(LIBCOMPONENTS) +else + $(LD) $(LDFLAGS) -shared -Wl,-soname,`basename $@` -Wl,--gc-sections -o $@ $(LIBCOMPONENTS) + strip --discard-all $@ +endif + + + $(BUILD_ASM)/%.s: src/%.c $(HEADERS) $(CC) $(CFLAGS) -S -c -o $@ $< -$(BUILD_ASM)/%.s: src/%.cxx $(HEADERS) - $(CXX) $(CXXFLAGS) -S -c -o $@ $< - $(BUILD_ASM)/%.s: test/%.c $(HEADERS) $(CC) $(CFLAGS) -S -c -o $@ $< -$(BUILD_ASM)/%.s: test/%.cxx $(HEADERS) +$(BUILD_ASM)/%.s: test/%.cxx $(HEADERSXX) $(CXX) $(CXXFLAGS) -S -c -o $@ $< # The sage test scripts diff --git a/src/curve_ed25519/curve_data.inc.c b/src/curve_ed25519/curve_data.inc.c index b669fb0..a9b1a0c 100644 --- a/src/curve_ed25519/curve_data.inc.c +++ b/src/curve_ed25519/curve_data.inc.c @@ -1,9 +1,22 @@ -/* Rename table for eventual factoring into .c.inc, MSR ECC style */ +// FIXME move to arch or something +#define WBITS DECAF_WORD_BITS + +#if WBITS == 64 +#define LBITS 51 +typedef __int128_t decaf_sdword_t; +#define LIMB(x) (x##ull) +#define SC_LIMB(x) (x##ull) +#else +#error "Only supporting 64-bit platforms right now" +#endif + +#define API_NAME "decaf_255" +#define API_NS(_id) decaf_255_##_id +#define API_NS2(_pref,_id) _pref##_decaf_255_##_id + #define SCALAR_LIMBS DECAF_255_SCALAR_LIMBS #define SCALAR_BITS DECAF_255_SCALAR_BITS #define NLIMBS DECAF_255_LIMBS -#define API_NS(_id) decaf_255_##_id -#define API_NS2(_pref,_id) _pref##_decaf_255_##_id #define scalar_t decaf_255_scalar_t #define point_t decaf_255_point_t #define precomputed_s decaf_255_precomputed_s @@ -21,12 +34,14 @@ static const scalar_t sc_p = {{{ SC_LIMB(0x1000000000000000) }}}; +#ifdef GEN_TABLES /* sqrt(9) = 3 from the curve spec. Not exported, but used by pregen tool. */ -const unsigned char base_point_ser_for_pregen[SER_BYTES] = { +static const unsigned char base_point_ser_for_pregen[SER_BYTES] = { 3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 }; +#endif -const gf SQRT_ONE_MINUS_D = {FIELD_LITERAL( +static const gf SQRT_ONE_MINUS_D = {FIELD_LITERAL( 0x6db8831bbddec, 0x38d7b56c9c165, 0x016b221394bdc, diff --git a/src/curve_ed25519/field b/src/curve_ed25519/field deleted file mode 120000 index 5333fc7..0000000 --- a/src/curve_ed25519/field +++ /dev/null @@ -1 +0,0 @@ -../p25519/ \ No newline at end of file diff --git a/src/curve_ed448goldilocks/curve_data.inc.c b/src/curve_ed448goldilocks/curve_data.inc.c index 80a674d..77ba847 100644 --- a/src/curve_ed448goldilocks/curve_data.inc.c +++ b/src/curve_ed448goldilocks/curve_data.inc.c @@ -1,8 +1,27 @@ +// FIXME move to arch or something +#define WBITS DECAF_WORD_BITS + +#if WBITS == 64 +#define LBITS 56 +typedef __int128_t decaf_sdword_t; +#define LIMB(x) (x##ull) +#define SC_LIMB(x) (x##ull) +#elif WBITS == 32 +typedef int64_t decaf_sdword_t; +#define LBITS 28 +#define LIMB(x) (x##ull)&((1ull<>LBITS +#define SC_LIMB(x) (x##ull)&((1ull<<32)-1), (x##ull)>>32 +#else +#error "Only supporting 32- and 64-bit platforms right now" +#endif + +#define API_NAME "decaf_448" +#define API_NS(_id) decaf_448_##_id +#define API_NS2(_pref,_id) _pref##_decaf_448_##_id + #define SCALAR_LIMBS DECAF_448_SCALAR_LIMBS #define SCALAR_BITS DECAF_448_SCALAR_BITS #define NLIMBS DECAF_448_LIMBS -#define API_NS(_id) decaf_448_##_id -#define API_NS2(_pref,_id) _pref##_decaf_448_##_id #define scalar_t decaf_448_scalar_t #define point_t decaf_448_point_t #define precomputed_s decaf_448_precomputed_s @@ -22,8 +41,10 @@ static const scalar_t sc_p = {{{ SC_LIMB(0xffffffffffffffff), SC_LIMB(0x3fffffffffffffff) }}}; - + +#ifdef GEN_TABLES /* sqrt(5) = 2phi-1 from the curve spec. Not exported, but used by pregen tool. */ -const unsigned char base_point_ser_for_pregen[SER_BYTES] = { +static const unsigned char base_point_ser_for_pregen[SER_BYTES] = { -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1 }; +#endif diff --git a/src/curve_ed448goldilocks/field b/src/curve_ed448goldilocks/field deleted file mode 120000 index 7efdcab..0000000 --- a/src/curve_ed448goldilocks/field +++ /dev/null @@ -1 +0,0 @@ -../p448/ \ No newline at end of file diff --git a/src/decaf.c b/src/decaf.c index 199497a..9b342f2 100644 --- a/src/decaf.c +++ b/src/decaf.c @@ -13,22 +13,6 @@ #include #include -#define WBITS DECAF_WORD_BITS - -#if WBITS == 64 -#define LBITS 56 -typedef __int128_t decaf_sdword_t; -#define LIMB(x) (x##ull) -#define SC_LIMB(x) (x##ull) -#elif WBITS == 32 -typedef int64_t decaf_sdword_t; -#define LBITS 28 -#define LIMB(x) (x##ull)&((1ull<>LBITS -#define SC_LIMB(x) (x##ull)&((1ull<<32)-1), (x##ull)>>32 -#else -#error "Only supporting 32- and 64-bit platforms right now" -#endif - #define sv static void #define snv static void __attribute__((noinline)) #define siv static inline void __attribute__((always_inline)) diff --git a/src/decaf_fast.c b/src/decaf_fast.c index a7d2b93..9ee3d14 100644 --- a/src/decaf_fast.c +++ b/src/decaf_fast.c @@ -15,21 +15,13 @@ #include "field.h" #include "decaf_config.h" -#define WBITS DECAF_WORD_BITS -#if WBITS == 64 - typedef __int128_t decaf_sdword_t; - #define SC_LIMB(x) (x##ull) -#elif WBITS == 32 - typedef int64_t decaf_sdword_t; - #define SC_LIMB(x) (x##ull)&((1ull<<32)-1), (x##ull)>>32 -#else - #error "Only supporting 32- and 64-bit platforms right now" -#endif - - /* Include the curve data here */ #include "curve_data.inc.c" +#if (COFACTOR == 8) && !IMAGINE_TWIST +/* FUTURE: Curve41417 doesn't have these properties. */ +#error "Currently require IMAGINE_TWIST (and thus p=5 mod 8) for cofactor 8" +#endif #if IMAGINE_TWIST && (P_MOD_8 != 5) #error "Cannot use IMAGINE_TWIST except for p == 5 mod 8" @@ -162,6 +154,7 @@ static decaf_word_t hibit(const gf x) { return -(y->limb[0]&1); } +#if COFACTOR==8 /** Return high bit of x = low bit of 2x mod p */ static decaf_word_t lobit(const gf x) { gf y; @@ -169,6 +162,7 @@ static decaf_word_t lobit(const gf x) { gf_strong_reduce(y); return -(y->limb[0]&1); } +#endif /** {extra,accum} - sub +? p * Must have extra <= 1 @@ -408,27 +402,64 @@ static void deisogenize ( decaf_bool_t toggle_hibit_t_over_s, decaf_bool_t toggle_rotation ) { - gf c, d, x, t; +#if COFACTOR == 4 && !IMAGINE_TWIST + (void) toggle_rotation; + + /* TODO: Can shave off one mul here; not important but makes consistent with paper */ + gf b, d; + gf_s *a = s, *c = minus_t_over_s; + gf_mulw_sgn ( a, p->y, 1-EDWARDS_D ); + gf_mul ( c, a, p->t ); /* -dYT, with EDWARDS_D = d-1 */ + gf_mul ( a, p->x, p->z ); + gf_sub ( d, c, a ); /* aXZ-dYT with a=-1 */ + gf_add ( a, p->z, p->y ); + gf_sub ( b, p->z, p->y ); + gf_mul ( c, b, a ); + gf_mulw_sgn ( b, c, -EDWARDS_D ); /* (a-d)(Z+Y)(Z-Y) */ + decaf_bool_t ok = gf_isqrt_chk ( a, b, DECAF_TRUE ); /* r in the paper */ + (void)ok; assert(ok); + gf_mulw_sgn ( b, a, -EDWARDS_D ); /* u in the paper */ + gf_mul ( c, b, a ); /* ur */ + gf_mul ( a, c, d ); /* ur (aZX-dYT) */ + gf_add ( d, b, b ); /* 2u = -2au since a=-1 */ + gf_mul ( c, d, p->z ); /* 2uZ */ + cond_neg ( b, toggle_hibit_t_over_s ^ ~hibit(c) ); /* u <- -u if negative. */ + cond_neg ( c, toggle_hibit_t_over_s ^ ~hibit(c) ); /* u <- -u if negative. */ + gf_mul ( d, b, p->y ); + gf_add ( s, a, d ); + cond_neg ( s, toggle_hibit_s ^ hibit(s) ); +#else + /* More complicated because of rotation */ + /* FIXME This code is wrong for certain non-Curve25519 curves; check if it's because of Cofactor==8 or IMAGINE_ROTATION */ + + gf c, d; gf_s *b = s, *a = minus_t_over_s; #if IMAGINE_TWIST + gf x, t; gf_mul ( x, p->x, SQRT_MINUS_ONE); gf_mul ( t, p->t, SQRT_MINUS_ONE); gf_sub ( x, ZERO, x ); gf_sub ( t, ZERO, t ); -#endif - gf DEBUG; gf_add ( a, p->z, x ); gf_sub ( b, p->z, x ); - gf_mul ( c, a, b ); /* "zx" = Z^2 - X^2 */ - gf_cpy(DEBUG,c); + gf_mul ( c, a, b ); /* "zx" = Z^2 - aX^2 = Z^2 - X^2 */ +#else + const gf_s *x = p->x, *t = p->t; + /* Won't hit the cond_sel below because COFACTOR==8 requires IMAGINE_TWIST for now. */ + + gf_sqr ( a, p->z ); + gf_sqr ( b, p->x ); + gf_add ( c, a, b ); /* "zx" = Z^2 - aX^2 = Z^2 + X^2 */ +#endif + gf_mul ( a, p->z, t ); /* "tz" = T*Z */ gf_sqr ( b, a ); - gf_mul ( d, b, c ); /* (TZ)^2 * (Z^2-X^2) */ + gf_mul ( d, b, c ); /* (TZ)^2 * (Z^2-aX^2) */ decaf_bool_t ok = gf_isqrt_chk ( b, d, DECAF_TRUE ); (void)ok; assert(ok); - gf_mul ( d, b, a ); /* "osx" = 1 / sqrt(z^2-x^2) */ + gf_mul ( d, b, a ); /* "osx" = 1 / sqrt(z^2-ax^2) */ gf_mul ( a, b, c ); gf_mul ( b, a, d ); /* 1/tz */ @@ -445,6 +476,7 @@ static void deisogenize ( cond_sel ( x, p->y, x, rotate ); } #else + (void)toggle_rotation; rotate = 0; #endif @@ -458,6 +490,8 @@ static void deisogenize ( gf_add ( d, d, c ); gf_mul ( b, d, x ); /* here "x" = y unless rotate */ cond_neg ( b, toggle_hibit_s ^ hibit(b) ); + +#endif } void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) { @@ -472,7 +506,7 @@ void API_NS(point_encode)( unsigned char ser[SER_BYTES], const point_t p ) { static decaf_bool_t gf_deser(gf s, const unsigned char ser[SER_BYTES]) { return gf_deserialize((gf_s *)s, ser); } - + decaf_bool_t API_NS(point_decode) ( point_t p, const unsigned char ser[SER_BYTES], @@ -483,25 +517,32 @@ decaf_bool_t API_NS(point_decode) ( succ &= allow_identity | ~zero; succ &= ~hibit(s); gf_sqr ( a, s ); - gf_sub ( f, ONE, a ); /* f = 1-s^2 = 1-as^2 since a=1 */ +#if IMAGINE_TWIST + gf_sub ( f, ONE, a ); /* f = 1-as^2 = 1-s^2*/ +#else + gf_add ( f, ONE, a ); /* f = 1-as^2 = 1+s^2 */ +#endif succ &= ~ gf_eq( f, ZERO ); gf_sqr ( b, f ); - gf_mulw_sgn ( c, a, 4-4*EDWARDS_D ); + gf_mulw_sgn ( c, a, 4*IMAGINE_TWIST-4*EDWARDS_D ); gf_add ( c, c, b ); /* t^2 */ - gf_mul ( d, f, s ); /* s(1-s^2) for denoms */ + gf_mul ( d, f, s ); /* s(1-as^2) for denoms */ gf_sqr ( e, d ); gf_mul ( b, c, e ); - succ &= gf_isqrt_chk ( e, b, DECAF_TRUE ); /* e = 1/(t s (1-s^2)) */ + succ &= gf_isqrt_chk ( e, b, DECAF_TRUE ); /* e = 1/(t s (1-as^2)) */ gf_mul ( b, e, d ); /* 1/t */ - gf_mul ( d, e, c ); /* d = t / (s(1-s^2)) */ + gf_mul ( d, e, c ); /* d = t / (s(1-as^2)) */ gf_mul ( e, d, f ); /* t/s */ decaf_bool_t negtos = hibit(e); cond_neg(b, negtos); cond_neg(d, negtos); - - gf_add ( p->z, ONE, a); /* Z = 1+s^2 */ - succ &= ~gf_eq( p->z, ZERO ); /* FUTURE: unnecessary? */ + +#if IMAGINE_TWIST + gf_add ( p->z, ONE, a); /* Z = 1+as^2 = 1-s^2 */ +#else + gf_sub ( p->z, ONE, a); /* Z = 1+as^2 = 1-s^2 */ +#endif #if COFACTOR == 8 gf_mul ( a, p->z, d); /* t(1+s^2) / s(1-s^2) = 2/xy */ @@ -745,7 +786,7 @@ static void pt_to_pniels ( ) { gf_sub ( b->n->a, a->y, a->x ); gf_add ( b->n->b, a->x, a->y ); - gf_mulw_sgn ( b->n->c, a->t, 2*EFF_D ); + gf_mulw_sgn ( b->n->c, a->t, 2*TWISTED_D ); gf_add ( b->z, a->z, a->z ); } diff --git a/src/decaf_gen_tables.c b/src/decaf_gen_tables.c index c0aaa29..1db5aa6 100644 --- a/src/decaf_gen_tables.c +++ b/src/decaf_gen_tables.c @@ -15,8 +15,8 @@ #include "decaf_config.h" #include "field.h" -#define API_NS(_id) decaf_255_##_id -#define API_NS2(_pref,_id) _pref##_decaf_255_##_id +#define GEN_TABLES +#include "curve_data.inc.c" /* To satisfy linker. */ const gf API_NS(precomputed_base_as_fe)[1]; @@ -24,7 +24,6 @@ const API_NS(scalar_t) API_NS(precomputed_scalarmul_adjustment); const API_NS(scalar_t) API_NS(point_scalarmul_adjustment); const API_NS(scalar_t) API_NS(sc_r2) = {{{0}}}; const decaf_word_t API_NS(MONTGOMERY_FACTOR) = 0; -const unsigned char base_point_ser_for_pregen[DECAF_255_SER_BYTES]; const API_NS(point_t) API_NS(point_base); @@ -94,8 +93,8 @@ int main(int argc, char **argv) { printf("/** @warning: this file was automatically generated. */\n"); printf("#include \n\n"); printf("#include \"field.h\"\n\n"); - printf("#define API_NS(_id) decaf_255_##_id\n"); - printf("#define API_NS2(_pref,_id) _pref##_decaf_255_##_id\n"); + printf("#define API_NS(_id) %s_##_id\n", API_NAME); + printf("#define API_NS2(_pref,_id) _pref##_%s_##_id\n", API_NAME); output = (const gf_s *)real_point_base; printf("const API_NS(point_t) API_NS(point_base) = {{\n"); @@ -136,8 +135,8 @@ int main(int argc, char **argv) { scalar_print("API_NS(precomputed_scalarmul_adjustment)", smadj); API_NS(scalar_copy)(smadj,API_NS(scalar_one)); - for (i=0; ilimb, *b = bs->limb; uint32_t *c = cs->limb; @@ -84,8 +84,8 @@ p448_mul ( void p448_mulw ( - p448_t *__restrict__ cs, - const p448_t *as, + gf_448_s *__restrict__ cs, + const gf_448_t as, uint64_t b ) { const uint32_t bhi = b>>28, blo = b & ((1<<28)-1); @@ -128,15 +128,15 @@ p448_mulw ( void p448_sqr ( - p448_t *__restrict__ cs, - const p448_t *as + gf_448_s *__restrict__ cs, + const gf_448_t as ) { p448_mul(cs,as,as); /* PERF */ } void p448_strong_reduce ( - p448_t *a + gf_448_t a ) { word_t mask = (1ull<<28)-1; @@ -180,14 +180,14 @@ p448_strong_reduce ( void p448_serialize ( uint8_t *serial, - const struct p448_t *x + const gf_448_t x ) { int i,j; - p448_t red; - p448_copy(&red, x); - p448_strong_reduce(&red); + gf_448_t red; + p448_copy(red, x); + p448_strong_reduce(red); for (i=0; i<8; i++) { - uint64_t limb = red.limb[2*i] + (((uint64_t)red.limb[2*i+1])<<28); + uint64_t limb = red->limb[2*i] + (((uint64_t)red->limb[2*i+1])<<28); for (j=0; j<7; j++) { serial[7*i+j] = limb; limb >>= 8; @@ -198,7 +198,7 @@ p448_serialize ( mask_t p448_deserialize ( - p448_t *x, + gf_448_t x, const uint8_t serial[56] ) { int i,j; diff --git a/src/p448/arch_32/f_impl.h b/src/p448/arch_32/f_impl.h index 89bf763..d1f6f72 100644 --- a/src/p448/arch_32/f_impl.h +++ b/src/p448/arch_32/f_impl.h @@ -9,9 +9,9 @@ #include #include -typedef struct p448_t { +typedef struct gf_448_s { uint32_t limb[16]; -} __attribute__((aligned(32))) p448_t; +} __attribute__((aligned(32))) gf_448_s, gf_448_t[1]; #define LBITS 28 #define LIMB(x) (x##ull)&((1ull<>LBITS @@ -24,69 +24,69 @@ extern "C" { static __inline__ void p448_add_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) __attribute__((unused,always_inline)); static __inline__ void p448_sub_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) __attribute__((unused,always_inline)); static __inline__ void p448_copy ( - p448_t *out, - const p448_t *a + gf_448_t out, + const gf_448_t a ) __attribute__((unused,always_inline)); static __inline__ void p448_weak_reduce ( - p448_t *inout + gf_448_t inout ) __attribute__((unused,always_inline)); void p448_strong_reduce ( - p448_t *inout + gf_448_t inout ); static __inline__ void p448_bias ( - p448_t *inout, + gf_448_t inout, int amount ) __attribute__((unused,always_inline)); void p448_mul ( - p448_t *__restrict__ out, - const p448_t *a, - const p448_t *b + gf_448_s *__restrict__ out, + const gf_448_t a, + const gf_448_t b ); void p448_mulw ( - p448_t *__restrict__ out, - const p448_t *a, + gf_448_s *__restrict__ out, + const gf_448_t a, uint64_t b ); void p448_sqr ( - p448_t *__restrict__ out, - const p448_t *a + gf_448_s *__restrict__ out, + const gf_448_t a ); void p448_serialize ( uint8_t *serial, - const struct p448_t *x + const gf_448_t x ); mask_t p448_deserialize ( - p448_t *x, + gf_448_t x, const uint8_t serial[56] ); @@ -94,9 +94,9 @@ p448_deserialize ( void p448_add_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) { unsigned int i; for (i=0; ilimb[15] >> 28; diff --git a/src/p448/arch_arm_32/f_impl.c b/src/p448/arch_arm_32/f_impl.c index e0edfb9..068774a 100644 --- a/src/p448/arch_arm_32/f_impl.c +++ b/src/p448/arch_arm_32/f_impl.c @@ -100,9 +100,9 @@ smull2 ( void p448_mul ( - p448_t *__restrict__ cs, - const p448_t *as, - const p448_t *bs + gf_448_s *__restrict__ cs, + const gf_448_t as, + const gf_448_t bs ) { const uint32_t *a = as->limb, *b = bs->limb; @@ -451,8 +451,8 @@ p448_mul ( void p448_sqr ( - p448_t *__restrict__ cs, - const p448_t *as + gf_448_s *__restrict__ cs, + const gf_448_t as ) { const uint32_t *a = as->limb; uint32_t *c = cs->limb; @@ -749,8 +749,8 @@ p448_sqr ( void p448_mulw ( - p448_t *__restrict__ cs, - const p448_t *as, + gf_448_s *__restrict__ cs, + const gf_448_t as, uint64_t b ) { uint32_t mask = (1ull<<28)-1; @@ -863,7 +863,7 @@ p448_mulw ( void p448_strong_reduce ( - p448_t *a + gf_448_t a ) { word_t mask = (1ull<<28)-1; @@ -907,14 +907,14 @@ p448_strong_reduce ( void p448_serialize ( uint8_t *serial, - const struct p448_t *x + const gf_448_t x ) { int i,j; - p448_t red; - p448_copy(&red, x); - p448_strong_reduce(&red); + gf_448_t red; + p448_copy(red, x); + p448_strong_reduce(red); for (i=0; i<8; i++) { - uint64_t limb = red.limb[2*i] + (((uint64_t)red.limb[2*i+1])<<28); + uint64_t limb = red->limb[2*i] + (((uint64_t)red->limb[2*i+1])<<28); for (j=0; j<7; j++) { serial[7*i+j] = limb; limb >>= 8; @@ -925,7 +925,7 @@ p448_serialize ( mask_t p448_deserialize ( - p448_t *x, + gf_448_t x, const uint8_t serial[56] ) { int i,j; diff --git a/src/p448/arch_arm_32/f_impl.h b/src/p448/arch_arm_32/f_impl.h index 89bf763..d1f6f72 100644 --- a/src/p448/arch_arm_32/f_impl.h +++ b/src/p448/arch_arm_32/f_impl.h @@ -9,9 +9,9 @@ #include #include -typedef struct p448_t { +typedef struct gf_448_s { uint32_t limb[16]; -} __attribute__((aligned(32))) p448_t; +} __attribute__((aligned(32))) gf_448_s, gf_448_t[1]; #define LBITS 28 #define LIMB(x) (x##ull)&((1ull<>LBITS @@ -24,69 +24,69 @@ extern "C" { static __inline__ void p448_add_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) __attribute__((unused,always_inline)); static __inline__ void p448_sub_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) __attribute__((unused,always_inline)); static __inline__ void p448_copy ( - p448_t *out, - const p448_t *a + gf_448_t out, + const gf_448_t a ) __attribute__((unused,always_inline)); static __inline__ void p448_weak_reduce ( - p448_t *inout + gf_448_t inout ) __attribute__((unused,always_inline)); void p448_strong_reduce ( - p448_t *inout + gf_448_t inout ); static __inline__ void p448_bias ( - p448_t *inout, + gf_448_t inout, int amount ) __attribute__((unused,always_inline)); void p448_mul ( - p448_t *__restrict__ out, - const p448_t *a, - const p448_t *b + gf_448_s *__restrict__ out, + const gf_448_t a, + const gf_448_t b ); void p448_mulw ( - p448_t *__restrict__ out, - const p448_t *a, + gf_448_s *__restrict__ out, + const gf_448_t a, uint64_t b ); void p448_sqr ( - p448_t *__restrict__ out, - const p448_t *a + gf_448_s *__restrict__ out, + const gf_448_t a ); void p448_serialize ( uint8_t *serial, - const struct p448_t *x + const gf_448_t x ); mask_t p448_deserialize ( - p448_t *x, + gf_448_t x, const uint8_t serial[56] ); @@ -94,9 +94,9 @@ p448_deserialize ( void p448_add_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) { unsigned int i; for (i=0; ilimb[15] >> 28; diff --git a/src/p448/arch_neon_experimental/f_impl.c b/src/p448/arch_neon_experimental/f_impl.c index 6e57b8f..371e668 100644 --- a/src/p448/arch_neon_experimental/f_impl.c +++ b/src/p448/arch_neon_experimental/f_impl.c @@ -70,9 +70,9 @@ smull2 ( void p448_mul ( - p448_t *__restrict__ cs, - const p448_t *as, - const p448_t *bs + gf_448_s *__restrict__ cs, + const gf_448_t as, + const gf_448_t bs ) { #define _bl0 "q0" #define _bl0_0 "d0" @@ -369,8 +369,8 @@ p448_mul ( void p448_sqr ( - p448_t *__restrict__ cs, - const p448_t *bs + gf_448_s *__restrict__ cs, + const gf_448_t bs ) { int32x2_t *vc = (int32x2_t*) cs->limb; @@ -570,8 +570,8 @@ p448_sqr ( void p448_mulw ( - p448_t *__restrict__ cs, - const p448_t *as, + gf_448_s *__restrict__ cs, + const gf_448_t as, uint64_t b ) { uint32x2_t vmask = {(1<<28) - 1, (1<<28)-1}; @@ -621,7 +621,7 @@ p448_mulw ( /* PERF: vectorize? */ void p448_strong_reduce ( - p448_t *a + gf_448_t a ) { word_t mask = (1ull<<28)-1; @@ -665,15 +665,15 @@ p448_strong_reduce ( void p448_serialize ( uint8_t *serial, - const struct p448_t *x + const gf_448_t x ) { int i,j; - p448_t red; - p448_copy(&red, x); - p448_strong_reduce(&red); + gf_448_t red; + p448_copy(red, x); + p448_strong_reduce(red); for (i=0; i<8; i++) { - uint64_t limb = red.limb[LIMBPERM(2*i)] + (((uint64_t)red.limb[LIMBPERM(2*i+1)])<<28); + uint64_t limb = red->limb[LIMBPERM(2*i)] + (((uint64_t)red->limb[LIMBPERM(2*i+1)])<<28); for (j=0; j<7; j++) { serial[7*i+j] = limb; limb >>= 8; @@ -684,7 +684,7 @@ p448_serialize ( mask_t p448_deserialize ( - p448_t *x, + gf_448_t x, const uint8_t serial[56] ) { int i,j; diff --git a/src/p448/arch_neon_experimental/f_impl.h b/src/p448/arch_neon_experimental/f_impl.h index 75bd92e..6a26a6f 100644 --- a/src/p448/arch_neon_experimental/f_impl.h +++ b/src/p448/arch_neon_experimental/f_impl.h @@ -9,9 +9,9 @@ #include #include -typedef struct p448_t { +typedef struct gf_448_s { uint32_t limb[16]; -} __attribute__((aligned(32))) p448_t; +} __attribute__((aligned(32))) gf_448_s, gf_448_t[1]; #define LIMBPERM(x) (((x)<<1 | (x)>>3) & 15) #define USE_NEON_PERM 1 @@ -30,69 +30,69 @@ extern "C" { static __inline__ void p448_add_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) __attribute__((unused,always_inline)); static __inline__ void p448_sub_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) __attribute__((unused,always_inline)); static __inline__ void p448_copy ( - p448_t *out, - const p448_t *a + gf_448_t out, + const gf_448_t a ) __attribute__((unused,always_inline)); static __inline__ void p448_weak_reduce ( - p448_t *inout + gf_448_t inout ) __attribute__((unused,always_inline)); void p448_strong_reduce ( - p448_t *inout + gf_448_t inout ); static __inline__ void p448_bias ( - p448_t *inout, + gf_448_t inout, int amount ) __attribute__((unused,always_inline)); void p448_mul ( - p448_t *__restrict__ out, - const p448_t *a, - const p448_t *b + gf_448_s *__restrict__ out, + const gf_448_t a, + const gf_448_t b ); void p448_mulw ( - p448_t *__restrict__ out, - const p448_t *a, + gf_448_s *__restrict__ out, + const gf_448_t a, uint64_t b ); void p448_sqr ( - p448_t *__restrict__ out, - const p448_t *a + gf_448_s *__restrict__ out, + const gf_448_t a ); void p448_serialize ( uint8_t *serial, - const struct p448_t *x + const gf_448_t x ); mask_t p448_deserialize ( - p448_t *x, + gf_448_t x, const uint8_t serial[56] ); @@ -100,9 +100,9 @@ p448_deserialize ( void p448_add_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) { unsigned int i; for (i=0; ilimb, *b = bs->limb; uint64_t *c = cs->limb; @@ -184,8 +184,8 @@ p448_mul ( void p448_mulw ( - p448_t *__restrict__ cs, - const p448_t *as, + gf_448_s *__restrict__ cs, + const gf_448_t as, uint64_t b ) { const uint64_t *a = as->limb; @@ -213,8 +213,8 @@ p448_mulw ( void p448_sqr ( - p448_t *__restrict__ cs, - const p448_t *as + gf_448_s *__restrict__ cs, + const gf_448_t as ) { const uint64_t *a = as->limb; uint64_t *c = cs->limb; @@ -328,7 +328,7 @@ p448_sqr ( void p448_strong_reduce ( - p448_t *a + gf_448_t a ) { uint64_t mask = (1ull<<56)-1; @@ -372,24 +372,24 @@ p448_strong_reduce ( void p448_serialize ( uint8_t *serial, - const struct p448_t *x + const gf_448_t x ) { int i,j; - p448_t red; - p448_copy(&red, x); - p448_strong_reduce(&red); + gf_448_t red; + p448_copy(red, x); + p448_strong_reduce(red); for (i=0; i<8; i++) { for (j=0; j<7; j++) { - serial[7*i+j] = red.limb[i]; - red.limb[i] >>= 8; + serial[7*i+j] = red->limb[i]; + red->limb[i] >>= 8; } - assert(red.limb[i] == 0); + assert(red->limb[i] == 0); } } mask_t p448_deserialize ( - p448_t *x, + gf_448_t x, const uint8_t serial[56] ) { int i,j; diff --git a/src/p448/arch_ref64/f_impl.h b/src/p448/arch_ref64/f_impl.h index b7ff50d..5fe6590 100644 --- a/src/p448/arch_ref64/f_impl.h +++ b/src/p448/arch_ref64/f_impl.h @@ -10,9 +10,9 @@ #include "word.h" -typedef struct p448_t { +typedef struct gf_448_s { uint64_t limb[8]; -} __attribute__((aligned(32))) p448_t; +} __attribute__((aligned(32))) gf_448_s, gf_448_t[1]; #define LBITS 56 #define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}} @@ -23,69 +23,69 @@ extern "C" { static __inline__ void p448_add_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) __attribute__((unused)); static __inline__ void p448_sub_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) __attribute__((unused)); static __inline__ void p448_copy ( - p448_t *out, - const p448_t *a + gf_448_t out, + const gf_448_t a ) __attribute__((unused)); static __inline__ void p448_weak_reduce ( - p448_t *inout + gf_448_t inout ) __attribute__((unused)); void p448_strong_reduce ( - p448_t *inout + gf_448_t inout ); static __inline__ void p448_bias ( - p448_t *inout, + gf_448_t inout, int amount ) __attribute__((unused)); void p448_mul ( - p448_t *__restrict__ out, - const p448_t *a, - const p448_t *b + gf_448_s *__restrict__ out, + const gf_448_t a, + const gf_448_t b ); void p448_mulw ( - p448_t *__restrict__ out, - const p448_t *a, + gf_448_s *__restrict__ out, + const gf_448_t a, uint64_t b ); void p448_sqr ( - p448_t *__restrict__ out, - const p448_t *a + gf_448_s *__restrict__ out, + const gf_448_t a ); void p448_serialize ( uint8_t *serial, - const struct p448_t *x + const gf_448_t x ); mask_t p448_deserialize ( - p448_t *x, + gf_448_t x, const uint8_t serial[56] ); @@ -93,9 +93,9 @@ p448_deserialize ( void p448_add_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) { unsigned int i; for (i=0; i<8; i++) { @@ -106,9 +106,9 @@ p448_add_RAW ( void p448_sub_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) { unsigned int i; uint64_t co1 = ((1ull<<56)-1)*2, co2 = co1-2; @@ -120,15 +120,15 @@ p448_sub_RAW ( void p448_copy ( - p448_t *out, - const p448_t *a + gf_448_t out, + const gf_448_t a ) { memcpy(out,a,sizeof(*a)); } void p448_bias ( - p448_t *a, + gf_448_t a, int amt ) { (void) a; @@ -137,7 +137,7 @@ p448_bias ( void p448_weak_reduce ( - p448_t *a + gf_448_t a ) { uint64_t mask = (1ull<<56) - 1; uint64_t tmp = a->limb[7] >> 56; diff --git a/src/p448/arch_x86_64/f_impl.c b/src/p448/arch_x86_64/f_impl.c index 9df771e..e959dbc 100644 --- a/src/p448/arch_x86_64/f_impl.c +++ b/src/p448/arch_x86_64/f_impl.c @@ -7,9 +7,9 @@ void p448_mul ( - p448_t *__restrict__ cs, - const p448_t *as, - const p448_t *bs + gf_448_s *__restrict__ cs, + const gf_448_t as, + const gf_448_t bs ) { const uint64_t *a = as->limb, *b = bs->limb; uint64_t *c = cs->limb; @@ -147,8 +147,8 @@ p448_mul ( void p448_mulw ( - p448_t *__restrict__ cs, - const p448_t *as, + gf_448_s *__restrict__ cs, + const gf_448_t as, uint64_t b ) { const uint64_t *a = as->limb; @@ -192,8 +192,8 @@ p448_mulw ( void p448_sqr ( - p448_t *__restrict__ cs, - const p448_t *as + gf_448_s *__restrict__ cs, + const gf_448_t as ) { const uint64_t *a = as->limb; uint64_t *c = cs->limb; @@ -307,7 +307,7 @@ p448_sqr ( void p448_strong_reduce ( - p448_t *a + gf_448_t a ) { uint64_t mask = (1ull<<56)-1; @@ -351,24 +351,24 @@ p448_strong_reduce ( void p448_serialize ( uint8_t *serial, - const struct p448_t *x + const gf_448_t x ) { int i,j; - p448_t red; - p448_copy(&red, x); - p448_strong_reduce(&red); + gf_448_t red; + p448_copy(red, x); + p448_strong_reduce(red); for (i=0; i<8; i++) { for (j=0; j<7; j++) { - serial[7*i+j] = red.limb[i]; - red.limb[i] >>= 8; + serial[7*i+j] = red->limb[i]; + red->limb[i] >>= 8; } - assert(red.limb[i] == 0); + assert(red->limb[i] == 0); } } mask_t p448_deserialize ( - p448_t *x, + gf_448_t x, const uint8_t serial[56] ) { int i,j; diff --git a/src/p448/arch_x86_64/f_impl.h b/src/p448/arch_x86_64/f_impl.h index 6e7c523..aa93b24 100644 --- a/src/p448/arch_x86_64/f_impl.h +++ b/src/p448/arch_x86_64/f_impl.h @@ -9,9 +9,12 @@ #include "word.h" -typedef struct p448_t { +#ifndef __DECAF_448_H__ // HACK FIXME +#define DECAF_WORD_BITS 64 +typedef struct gf_448_s { uint64_t limb[8]; -} __attribute__((aligned(32))) p448_t; +} __attribute__((aligned(32))) gf_448_s, gf_448_t[1]; +#endif #define LBITS 56 #define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}} @@ -22,69 +25,69 @@ extern "C" { static __inline__ void p448_add_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) __attribute__((unused,always_inline)); static __inline__ void p448_sub_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) __attribute__((unused,always_inline)); static __inline__ void p448_copy ( - p448_t *out, - const p448_t *a + gf_448_t out, + const gf_448_t a ) __attribute__((unused,always_inline)); static __inline__ void p448_weak_reduce ( - p448_t *inout + gf_448_t inout ) __attribute__((unused,always_inline)); void p448_strong_reduce ( - p448_t *inout + gf_448_t inout ); static __inline__ void p448_bias ( - p448_t *inout, + gf_448_t inout, int amount ) __attribute__((unused,always_inline)); void p448_mul ( - p448_t *__restrict__ out, - const p448_t *a, - const p448_t *b + gf_448_s *__restrict__ out, + const gf_448_t a, + const gf_448_t b ); void p448_mulw ( - p448_t *__restrict__ out, - const p448_t *a, + gf_448_s *__restrict__ out, + const gf_448_t a, uint64_t b ); void p448_sqr ( - p448_t *__restrict__ out, - const p448_t *a + gf_448_s *__restrict__ out, + const gf_448_t a ); void p448_serialize ( uint8_t *serial, - const struct p448_t *x + const gf_448_t x ); mask_t p448_deserialize ( - p448_t *x, + gf_448_t x, const uint8_t serial[56] ); @@ -92,9 +95,9 @@ p448_deserialize ( void p448_add_RAW ( - p448_t *out, - const p448_t *a, - const p448_t *b + gf_448_t out, + const gf_448_t a, + const gf_448_t b ) { unsigned int i; for (i=0; i // MAGIC +#include // MAGIC #endif /* __DECAF_H__ */ diff --git a/src/public_include/decaf/decaf_255.hxx b/src/public_include/decaf/decaf_255.hxx index 39a43b0..e84a792 100644 --- a/src/public_include/decaf/decaf_255.hxx +++ b/src/public_include/decaf/decaf_255.hxx @@ -46,7 +46,13 @@ namespace decaf { /** * @brief Curve25519/Decaf instantiation of group. */ -struct Ed255 { +struct IsoEd25519 { + +/** The name of the curve */ +static inline const char *name() { return "IsoEd25519"; } + +/** The curve's cofactor (removed, but useful for testing) */ +static const int REMOVED_COFACTOR = 8; /** @cond internal */ class Point; @@ -533,17 +539,17 @@ public: /** @endcond */ }; -}; /* struct Ed255 */ +}; /* struct IsoEd25519 */ /** @cond internal */ -inline SecureBuffer Ed255::Scalar::direct_scalarmul ( +inline SecureBuffer IsoEd25519::Scalar::direct_scalarmul ( const Block &in, decaf_bool_t allow_identity, decaf_bool_t short_circuit ) const throw(CryptoException) { - SecureBuffer out(Ed255::Point::SER_BYTES); + SecureBuffer out(IsoEd25519::Point::SER_BYTES); if (!decaf_255_direct_scalarmul(out, in.data(), s, allow_identity, short_circuit)) throw CryptoException(); return out; diff --git a/src/public_include/decaf/decaf_448.h b/src/public_include/decaf/decaf_448.h index 6a7345e..bcf7c91 100644 --- a/src/public_include/decaf/decaf_448.h +++ b/src/public_include/decaf/decaf_448.h @@ -426,7 +426,7 @@ decaf_bool_t decaf_448_point_valid ( ) API_VIS WARN_UNUSED NONNULL1 NOINLINE; /** - * @brief 2-torque a point, for debugging purposes. + * @brief Torque a point, for debugging purposes. * * @param [out] q The point to torque. * @param [in] p The point to torque. @@ -436,6 +436,21 @@ void decaf_448_point_debugging_torque ( const decaf_448_point_t p ) API_VIS NONNULL2 NOINLINE; +/** + * @brief Projectively scale a point, for debugging purposes. + * The output will be equal to the input, and will be valid + * even if the factor is zero. + * + * @param [out] q The point to scale. + * @param [in] p The point to scale. + * @param [in] factor Serialized GF factor to scale. + */ +void decaf_448_point_debugging_pscale ( + decaf_448_point_t q, + const decaf_448_point_t p, + const unsigned char factor[DECAF_448_SER_BYTES] +) API_VIS NONNULL2 NOINLINE; + /** * @brief Almost-Elligator-like hash to curve. * diff --git a/src/public_include/decaf/decaf_448.hxx b/src/public_include/decaf/decaf_448.hxx index 8448a18..c043fcb 100644 --- a/src/public_include/decaf/decaf_448.hxx +++ b/src/public_include/decaf/decaf_448.hxx @@ -46,7 +46,13 @@ namespace decaf { /** * @brief Ed448-Goldilocks/Decaf instantiation of group. */ -struct Ed448 { +struct Ed448Goldilocks { + +/** The name of the curve */ +static inline const char *name() { return "Ed448-Goldilocks"; } + +/** The curve's cofactor (removed, but useful for testing) */ +static const int REMOVED_COFACTOR = 4; /** @cond internal */ class Point; diff --git a/test/bench_decaf.cxx b/test/bench_decaf.cxx index b28e1ac..9b35e8e 100644 --- a/test/bench_decaf.cxx +++ b/test/bench_decaf.cxx @@ -20,9 +20,6 @@ #include using namespace decaf; -typedef Ed255::Scalar Scalar; -typedef Ed255::Point Point; -typedef Ed255::Precomputed Precomputed; static __inline__ void __attribute__((unused)) ignore_result ( int result ) { (void)result; } @@ -140,6 +137,13 @@ public: double Benchmark::totalCy = 0, Benchmark::totalS = 0; + +template struct Benches { + +typedef typename Group::Scalar Scalar; +typedef typename Group::Point Point; +typedef typename Group::Precomputed Precomputed; + static void tdh ( SpongeRng &clientRng, SpongeRng &serverRng, @@ -274,6 +278,62 @@ static void spake2ee( server.respec(STROBE_KEYED_128); } +static void macro() { + printf("\nMacro-benchmarks for %s:\n", Group::name()); + printf("Protocol benchmarks:\n"); + SpongeRng clientRng(Block("client rng seed")); + SpongeRng serverRng(Block("server rng seed")); + SecureBuffer hashedPassword("hello world"); + for (Benchmark b("Spake2ee c+s",0.1); b.iter(); ) { + spake2ee(clientRng, serverRng, hashedPassword,false); + } + + for (Benchmark b("Spake2ee c+s aug",0.1); b.iter(); ) { + spake2ee(clientRng, serverRng, hashedPassword,true); + } + + Scalar x(clientRng); + SecureBuffer gx(Precomputed::base() * x); + Scalar y(serverRng); + SecureBuffer gy(Precomputed::base() * y); + + for (Benchmark b("FHMQV c+s",0.1); b.iter(); ) { + fhmqv(clientRng, serverRng,x,gx,y,gy); + } + + for (Benchmark b("TripleDH anon c+s",0.1); b.iter(); ) { + tdh(clientRng, serverRng, x,gx,y,gy); + } +} + +static void micro() { + SpongeRng rng(Block("per-curve-benchmarks")); + Precomputed pBase; + Point p,q; + Scalar s,t; + SecureBuffer ep, ep2(Point::SER_BYTES*2); + + printf("\nMicro-benchmarks for %s:\n", Group::name()); + for (Benchmark b("Scalar add", 1000); b.iter(); ) { s+=t; } + for (Benchmark b("Scalar times", 100); b.iter(); ) { s*=t; } + for (Benchmark b("Scalar inv", 1); b.iter(); ) { s.inverse(); } + for (Benchmark b("Point add", 100); b.iter(); ) { p += q; } + for (Benchmark b("Point double", 100); b.iter(); ) { p.double_in_place(); } + for (Benchmark b("Point scalarmul"); b.iter(); ) { p * s; } + for (Benchmark b("Point encode"); b.iter(); ) { ep = SecureBuffer(p); } + for (Benchmark b("Point decode"); b.iter(); ) { p = Point(ep); } + for (Benchmark b("Point create/destroy"); b.iter(); ) { Point r; } + for (Benchmark b("Point hash nonuniform"); b.iter(); ) { Point::from_hash(ep); } + for (Benchmark b("Point hash uniform"); b.iter(); ) { Point::from_hash(ep2); } + for (Benchmark b("Point unhash nonuniform"); b.iter(); ) { ignore_result(p.invert_elligator(ep,0)); } + for (Benchmark b("Point unhash uniform"); b.iter(); ) { ignore_result(p.invert_elligator(ep2,0)); } + for (Benchmark b("Point steg"); b.iter(); ) { p.steg_encode(rng); } + for (Benchmark b("Point double scalarmul"); b.iter(); ) { Point::double_scalarmul(p,s,q,t); } + for (Benchmark b("Point precmp scalarmul"); b.iter(); ) { pBase * s; } +} + +}; /* template struct Benches */ + int main(int argc, char **argv) { bool micro = false; if (argc >= 2 && !strcmp(argv[1], "--micro")) @@ -293,10 +353,6 @@ int main(int argc, char **argv) { if (micro) { - Precomputed pBase; - Point p,q; - Scalar s,t; - SecureBuffer ep, ep2(Point::SER_BYTES*2); SpongeRng rng(Block("micro-benchmarks")); printf("\nMicro-benchmarks:\n"); @@ -325,25 +381,12 @@ int main(int argc, char **argv) { for (Benchmark b("STROBEk256 1kiB", 10); b.iter(); ) { strobe.encrypt_no_auth(TmpBuffer(b1024,1024),TmpBuffer(b1024,1024),b.i>1); } - for (Benchmark b("Scalar add", 1000); b.iter(); ) { s+=t; } - for (Benchmark b("Scalar times", 100); b.iter(); ) { s*=t; } - for (Benchmark b("Scalar inv", 1); b.iter(); ) { s.inverse(); } - for (Benchmark b("Point add", 100); b.iter(); ) { p += q; } - for (Benchmark b("Point double", 100); b.iter(); ) { p.double_in_place(); } - for (Benchmark b("Point scalarmul"); b.iter(); ) { p * s; } - for (Benchmark b("Point encode"); b.iter(); ) { ep = SecureBuffer(p); } - for (Benchmark b("Point decode"); b.iter(); ) { p = Point(ep); } - for (Benchmark b("Point create/destroy"); b.iter(); ) { Point r; } - for (Benchmark b("Point hash nonuniform"); b.iter(); ) { Point::from_hash(ep); } - for (Benchmark b("Point hash uniform"); b.iter(); ) { Point::from_hash(ep2); } - for (Benchmark b("Point unhash nonuniform"); b.iter(); ) { ignore_result(p.invert_elligator(ep,0)); } - for (Benchmark b("Point unhash uniform"); b.iter(); ) { ignore_result(p.invert_elligator(ep2,0)); } - for (Benchmark b("Point steg"); b.iter(); ) { p.steg_encode(rng); } - for (Benchmark b("Point double scalarmul"); b.iter(); ) { Point::double_scalarmul(p,s,q,t); } - for (Benchmark b("Point precmp scalarmul"); b.iter(); ) { pBase * s; } /* TODO: scalarmul for verif, etc */ + Benches::micro(); + Benches::micro(); } + /* TODO: 255->448 */ printf("\nMacro-benchmarks:\n"); for (Benchmark b("Keygen"); b.iter(); ) { decaf_255_derive_private_key(s1,r1); @@ -369,31 +412,9 @@ int main(int argc, char **argv) { umessage[1]^=umessage[0]; ignore_result(ret); } - - printf("\nProtocol benchmarks:\n"); - SpongeRng clientRng(Block("client rng seed")); - SpongeRng serverRng(Block("server rng seed")); - SecureBuffer hashedPassword("hello world"); - for (Benchmark b("Spake2ee c+s",0.1); b.iter(); ) { - spake2ee(clientRng, serverRng, hashedPassword,false); - } - for (Benchmark b("Spake2ee c+s aug",0.1); b.iter(); ) { - spake2ee(clientRng, serverRng, hashedPassword,true); - } - - Scalar x(clientRng); - SecureBuffer gx(Precomputed::base() * x); - Scalar y(serverRng); - SecureBuffer gy(Precomputed::base() * y); - - for (Benchmark b("FHMQV c+s",0.1); b.iter(); ) { - fhmqv(clientRng, serverRng,x,gx,y,gy); - } - - for (Benchmark b("TripleDH anon c+s",0.1); b.iter(); ) { - tdh(clientRng, serverRng, x,gx,y,gy); - } + Benches::macro(); + Benches::macro(); printf("\n"); Benchmark::calib(); diff --git a/test/test_decaf.cxx b/test/test_decaf.cxx index 9e98222..2a9103a 100644 --- a/test/test_decaf.cxx +++ b/test/test_decaf.cxx @@ -164,7 +164,7 @@ static void test_elligator() { decaf::SpongeRng rng(decaf::Block("test_elligator")); Test test("Elligator"); - const int NHINTS = 1<<4; + const int NHINTS = Group::REMOVED_COFACTOR * 2; decaf::SecureBuffer *alts[NHINTS]; bool successes[NHINTS]; decaf::SecureBuffer *alts2[NHINTS]; @@ -312,7 +312,7 @@ static void test_ec() { }; // template - +// FIXME cross-field static void test_decaf() { Test test("Sample crypto"); decaf::SpongeRng rng(decaf::Block("test_decaf")); @@ -350,11 +350,18 @@ static void test_decaf() { int main(int argc, char **argv) { (void) argc; (void) argv; - Tests::test_arithmetic(); - Tests::test_elligator(); - Tests::test_ec(); + printf("Testing %s:\n", decaf::IsoEd25519::name()); + Tests::test_arithmetic(); + Tests::test_elligator(); + Tests::test_ec(); test_decaf(); + printf("\n"); + printf("Testing %s:\n", decaf::Ed448Goldilocks::name()); + Tests::test_arithmetic(); + Tests::test_elligator(); + Tests::test_ec(); + if (passing) printf("Passed all tests.\n"); return passing ? 0 : 1;