diff --git a/Makefile b/Makefile index 95ba7c9..54156ea 100644 --- a/Makefile +++ b/Makefile @@ -44,7 +44,7 @@ FIELD ?= p25519 WARNFLAGS = -pedantic -Wall -Wextra -Werror -Wunreachable-code \ -Wmissing-declarations -Wunused-function -Wno-overlength-strings $(EXWARN) -INCFLAGS = -Isrc/include -Isrc/public_include -Isrc/$(FIELD) -Isrc/$(FIELD)/$(ARCH) +INCFLAGS = -Isrc/include -Isrc/public_include LANGFLAGS = -std=c99 -fno-strict-aliasing LANGXXFLAGS = -fno-strict-aliasing GENFLAGS = -ffunction-sections -fdata-sections -fvisibility=hidden -fomit-frame-pointer -fPIC @@ -83,10 +83,16 @@ BUILDPYS= $(SAGES:test/%.sage=$(BUILD_PY)/%.py) HEADERS= Makefile $(shell find src test -name "*.h") $(shell find . -name "*.hxx") $(BUILD_OBJ)/timestamp -DECAFCOMPONENTS= $(BUILD_OBJ)/$(DECAF).o $(BUILD_OBJ)/shake.o $(BUILD_OBJ)/decaf_crypto.o \ - $(BUILD_OBJ)/$(FIELD).o $(BUILD_OBJ)/f_arithmetic.o $(BUILD_OBJ)/utils.o +# components needed by the table generators +GENCOMPONENTS= \ + $(BUILD_OBJ)/$(DECAF)_ed25519.o $(BUILD_OBJ)/p25519_impl.o $(BUILD_OBJ)/p25519_arithmetic.o \ + $(BUILD_OBJ)/utils.o \ + #$(BUILD_OBJ)/p448_impl.o $(BUILD_OBJ)/p448_arithmetic.o + +# components needed by the lib +DECAFCOMPONENTS= $(BUILD_OBJ)/shake.o $(BUILD_OBJ)/decaf_crypto.o $(GENCOMPONENTS) ifeq ($(DECAF),decaf_fast) -DECAFCOMPONENTS += $(BUILD_OBJ)/decaf_tables.o +DECAFCOMPONENTS += $(BUILD_OBJ)/decaf_tables_ed25519.o endif BENCHCOMPONENTS = $(BUILD_OBJ)/bench.o $(BUILD_OBJ)/shake.o @@ -143,15 +149,39 @@ $(BUILD_OBJ)/timestamp: $(BUILD_OBJ)/%.o: $(BUILD_ASM)/%.s $(ASM) $(ASFLAGS) -c -o $@ $< -$(BUILD_IBIN)/decaf_gen_tables: $(BUILD_OBJ)/decaf_gen_tables.o \ - $(BUILD_OBJ)/$(DECAF).o $(BUILD_OBJ)/$(FIELD).o $(BUILD_OBJ)/f_arithmetic.o $(BUILD_OBJ)/utils.o +# I don't know why this rule is necessary... bug in make, or obscure pattern matching rule? +$(BUILD_OBJ)/decaf_gen_tables_%.o: $(BUILD_ASM)/decaf_gen_tables_%.s + $(ASM) $(ASFLAGS) -c -o $@ $< + +$(BUILD_IBIN)/decaf_gen_tables_%: $(BUILD_OBJ)/decaf_gen_tables_%.o $(GENCOMPONENTS) $(LD) $(LDFLAGS) -o $@ $^ -$(BUILD_C)/decaf_tables.c: $(BUILD_IBIN)/decaf_gen_tables +$(BUILD_C)/decaf_tables_%.c: $(BUILD_IBIN)/decaf_gen_tables_% ./$< > $@ -$(BUILD_ASM)/decaf_tables.s: $(BUILD_C)/decaf_tables.c $(HEADERS) - $(CC) $(CFLAGS) -S -c -o $@ $< +$(BUILD_ASM)/decaf_tables_%.s: $(BUILD_C)/decaf_tables_%.c $(HEADERS) + $(CC) $(CFLAGS) -S -c -o $@ $< \ + -I src/curve_$*/ -I src/curve_$*/field -I src/curve_$*/field/$(ARCH) \ + +$(BUILD_ASM)/decaf_gen_tables_%.s: src/decaf_gen_tables.c $(HEADERS) + $(CC) $(CFLAGS) \ + -I src/curve_$*/ -I src/curve_$*/field -I src/curve_$*/field/$(ARCH) \ + -S -c -o $@ $< + +$(BUILD_ASM)/decaf_fast_%.s: src/decaf_fast.c $(HEADERS) + $(CC) $(CFLAGS) \ + -I src/curve_$*/ -I src/curve_$*/field -I src/curve_$*/field/$(ARCH) \ + -S -c -o $@ $< + +$(BUILD_ASM)/%_arithmetic.s: src/%/f_arithmetic.c $(HEADERS) + $(CC) $(CFLAGS) \ + -I src/$* -I src/$*/$(ARCH) \ + -S -c -o $@ $< + +$(BUILD_ASM)/%_impl.s: src/%/$(ARCH)/f_impl.c $(HEADERS) + $(CC) $(CFLAGS) \ + -I src/$* -I src/$*/$(ARCH) \ + -S -c -o $@ $< $(BUILD_ASM)/%.s: src/%.c $(HEADERS) $(CC) $(CFLAGS) -S -c -o $@ $< @@ -165,12 +195,6 @@ $(BUILD_ASM)/%.s: test/%.c $(HEADERS) $(BUILD_ASM)/%.s: test/%.cxx $(HEADERS) $(CXX) $(CXXFLAGS) -S -c -o $@ $< -$(BUILD_ASM)/%.s: src/$(FIELD)/$(ARCH)/%.c $(HEADERS) - $(CC) $(CFLAGS) -S -c -o $@ $< - -$(BUILD_ASM)/%.s: src/$(FIELD)/%.c $(HEADERS) - $(CC) $(CFLAGS) -S -c -o $@ $< - # The sage test scripts sage: $(BUILDPYS) @@ -191,29 +215,29 @@ $(BUILDPYS): $(SAGES) $(BUILD_OBJ)/timestamp $(BUILD_DOC)/timestamp: mkdir -p `dirname $@` touch $@ - -doc: Doxyfile $(BUILD_OBJ)/timestamp $(HEADERS) src/*.c src/$(FIELD)/$(ARCH)/*.c src/$(FIELD)/$(ARCH)/*.h - doxygen > /dev/null - -# The eBATS benchmarking script -bat: $(BATNAME) - -$(BATNAME): include/* src/* src/*/* test/batarch.map $(BUILD_C)/decaf_tables.c # TODO tables some other way - rm -fr $@ - for prim in dh sign; do \ - targ="$@/crypto_$$prim/ed448goldilocks_decaf"; \ - (while read arch where; do \ - mkdir -p $$targ/`basename $$arch`; \ - cp include/*.h $(BUILD_C)/decaf_tables.c src/decaf_fast.c src/decaf_crypto.c src/shake.c src/include/*.h src/bat/$$prim.c src/p448/$$where/*.c src/p448/$$where/*.h src/p448/*.c src/p448/*.h $$targ/`basename $$arch`; \ - cp src/bat/api_$$prim.h $$targ/`basename $$arch`/api.h; \ - perl -p -i -e 's/SYSNAME/'`basename $(BATNAME)`_`basename $$arch`'/g' $$targ/`basename $$arch`/api.h; \ - perl -p -i -e 's/__TODAY__/'$(TODAY)'/g' $$targ/`basename $$arch`/api.h; \ - done \ - ) < test/batarch.map; \ - echo 'Mike Hamburg' > $$targ/designers; \ - echo 'Ed448-Goldilocks Decaf sign and dh' > $$targ/description; \ - done - (cd $(BATNAME)/.. && tar czf $(BATBASE).tgz $(BATBASE) ) +# +# doc: Doxyfile $(BUILD_OBJ)/timestamp $(HEADERS) src/*.c src/$(FIELD)/$(ARCH)/*.c src/$(FIELD)/$(ARCH)/*.h +# doxygen > /dev/null + +# # The eBATS benchmarking script +# bat: $(BATNAME) +# +# $(BATNAME): include/* src/* src/*/* test/batarch.map $(BUILD_C)/decaf_tables.c # TODO tables some other way +# rm -fr $@ +# for prim in dh sign; do \ +# targ="$@/crypto_$$prim/ed448goldilocks_decaf"; \ +# (while read arch where; do \ +# mkdir -p $$targ/`basename $$arch`; \ +# cp include/*.h $(BUILD_C)/decaf_tables.c src/decaf_fast.c src/decaf_crypto.c src/shake.c src/include/*.h src/bat/$$prim.c src/p448/$$where/*.c src/p448/$$where/*.h src/p448/*.c src/p448/*.h $$targ/`basename $$arch`; \ +# cp src/bat/api_$$prim.h $$targ/`basename $$arch`/api.h; \ +# perl -p -i -e 's/SYSNAME/'`basename $(BATNAME)`_`basename $$arch`'/g' $$targ/`basename $$arch`/api.h; \ +# perl -p -i -e 's/__TODAY__/'$(TODAY)'/g' $$targ/`basename $$arch`/api.h; \ +# done \ +# ) < test/batarch.map; \ +# echo 'Mike Hamburg' > $$targ/designers; \ +# echo 'Ed448-Goldilocks Decaf sign and dh' > $$targ/description; \ +# done +# (cd $(BATNAME)/.. && tar czf $(BATBASE).tgz $(BATBASE) ) # Finds todo items in .h and .c files TODO_TYPES ?= HACK TODO FIXME BUG XXX PERF FUTURE REMOVE MAGIC diff --git a/src/curve_ed25519/curve_data.inc.c b/src/curve_ed25519/curve_data.inc.c new file mode 100644 index 0000000..b669fb0 --- /dev/null +++ b/src/curve_ed25519/curve_data.inc.c @@ -0,0 +1,35 @@ +/* Rename table for eventual factoring into .c.inc, MSR ECC style */ +#define SCALAR_LIMBS DECAF_255_SCALAR_LIMBS +#define SCALAR_BITS DECAF_255_SCALAR_BITS +#define NLIMBS DECAF_255_LIMBS +#define API_NS(_id) decaf_255_##_id +#define API_NS2(_pref,_id) _pref##_decaf_255_##_id +#define scalar_t decaf_255_scalar_t +#define point_t decaf_255_point_t +#define precomputed_s decaf_255_precomputed_s +#define SER_BYTES DECAF_255_SER_BYTES +#define IMAGINE_TWIST 1 +#define P_MOD_8 5 +#define COFACTOR 8 + +static const int EDWARDS_D = -121665; + +static const scalar_t sc_p = {{{ + SC_LIMB(0x5812631a5cf5d3ed), + SC_LIMB(0x14def9dea2f79cd6), + SC_LIMB(0), + SC_LIMB(0x1000000000000000) +}}}; + +/* sqrt(9) = 3 from the curve spec. Not exported, but used by pregen tool. */ +const unsigned char base_point_ser_for_pregen[SER_BYTES] = { + 3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 +}; + +const gf SQRT_ONE_MINUS_D = {FIELD_LITERAL( + 0x6db8831bbddec, + 0x38d7b56c9c165, + 0x016b221394bdc, + 0x7540f7816214a, + 0x0a0d85b4032b1 +)}; diff --git a/src/curve_ed25519/field b/src/curve_ed25519/field new file mode 120000 index 0000000..5333fc7 --- /dev/null +++ b/src/curve_ed25519/field @@ -0,0 +1 @@ +../p25519/ \ No newline at end of file diff --git a/src/curve_ed448goldilocks/curve_data.inc.c b/src/curve_ed448goldilocks/curve_data.inc.c new file mode 100644 index 0000000..80a674d --- /dev/null +++ b/src/curve_ed448goldilocks/curve_data.inc.c @@ -0,0 +1,29 @@ +#define SCALAR_LIMBS DECAF_448_SCALAR_LIMBS +#define SCALAR_BITS DECAF_448_SCALAR_BITS +#define NLIMBS DECAF_448_LIMBS +#define API_NS(_id) decaf_448_##_id +#define API_NS2(_pref,_id) _pref##_decaf_448_##_id +#define scalar_t decaf_448_scalar_t +#define point_t decaf_448_point_t +#define precomputed_s decaf_448_precomputed_s +#define SER_BYTES DECAF_448_SER_BYTES +#define IMAGINE_TWIST 0 +#define P_MOD_8 7 +#define COFACTOR 4 + +static const int EDWARDS_D = -39081; + +static const scalar_t sc_p = {{{ + SC_LIMB(0x2378c292ab5844f3), + SC_LIMB(0x216cc2728dc58f55), + SC_LIMB(0xc44edb49aed63690), + SC_LIMB(0xffffffff7cca23e9), + SC_LIMB(0xffffffffffffffff), + SC_LIMB(0xffffffffffffffff), + SC_LIMB(0x3fffffffffffffff) +}}}; + +/* sqrt(5) = 2phi-1 from the curve spec. Not exported, but used by pregen tool. */ +const unsigned char base_point_ser_for_pregen[SER_BYTES] = { + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,1 +}; diff --git a/src/curve_ed448goldilocks/field b/src/curve_ed448goldilocks/field new file mode 120000 index 0000000..7efdcab --- /dev/null +++ b/src/curve_ed448goldilocks/field @@ -0,0 +1 @@ +../p448/ \ No newline at end of file diff --git a/src/decaf_fast.c b/src/decaf_fast.c index 7377881..a7d2b93 100644 --- a/src/decaf_fast.c +++ b/src/decaf_fast.c @@ -16,26 +16,35 @@ #include "decaf_config.h" #define WBITS DECAF_WORD_BITS - -/* Rename table for eventual factoring into .c.inc, MSR ECC style */ -#define SCALAR_LIMBS DECAF_255_SCALAR_LIMBS -#define SCALAR_BITS DECAF_255_SCALAR_BITS -#define NLIMBS DECAF_255_LIMBS -#define API_NS(_id) decaf_255_##_id -#define API_NS2(_pref,_id) _pref##_decaf_255_##_id -#define scalar_t decaf_255_scalar_t -#define point_t decaf_255_point_t -#define precomputed_s decaf_255_precomputed_s -#define SER_BYTES DECAF_255_SER_BYTES - #if WBITS == 64 -typedef __int128_t decaf_sdword_t; -#define SC_LIMB(x) (x##ull) + typedef __int128_t decaf_sdword_t; + #define SC_LIMB(x) (x##ull) #elif WBITS == 32 -typedef int64_t decaf_sdword_t; -#define SC_LIMB(x) (x##ull)&((1ull<<32)-1), (x##ull)>>32 + typedef int64_t decaf_sdword_t; + #define SC_LIMB(x) (x##ull)&((1ull<<32)-1), (x##ull)>>32 #else -#error "Only supporting 32- and 64-bit platforms right now" + #error "Only supporting 32- and 64-bit platforms right now" +#endif + + +/* Include the curve data here */ +#include "curve_data.inc.c" + + +#if IMAGINE_TWIST && (P_MOD_8 != 5) +#error "Cannot use IMAGINE_TWIST except for p == 5 mod 8" +#endif + +#if (COFACTOR != 8) && (COFACTOR != 4) +#error "COFACTOR must be 4 or 8" +#endif + +#if IMAGINE_TWIST +extern const gf SQRT_MINUS_ONE; +#endif + +#if COFACTOR == 8 +extern const gf SQRT_ONE_MINUS_D; /* TODO: Intern this? */ #endif #define sv static void @@ -43,23 +52,9 @@ typedef int64_t decaf_sdword_t; #define siv static inline void __attribute__((always_inline)) static const gf ZERO = {{{0}}}, ONE = {{{1}}}; -static const int EDWARDS_D = -121665; - -static const scalar_t sc_p = {{{ - SC_LIMB(0x5812631a5cf5d3ed), - SC_LIMB(0x14def9dea2f79cd6), - SC_LIMB(0), - SC_LIMB(0x1000000000000000) -}}}; - const scalar_t API_NS(scalar_one) = {{{1}}}, API_NS(scalar_zero) = {{{0}}}; -extern const scalar_t sc_r2; -extern const decaf_word_t MONTGOMERY_FACTOR; - -/* sqrt(9) = 3 from the curve spec. Not exported, but used by pregen tool. */ -const unsigned char base_point_ser_for_pregen[SER_BYTES] = { - 3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 -}; +extern const scalar_t API_NS(sc_r2); +extern const decaf_word_t API_NS(MONTGOMERY_FACTOR); extern const point_t API_NS(point_base); @@ -77,6 +72,7 @@ const precomputed_s *API_NS(precomputed_base) = const size_t API_NS2(sizeof,precomputed_s) = sizeof(precomputed_s); const size_t API_NS2(alignof,precomputed_s) = 32; +/* FIXME PERF: Vectorize vs unroll */ #ifdef __clang__ #if 100*__clang_major__ + __clang_minor__ > 305 #define UNROLL _Pragma("clang loop unroll(full)") // PERF FIXME: vectorize? @@ -222,7 +218,7 @@ snv sc_montmul ( } accum[j] = chain; - mand = accum[0] * MONTGOMERY_FACTOR; + mand = accum[0] * API_NS(MONTGOMERY_FACTOR); chain = 0; mier = sc_p->limb; for (j=0; j=0; i--) { sc_montsqr(b,b); @@ -403,8 +399,6 @@ const point_t API_NS(point_identity) = {{{{{0}}},{{{1}}},{{{1}}},{{{0}}}}}; static void gf_encode ( unsigned char ser[SER_BYTES], gf a ) { gf_serialize(ser, (gf_s *)a); } - -extern const gf SQRT_MINUS_ONE, SQRT_ONE_MINUS_D; /* Intern this? */ static void deisogenize ( gf_s *__restrict__ s, @@ -416,11 +410,13 @@ static void deisogenize ( ) { gf c, d, x, t; gf_s *b = s, *a = minus_t_over_s; - + +#if IMAGINE_TWIST gf_mul ( x, p->x, SQRT_MINUS_ONE); gf_mul ( t, p->t, SQRT_MINUS_ONE); gf_sub ( x, ZERO, x ); gf_sub ( t, ZERO, t ); +#endif gf DEBUG; gf_add ( a, p->z, x ); @@ -437,6 +433,7 @@ static void deisogenize ( gf_mul ( b, a, d ); /* 1/tz */ decaf_bool_t rotate; +#if (COFACTOR == 8) { gf e; gf_sqr(e, p->z); @@ -447,7 +444,9 @@ static void deisogenize ( cond_sel ( a, a, SQRT_ONE_MINUS_D, rotate ); cond_sel ( x, p->y, x, rotate ); } - +#else + rotate = 0; +#endif gf_mul ( c, a, d ); // new "osx" gf_mul ( a, c, p->z ); @@ -503,24 +502,45 @@ decaf_bool_t API_NS(point_decode) ( gf_add ( p->z, ONE, a); /* Z = 1+s^2 */ succ &= ~gf_eq( p->z, ZERO ); /* FUTURE: unnecessary? */ - + +#if COFACTOR == 8 gf_mul ( a, p->z, d); /* t(1+s^2) / s(1-s^2) = 2/xy */ succ &= ~lobit(a); /* = ~hibit(a/2), since hibit(x) = lobit(2x) */ +#endif gf_mul ( a, f, b ); /* y = (1-s^2) / t */ gf_mul ( p->y, p->z, a ); /* Y = yZ */ - gf_add ( a, s, s ); - gf_mul(p->x, a, SQRT_MINUS_ONE); /* Curve25519 */ +#if IMAGINE_TWIST + gf_add ( b, s, s ); + gf_mul(p->x, b, SQRT_MINUS_ONE); /* Curve25519 */ +#else + gf_add ( p->x, s, s ); +#endif gf_mul ( p->t, p->x, a ); /* T = 2s (1-as^2)/t */ p->y->limb[0] -= zero; - /* Curve25519 */ assert(API_NS(point_valid)(p) | ~succ); return succ; } +#if IMAGINE_TWIST +#define TWISTED_D (-(EDWARDS_D)) +#else +#define TWISTED_D ((EDWARDS_D)-1) +#endif + +#if TWISTED_D < 0 +#define EFF_D (-(TWISTED_D)) +#define NEG_D 1 +#else +#define EFF_D TWISTED_D +#define NEG_D 0 +#endif + + + void API_NS(point_sub) ( point_t p, const point_t q, @@ -534,13 +554,18 @@ void API_NS(point_sub) ( gf_add_nr ( b, q->y, q->x ); gf_mul ( p->y, d, b ); gf_mul ( b, r->t, q->t ); - gf_mulw_sgn ( p->x, b, -2*EDWARDS_D ); + gf_mulw_sgn ( p->x, b, 2*EFF_D ); gf_add_nr ( b, a, p->y ); gf_sub_nr ( c, p->y, a ); gf_mul ( a, q->z, r->z ); gf_add_nr ( a, a, a ); +#if NEG_D + gf_sub_nr ( p->y, a, p->x ); + gf_add_nr ( a, a, p->x ); +#else gf_add_nr ( p->y, a, p->x ); gf_sub_nr ( a, a, p->x ); +#endif gf_mul ( p->z, a, p->y ); gf_mul ( p->x, p->y, c ); gf_mul ( p->y, a, b ); @@ -560,13 +585,18 @@ void API_NS(point_add) ( gf_add_nr ( b, q->y, q->x ); gf_mul ( p->y, d, b ); gf_mul ( b, r->t, q->t ); - gf_mulw_sgn ( p->x, b, -2*EDWARDS_D ); + gf_mulw_sgn ( p->x, b, 2*EFF_D ); gf_add_nr ( b, a, p->y ); gf_sub_nr ( c, p->y, a ); gf_mul ( a, q->z, r->z ); gf_add_nr ( a, a, a ); +#if NEG_D + gf_add_nr ( p->y, a, p->x ); + gf_sub_nr ( a, a, p->x ); +#else gf_sub_nr ( p->y, a, p->x ); gf_add_nr ( a, a, p->x ); +#endif gf_mul ( p->z, a, p->y ); gf_mul ( p->x, p->y, c ); gf_mul ( p->y, a, b ); @@ -678,7 +708,7 @@ void API_NS(scalar_decode_long)( while (i) { i -= SER_BYTES; - sc_montmul(t1,t1,sc_r2); + sc_montmul(t1,t1,API_NS(sc_r2)); ignore_result( API_NS(scalar_decode)(t2, ser+i) ); API_NS(scalar_add)(t1, t1, t2); } @@ -715,7 +745,7 @@ static void pt_to_pniels ( ) { gf_sub ( b->n->a, a->y, a->x ); gf_add ( b->n->b, a->x, a->y ); - gf_mulw_sgn ( b->n->c, a->t, -2*EDWARDS_D ); + gf_mulw_sgn ( b->n->c, a->t, 2*EFF_D ); gf_add ( b->z, a->z, a->z ); } @@ -967,13 +997,21 @@ decaf_bool_t API_NS(point_eq) ( const point_t p, const point_t q ) { gf_mul ( b, q->y, p->x ); decaf_bool_t succ = gf_eq(a,b); - /* Interesting note: the 4tor would normally be rotation. - * But because of the *i twist, it's actually - * (x,y) <-> (iy,ix) - */ - gf_mul ( a, p->y, q->y ); - gf_mul ( b, q->x, p->x ); - succ |= gf_eq(a,b); + #if (COFACTOR == 8) && IMAGINE_TWIST + gf_mul ( a, p->y, q->y ); + gf_mul ( b, q->x, p->x ); + #if !(IMAGINE_TWIST) + gf_sub ( a, ZERO, a ); + #else + /* Interesting note: the 4tor would normally be rotation. + * But because of the *i twist, it's actually + * (x,y) <-> (iy,ix) + */ + + /* No code, just a comment. */ + #endif + succ |= gf_eq(a,b); + #endif return succ; } @@ -987,8 +1025,12 @@ void API_NS(point_from_hash_nonuniform) ( gf_deser(r0,ser); gf_strong_reduce(r0); gf_sqr(a,r0); - //gf_sub(r,ZERO,a); /*gf_mulw_sgn(r,a,QUADRATIC_NONRESIDUE);*/ - gf_mul(r,a,SQRT_MINUS_ONE); +#if P_MOD_8 == 5 + /* r = QNR * a */ + gf_mul(r,a,SQRT_MINUS_ONE); +#else + gf_sub(r,ZERO,a); +#endif gf_mulw_sgn(dee,ONE,EDWARDS_D); gf_mulw_sgn(c,r,EDWARDS_D); @@ -1044,8 +1086,10 @@ void API_NS(point_from_hash_nonuniform) ( cond_sel(b,c,ONE,gf_eq(c,ZERO)); /* 0,0 -> 1,0 */ /* isogenize */ +#if IMAGINE_TWIST gf_mul(c,a,SQRT_MINUS_ONE); gf_cpy(a,c); // TODO rename +#endif gf_sqr(c,a); /* s^2 */ gf_add(a,a,a); /* 2s */ @@ -1061,7 +1105,7 @@ void API_NS(point_from_hash_nonuniform) ( decaf_bool_t API_NS(invert_elligator_nonuniform) ( - unsigned char recovered_hash[DECAF_255_SER_BYTES], + unsigned char recovered_hash[SER_BYTES], const point_t p, uint16_t hint_ ) { @@ -1087,17 +1131,23 @@ API_NS(invert_elligator_nonuniform) ( } gf_mulw_sgn(d,c,2*EDWARDS_D-1); /* $d = (2d-a)s^2 */ - gf_add(a,d,b); /* num? */ + gf_add(a,b,d); /* num? */ gf_sub(d,d,b); /* den? */ gf_mul(b,a,d); /* n*d */ cond_sel(a,d,a,sgn_s); +#if P_MOD_8 == 5 gf_mul(d,b,SQRT_MINUS_ONE); +#else + gf_sub(d,ZERO,b); +#endif decaf_bool_t succ = gf_isqrt_chk(c,d,DECAF_TRUE); gf_mul(b,a,c); cond_neg(b, sgn_r0^hibit(b)); succ &= ~(gf_eq(b,ZERO) & sgn_r0); +#if COFACTOR == 8 succ &= ~(is_identity & sgn_ed_T); /* NB: there are no preimages of rotated identity. */ +#endif gf_encode(recovered_hash, b); /* TODO: deal with overflow flag */ @@ -1137,7 +1187,7 @@ decaf_bool_t API_NS(point_valid) ( gf_sqr(b,p->y); gf_sub(a,b,a); gf_sqr(b,p->t); - gf_mulw_sgn(c,b,-EDWARDS_D); + gf_mulw_sgn(c,b,TWISTED_D); gf_sqr(b,p->z); gf_add(b,b,c); out &= gf_eq(a,b); @@ -1149,18 +1199,18 @@ void API_NS(point_debugging_torque) ( point_t q, const point_t p ) { -#if 0 - gf_sub(q->x,ZERO,p->x); - gf_sub(q->y,ZERO,p->y); - gf_cpy(q->z,p->z); - gf_cpy(q->t,p->t); -#else +#if COFACTOR == 8 gf tmp; gf_mul(tmp,p->x,SQRT_MINUS_ONE); gf_mul(q->x,p->y,SQRT_MINUS_ONE); gf_cpy(q->y,tmp); gf_cpy(q->z,p->z); gf_sub(q->t,ZERO,p->t); +#else + gf_sub(q->x,ZERO,p->x); + gf_sub(q->y,ZERO,p->y); + gf_cpy(q->z,p->z); + gf_cpy(q->t,p->t); #endif } diff --git a/src/decaf_gen_tables.c b/src/decaf_gen_tables.c index b23e1c7..c0aaa29 100644 --- a/src/decaf_gen_tables.c +++ b/src/decaf_gen_tables.c @@ -22,8 +22,8 @@ const gf API_NS(precomputed_base_as_fe)[1]; const API_NS(scalar_t) API_NS(precomputed_scalarmul_adjustment); const API_NS(scalar_t) API_NS(point_scalarmul_adjustment); -const API_NS(scalar_t) sc_r2 = {{{0}}}; -const decaf_word_t MONTGOMERY_FACTOR = 0; +const API_NS(scalar_t) API_NS(sc_r2) = {{{0}}}; +const decaf_word_t API_NS(MONTGOMERY_FACTOR) = 0; const unsigned char base_point_ser_for_pregen[DECAF_255_SER_BYTES]; const API_NS(point_t) API_NS(point_base); @@ -147,7 +147,7 @@ int main(int argc, char **argv) { for (i=0; i -#include "p25519.h" +#include "f_impl.h" #define GF_LIT_LIMB_BITS 51 #define GF_BITS 255 #define gf gf_25519_t diff --git a/src/p448/arch_32/p448.c b/src/p448/arch_32/f_impl.c similarity index 99% rename from src/p448/arch_32/p448.c rename to src/p448/arch_32/f_impl.c index 6921c2f..f842b5f 100644 --- a/src/p448/arch_32/p448.c +++ b/src/p448/arch_32/f_impl.c @@ -3,7 +3,7 @@ */ #include "word.h" -#include "p448.h" +#include "f_impl.h" static inline mask_t __attribute__((always_inline)) is_zero ( diff --git a/src/p448/arch_32/p448.h b/src/p448/arch_32/f_impl.h similarity index 100% rename from src/p448/arch_32/p448.h rename to src/p448/arch_32/f_impl.h diff --git a/src/p448/arch_arm_32/p448.c b/src/p448/arch_arm_32/f_impl.c similarity index 99% rename from src/p448/arch_arm_32/p448.c rename to src/p448/arch_arm_32/f_impl.c index fc526f8..e0edfb9 100644 --- a/src/p448/arch_arm_32/p448.c +++ b/src/p448/arch_arm_32/f_impl.c @@ -3,7 +3,7 @@ */ #include "word.h" -#include "p448.h" +#include "f_impl.h" static inline mask_t __attribute__((always_inline)) is_zero ( diff --git a/src/p448/arch_arm_32/p448.h b/src/p448/arch_arm_32/f_impl.h similarity index 100% rename from src/p448/arch_arm_32/p448.h rename to src/p448/arch_arm_32/f_impl.h diff --git a/src/p448/arch_neon_experimental/p448.c b/src/p448/arch_neon_experimental/f_impl.c similarity index 99% rename from src/p448/arch_neon_experimental/p448.c rename to src/p448/arch_neon_experimental/f_impl.c index 8b4c9bc..6e57b8f 100644 --- a/src/p448/arch_neon_experimental/p448.c +++ b/src/p448/arch_neon_experimental/f_impl.c @@ -3,7 +3,7 @@ */ #include "word.h" -#include "p448.h" +#include "f_impl.h" static inline mask_t __attribute__((always_inline)) is_zero ( diff --git a/src/p448/arch_neon_experimental/p448.h b/src/p448/arch_neon_experimental/f_impl.h similarity index 100% rename from src/p448/arch_neon_experimental/p448.h rename to src/p448/arch_neon_experimental/f_impl.h diff --git a/src/p448/arch_ref64/p448.c b/src/p448/arch_ref64/f_impl.c similarity index 99% rename from src/p448/arch_ref64/p448.c rename to src/p448/arch_ref64/f_impl.c index 76fb300..bf08f49 100644 --- a/src/p448/arch_ref64/p448.c +++ b/src/p448/arch_ref64/f_impl.c @@ -2,7 +2,7 @@ * Released under the MIT License. See LICENSE.txt for license information. */ -#include "p448.h" +#include "f_impl.h" static __inline__ __uint128_t widemul( const uint64_t a, diff --git a/src/p448/arch_ref64/p448.h b/src/p448/arch_ref64/f_impl.h similarity index 100% rename from src/p448/arch_ref64/p448.h rename to src/p448/arch_ref64/f_impl.h diff --git a/src/p448/arch_x86_64/p448.c b/src/p448/arch_x86_64/f_impl.c similarity index 99% rename from src/p448/arch_x86_64/p448.c rename to src/p448/arch_x86_64/f_impl.c index 120bbbb..9df771e 100644 --- a/src/p448/arch_x86_64/p448.c +++ b/src/p448/arch_x86_64/f_impl.c @@ -2,7 +2,7 @@ * Released under the MIT License. See LICENSE.txt for license information. */ -#include "p448.h" +#include "f_impl.h" #include "x86-64-arith.h" void diff --git a/src/p448/arch_x86_64/p448.h b/src/p448/arch_x86_64/f_impl.h similarity index 100% rename from src/p448/arch_x86_64/p448.h rename to src/p448/arch_x86_64/f_impl.h diff --git a/src/p448/f_arithmetic.c b/src/p448/f_arithmetic.c index d73832a..afb6792 100644 --- a/src/p448/f_arithmetic.c +++ b/src/p448/f_arithmetic.c @@ -12,10 +12,10 @@ void gf_isr ( - gf_a_t a, - const gf_a_t x + gf a, + const gf x ) { - gf_a_t L0, L1, L2; + gf L0, L1, L2; gf_sqr ( L1, x ); gf_mul ( L2, x, L1 ); gf_sqr ( L1, L2 ); diff --git a/src/p448/f_field.h b/src/p448/f_field.h index 29188e7..1f55490 100644 --- a/src/p448/f_field.h +++ b/src/p448/f_field.h @@ -12,7 +12,7 @@ #include "constant_time.h" #include -#include "p448.h" +#include "f_impl.h" #define GF_LIT_LIMB_BITS 56 #define GF_BITS 448 #define gf p448_t diff --git a/src/p480/arch_x86_64/p480.c b/src/p480/arch_x86_64/f_impl.c similarity index 99% rename from src/p480/arch_x86_64/p480.c rename to src/p480/arch_x86_64/f_impl.c index ec7345f..db7823f 100644 --- a/src/p480/arch_x86_64/p480.c +++ b/src/p480/arch_x86_64/f_impl.c @@ -2,7 +2,7 @@ * Released under the MIT License. See LICENSE.txt for license information. */ -#include "p480.h" +#include "f_impl.h" #include "x86-64-arith.h" void diff --git a/src/p480/arch_x86_64/p480.h b/src/p480/arch_x86_64/f_impl.h similarity index 100% rename from src/p480/arch_x86_64/p480.h rename to src/p480/arch_x86_64/f_impl.h diff --git a/src/p480/f_field.h b/src/p480/f_field.h index 471e90d..01b166f 100644 --- a/src/p480/f_field.h +++ b/src/p480/f_field.h @@ -12,7 +12,7 @@ #include "constant_time.h" #include -#include "p480.h" +#include "f_impl.h" #define GF_LIT_LIMB_BITS 60 #define GF_BITS 480 #define gf p480_t diff --git a/src/p521/arch_ref64/p521.c b/src/p521/arch_ref64/f_impl.c similarity index 99% rename from src/p521/arch_ref64/p521.c rename to src/p521/arch_ref64/f_impl.c index fbcc149..0bff961 100644 --- a/src/p521/arch_ref64/p521.c +++ b/src/p521/arch_ref64/f_impl.c @@ -2,7 +2,7 @@ * Released under the MIT License. See LICENSE.txt for license information. */ -#include "p521.h" +#include "f_impl.h" static __inline__ __uint128_t widemul( const uint64_t a, diff --git a/src/p521/arch_ref64/p521.h b/src/p521/arch_ref64/f_impl.h similarity index 100% rename from src/p521/arch_ref64/p521.h rename to src/p521/arch_ref64/f_impl.h diff --git a/src/p521/arch_x86_64_r12/p521.c b/src/p521/arch_x86_64_r12/f_impl.c similarity index 99% rename from src/p521/arch_x86_64_r12/p521.c rename to src/p521/arch_x86_64_r12/f_impl.c index 98fae11..ba3e77b 100644 --- a/src/p521/arch_x86_64_r12/p521.c +++ b/src/p521/arch_x86_64_r12/f_impl.c @@ -2,7 +2,7 @@ * Released under the MIT License. See LICENSE.txt for license information. */ -#include "p521.h" +#include "f_impl.h" typedef struct { uint64x3_t lo, hi, hier; diff --git a/src/p521/arch_x86_64_r12/p521.h b/src/p521/arch_x86_64_r12/f_impl.h similarity index 100% rename from src/p521/arch_x86_64_r12/p521.h rename to src/p521/arch_x86_64_r12/f_impl.h diff --git a/src/p521/f_field.h b/src/p521/f_field.h index 6a72ea7..9128417 100644 --- a/src/p521/f_field.h +++ b/src/p521/f_field.h @@ -12,7 +12,7 @@ #include #include "constant_time.h" -#include "p521.h" +#include "f_impl.h" #define GF_LIT_LIMB_BITS 58 #define GF_BITS 521 #define gf p521_t